source: other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ClusterFileIO.java@ 31227

Last change on this file since 31227 was 31227, checked in by davidb, 7 years ago

Code tidy up

  • Property svn:executable set to *
File size: 5.8 KB
Line 
1package org.hathitrust.extractedfeatures;
2
3import java.io.BufferedInputStream;
4import java.io.BufferedOutputStream;
5import java.io.BufferedReader;
6import java.io.BufferedWriter;
7import java.io.FileOutputStream;
8import java.io.IOException;
9import java.io.InputStreamReader;
10import java.io.OutputStreamWriter;
11import java.net.URI;
12import java.net.URISyntaxException;
13
14import org.apache.commons.compress.compressors.CompressorException;
15import org.apache.commons.compress.compressors.CompressorInputStream;
16import org.apache.commons.compress.compressors.CompressorOutputStream;
17import org.apache.commons.compress.compressors.CompressorStreamFactory;
18import org.apache.hadoop.conf.Configuration;
19import org.apache.hadoop.fs.FSDataInputStream;
20import org.apache.hadoop.fs.FSDataOutputStream;
21import org.apache.hadoop.fs.FileSystem;
22import org.apache.hadoop.fs.Path;
23
24public class ClusterFileIO {
25
26
27 public static void memory_usage(String prefix)
28 {
29 Runtime runtime = Runtime.getRuntime();
30
31 java.text.NumberFormat format = java.text.NumberFormat.getInstance();
32
33 StringBuilder sb = new StringBuilder();
34 long maxMemory = runtime.maxMemory();
35 long allocatedMemory = runtime.totalMemory();
36 long freeMemory = runtime.freeMemory();
37
38 sb.append(prefix+" free memory: " + format.format(freeMemory / 1024) + "\n");
39 sb.append(prefix+" allocated memory: " + format.format(allocatedMemory / 1024) + "\n");
40 sb.append(prefix+" max memory: " + format.format(maxMemory / 1024) + "\n");
41 sb.append(prefix+" total free memory: " + format.format((freeMemory + (maxMemory - allocatedMemory)) / 1024) + "\n");
42
43 System.out.print(sb.toString());
44 }
45
46
47 protected static FileSystem getFileSystemInstance(String input_file_or_dir)
48 {
49 FileSystem fs = null;
50
51 try {
52 Configuration conf = new Configuration();
53 URI uri = new URI(input_file_or_dir);
54 fs = FileSystem.newInstance(uri,conf);
55 }
56 catch (URISyntaxException e) {
57 e.printStackTrace();
58 }
59 catch (IOException e) {
60 e.printStackTrace();
61 }
62
63 return fs;
64 }
65
66 public static boolean isHDFS(String fileIn)
67 {
68 return fileIn.startsWith("hdfs://");
69 }
70
71 public static boolean exists(String file)
72 {
73 FileSystem fs = getFileSystemInstance(file);
74
75 boolean exists = false;
76
77 try {
78 Path path = new Path(file);
79 exists = fs.exists(path);
80 } catch (IllegalArgumentException e) {
81 exists = false;
82 } catch (IOException e) {
83 exists = false;
84 }
85
86 return exists;
87 }
88
89 public static String removeSuffix(String file,String suffix)
90 {
91 return file.substring(0,file.length() - suffix.length());
92 }
93
94 public static boolean createDirectoryAll(String dir)
95 {
96 FileSystem fs = getFileSystemInstance(dir);
97 boolean created_dir = false;
98
99 if (!exists(dir)) {
100 try {
101 URI uri = new URI(dir);
102 Path path = new Path(uri);
103 fs.mkdirs(path);
104 created_dir = true;
105 } catch (URISyntaxException e) {
106 e.printStackTrace();
107 } catch (IOException e) {
108 e.printStackTrace();
109 }
110 }
111
112 return created_dir;
113 }
114
115 public static BufferedInputStream getBufferedInputStream(String fileIn)
116 throws IOException
117 {
118 FileSystem fs = getFileSystemInstance(fileIn);
119
120 BufferedInputStream bis = null;
121
122 if (isHDFS(fileIn)) {
123 URI uri = URI.create (fileIn);
124 //Configuration conf = new Configuration();
125 //FileSystem file = FileSystem.get(uri, conf);
126 //FSDataInputStream fin = file.open(new Path(uri));
127
128 //FSDataInputStream fin = _fs.open(new Path(fileIn));
129 FSDataInputStream fin = fs.open(new Path(uri));
130
131 bis = new BufferedInputStream(fin);
132 }
133 else {
134 /*
135 // Trim 'file://' off the front
136 String local_file_in = fileIn;
137 if (local_file_in.startsWith("file://")) {
138 local_file_in = fileIn.substring("file://".length());
139 }
140 FileInputStream fin = new FileInputStream(local_file_in);
141 bis = new BufferedInputStream(fin);
142 */
143
144 FSDataInputStream fin = fs.open(new Path(fileIn));
145
146 bis = new BufferedInputStream(fin);
147 }
148
149 return bis;
150 }
151
152 public static BufferedOutputStream getBufferedOutputStream(String fileOut)
153 throws IOException
154 {
155 BufferedOutputStream bos = null;
156
157 if (fileOut.startsWith("hdfs://")) {
158 URI uri = URI.create (fileOut);
159 Configuration conf = new Configuration();
160 FileSystem file = FileSystem.get(uri, conf);
161 FSDataOutputStream fout = file.create(new Path(uri));
162
163 bos = new BufferedOutputStream(fout);
164 }
165 else {
166 // Trim 'file://' off the front
167 String local_file_out = fileOut;
168 if (local_file_out.startsWith("file://")) {
169 local_file_out = fileOut.substring("file://".length());
170 }
171 FileOutputStream fout = new FileOutputStream(local_file_out);
172 bos = new BufferedOutputStream(fout);
173 }
174
175 return bos;
176 }
177
178 public static BufferedReader getBufferedReaderForCompressedFile(String fileIn)
179 throws IOException, CompressorException
180 {
181 BufferedInputStream bis = getBufferedInputStream(fileIn);
182 CompressorInputStream cis = new CompressorStreamFactory().createCompressorInputStream(bis);
183 BufferedReader br = new BufferedReader(new InputStreamReader(cis,"UTF8"));
184 return br;
185 }
186
187 public static BufferedWriter getBufferedWriterForCompressedFile(String fileOut)
188 throws IOException, CompressorException
189 {
190 BufferedOutputStream bos = getBufferedOutputStream(fileOut);
191 CompressorStreamFactory csf = new CompressorStreamFactory();
192 CompressorOutputStream cos = csf.createCompressorOutputStream(CompressorStreamFactory.BZIP2,bos);
193 BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(cos,"UTF8"));
194 return bw;
195 }
196
197}
Note: See TracBrowser for help on using the repository browser.