source: other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ClusterFileIO.java@ 32101

Last change on this file since 32101 was 32101, checked in by davidb, 6 years ago

Tweaks to allow serial ingest to run

  • Property svn:executable set to *
File size: 6.5 KB
Line 
1package org.hathitrust.extractedfeatures;
2
3import java.io.BufferedInputStream;
4import java.io.BufferedOutputStream;
5import java.io.BufferedReader;
6import java.io.BufferedWriter;
7import java.io.FileInputStream;
8import java.io.FileOutputStream;
9import java.io.IOException;
10import java.io.InputStreamReader;
11import java.io.OutputStreamWriter;
12import java.net.URI;
13import java.net.URISyntaxException;
14
15import org.apache.commons.compress.compressors.CompressorException;
16import org.apache.commons.compress.compressors.CompressorInputStream;
17import org.apache.commons.compress.compressors.CompressorOutputStream;
18import org.apache.commons.compress.compressors.CompressorStreamFactory;
19import org.apache.hadoop.conf.Configuration;
20import org.apache.hadoop.fs.FSDataInputStream;
21import org.apache.hadoop.fs.FSDataOutputStream;
22import org.apache.hadoop.fs.FileSystem;
23import org.apache.hadoop.fs.Path;
24import org.json.JSONObject;
25
26public class ClusterFileIO {
27
28
29 public static void memory_usage(String prefix)
30 {
31 Runtime runtime = Runtime.getRuntime();
32
33 java.text.NumberFormat format = java.text.NumberFormat.getInstance();
34
35 StringBuilder sb = new StringBuilder();
36 long maxMemory = runtime.maxMemory();
37 long allocatedMemory = runtime.totalMemory();
38 long freeMemory = runtime.freeMemory();
39
40 sb.append(prefix+" free memory: " + format.format(freeMemory / 1024) + "\n");
41 sb.append(prefix+" allocated memory: " + format.format(allocatedMemory / 1024) + "\n");
42 sb.append(prefix+" max memory: " + format.format(maxMemory / 1024) + "\n");
43 sb.append(prefix+" total free memory: " + format.format((freeMemory + (maxMemory - allocatedMemory)) / 1024) + "\n");
44
45 System.out.print(sb.toString());
46 }
47
48
49 protected static FileSystem getFileSystemInstance(String input_file_or_dir)
50 {
51 FileSystem fs = null;
52
53 try {
54 Configuration conf = new Configuration();
55 URI uri = new URI(input_file_or_dir);
56 fs = FileSystem.newInstance(uri,conf);
57 }
58 catch (URISyntaxException e) {
59 e.printStackTrace();
60 }
61 catch (IOException e) {
62 e.printStackTrace();
63 }
64
65 return fs;
66 }
67
68 public static boolean isHDFS(String fileIn)
69 {
70 return fileIn.startsWith("hdfs://");
71 }
72
73 public static boolean exists(String file)
74 {
75 FileSystem fs = getFileSystemInstance(file);
76
77 boolean exists = false;
78
79 try {
80 Path path = new Path(file);
81 exists = fs.exists(path);
82 } catch (IllegalArgumentException e) {
83 exists = false;
84 } catch (IOException e) {
85 exists = false;
86 }
87
88 return exists;
89 }
90
91 public static String removeSuffix(String file,String suffix)
92 {
93 return file.substring(0,file.length() - suffix.length());
94 }
95
96 public static boolean createDirectoryAll(String dir)
97 {
98 FileSystem fs = getFileSystemInstance(dir);
99 boolean created_dir = false;
100
101 if (!exists(dir)) {
102 try {
103 URI uri = new URI(dir);
104 Path path = new Path(uri);
105 fs.mkdirs(path);
106 created_dir = true;
107 } catch (URISyntaxException e) {
108 e.printStackTrace();
109 } catch (IOException e) {
110 e.printStackTrace();
111 }
112 }
113
114 return created_dir;
115 }
116
117 public static BufferedInputStream getBufferedInputStream(String fileIn)
118 throws IOException
119 {
120 FileSystem fs = getFileSystemInstance(fileIn);
121
122 BufferedInputStream bis = null;
123
124 if (isHDFS(fileIn)) {
125 URI uri = URI.create (fileIn);
126 //Configuration conf = new Configuration();
127 //FileSystem file = FileSystem.get(uri, conf);
128 //FSDataInputStream fin = file.open(new Path(uri));
129
130 //FSDataInputStream fin = _fs.open(new Path(fileIn));
131
132 Path path = new Path(uri);
133 FSDataInputStream fin = fs.open(path);
134
135 bis = new BufferedInputStream(fin);
136 }
137 else {
138
139
140 // Trim 'file://' off the front
141 /*
142 String local_file_in = fileIn;
143 if (local_file_in.startsWith("file://")) {
144 local_file_in = fileIn.substring("file://".length());
145 }
146 else if (local_file_in.startsWith("file:/")) {
147 local_file_in = fileIn.substring("file:/".length());
148 }
149 FileInputStream fin = new FileInputStream(local_file_in);
150 bis = new BufferedInputStream(fin);
151 */
152
153
154 URI uri = URI.create (fileIn);
155 Path path = new Path(uri);
156
157 FSDataInputStream fin = fs.open(path);
158 bis = new BufferedInputStream(fin);
159
160
161 }
162
163 return bis;
164 }
165
166 protected static String readTextFile(String filename)
167 {
168 StringBuilder sb = new StringBuilder();
169
170 try {
171 BufferedReader br = ClusterFileIO.getBufferedReaderForCompressedFile(filename);
172
173 int cp;
174 while ((cp = br.read()) != -1) {
175 sb.append((char) cp);
176 }
177
178 br.close();
179 }
180 catch (Exception e) {
181 e.printStackTrace();
182 }
183
184 return sb.toString();
185 }
186
187 public static BufferedOutputStream getBufferedOutputStream(String fileOut)
188 throws IOException
189 {
190 BufferedOutputStream bos = null;
191
192 if (fileOut.startsWith("hdfs://")) {
193 URI uri = URI.create (fileOut);
194 Configuration conf = new Configuration();
195 FileSystem file = FileSystem.get(uri, conf);
196 FSDataOutputStream fout = file.create(new Path(uri));
197
198 bos = new BufferedOutputStream(fout);
199 }
200 else {
201 // Trim 'file://' off the front
202 String local_file_out = fileOut;
203 if (local_file_out.startsWith("file://")) {
204 local_file_out = fileOut.substring("file://".length());
205 }
206 FileOutputStream fout = new FileOutputStream(local_file_out);
207 bos = new BufferedOutputStream(fout);
208 }
209
210 return bos;
211 }
212
213 public static BufferedReader getBufferedReaderForCompressedFile(String fileIn)
214 throws IOException, CompressorException
215 {
216 BufferedInputStream bis = getBufferedInputStream(fileIn);
217 CompressorInputStream cis = new CompressorStreamFactory().createCompressorInputStream(bis);
218 BufferedReader br = new BufferedReader(new InputStreamReader(cis,"UTF8"));
219 return br;
220 }
221
222 public static BufferedWriter getBufferedWriterForCompressedFile(String fileOut)
223 throws IOException, CompressorException
224 {
225 BufferedOutputStream bos = getBufferedOutputStream(fileOut);
226 CompressorStreamFactory csf = new CompressorStreamFactory();
227 CompressorOutputStream cos = csf.createCompressorOutputStream(CompressorStreamFactory.BZIP2,bos);
228 BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(cos,"UTF8"));
229 return bw;
230 }
231
232}
Note: See TracBrowser for help on using the repository browser.