source: other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ClusterFileIO.java@ 31228

Last change on this file since 31228 was 31228, checked in by davidb, 7 years ago

Change to see if code can be made more unified. If so, then getBufferedInputStream can be simplified

  • Property svn:executable set to *
File size: 5.9 KB
Line 
1package org.hathitrust.extractedfeatures;
2
3import java.io.BufferedInputStream;
4import java.io.BufferedOutputStream;
5import java.io.BufferedReader;
6import java.io.BufferedWriter;
7import java.io.FileOutputStream;
8import java.io.IOException;
9import java.io.InputStreamReader;
10import java.io.OutputStreamWriter;
11import java.net.URI;
12import java.net.URISyntaxException;
13
14import org.apache.commons.compress.compressors.CompressorException;
15import org.apache.commons.compress.compressors.CompressorInputStream;
16import org.apache.commons.compress.compressors.CompressorOutputStream;
17import org.apache.commons.compress.compressors.CompressorStreamFactory;
18import org.apache.hadoop.conf.Configuration;
19import org.apache.hadoop.fs.FSDataInputStream;
20import org.apache.hadoop.fs.FSDataOutputStream;
21import org.apache.hadoop.fs.FileSystem;
22import org.apache.hadoop.fs.Path;
23
24public class ClusterFileIO {
25
26
27 public static void memory_usage(String prefix)
28 {
29 Runtime runtime = Runtime.getRuntime();
30
31 java.text.NumberFormat format = java.text.NumberFormat.getInstance();
32
33 StringBuilder sb = new StringBuilder();
34 long maxMemory = runtime.maxMemory();
35 long allocatedMemory = runtime.totalMemory();
36 long freeMemory = runtime.freeMemory();
37
38 sb.append(prefix+" free memory: " + format.format(freeMemory / 1024) + "\n");
39 sb.append(prefix+" allocated memory: " + format.format(allocatedMemory / 1024) + "\n");
40 sb.append(prefix+" max memory: " + format.format(maxMemory / 1024) + "\n");
41 sb.append(prefix+" total free memory: " + format.format((freeMemory + (maxMemory - allocatedMemory)) / 1024) + "\n");
42
43 System.out.print(sb.toString());
44 }
45
46
47 protected static FileSystem getFileSystemInstance(String input_file_or_dir)
48 {
49 FileSystem fs = null;
50
51 try {
52 Configuration conf = new Configuration();
53 URI uri = new URI(input_file_or_dir);
54 fs = FileSystem.newInstance(uri,conf);
55 }
56 catch (URISyntaxException e) {
57 e.printStackTrace();
58 }
59 catch (IOException e) {
60 e.printStackTrace();
61 }
62
63 return fs;
64 }
65
66 public static boolean isHDFS(String fileIn)
67 {
68 return fileIn.startsWith("hdfs://");
69 }
70
71 public static boolean exists(String file)
72 {
73 FileSystem fs = getFileSystemInstance(file);
74
75 boolean exists = false;
76
77 try {
78 Path path = new Path(file);
79 exists = fs.exists(path);
80 } catch (IllegalArgumentException e) {
81 exists = false;
82 } catch (IOException e) {
83 exists = false;
84 }
85
86 return exists;
87 }
88
89 public static String removeSuffix(String file,String suffix)
90 {
91 return file.substring(0,file.length() - suffix.length());
92 }
93
94 public static boolean createDirectoryAll(String dir)
95 {
96 FileSystem fs = getFileSystemInstance(dir);
97 boolean created_dir = false;
98
99 if (!exists(dir)) {
100 try {
101 URI uri = new URI(dir);
102 Path path = new Path(uri);
103 fs.mkdirs(path);
104 created_dir = true;
105 } catch (URISyntaxException e) {
106 e.printStackTrace();
107 } catch (IOException e) {
108 e.printStackTrace();
109 }
110 }
111
112 return created_dir;
113 }
114
115 public static BufferedInputStream getBufferedInputStream(String fileIn)
116 throws IOException
117 {
118 FileSystem fs = getFileSystemInstance(fileIn);
119
120 BufferedInputStream bis = null;
121
122 if (isHDFS(fileIn)) {
123 URI uri = URI.create (fileIn);
124 //Configuration conf = new Configuration();
125 //FileSystem file = FileSystem.get(uri, conf);
126 //FSDataInputStream fin = file.open(new Path(uri));
127
128 //FSDataInputStream fin = _fs.open(new Path(fileIn));
129
130 Path path = new Path(uri);
131 FSDataInputStream fin = fs.open(path);
132
133 bis = new BufferedInputStream(fin);
134 }
135 else {
136 /*
137 // Trim 'file://' off the front
138 String local_file_in = fileIn;
139 if (local_file_in.startsWith("file://")) {
140 local_file_in = fileIn.substring("file://".length());
141 }
142 FileInputStream fin = new FileInputStream(local_file_in);
143 bis = new BufferedInputStream(fin);
144 */
145 URI uri = URI.create (fileIn);
146 Path path = new Path(uri);
147
148 FSDataInputStream fin = fs.open(path);
149
150 bis = new BufferedInputStream(fin);
151 }
152
153 return bis;
154 }
155
156 public static BufferedOutputStream getBufferedOutputStream(String fileOut)
157 throws IOException
158 {
159 BufferedOutputStream bos = null;
160
161 if (fileOut.startsWith("hdfs://")) {
162 URI uri = URI.create (fileOut);
163 Configuration conf = new Configuration();
164 FileSystem file = FileSystem.get(uri, conf);
165 FSDataOutputStream fout = file.create(new Path(uri));
166
167 bos = new BufferedOutputStream(fout);
168 }
169 else {
170 // Trim 'file://' off the front
171 String local_file_out = fileOut;
172 if (local_file_out.startsWith("file://")) {
173 local_file_out = fileOut.substring("file://".length());
174 }
175 FileOutputStream fout = new FileOutputStream(local_file_out);
176 bos = new BufferedOutputStream(fout);
177 }
178
179 return bos;
180 }
181
182 public static BufferedReader getBufferedReaderForCompressedFile(String fileIn)
183 throws IOException, CompressorException
184 {
185 BufferedInputStream bis = getBufferedInputStream(fileIn);
186 CompressorInputStream cis = new CompressorStreamFactory().createCompressorInputStream(bis);
187 BufferedReader br = new BufferedReader(new InputStreamReader(cis,"UTF8"));
188 return br;
189 }
190
191 public static BufferedWriter getBufferedWriterForCompressedFile(String fileOut)
192 throws IOException, CompressorException
193 {
194 BufferedOutputStream bos = getBufferedOutputStream(fileOut);
195 CompressorStreamFactory csf = new CompressorStreamFactory();
196 CompressorOutputStream cos = csf.createCompressorOutputStream(CompressorStreamFactory.BZIP2,bos);
197 BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(cos,"UTF8"));
198 return bw;
199 }
200
201}
Note: See TracBrowser for help on using the repository browser.