source: other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ClusterFileIO.java@ 31088

Last change on this file since 31088 was 31088, checked in by davidb, 7 years ago

Shift to newIstance for FileSystem due to StackOverflow page describing how the class returned is shared. memory_usage() method added. Removal of some commented out code

  • Property svn:executable set to *
File size: 5.9 KB
Line 
1package org.hathitrust.extractedfeatures;
2
3import java.io.BufferedInputStream;
4import java.io.BufferedOutputStream;
5import java.io.BufferedReader;
6import java.io.BufferedWriter;
7import java.io.FileOutputStream;
8import java.io.IOException;
9import java.io.InputStreamReader;
10import java.io.OutputStreamWriter;
11import java.net.URI;
12import java.net.URISyntaxException;
13
14import org.apache.commons.compress.compressors.CompressorException;
15import org.apache.commons.compress.compressors.CompressorInputStream;
16import org.apache.commons.compress.compressors.CompressorOutputStream;
17import org.apache.commons.compress.compressors.CompressorStreamFactory;
18import org.apache.hadoop.conf.Configuration;
19import org.apache.hadoop.fs.FSDataInputStream;
20import org.apache.hadoop.fs.FSDataOutputStream;
21import org.apache.hadoop.fs.FileSystem;
22import org.apache.hadoop.fs.Path;
23
24public class ClusterFileIO {
25
26
27 public static void memory_usage(String prefix)
28 {
29 Runtime runtime = Runtime.getRuntime();
30
31 java.text.NumberFormat format = java.text.NumberFormat.getInstance();
32
33 StringBuilder sb = new StringBuilder();
34 long maxMemory = runtime.maxMemory();
35 long allocatedMemory = runtime.totalMemory();
36 long freeMemory = runtime.freeMemory();
37
38 sb.append(prefix+" free memory: " + format.format(freeMemory / 1024) + "\n");
39 sb.append(prefix+" allocated memory: " + format.format(allocatedMemory / 1024) + "\n");
40 sb.append(prefix+" max memory: " + format.format(maxMemory / 1024) + "\n");
41 sb.append(prefix+" total free memory: " + format.format((freeMemory + (maxMemory - allocatedMemory)) / 1024) + "\n");
42
43 System.out.print(sb.toString());
44 }
45
46
47 protected static FileSystem getFileSystemInstance(String input_dir)
48 {
49 FileSystem fs = null;
50
51 try {
52 Configuration conf = new Configuration();
53 URI uri = new URI(input_dir);
54 fs = FileSystem.newInstance(uri,conf);
55 }
56 catch (URISyntaxException e) {
57 e.printStackTrace();
58 }
59 catch (IOException e) {
60 e.printStackTrace();
61 }
62
63 return fs;
64 }
65
66 public static boolean isHDFS(String fileIn)
67 {
68 return fileIn.startsWith("hdfs://");
69 }
70
71 public static boolean exists(String file)
72 {
73 FileSystem fs = getFileSystemInstance(file);
74
75 //Configuration conf = jsc.hadoopConfiguration();
76 //FileSystem fs = org.apache.hadoop.fs.FileSystem.get(conf);
77 boolean exists = false;
78
79 try {
80 exists = fs.exists(new Path(file));
81 } catch (IllegalArgumentException e) {
82 exists = false;
83 } catch (IOException e) {
84 exists = false;
85 }
86
87 return exists;
88 }
89
90 public static String removeSuffix(String file,String suffix)
91 {
92 return file.substring(0,file.length() - suffix.length());
93 }
94
95 public static boolean createDirectoryAll(String dir)
96 {
97 FileSystem fs = getFileSystemInstance(dir);
98 boolean created_dir = false;
99
100 if (!exists(dir)) {
101 try {
102 URI uri = new URI(dir);
103 Path path = new Path(uri);
104 fs.mkdirs(path);
105 created_dir = true;
106 } catch (URISyntaxException e) {
107 e.printStackTrace();
108 } catch (IOException e) {
109 e.printStackTrace();
110 }
111 }
112
113 return created_dir;
114 }
115
116 public static BufferedInputStream getBufferedInputStream(String fileIn)
117 throws IOException
118 {
119 FileSystem fs = getFileSystemInstance(fileIn);
120
121 BufferedInputStream bis = null;
122
123 if (isHDFS(fileIn)) {
124 URI uri = URI.create (fileIn);
125 //Configuration conf = new Configuration();
126 //FileSystem file = FileSystem.get(uri, conf);
127 //FSDataInputStream fin = file.open(new Path(uri));
128
129 //FSDataInputStream fin = _fs.open(new Path(fileIn));
130 FSDataInputStream fin = fs.open(new Path(uri));
131
132 bis = new BufferedInputStream(fin);
133 }
134 else {
135 /*
136 // Trim 'file://' off the front
137 String local_file_in = fileIn;
138 if (local_file_in.startsWith("file://")) {
139 local_file_in = fileIn.substring("file://".length());
140 }
141 FileInputStream fin = new FileInputStream(local_file_in);
142 bis = new BufferedInputStream(fin);
143 */
144
145 FSDataInputStream fin = fs.open(new Path(fileIn));
146
147 bis = new BufferedInputStream(fin);
148 }
149
150 return bis;
151 }
152
153 public static BufferedOutputStream getBufferedOutputStream(String fileOut)
154 throws IOException
155 {
156 BufferedOutputStream bos = null;
157
158 if (fileOut.startsWith("hdfs://")) {
159 URI uri = URI.create (fileOut);
160 Configuration conf = new Configuration();
161 FileSystem file = FileSystem.get(uri, conf);
162 FSDataOutputStream fout = file.create(new Path(uri));
163
164 bos = new BufferedOutputStream(fout);
165 }
166 else {
167 // Trim 'file://' off the front
168 String local_file_out = fileOut;
169 if (local_file_out.startsWith("file://")) {
170 local_file_out = fileOut.substring("file://".length());
171 }
172 FileOutputStream fout = new FileOutputStream(local_file_out);
173 bos = new BufferedOutputStream(fout);
174 }
175
176 return bos;
177 }
178
179 public static BufferedReader getBufferedReaderForCompressedFile(String fileIn)
180 throws IOException, CompressorException
181 {
182 BufferedInputStream bis = getBufferedInputStream(fileIn);
183 CompressorInputStream cis = new CompressorStreamFactory().createCompressorInputStream(bis);
184 BufferedReader br = new BufferedReader(new InputStreamReader(cis,"UTF8"));
185 return br;
186 }
187
188 public static BufferedWriter getBufferedWriterForCompressedFile(String fileOut)
189 throws IOException, CompressorException
190 {
191 BufferedOutputStream bos = getBufferedOutputStream(fileOut);
192 CompressorStreamFactory csf = new CompressorStreamFactory();
193 CompressorOutputStream cos = csf.createCompressorOutputStream(CompressorStreamFactory.BZIP2,bos);
194 BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(cos,"UTF8"));
195 return bw;
196 }
197
198}
Note: See TracBrowser for help on using the repository browser.