source: other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ClusterFileIO.java@ 31223

Last change on this file since 31223 was 31223, checked in by davidb, 7 years ago

Exception printStackTrace

  • Property svn:executable set to *
File size: 6.0 KB
Line 
1package org.hathitrust.extractedfeatures;
2
3import java.io.BufferedInputStream;
4import java.io.BufferedOutputStream;
5import java.io.BufferedReader;
6import java.io.BufferedWriter;
7import java.io.FileOutputStream;
8import java.io.IOException;
9import java.io.InputStreamReader;
10import java.io.OutputStreamWriter;
11import java.net.URI;
12import java.net.URISyntaxException;
13
14import org.apache.commons.compress.compressors.CompressorException;
15import org.apache.commons.compress.compressors.CompressorInputStream;
16import org.apache.commons.compress.compressors.CompressorOutputStream;
17import org.apache.commons.compress.compressors.CompressorStreamFactory;
18import org.apache.hadoop.conf.Configuration;
19import org.apache.hadoop.fs.FSDataInputStream;
20import org.apache.hadoop.fs.FSDataOutputStream;
21import org.apache.hadoop.fs.FileSystem;
22import org.apache.hadoop.fs.Path;
23
24public class ClusterFileIO {
25
26
27 public static void memory_usage(String prefix)
28 {
29 Runtime runtime = Runtime.getRuntime();
30
31 java.text.NumberFormat format = java.text.NumberFormat.getInstance();
32
33 StringBuilder sb = new StringBuilder();
34 long maxMemory = runtime.maxMemory();
35 long allocatedMemory = runtime.totalMemory();
36 long freeMemory = runtime.freeMemory();
37
38 sb.append(prefix+" free memory: " + format.format(freeMemory / 1024) + "\n");
39 sb.append(prefix+" allocated memory: " + format.format(allocatedMemory / 1024) + "\n");
40 sb.append(prefix+" max memory: " + format.format(maxMemory / 1024) + "\n");
41 sb.append(prefix+" total free memory: " + format.format((freeMemory + (maxMemory - allocatedMemory)) / 1024) + "\n");
42
43 System.out.print(sb.toString());
44 }
45
46
47 public static FileSystem getFileSystemInstance(String input_file_or_dir)
48 {
49 FileSystem fs = null;
50
51 try {
52 Configuration conf = new Configuration();
53 URI uri = new URI(input_file_or_dir);
54 fs = FileSystem.newInstance(uri,conf);
55 }
56 catch (URISyntaxException e) {
57 e.printStackTrace();
58 }
59 catch (IOException e) {
60 e.printStackTrace();
61 }
62
63 return fs;
64 }
65
66 public static boolean isHDFS(String fileIn)
67 {
68 return fileIn.startsWith("hdfs://");
69 }
70
71 public static boolean exists(String file)
72 {
73 FileSystem fs = getFileSystemInstance(file);
74
75 //Configuration conf = jsc.hadoopConfiguration();
76 //FileSystem fs = org.apache.hadoop.fs.FileSystem.get(conf);
77 boolean exists = false;
78
79 try {
80 Path path = new Path(file);
81 exists = fs.exists(path);
82 } catch (IllegalArgumentException e) {
83 e.printStackTrace();
84 exists = false;
85 } catch (IOException e) {
86 e.printStackTrace();
87 exists = false;
88 }
89
90 return exists;
91 }
92
93 public static String removeSuffix(String file,String suffix)
94 {
95 return file.substring(0,file.length() - suffix.length());
96 }
97
98 public static boolean createDirectoryAll(String dir)
99 {
100 FileSystem fs = getFileSystemInstance(dir);
101 boolean created_dir = false;
102
103 if (!exists(dir)) {
104 try {
105 URI uri = new URI(dir);
106 Path path = new Path(uri);
107 fs.mkdirs(path);
108 created_dir = true;
109 } catch (URISyntaxException e) {
110 e.printStackTrace();
111 } catch (IOException e) {
112 e.printStackTrace();
113 }
114 }
115
116 return created_dir;
117 }
118
119 public static BufferedInputStream getBufferedInputStream(String fileIn)
120 throws IOException
121 {
122 FileSystem fs = getFileSystemInstance(fileIn);
123
124 BufferedInputStream bis = null;
125
126 if (isHDFS(fileIn)) {
127 URI uri = URI.create (fileIn);
128 //Configuration conf = new Configuration();
129 //FileSystem file = FileSystem.get(uri, conf);
130 //FSDataInputStream fin = file.open(new Path(uri));
131
132 //FSDataInputStream fin = _fs.open(new Path(fileIn));
133 FSDataInputStream fin = fs.open(new Path(uri));
134
135 bis = new BufferedInputStream(fin);
136 }
137 else {
138 /*
139 // Trim 'file://' off the front
140 String local_file_in = fileIn;
141 if (local_file_in.startsWith("file://")) {
142 local_file_in = fileIn.substring("file://".length());
143 }
144 FileInputStream fin = new FileInputStream(local_file_in);
145 bis = new BufferedInputStream(fin);
146 */
147
148 FSDataInputStream fin = fs.open(new Path(fileIn));
149
150 bis = new BufferedInputStream(fin);
151 }
152
153 return bis;
154 }
155
156 public static BufferedOutputStream getBufferedOutputStream(String fileOut)
157 throws IOException
158 {
159 BufferedOutputStream bos = null;
160
161 if (fileOut.startsWith("hdfs://")) {
162 URI uri = URI.create (fileOut);
163 Configuration conf = new Configuration();
164 FileSystem file = FileSystem.get(uri, conf);
165 FSDataOutputStream fout = file.create(new Path(uri));
166
167 bos = new BufferedOutputStream(fout);
168 }
169 else {
170 // Trim 'file://' off the front
171 String local_file_out = fileOut;
172 if (local_file_out.startsWith("file://")) {
173 local_file_out = fileOut.substring("file://".length());
174 }
175 FileOutputStream fout = new FileOutputStream(local_file_out);
176 bos = new BufferedOutputStream(fout);
177 }
178
179 return bos;
180 }
181
182 public static BufferedReader getBufferedReaderForCompressedFile(String fileIn)
183 throws IOException, CompressorException
184 {
185 BufferedInputStream bis = getBufferedInputStream(fileIn);
186 CompressorInputStream cis = new CompressorStreamFactory().createCompressorInputStream(bis);
187 BufferedReader br = new BufferedReader(new InputStreamReader(cis,"UTF8"));
188 return br;
189 }
190
191 public static BufferedWriter getBufferedWriterForCompressedFile(String fileOut)
192 throws IOException, CompressorException
193 {
194 BufferedOutputStream bos = getBufferedOutputStream(fileOut);
195 CompressorStreamFactory csf = new CompressorStreamFactory();
196 CompressorOutputStream cos = csf.createCompressorOutputStream(CompressorStreamFactory.BZIP2,bos);
197 BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(cos,"UTF8"));
198 return bw;
199 }
200
201}
Note: See TracBrowser for help on using the repository browser.