root/other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ClusterFileIO.java @ 32101

Revision 32101, 6.5 KB (checked in by davidb, 9 months ago)

Tweaks to allow serial ingest to run

  • Property svn:executable set to *
Line 
1package org.hathitrust.extractedfeatures;
2
3import java.io.BufferedInputStream;
4import java.io.BufferedOutputStream;
5import java.io.BufferedReader;
6import java.io.BufferedWriter;
7import java.io.FileInputStream;
8import java.io.FileOutputStream;
9import java.io.IOException;
10import java.io.InputStreamReader;
11import java.io.OutputStreamWriter;
12import java.net.URI;
13import java.net.URISyntaxException;
14
15import org.apache.commons.compress.compressors.CompressorException;
16import org.apache.commons.compress.compressors.CompressorInputStream;
17import org.apache.commons.compress.compressors.CompressorOutputStream;
18import org.apache.commons.compress.compressors.CompressorStreamFactory;
19import org.apache.hadoop.conf.Configuration;
20import org.apache.hadoop.fs.FSDataInputStream;
21import org.apache.hadoop.fs.FSDataOutputStream;
22import org.apache.hadoop.fs.FileSystem;
23import org.apache.hadoop.fs.Path;
24import org.json.JSONObject;
25
26public class ClusterFileIO {
27
28
29    public static void memory_usage(String prefix)
30    {
31        Runtime runtime = Runtime.getRuntime();
32       
33        java.text.NumberFormat format = java.text.NumberFormat.getInstance();
34       
35        StringBuilder sb = new StringBuilder();
36        long maxMemory = runtime.maxMemory();
37        long allocatedMemory = runtime.totalMemory();
38        long freeMemory = runtime.freeMemory();
39       
40        sb.append(prefix+" free memory: " + format.format(freeMemory / 1024) + "\n");
41        sb.append(prefix+" allocated memory: " + format.format(allocatedMemory / 1024) + "\n");
42        sb.append(prefix+" max memory: " + format.format(maxMemory / 1024) + "\n");
43        sb.append(prefix+" total free memory: " + format.format((freeMemory + (maxMemory - allocatedMemory)) / 1024) + "\n");
44
45        System.out.print(sb.toString());
46    }
47
48   
49    protected static FileSystem getFileSystemInstance(String input_file_or_dir)
50    {
51        FileSystem fs = null;
52
53        try {
54            Configuration conf = new Configuration();
55            URI uri = new URI(input_file_or_dir);
56            fs = FileSystem.newInstance(uri,conf);
57        }
58        catch (URISyntaxException e) {
59            e.printStackTrace();   
60        }
61        catch (IOException e) {
62            e.printStackTrace();
63        }
64
65        return fs;
66    }
67   
68    public static boolean isHDFS(String fileIn)
69    {
70     return fileIn.startsWith("hdfs://");
71    }
72   
73    public static boolean exists(String file)
74    {
75        FileSystem fs = getFileSystemInstance(file);
76       
77        boolean exists = false;
78       
79        try {
80            Path path = new Path(file);
81            exists = fs.exists(path);
82        } catch (IllegalArgumentException e) {
83            exists = false;
84        } catch (IOException e) {
85            exists = false;
86        }
87
88        return exists;
89    }
90   
91    public static String removeSuffix(String file,String suffix)
92    {
93        return file.substring(0,file.length() - suffix.length());
94    }
95   
96    public static boolean createDirectoryAll(String dir)
97    {
98        FileSystem fs = getFileSystemInstance(dir);
99        boolean created_dir = false;
100
101        if (!exists(dir)) {
102            try {
103                URI uri = new URI(dir);
104                Path path = new Path(uri);
105                fs.mkdirs(path);
106                created_dir = true;
107            } catch (URISyntaxException e) {
108                e.printStackTrace();
109            } catch (IOException e) {
110                e.printStackTrace();
111            }
112        }
113       
114        return created_dir;
115    }
116   
117    public static BufferedInputStream getBufferedInputStream(String fileIn)
118            throws IOException
119    {
120        FileSystem fs = getFileSystemInstance(fileIn);
121       
122        BufferedInputStream bis = null;
123       
124        if (isHDFS(fileIn)) {
125            URI uri = URI.create (fileIn);
126            //Configuration conf = new Configuration();
127            //FileSystem file = FileSystem.get(uri, conf);
128            //FSDataInputStream fin = file.open(new Path(uri));
129           
130            //FSDataInputStream fin = _fs.open(new Path(fileIn));
131           
132            Path path = new Path(uri);
133            FSDataInputStream fin = fs.open(path);
134           
135            bis = new BufferedInputStream(fin);
136        }
137        else {
138           
139           
140            // Trim 'file://' off the front
141            /*
142            String local_file_in = fileIn;
143            if (local_file_in.startsWith("file://")) {
144                local_file_in = fileIn.substring("file://".length());
145            }
146            else if (local_file_in.startsWith("file:/")) {
147                local_file_in = fileIn.substring("file:/".length());
148            }
149            FileInputStream fin = new FileInputStream(local_file_in);
150            bis = new BufferedInputStream(fin);
151            */
152           
153           
154            URI uri = URI.create (fileIn);
155            Path path = new Path(uri);
156           
157            FSDataInputStream fin = fs.open(path);
158            bis = new BufferedInputStream(fin);
159           
160           
161        }
162   
163        return bis;
164    }
165
166    protected static String readTextFile(String filename)
167    {
168        StringBuilder sb = new StringBuilder();
169       
170        try {   
171            BufferedReader br = ClusterFileIO.getBufferedReaderForCompressedFile(filename);
172
173            int cp;
174            while ((cp = br.read()) != -1) {
175                sb.append((char) cp);
176            }
177   
178            br.close();
179        }
180        catch (Exception e) {
181            e.printStackTrace();
182        }
183       
184        return sb.toString();
185    }
186   
187    public static BufferedOutputStream getBufferedOutputStream(String fileOut)
188            throws IOException
189    {
190        BufferedOutputStream bos = null;
191       
192        if (fileOut.startsWith("hdfs://")) {
193            URI uri = URI.create (fileOut);
194            Configuration conf = new Configuration();
195            FileSystem file = FileSystem.get(uri, conf);
196            FSDataOutputStream fout = file.create(new Path(uri));
197   
198            bos = new BufferedOutputStream(fout);
199        }
200        else {
201            // Trim 'file://' off the front
202            String local_file_out = fileOut;
203            if (local_file_out.startsWith("file://")) {
204                local_file_out = fileOut.substring("file://".length());
205            }
206            FileOutputStream fout = new FileOutputStream(local_file_out);
207            bos = new BufferedOutputStream(fout);
208        }
209   
210        return bos;
211    }
212   
213    public static BufferedReader getBufferedReaderForCompressedFile(String fileIn)
214            throws IOException, CompressorException
215    {
216        BufferedInputStream bis = getBufferedInputStream(fileIn);
217        CompressorInputStream cis = new CompressorStreamFactory().createCompressorInputStream(bis);
218        BufferedReader br = new BufferedReader(new InputStreamReader(cis,"UTF8"));
219        return br;
220    }
221
222    public static BufferedWriter getBufferedWriterForCompressedFile(String fileOut)
223            throws IOException, CompressorException
224    {
225        BufferedOutputStream bos = getBufferedOutputStream(fileOut);
226        CompressorStreamFactory csf = new CompressorStreamFactory();
227        CompressorOutputStream cos = csf.createCompressorOutputStream(CompressorStreamFactory.BZIP2,bos);
228        BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(cos,"UTF8"));
229        return bw;
230    }
231
232}
Note: See TracBrowser for help on using the browser.