Changeset 30921 for other-projects

Show
Ignore:
Timestamp:
25.10.2016 23:23:08 (3 years ago)
Author:
davidb
Message:

Code change to read in JSON file over HDFS

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/PagedJSON.java

    r30918 r30921  
    88import java.io.InputStreamReader; 
    99import java.io.UnsupportedEncodingException; 
     10import java.net.URI; 
    1011import java.nio.charset.StandardCharsets; 
    11 import java.nio.file.Files; 
    12 import java.nio.file.Path; 
    13 import java.nio.file.Paths; 
     12//import java.nio.file.Files; 
     13//import java.nio.file.Path; 
     14//import java.nio.file.Paths; 
    1415import java.util.ArrayList; 
    1516import java.util.Arrays; 
     
    2021import org.apache.commons.compress.compressors.CompressorInputStream; 
    2122import org.apache.commons.compress.compressors.CompressorStreamFactory; 
     23import org.apache.hadoop.conf.Configuration; 
     24import org.apache.hadoop.fs.FSDataInputStream; 
     25import org.apache.hadoop.fs.FileSystem; 
     26import org.apache.hadoop.fs.Path; 
    2227import org.apache.spark.api.java.function.FlatMapFunction; 
    2328import org.json.JSONArray; 
     
    3540 
    3641/*  
    37 URI uri = URI.create (“hdfs://host:port/file path”); 
     42URI uri = URI.create ("hdfs://host:port/file path"); 
    3843Configuration conf = new Configuration(); 
    3944FileSystem file = FileSystem.get(uri, conf); 
     
    5459     
    5560    protected static BufferedReader getBufferedReaderForCompressedFile(String fileIn)  
    56             throws FileNotFoundException, UnsupportedEncodingException, CompressorException { 
    57         FileInputStream fin = new FileInputStream(fileIn); 
     61            throws CompressorException, IOException  
     62    { 
     63        URI uri = URI.create (fileIn); 
     64        Configuration conf = new Configuration(); 
     65        FileSystem file = FileSystem.get(uri, conf); 
     66        FSDataInputStream fin = file.open(new Path(uri)); 
     67             
     68        //FileInputStream fin = new FileInputStream(fileIn); 
    5869        BufferedInputStream bis = new BufferedInputStream(fin); 
    5970        CompressorInputStream input = new CompressorStreamFactory().createCompressorInputStream(bis);