Ignore:
Timestamp:
2016-12-12T23:22:33+13:00 (7 years ago)
Author:
davidb
Message:

Changed to using ClusterFileIO supporting methods

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/WhitelistBloomFilter.java

    r31215 r31222  
    99import java.io.IOException;
    1010import java.io.InputStream;
    11 import java.io.Serializable;
    12 import java.nio.charset.Charset;
     11import java.net.URI;
    1312import java.nio.charset.StandardCharsets;
    1413import java.nio.file.Files;
     
    1615import java.util.stream.Stream;
    1716
    18 import javax.annotation.Nullable;
     17import org.apache.hadoop.fs.FSDataInputStream;
     18import org.apache.hadoop.fs.FSDataOutputStream;
     19import org.apache.hadoop.fs.FileSystem;
     20import org.apache.hadoop.fs.Path;
    1921
    20 import com.google.common.base.Preconditions;
    2122import com.google.common.hash.BloomFilter;
    2223import com.google.common.hash.Funnel;
    2324import com.google.common.hash.Funnels;
    24 import com.google.common.hash.PrimitiveSink;
    2525
    2626public class WhitelistBloomFilter {
     
    3030    protected static final double FALSE_POSITIVE_PERCENTAGE = 0.01;
    3131   
    32    
    3332    // http://stackoverflow.com/questions/453018/number-of-lines-in-a-file-in-java
    34     public static int countLines(String filename) throws IOException
     33    protected static int countLines(String filename) throws IOException
    3534    {
    3635        InputStream is = new BufferedInputStream(new FileInputStream(filename));
     
    5958        System.out.println("Constructing: WhitelistBloomFilter");
    6059
    61         File ser_dictionary_file = new File(dictionary_filename + SERIALIZED_SUFFIX);
    6260       
    63         if (ser_dictionary_file.exists()) {
     61        String ser_dictionary_filename = dictionary_filename + SERIALIZED_SUFFIX;
     62       
     63       
     64        //File ser_dictionary_file = new File(dictionary_filename + SERIALIZED_SUFFIX);
     65       
     66        //if (ser_dictionary_file.exists()) {
     67        if (ClusterFileIO.exists(ser_dictionary_filename)) {
    6468            System.out.println("Loading Serialized Bloom filter ...");
    65             _bloomFilter = serializeIn(ser_dictionary_file);
     69            _bloomFilter = serializeIn(ser_dictionary_filename);
    6670            System.out.println("... done");
    6771        }
     
    103107            System.out.println("Serializing Bloom filter ...");
    104108
    105             File ser_dictionary = new File(filename + SERIALIZED_SUFFIX);
    106             serializeOut(ser_dictionary);
     109            String ser_filename = filename + SERIALIZED_SUFFIX;
     110           
     111            //File ser_dictionary = new File(ser_filename);
     112            serializeOut(ser_filename);
    107113
    108114            System.out.println("... done");
     
    116122    }
    117123
    118     protected void serializeOut(File ser_file)
     124    //protected void serializeOut(File ser_file)
     125    protected void serializeOut(String ser_filename)
    119126    {
     127        //FileSystem fs = ClusterFileIO.getFileSystemInstance(ser_filename);
     128       
    120129        try {
    121             FileOutputStream fos = new FileOutputStream(ser_file);
     130            //URI ser_uri = URI.create (ser_filename);
     131            //Path ser_path = new Path(ser_uri);
     132           
     133            BufferedOutputStream bos = ClusterFileIO.getBufferedOutputStream(ser_filename);
     134           
     135            //FileOutputStream fos = new FileOutputStream(ser_file);
    122136
    123             BufferedOutputStream bfos = new BufferedOutputStream(fos);
     137            //BufferedOutputStream bfos = new BufferedOutputStream(fos);
    124138
    125             _bloomFilter.writeTo(bfos);
     139            _bloomFilter.writeTo(bos);
    126140
    127             bfos.close();
     141            bos.close();
    128142        }
    129143        catch (FileNotFoundException e) {
    130             System.err.println("Unable to open Bloom file:" + ser_file.getAbsolutePath());
     144            System.err.println("Unable to open Bloom file:" + ser_filename);
     145            //System.err.println("Unable to open Bloom file:" + ser_file.getAbsolutePath());
     146           
    131147            e.printStackTrace();
    132148        } catch (IOException e) {
    133             System.err.println("Error reading in Bloom file:" + ser_file.getAbsolutePath());
     149            System.err.println("Error reading in Bloom file:" + ser_filename);
     150            //System.err.println("Error reading in Bloom file:" + ser_file.getAbsolutePath());
    134151            e.printStackTrace();
    135152        }
    136153    }
    137154
    138     protected static BloomFilter<CharSequence> serializeIn(File ser_file)
     155    //protected static BloomFilter<CharSequence> serializeIn(File ser_file)
     156    protected static BloomFilter<CharSequence> serializeIn(String ser_filename)
    139157    {
    140158        BloomFilter<CharSequence> bloomFilter = null;
    141159   
     160        //FileSystem fs = ClusterFileIO.getFileSystemInstance(ser_filename);
     161       
    142162        try {
    143             FileInputStream fis = new FileInputStream(ser_file);
    144             BufferedInputStream bfis = new BufferedInputStream(fis);
     163            //URI ser_uri = URI.create (ser_filename);
     164            //Path ser_path = new Path(ser_uri);
     165
     166            //FSDataInputStream fsdis = fs.open(ser_path);
     167            //BufferedInputStream bis = new BufferedInputStream(fsdis);
     168           
     169            BufferedInputStream bis = ClusterFileIO.getBufferedInputStream(ser_filename);
     170           
     171            //FileInputStream fis = new FileInputStream(ser_file);
     172            //BufferedInputStream bfis = new BufferedInputStream(fis);
    145173
    146174            Funnel<CharSequence> string_funnel = Funnels.stringFunnel(StandardCharsets.UTF_8);
    147             bloomFilter = BloomFilter.readFrom(bfis,string_funnel);
     175            bloomFilter = BloomFilter.readFrom(bis,string_funnel);
    148176
    149             bfis.close();
     177            bis.close();
    150178        }
    151179        catch (FileNotFoundException e) {
    152             System.err.println("Unable to open Bloom file:" + ser_file.getAbsolutePath());
     180            //System.err.println("Unable to open Bloom file:" + ser_file.getAbsolutePath());
     181            //System.err.println("Unable to open Bloom file:" + ser_path.getName());
     182            System.err.println("Unable to open Bloom file:" + ser_filename);
     183           
    153184            e.printStackTrace();
    154185        } catch (IOException e) {
    155             System.err.println("Error writing out Bloom file:" + ser_file.getAbsolutePath());
     186            //System.err.println("Error writing out Bloom file:" + ser_file.getAbsolutePath());
     187            //System.err.println("Error writing out Bloom file:" + ser_path.getName());
     188            System.err.println("Error writing out Bloom file:" + ser_filename);
    156189            e.printStackTrace();
    157190        }
Note: See TracChangeset for help on using the changeset viewer.