- Timestamp:
- 2016-12-13T14:00:15+13:00 (7 years ago)
- Location:
- other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ClusterFileIO.java
r31223 r31227 45 45 46 46 47 p ublicstatic FileSystem getFileSystemInstance(String input_file_or_dir)47 protected static FileSystem getFileSystemInstance(String input_file_or_dir) 48 48 { 49 49 FileSystem fs = null; … … 73 73 FileSystem fs = getFileSystemInstance(file); 74 74 75 //Configuration conf = jsc.hadoopConfiguration();76 //FileSystem fs = org.apache.hadoop.fs.FileSystem.get(conf);77 75 boolean exists = false; 78 76 … … 81 79 exists = fs.exists(path); 82 80 } catch (IllegalArgumentException e) { 83 e.printStackTrace();84 81 exists = false; 85 82 } catch (IOException e) { 86 e.printStackTrace();87 83 exists = false; 88 84 } -
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/WhitelistBloomFilter.java
r31224 r31227 3 3 import java.io.BufferedInputStream; 4 4 import java.io.BufferedOutputStream; 5 import java.io.File;6 5 import java.io.FileInputStream; 7 6 import java.io.FileNotFoundException; 8 import java.io.FileOutputStream;9 7 import java.io.IOException; 10 8 import java.io.InputStream; 11 import java.net.URI;12 9 import java.nio.charset.StandardCharsets; 13 10 import java.nio.file.Files; 14 11 import java.nio.file.Paths; 15 12 import java.util.stream.Stream; 16 17 import org.apache.hadoop.fs.FSDataInputStream;18 import org.apache.hadoop.fs.FSDataOutputStream;19 import org.apache.hadoop.fs.FileSystem;20 import org.apache.hadoop.fs.Path;21 13 22 14 import com.google.common.hash.BloomFilter; … … 57 49 public WhitelistBloomFilter(String dictionary_filename, boolean serialize) { 58 50 System.out.println("Constructing: WhitelistBloomFilter"); 59 60 51 61 52 String ser_dictionary_filename = dictionary_filename + SERIALIZED_SUFFIX; 62 53 63 64 //File ser_dictionary_file = new File(dictionary_filename + SERIALIZED_SUFFIX);65 66 //if (ser_dictionary_file.exists()) {67 System.err.println("***** checking serialized dictionary:" + ser_dictionary_filename);68 54 if (ClusterFileIO.exists(ser_dictionary_filename)) { 69 55 System.out.println("Loading Serialized Bloom filter ..."); … … 109 95 110 96 String ser_filename = filename + SERIALIZED_SUFFIX; 111 112 //File ser_dictionary = new File(ser_filename);113 97 serializeOut(ser_filename); 114 98 … … 123 107 } 124 108 125 //protected void serializeOut(File ser_file)126 109 protected void serializeOut(String ser_filename) 127 110 { 128 //FileSystem fs = ClusterFileIO.getFileSystemInstance(ser_filename);129 130 111 try { 131 //URI ser_uri = URI.create (ser_filename);132 //Path ser_path = new Path(ser_uri);133 134 112 BufferedOutputStream bos = ClusterFileIO.getBufferedOutputStream(ser_filename); 135 136 //FileOutputStream fos = new FileOutputStream(ser_file);137 138 //BufferedOutputStream bfos = new BufferedOutputStream(fos);139 140 113 _bloomFilter.writeTo(bos); 141 142 114 bos.close(); 143 115 } 144 116 catch (FileNotFoundException e) { 145 117 System.err.println("Unable to open Bloom file:" + ser_filename); 146 //System.err.println("Unable to open Bloom file:" + ser_file.getAbsolutePath()); 147 148 e.printStackTrace(); 118 //e.printStackTrace(); 149 119 } catch (IOException e) { 150 120 System.err.println("Error reading in Bloom file:" + ser_filename); 151 //System.err.println("Error reading in Bloom file:" + ser_file.getAbsolutePath()); 152 e.printStackTrace(); 121 //e.printStackTrace(); 153 122 } 154 123 } 155 124 156 //protected static BloomFilter<CharSequence> serializeIn(File ser_file)157 125 protected static BloomFilter<CharSequence> serializeIn(String ser_filename) 158 126 { 159 127 BloomFilter<CharSequence> bloomFilter = null; 160 128 161 //FileSystem fs = ClusterFileIO.getFileSystemInstance(ser_filename);162 163 129 try { 164 //URI ser_uri = URI.create (ser_filename);165 //Path ser_path = new Path(ser_uri);166 167 //FSDataInputStream fsdis = fs.open(ser_path);168 //BufferedInputStream bis = new BufferedInputStream(fsdis);169 170 130 BufferedInputStream bis = ClusterFileIO.getBufferedInputStream(ser_filename); 171 172 //FileInputStream fis = new FileInputStream(ser_file); 173 //BufferedInputStream bfis = new BufferedInputStream(fis); 174 131 175 132 Funnel<CharSequence> string_funnel = Funnels.stringFunnel(StandardCharsets.UTF_8); 176 133 bloomFilter = BloomFilter.readFrom(bis,string_funnel); … … 179 136 } 180 137 catch (FileNotFoundException e) { 181 //System.err.println("Unable to open Bloom file:" + ser_file.getAbsolutePath()); 182 //System.err.println("Unable to open Bloom file:" + ser_path.getName()); 183 System.err.println("Unable to open Bloom file:" + ser_filename); 184 185 e.printStackTrace(); 138 System.err.println("Unable to open Bloom file:" + ser_filename); 139 //e.printStackTrace(); 186 140 } catch (IOException e) { 187 //System.err.println("Error writing out Bloom file:" + ser_file.getAbsolutePath());188 //System.err.println("Error writing out Bloom file:" + ser_path.getName());189 141 System.err.println("Error writing out Bloom file:" + ser_filename); 190 e.printStackTrace();142 //e.printStackTrace(); 191 143 } 192 144 return bloomFilter; 193 145 } 194 195 196 197 198 199 146 }
Note:
See TracChangeset
for help on using the changeset viewer.