Changeset 31319
- Timestamp:
- 2017-01-22T15:19:50+13:00 (7 years ago)
- Location:
- other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/PerVolumeMongoDBDocumentsMap.java
r31318 r31319 2 2 3 3 import java.io.IOException; 4 import java.io.Reader;5 import java.io.StringReader;6 import java.util.ArrayList;7 import java.util.Iterator;8 4 import java.util.List; 9 5 import java.util.Set; 10 6 11 import org.apache.lucene.analysis.TokenStream;12 import org.apache.lucene.analysis.core.LowerCaseFilter;13 import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer;14 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;15 7 import org.apache.spark.api.java.function.Function; 16 8 import org.apache.spark.util.DoubleAccumulator; 17 9 import org.bson.Document; 18 import org.json.JSONArray;19 10 import org.json.JSONObject; 20 11 … … 57 48 String[] key_array = key_set.toArray(new String[key_set.size()]); 58 49 59 60 //Set<String> key_set = ef_count.keySet();61 //for (String key : key_set) {62 63 //Iterator<String> key_iterator = ef_count.keySet().iterator();64 //while (key_iterator.hasNext()) {65 50 for (int i=0; i<key_array.length; i++) { 66 51 … … 69 54 if (key.contains(".")) { 70 55 String new_key = key.replaceAll("\\.", "<PERIOD>"); 71 System.out.println("**** old key:" + key + "=> new key:" + new_key);56 //System.out.println("**** old key:" + key + "=> new key:" + new_key); 72 57 ef_count.put(new_key, ef_count.get(key)); 73 58 ef_count.remove(key); … … 202 187 fixup_volume(json_file_in,doc); 203 188 204 collection.insertOne(doc); 189 collection.findOneAndReplace(Document.parse("{_id:" + doc.getString("_id") + "}"),doc); 190 191 //collection.insertOne(doc); 205 192 206 193 /* -
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForMongoDBIngest.java
r31310 r31319 106 106 107 107 System.out.println("################"); 108 System.out.println("# Total Page Count = " + total_page_count);108 System.out.println("# Total Volume Count = " + total_page_count); 109 109 System.out.println("################"); 110 110 }
Note:
See TracChangeset
for help on using the changeset viewer.