Changeset 31319

Show
Ignore:
Timestamp:
22.01.2017 15:19:50 (3 years ago)
Author:
davidb
Message:

Changed to replace existing MongoDB entry. Fixed up printt statement

Location:
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/PerVolumeMongoDBDocumentsMap.java

    r31318 r31319  
    22 
    33import java.io.IOException; 
    4 import java.io.Reader; 
    5 import java.io.StringReader; 
    6 import java.util.ArrayList; 
    7 import java.util.Iterator; 
    84import java.util.List; 
    95import java.util.Set; 
    106 
    11 import org.apache.lucene.analysis.TokenStream; 
    12 import org.apache.lucene.analysis.core.LowerCaseFilter; 
    13 import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer; 
    14 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 
    157import org.apache.spark.api.java.function.Function; 
    168import org.apache.spark.util.DoubleAccumulator; 
    179import org.bson.Document; 
    18 import org.json.JSONArray; 
    1910import org.json.JSONObject; 
    2011 
     
    5748        String[] key_array = key_set.toArray(new String[key_set.size()]); 
    5849         
    59          
    60         //Set<String> key_set = ef_count.keySet(); 
    61         //for (String key : key_set) { 
    62              
    63         //Iterator<String> key_iterator = ef_count.keySet().iterator(); 
    64         //while (key_iterator.hasNext()) { 
    6550        for (int i=0; i<key_array.length; i++) { 
    6651             
     
    6954            if (key.contains(".")) { 
    7055                String new_key = key.replaceAll("\\.", "<PERIOD>"); 
    71                 System.out.println("**** old key:" + key + "=> new key:" + new_key); 
     56                //System.out.println("**** old key:" + key + "=> new key:" + new_key); 
    7257                ef_count.put(new_key, ef_count.get(key)); 
    7358                ef_count.remove(key); 
     
    202187            fixup_volume(json_file_in,doc); 
    203188             
    204             collection.insertOne(doc);       
     189            collection.findOneAndReplace(Document.parse("{_id:" + doc.getString("_id") + "}"),doc); 
     190             
     191            //collection.insertOne(doc);         
    205192             
    206193            /* 
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForMongoDBIngest.java

    r31310 r31319  
    106106         
    107107        System.out.println("################"); 
    108         System.out.println("# Total Page Count = " + total_page_count); 
     108        System.out.println("# Total Volume Count = " + total_page_count); 
    109109        System.out.println("################"); 
    110110    }