Changeset 31269 for other-projects

Show
Ignore:
Timestamp:
28.12.2016 10:30:08 (3 years ago)
Author:
davidb
Message:

Some variable name changes, and printing tidy up

Location:
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures
Files:
3 modified

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/PerVolumeJSON.java

    r31266 r31269  
    113113                ids.add(page_id);  
    114114 
    115                 if (i==0) { 
    116                     System.out.println("Sample output JSON page file: " + output_json_bz2); 
     115                if (_verbosity >=2) { 
     116                    if (i==0) { 
     117                        System.out.println("Sample output JSON page file [i=0]: " + output_json_bz2); 
     118                    } 
    117119                } 
    118  
    119120                JSONObject ef_page = ef_pages.getJSONObject(i); 
    120121 
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/PerVolumeLangStreamFlatmap.java

    r31260 r31269  
    4141        JSONObject extracted_feature_record = JSONClusterFileIO.readJSONFile(full_json_file_in); 
    4242         
    43         ArrayList<String> all_word_list = new ArrayList<String>(); 
     43        ArrayList<String> all_lang_list = new ArrayList<String>(); 
    4444         
    4545        if (extracted_feature_record != null) { 
     
    8787                if (ef_page != null) { 
    8888                     
    89                     ArrayList<String> page_word_list = SolrDocJSON.generateTokenPosCountLangLabels(volume_id, page_id, ef_page);                     
    90                     all_word_list.addAll(page_word_list); 
     89                    ArrayList<String> page_lang_labels = SolrDocJSON.generateTokenPosCountLangLabels(volume_id, page_id, ef_page);                   
     90                    all_lang_list.addAll(page_lang_labels); 
    9191                } 
    9292                else { 
     
    109109        _progress_accum.add(_progress_step); 
    110110         
    111         return all_word_list.iterator(); 
     111        return all_lang_list.iterator(); 
    112112    } 
    113113     
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForSolrIngest.java

    r31266 r31269  
    113113        DoubleAccumulator progress_accum = jsc.sc().doubleAccumulator("Progress Percent"); 
    114114         
    115         System.err.println(); 
    116         System.err.println(); 
    117         System.err.println(); 
    118         System.err.println("****##### _input_dir =  " + _input_dir); 
    119         System.err.println(); 
    120         System.err.println(); 
    121         System.err.println(); 
    122          
    123115        boolean icu_tokenize = Boolean.getBoolean("wcsa-ef-ingest.icu-tokenize"); 
    124116        boolean strict_file_io = Boolean.getBoolean("wcsa-ef-ingest.strict-file-io"); 
     
    130122        //json_list_data_rp.foreach(per_vol_json); 
    131123        JavaRDD<String> per_page_ids = json_list_data_rp.flatMap(per_vol_json); 
    132         long num_page_ids = per_page_ids.count(); 
    133          
    134         long num_ids = num_volumes; 
     124        long num_page_ids = per_page_ids.count(); // trigger lazy eval of: flatmap:per-vol  
     125         
     126        //long num_ids = num_volumes; 
    135127         
    136128        System.out.println("");