Ignore:
Timestamp:
2016-12-28T10:30:08+13:00 (7 years ago)
Author:
davidb
Message:

Some variable name changes, and printing tidy up

Location:
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/PerVolumeJSON.java

    r31266 r31269  
    113113                ids.add(page_id);
    114114
    115                 if (i==0) {
    116                     System.out.println("Sample output JSON page file: " + output_json_bz2);
     115                if (_verbosity >=2) {
     116                    if (i==0) {
     117                        System.out.println("Sample output JSON page file [i=0]: " + output_json_bz2);
     118                    }
    117119                }
    118 
    119120                JSONObject ef_page = ef_pages.getJSONObject(i);
    120121
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/PerVolumeLangStreamFlatmap.java

    r31260 r31269  
    4141        JSONObject extracted_feature_record = JSONClusterFileIO.readJSONFile(full_json_file_in);
    4242       
    43         ArrayList<String> all_word_list = new ArrayList<String>();
     43        ArrayList<String> all_lang_list = new ArrayList<String>();
    4444       
    4545        if (extracted_feature_record != null) {
     
    8787                if (ef_page != null) {
    8888                   
    89                     ArrayList<String> page_word_list = SolrDocJSON.generateTokenPosCountLangLabels(volume_id, page_id, ef_page);                   
    90                     all_word_list.addAll(page_word_list);
     89                    ArrayList<String> page_lang_labels = SolrDocJSON.generateTokenPosCountLangLabels(volume_id, page_id, ef_page);                 
     90                    all_lang_list.addAll(page_lang_labels);
    9191                }
    9292                else {
     
    109109        _progress_accum.add(_progress_step);
    110110       
    111         return all_word_list.iterator();
     111        return all_lang_list.iterator();
    112112    }
    113113   
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForSolrIngest.java

    r31266 r31269  
    113113        DoubleAccumulator progress_accum = jsc.sc().doubleAccumulator("Progress Percent");
    114114       
    115         System.err.println();
    116         System.err.println();
    117         System.err.println();
    118         System.err.println("****##### _input_dir =  " + _input_dir);
    119         System.err.println();
    120         System.err.println();
    121         System.err.println();
    122        
    123115        boolean icu_tokenize = Boolean.getBoolean("wcsa-ef-ingest.icu-tokenize");
    124116        boolean strict_file_io = Boolean.getBoolean("wcsa-ef-ingest.strict-file-io");
     
    130122        //json_list_data_rp.foreach(per_vol_json);
    131123        JavaRDD<String> per_page_ids = json_list_data_rp.flatMap(per_vol_json);
    132         long num_page_ids = per_page_ids.count();
    133        
    134         long num_ids = num_volumes;
     124        long num_page_ids = per_page_ids.count(); // trigger lazy eval of: flatmap:per-vol
     125       
     126        //long num_ids = num_volumes;
    135127       
    136128        System.out.println("");
Note: See TracChangeset for help on using the changeset viewer.