Ignore:
Timestamp:
2016-12-28T14:04:19+13:00 (7 years ago)
Author:
davidb
Message:

Updating of POS code to new files-per-partition paramater, plus some other related tweaks

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForLangCount.java

    r31270 r31271  
    6969       
    7070        //int num_partitions = Integer.getInteger("wcsa-ef-ingest.num-partitions", DEFAULT_NUM_PARTITIONS);
    71         int files_per_partition = Integer.getInteger("wcsa-ef-ingest.num-partitions", DEFAULT_FILES_PER_PARTITION);
     71        int files_per_partition = Integer.getInteger("wcsa-ef-ingest.files-per-partition", DEFAULT_FILES_PER_PARTITION);
    7272       
    73        
    74         JavaRDD<String> json_list_data = jsc.textFile(_json_list_filename,files_per_partition).cache();
     73        JavaRDD<String> json_list_data = jsc.textFile(_json_list_filename).cache();
    7574        json_list_data.setName("JSON-file-list");
    7675       
     
    8180       
    8281        JavaRDD<String> json_list_data_rp = json_list_data.repartition(num_partitions);
     82        json_list_data_rp.setName("JSON-file-list--repartitioned");
    8383       
    8484        DoubleAccumulator per_vol_progress_accum = jsc.sc().doubleAccumulator("Per Volume Progress Percent");
Note: See TracChangeset for help on using the changeset viewer.