Changeset 31013

Show
Ignore:
Timestamp:
31.10.2016 20:51:39 (3 years ago)
Author:
davidb
Message:

Accumulator for PerPageMap?

Location:
other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/extractedfeatures
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/extractedfeatures/PerPageJSONMap.java

    r31011 r31013  
    6464        } 
    6565         
     66        _progress_accum.add(_progress_step); 
     67         
    6668        return output_json_bz2; 
    6769    } 
  • other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/extractedfeatures/ProcessForSolrIngest.java

    r31011 r31013  
    117117        double per_vol = 100.0/(double)num_volumes; 
    118118         
    119         DoubleAccumulator progress_accum = jsc.sc().doubleAccumulator("Progress Percent"); 
    120          
    121         PerPageJSONFlatmap paged_solr_json_flatmap = new PerPageJSONFlatmap(_input_dir,_solr_url,_output_dir,_verbosity, progress_accum,per_vol); 
     119        DoubleAccumulator per_vol_progress_accum = jsc.sc().doubleAccumulator("Per Volume Progress Percent"); 
     120         
     121        PerPageJSONFlatmap paged_solr_json_flatmap  
     122            = new PerPageJSONFlatmap(_input_dir,_solr_url,_output_dir,_verbosity, per_vol_progress_accum,per_vol); 
    122123        JavaRDD<JSONObject> per_page_jsonobjects = json_list_data.flatMap(paged_solr_json_flatmap).cache(); 
    123124         
    124         PerPageJSONMap paged_json_id_map = new PerPageJSONMap(_input_dir,_solr_url,_output_dir,_verbosity, progress_accum,per_vol); 
     125        //long num_page_ids = per_page_jsonobjects.count(); // trigger lazy eval of: flatmap:per-vol 
     126 
     127        DoubleAccumulator per_page_progress_accum = jsc.sc().doubleAccumulator("Pages Processed"); 
     128         
     129        PerPageJSONMap paged_json_id_map  
     130            = new PerPageJSONMap(_input_dir,_solr_url,_output_dir,_verbosity, per_page_progress_accum,1.0); 
    125131        JavaRDD<String> per_page_ids = per_page_jsonobjects.map(paged_json_id_map); 
    126132