Ignore:
Timestamp:
10/31/16 20:51:39 (4 years ago)
Author:
davidb
Message:

Accumulator for PerPageMap

Location:
other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/extractedfeatures
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/extractedfeatures/PerPageJSONMap.java

    r31011 r31013  
    6464        }
    6565       
     66        _progress_accum.add(_progress_step);
     67       
    6668        return output_json_bz2;
    6769    }
  • other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/extractedfeatures/ProcessForSolrIngest.java

    r31011 r31013  
    117117        double per_vol = 100.0/(double)num_volumes;
    118118       
    119         DoubleAccumulator progress_accum = jsc.sc().doubleAccumulator("Progress Percent");
    120        
    121         PerPageJSONFlatmap paged_solr_json_flatmap = new PerPageJSONFlatmap(_input_dir,_solr_url,_output_dir,_verbosity, progress_accum,per_vol);
     119        DoubleAccumulator per_vol_progress_accum = jsc.sc().doubleAccumulator("Per Volume Progress Percent");
     120       
     121        PerPageJSONFlatmap paged_solr_json_flatmap
     122            = new PerPageJSONFlatmap(_input_dir,_solr_url,_output_dir,_verbosity, per_vol_progress_accum,per_vol);
    122123        JavaRDD<JSONObject> per_page_jsonobjects = json_list_data.flatMap(paged_solr_json_flatmap).cache();
    123124       
    124         PerPageJSONMap paged_json_id_map = new PerPageJSONMap(_input_dir,_solr_url,_output_dir,_verbosity, progress_accum,per_vol);
     125        //long num_page_ids = per_page_jsonobjects.count(); // trigger lazy eval of: flatmap:per-vol
     126
     127        DoubleAccumulator per_page_progress_accum = jsc.sc().doubleAccumulator("Pages Processed");
     128       
     129        PerPageJSONMap paged_json_id_map
     130            = new PerPageJSONMap(_input_dir,_solr_url,_output_dir,_verbosity, per_page_progress_accum,1.0);
    125131        JavaRDD<String> per_page_ids = per_page_jsonobjects.map(paged_json_id_map);
    126132
Note: See TracChangeset for help on using the changeset viewer.