Ignore:
Timestamp:
2016-10-31T15:40:36+13:00 (7 years ago)
Author:
davidb
Message:

Further RDD flatMap/map restructuring and refactoring, for per-page

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/extractedfeatures/ProcessForSolrIngest.java

    r31010 r31011  
    119119        DoubleAccumulator progress_accum = jsc.sc().doubleAccumulator("Progress Percent");
    120120       
    121         PerPageJSONFlatmap paged_json = new PerPageJSONFlatmap(_input_dir,_solr_url,_output_dir,_verbosity, progress_accum,per_vol);
    122         JavaRDD<JSONObject> json_per_page_ids = json_list_data.flatMap(paged_json).cache();
    123        
    124         PerPageJSONForeach paged_json_foreach = new PerPageJSONForeach(_input_dir,_solr_url,_output_dir,_verbosity, progress_accum,per_vol);
    125         json_per_page_ids.foreach(paged_json_foreach);
     121        PerPageJSONFlatmap paged_solr_json_flatmap = new PerPageJSONFlatmap(_input_dir,_solr_url,_output_dir,_verbosity, progress_accum,per_vol);
     122        JavaRDD<JSONObject> per_page_jsonobjects = json_list_data.flatMap(paged_solr_json_flatmap).cache();
     123       
     124        PerPageJSONMap paged_json_id_map = new PerPageJSONMap(_input_dir,_solr_url,_output_dir,_verbosity, progress_accum,per_vol);
     125        JavaRDD<String> per_page_ids = per_page_jsonobjects.map(paged_json_id_map);
    126126
    127127/*
     
    133133*/
    134134       
    135         long num_page_ids = json_per_page_ids.count();
     135        long num_page_ids = per_page_ids.count(); // trigger lazy eval of: flatmap:per-vol -> map:per-page
    136136       
    137137        System.out.println("");
Note: See TracChangeset for help on using the changeset viewer.