Ignore:
Timestamp:
2016-11-02T14:17:45+13:00 (7 years ago)
Author:
davidb
Message:

Support for randonly choosing Solr endpoints added in

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForSolrIngest.java

    r31027 r31028  
    66import java.io.IOException;
    77import java.io.Serializable;
     8import java.util.ArrayList;
     9
    810import org.apache.commons.cli.*;
    911
    1012import org.apache.spark.api.java.*;
    1113import org.apache.spark.util.DoubleAccumulator;
     14import org.apache.spark.util.LongAccumulator;
    1215import org.hathitrust.extractedfeatures.PerPageJSONFlatmap;
    1316import org.json.JSONObject;
     
    103106        jsc.close();
    104107    }
    105 
     108    public ArrayList<String> extrapolateSolrEndpoints()
     109    {
     110        ArrayList<String> solr_endpoints = new ArrayList<String>();
     111       
     112        String solr_endpoint_ips = System.getProperty("wcsa-ef-ingest.solr-endpoint-ips",null);
     113        if (solr_endpoint_ips != null) {
     114            String [] ips = solr_endpoint_ips.split(",");
     115            for (String ip : ips) {
     116                String solr_endpoint = _solr_url.replaceFirst("//.*?:", "//"+ip+":");
     117                solr_endpoints.add(solr_endpoint);
     118            }
     119        }
     120        else {
     121            if (_solr_url != null) {
     122                solr_endpoints.add(_solr_url);
     123            }
     124        }
     125
     126        return solr_endpoints;
     127    }
     128   
    106129    public void execPerPage()
    107130    {   
     
    129152        //long num_page_ids = per_page_jsonobjects.count(); // trigger lazy eval of: flatmap:per-vol
    130153
    131         DoubleAccumulator per_page_progress_accum = jsc.sc().doubleAccumulator("Pages Processed");
     154        LongAccumulator per_page_progress_accum = jsc.sc().longAccumulator("Pages Processed");
     155        ArrayList<String> solr_endpoints = extrapolateSolrEndpoints();
    132156       
    133157        PerPageJSONMap paged_json_id_map
    134             = new PerPageJSONMap(_input_dir,_solr_url,_output_dir,_verbosity, per_page_progress_accum,1.0);
     158            = new PerPageJSONMap(_input_dir,solr_endpoints,_output_dir,_verbosity, per_page_progress_accum,1);
    135159        JavaRDD<String> per_page_ids = per_page_jsonobjects.map(paged_json_id_map);
    136160
     
    219243
    220244       
    221         String verbosity_str = cmd.getOptionValue("verbosity","0");
     245        String verbosity_str = cmd.getOptionValue("verbosity","1");
    222246        int verbosity = Integer.parseInt(verbosity_str);
    223247
     
    269293            = new ProcessForSolrIngest(input_dir,json_list_filename,solr_url,output_dir,verbosity);
    270294       
    271         String process_json_mode = System.getProperty("ef-solr.process-json-mode","per-page");
    272         if (process_json_mode.equals("per-volume")) {
     295        String process_ef_json_mode = System.getProperty("wcsa-ef-ingest.process-ef-json-mode","per-page");
     296        if (process_ef_json_mode.equals("per-volume")) {
    273297            prep_for_ingest.execPerVolume();
    274298        }
Note: See TracChangeset for help on using the changeset viewer.