Ignore:
Timestamp:
2017-01-31T21:35:50+13:00 (7 years ago)
Author:
davidb
Message:

Initial cut at including POS information to solr index

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForSolrIngest.java

    r31374 r31375  
    2121    private static final long serialVersionUID = 1L;
    2222
    23     // Following details on number of partitions to use given in
    24     //  "Parallelized collections" section of:
    25     //   https://spark.apache.org/docs/2.0.1/programming-guide.html
    26     //
    27     // For a more detailed discussion see:
    28     //   http://blog.cloudera.com/blog/2015/03/how-to-tune-your-apache-spark-jobs-part-2/
    29    
    3023    protected static final int DEFAULT_NUM_CORES = 10;
    3124    protected static final int MINIMUM_NUM_PARTITIONS = 10*DEFAULT_NUM_CORES;
     
    3629    //protected String _json_list_filename;
    3730    protected String _whitelist_filename;
     31    protected String _langmap_directory;
     32   
    3833    protected String _solr_url;
    3934    protected String _output_dir;
     
    4944        boolean use_whitelist = Boolean.getBoolean("wcsa-ef-ingest.use-whitelist");
    5045        _whitelist_filename = (use_whitelist) ?  System.getProperty("wcsa-ef-ingest.whitelist-filename") : null;
     46       
     47        boolean use_langmap = Boolean.getBoolean("wcsa-ef-ingest.use-langmap");
     48        _langmap_directory = (use_langmap) ?  System.getProperty("wcsa-ef-ingest.langmap-directory") : null;
     49       
    5150       
    5251        _solr_url   = solr_url;
     
    110109        boolean strict_file_io = Boolean.getBoolean("wcsa-ef-ingest.strict-file-io");
    111110       
    112         PerVolumeJSON per_vol_json = new PerVolumeJSON(_input_dir,_whitelist_filename,
     111        PerVolumeJSON per_vol_json = new PerVolumeJSON(_input_dir,_whitelist_filename, _langmap_directory,
    113112                                                       _solr_url,_output_dir,_verbosity,
    114113                                                       icu_tokenize,strict_file_io);
Note: See TracChangeset for help on using the changeset viewer.