Ignore:
Timestamp:
2016-12-20T14:15:05+13:00 (7 years ago)
Author:
davidb
Message:

Support for icu-tokenize property added, plus relevant refactoring.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/PerPageJSONFlatmap.java

    r31226 r31252  
    3838    protected double            _progress_step;
    3939   
     40    boolean _icu_tokenize;
    4041    boolean _strict_file_io;
    4142   
     
    4344                              String solr_url, String output_dir, int verbosity,
    4445                              DoubleAccumulator progress_accum, double progress_step,
    45                               boolean strict_file_io)
     46                              boolean icu_tokenize, boolean strict_file_io)
    4647    {
    4748        _input_dir  = input_dir;
     
    5556        _progress_step  = progress_step;
    5657       
     58        _icu_tokenize   = icu_tokenize;
    5759        _strict_file_io = strict_file_io;
    5860       
     
    132134                    // Convert to Solr add form
    133135                    JSONObject solr_add_doc_json
    134                         = SolrDocJSON.generateSolrDocJSON(volume_id, page_id, ef_page, _whitelist_bloomfilter);
     136                        = SolrDocJSON.generateSolrDocJSON(volume_id, page_id, ef_page, _whitelist_bloomfilter,_icu_tokenize);
    135137                    solr_add_doc_json.put("filename_json_bz2", output_json_bz2);
    136138
Note: See TracChangeset for help on using the changeset viewer.