Changeset 31597


Ignore:
Timestamp:
2017-04-11T23:41:07+12:00 (7 years ago)
Author:
davidb
Message:

Additional _s and _ss fields to help with faceting. Temporarily commented out the full-text page part.

Location:
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/PerVolumeJSON.java

    r31505 r31597  
    134134                    }
    135135                }
    136                
     136/*             
    137137                //
    138138                // Now move on to POS extracted features per-page
     
    185185
    186186                }
     187                */
    187188            }
    188189        }
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForSolrIngest.java

    r31502 r31597  
    124124                                                       solr_endpoints,_output_dir,_verbosity,
    125125                                                       icu_tokenize,strict_file_io);
    126 
    127126       
    128127        JavaRDD<Integer> per_volume_page_count = json_text_rdd.map(per_vol_json);
    129128       
    130         Integer num_page_ids = per_volume_page_count.reduce((a, b) -> a + b);
    131        
    132         System.out.println("");
    133         System.out.println("############");
    134         System.out.println("# Number of page ids: " + num_page_ids);
     129        //Integer num_page_ids = per_volume_page_count.reduce((a, b) -> a + b);
     130        long num_vol_ids = per_volume_page_count.count();
     131       
     132        System.out.println("");
     133        System.out.println("############");
     134        //System.out.println("# Number of page ids: " + num_page_ids);
     135        System.out.println("# Number of volume ids: " + num_vol_ids);
    135136        System.out.println("############");
    136137        System.out.println("");
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/SolrDocJSON.java

    r31510 r31597  
    7373                if (metavalue != null) {
    7474                    solr_doc_json.put(metaname+"_t",metavalue);
     75                    solr_doc_json.put(metaname+"_s",metavalue);
    7576                }
    7677            }
     
    8081                if (metavalues != null) {
    8182                    solr_doc_json.put(metaname+"_t",metavalues);
    82                 }
    83             }
    84            
    85             solr_add_json.put("commitWithin", 5000);
     83                    solr_doc_json.put(metaname+"_ss",metavalues);
     84                }
     85            }
     86           
     87            solr_add_json.put("commitWithin", 60000); // used to be 5000
    8688            solr_add_json.put("doc", solr_doc_json);
    8789
Note: See TracChangeset for help on using the changeset viewer.