Changeset 31267

Show
Ignore:
Timestamp:
27.12.2016 18:52:41 (2 years ago)
Author:
davidb
Message:

Values trialed on gsliscluster1. Rekindling idea of per-vol processing

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/ef-solr.properties

    r31253 r31267  
    11 
    2 #wcsa-ef-ingest.process-ef-json-mode = per-volume 
    3 wcsa-ef-ingest.process-ef-json-mode = per-page 
     2wcsa-ef-ingest.process-ef-json-mode = per-volume 
     3#wcsa-ef-ingest.process-ef-json-mode = per-page 
     4wcsa-ef-ingest.use-whitelist = true 
     5wcsa-ef-ingest.whitelist-filename = file:/hdfsd05/dbbridge/whitelist-peter1.txt 
     6#wcsa-ef-ingest.whitelist-filename = file:///home/dbbridge/extracted-features-solr/solr-ingest/whitelist-peter1.txt 
     7 
    48 
    59#wcsa-ef-ingest.solr-clode-nodes = 10.11.0.53:8983,10.11.0.54:8983,10.11.0.55:8983 
    610wcsa-ef-ingest.solr-cloud-nodes = gc0:8983,gc1:8983,gc2:8983,gc3:8983,gc4:8983,gc5:8983,gc6:8983,gc7:8983,gc8:8983,gc9:8983 
     11wcsa-ef-ingest.icu-tokenize = true 
    712wcsa-ef-ingest.strict-file-io = false 
    8 wcsa-ef-ingest.icu-tokenize = true 
     13 
    914 
    1015# For guide on number of partitions to use, see "Parallelized collections" section of: 
     
    1621     
    1722# wcsa-ef-ingest.num-partitions = 12 
    18 wcsa-ef-ingest.num-partitions = 120 
     23#wcsa-ef-ingest.num-partitions = 110 
     24#wcsa-ef-ingest.num-partitions = 220 
     25#wcsa-ef-ingest.num-partitions = 400 
     26#wcsa-ef-ingest.num-partitions = 1000 
     27wcsa-ef-ingest.files-per-partition = 1300 
    1928 
    20 spark.executor.cores=11 
     29spark.executor.cores=10 
     30 
     31spark.driver.memory=50g 
     32spark.executor.memory=70g 
     33##spark.network.timeout=240s 
     34 
     35#spark.local.dir=/var/tmp