source: other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/ef-solr.properties

Last change on this file was 31267, checked in by davidb, 7 years ago

Values trialed on gsliscluster1. Rekindling idea of per-vol processing

File size: 1.3 KB
Line 
1
2wcsa-ef-ingest.process-ef-json-mode = per-volume
3#wcsa-ef-ingest.process-ef-json-mode = per-page
4wcsa-ef-ingest.use-whitelist = true
5wcsa-ef-ingest.whitelist-filename = file:/hdfsd05/dbbridge/whitelist-peter1.txt
6#wcsa-ef-ingest.whitelist-filename = file:///home/dbbridge/extracted-features-solr/solr-ingest/whitelist-peter1.txt
7
8
9#wcsa-ef-ingest.solr-clode-nodes = 10.11.0.53:8983,10.11.0.54:8983,10.11.0.55:8983
10wcsa-ef-ingest.solr-cloud-nodes = gc0:8983,gc1:8983,gc2:8983,gc3:8983,gc4:8983,gc5:8983,gc6:8983,gc7:8983,gc8:8983,gc9:8983
11wcsa-ef-ingest.icu-tokenize = true
12wcsa-ef-ingest.strict-file-io = false
13
14
15# For guide on number of partitions to use, see "Parallelized collections" section of:
16# https://spark.apache.org/docs/2.0.1/programming-guide.html
17# which suggests 2-4 * num_cores
18#
19# For a more detailed discussion see:
20# http://blog.cloudera.com/blog/2015/03/how-to-tune-your-apache-spark-jobs-part-2/
21
22# wcsa-ef-ingest.num-partitions = 12
23#wcsa-ef-ingest.num-partitions = 110
24#wcsa-ef-ingest.num-partitions = 220
25#wcsa-ef-ingest.num-partitions = 400
26#wcsa-ef-ingest.num-partitions = 1000
27wcsa-ef-ingest.files-per-partition = 1300
28
29spark.executor.cores=10
30
31spark.driver.memory=50g
32spark.executor.memory=70g
33##spark.network.timeout=240s
34
35#spark.local.dir=/var/tmp
Note: See TracBrowser for help on using the repository browser.