root/other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/scripts/FULL-RUN-YARN-SPARK.sh @ 31452

Revision 31452, 1.1 KB (checked in by davidb, 3 years ago)

Additional Spark progs to run

  • Property svn:executable set to *
Line 
1#!/bin/bash
2
3json_filelist=${1:-full-listing-step100000.txt}
4shift
5
6input_dir="hdfs://$SPARK_MASTER_HOST:9000/user/dbbridge/packed-full-ef-part-00000"
7#input_dir="hdfs://$SPARK_MASTER_HOST:9000/user/dbbridge/unpacked-ef-10000"
8
9#input_dir="hdfs://$SPARK_MASTER_HOST:9000/user/dbbridge/full-ef-json-files"
10#input_dir="hdfs://$SPARK_MASTER_HOST:9000/tmp/dbbridge/full-ef-json-files"
11#input_dir="hdfs://master:9000/user/htrc/full-ef-json-files"
12#input_dir="hdfs://10.10.0.52:9000/user/htrc/full-ef-json-files"
13
14#output_dir=hdfs://master:9000/user/htrc/full-solr-json-files
15#solr_url="http://gc0:8983/solr/htrc-full-ef/update"
16solr_url="http://solr1-s:8983/solr/htrc-full-ef/update"
17
18#
19
20#master_opt="--driver-memory 50g --executor-memory 12g --conf spark.executor.extraJavaOptions=-XX:+HeapDumpOnOutOfMemoryError --master spark://$SPARK_MASTER_HOST:7077"
21#master_opt="--driver-memory 50g --executor-memory 90g --master spark://$SPARK_MASTER_HOST:7077"
22master_opt="--num-executors 33 --executor-cores 4 --driver-memory 20g --executor-memory 8g --master yarn --deploy-mode cluster"
23
24. scripts/_RUN.sh
25
Note: See TracBrowser for help on using the browser.