source: other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/scripts/JSONLIST-RUN-YARN-SPARK.sh@ 32109

Last change on this file since 32109 was 32109, checked in by davidb, 6 years ago

Changes made after testing through YARN

  • Property svn:executable set to *
File size: 1.3 KB
Line 
1#!/bin/bash
2
3json_filelist=${1:-json-filelist.txt}
4shift
5
6#seq_file=${1:-hdfs:///user/dbbridge/json-files}
7
8if [ $# = 1 ] ; then
9 input_dir="hdfs:///user/dbbridge/json-files"
10else
11 input_dir=$1
12 shift
13fi
14
15##input_dir="hdfs://$SPARK_MASTER_HOST:9000/user/dbbridge/packed-full-ef-part-00000"
16###input_dir="hdfs://$SPARK_MASTER_HOST:9000/user/dbbridge/unpacked-ef-10000"
17#
18###input_dir="hdfs://$SPARK_MASTER_HOST:9000/user/dbbridge/full-ef-json-files"
19###input_dir="hdfs://$SPARK_MASTER_HOST:9000/tmp/dbbridge/full-ef-json-files"
20###input_dir="hdfs://master:9000/user/htrc/full-ef-json-files"
21###input_dir="hdfs://10.10.0.52:9000/user/htrc/full-ef-json-files"
22
23###output_dir=hdfs://master:9000/user/htrc/full-solr-json-files
24###solr_url="http://gc0:8983/solr/htrc-full-ef/update"
25###solr_url="http://solr1-s:8983/solr/htrc-full-ef/update"
26
27solr_base_url="http://solr1-s:8983/solr"
28
29
30#
31
32#master_opt="--driver-memory 50g --executor-memory 12g --conf spark.executor.extraJavaOptions=-XX:+HeapDumpOnOutOfMemoryError --master spark://$SPARK_MASTER_HOST:7077"
33#master_opt="--driver-memory 50g --executor-memory 90g --master spark://$SPARK_MASTER_HOST:7077"
34
35master_opt="--num-executors 33 --executor-cores 4 --driver-memory 20g --executor-memory 8g --master yarn --deploy-mode cluster"
36
37classmain="org.hathitrust.extractedfeatures.ProcessForSolrIngestJSONFilelist"
38
39. scripts/_RUN.sh
40
Note: See TracBrowser for help on using the repository browser.