Changeset 30929


Ignore:
Timestamp:
10/26/16 09:47:36 (5 years ago)
Author:
davidb
Message:

Tweaks made while testing the script

Location:
other-projects/hathitrust/solr-extracted-features/trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/solr-extracted-features/trunk/RUN-PD-CLUSTER.bash

    r30928 r30929  
    22
    33json_filelist=${1:-pd-file-listing-step10000.txt}
     4shift
    45
    56input_dir="hdfs://master:9000/user/htrc/pd-ef-json-files"
     
    1112master_opt="--master spark://10.10.0.52:7077"
    1213
    13 ./_RUN.bash
     14. ./_RUN.bash
  • other-projects/hathitrust/solr-extracted-features/trunk/_RUN.bash

    r30927 r30929  
    2424fi
    2525
     26echo
     27echo "****"
     28echo "* Checking for Spark and Hadoop daemons"
     29echo "****"
     30jps | sed 's/^/* /g'
     31echo "****"
     32echo "* Done"
     33echo "****"
     34echo
     35
    2636self_contained_jar=target/htrc-ef-ingest-0.9-jar-with-dependencies.jar
    2737base_cmd="spark-submit --class org.hathitrust.PrepareForIngest $master_opt $self_contained_jar"
    2838
    29 $base_cmd --json-filelist="$json_filelist" "$input_dir" "$output_dir" $*
     39cmd="$base_cmd --json-filelist=\"$json_filelist\" $input_dir $output_dir $*"
     40
     41echo "****"
     42echo "* Lauching:"
     43echo "*   $cmd"
     44echo "****"
     45echo "* Monitor progress through:"
     46echo "*   http://10.10.0.52:8080/"
     47echo "****"
     48echo
     49
     50$cmd
    3051
    3152#    spark-submit --class org.hathitrust.PrepareForIngest --master local[4] target/htrc-ef-ingest-0.9-jar-with-dependencies.jar --json-filelist=pd-file-listing-step10000.txt pd-ef-json-files pd-solr-json-files $*
Note: See TracChangeset for help on using the changeset viewer.