root/other-projects/hathitrust/solr-extracted-features/trunk/_RUN.bash @ 30936

Revision 30936, 2.1 KB (checked in by davidb, 4 years ago)

Refinement of Spark Monitor echo statements

  • Property svn:executable set to *
Line 
1#!/bin/bash
2
3# To work, the follow bash variables need to have been set:
4#
5#  json_filelist input_dir output_dir
6#
7# Typically done through running a wrapper script, such as:
8#
9#  RUN-PD-CLUSTER.bash
10
11if [ "x$json_filelist" = "x" ] ; then
12    echo "_RUN.bash: Failed to set 'json_filelist'" 1>&2
13    exit
14fi
15
16if [ "x$input_dir" = "x" ] ; then
17    echo "_RUN.bash: Failed to set 'input_dir'" 1>&2
18    exit
19fi
20
21if [ "x$output_dir" = "x" ] ; then
22    echo "_RUN.bash: Failed to set 'output_dir'" 1>&2
23    exit
24fi
25
26run_jps=0
27run_jps_daemons=""
28run_jps_daemons_suffix="daemon"
29
30if [ "x${input_dir##hdfs://*}" = "x" ] || [ "x${output_dir##hdfs://*}" = "x" ] ; then
31    # Evidence of running command over HDFS
32    run_jps=1
33    run_jps_daemons="Spark"
34fi
35
36if [ "x${master_opt##--master spark://*}" = "x" ] ; then
37    # Evidence of running command submitted to Spark cluster
38    run_jps=1
39    if [ "x$run_jps_daemons" != "x" ] ; then
40        run_jps_daemons="$run_jps_daemons and Hadoop"
41    run_jps_daemons_suffix="daemons"
42    else
43        run_jps_daemons="Hadoop"
44    fi
45fi
46
47if [ "$run_jps" = "1" ] ; then
48  echo
49  echo "****"
50  echo "* Checking for $run_jps_daemons $run_jps_daemons_suffix"
51  echo "****"
52  jps | sed 's/^/* /g'
53  echo "****"
54  echo "* Done"
55  echo "****"
56  echo
57
58  sleep 1
59fi
60
61self_contained_jar=target/htrc-ef-ingest-0.9-jar-with-dependencies.jar
62base_cmd="spark-submit --class org.hathitrust.PrepareForIngest $master_opt $self_contained_jar"
63
64cmd="$base_cmd --verbosity 1 $json_filelist $input_dir $output_dir $*"
65
66echo "****"
67echo "* Lauching:"
68echo "*   $cmd"
69echo "****"
70if [ "$run_jps" = "1" ] ; then
71  echo "* Monitor progress on Spark cluster through:"
72  echo "*   http://10.10.0.52:8080/"
73  echo "****"
74fi
75echo
76sleep 1
77
78$cmd
79
80#    spark-submit --class org.hathitrust.PrepareForIngest --master local[4] target/htrc-ef-ingest-0.9-jar-with-dependencies.jar --json-filelist=pd-file-listing-step10000.txt pd-ef-json-files pd-solr-json-files $*
81
82# spark-submit --class org.hathitrust.PrepareForIngest --master local[4] target\htrc-ef-ingest-0.9-jar-with-dependencies.jar --json-filelist=pd-file-listing-step1000.txt json-files solr-files $*
Note: See TracBrowser for help on using the browser.