root/other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/scripts/_RUN.sh @ 31057

Revision 31057, 2.3 KB (checked in by davidb, 4 years ago)

Tweak to jps output formatting

  • Property svn:executable set to *
Line 
1#!/bin/bash
2
3# To work, the follow bash variables need to have been set:
4#
5#  json_filelist input_dir output_dir
6#
7# Typically done through running a wrapper script, such as:
8#
9#  RUN-PD-CLUSTER.bash
10
11if [ "x$json_filelist" = "x" ] ; then
12    echo "_RUN.bash: Failed to set 'json_filelist'" 1>&2
13    exit
14fi
15
16if [ "x$input_dir" = "x" ] ; then
17    echo "_RUN.bash: Failed to set 'input_dir'" 1>&2
18    exit
19fi
20
21#if [ "x$output_dir" = "x" ] ; then
22#    echo "_RUN.bash: Failed to set 'output_dir'" 1>&2
23#    exit
24#fi
25
26run_jps=0
27run_jps_daemons=""
28run_jps_daemons_suffix="daemon"
29using_hdfs=0
30
31if [ "x${input_dir##hdfs://*}" = "x" ] || [ "x${output_dir##hdfs://*}" = "x" ] ; then
32    # Evidence of running command over HDFS
33    run_jps=1
34    run_jps_daemons="Spark"
35    using_hdfs=1
36fi
37
38if [ "x${master_opt##--master spark://*}" = "x" ] ; then
39    # Evidence of running command submitted to Spark cluster
40    run_jps=1
41    if [ "x$run_jps_daemons" != "x" ] ; then
42        run_jps_daemons="$run_jps_daemons and Hadoop"
43    run_jps_daemons_suffix="daemons"
44    else
45        run_jps_daemons="Hadoop"
46    fi
47fi
48
49if [ "$run_jps" = "1" ] ; then
50  echo
51  echo "****"
52  echo "* Checking for $run_jps_daemons $run_jps_daemons_suffix, by running 'jps':"
53  echo "****"
54  jps | egrep -v " Jps$" |  sed 's/^/* /g' \
55    | sed 's/ Master/ [Spark] Master/' \
56    | sed 's/ NameNode/ [HDFS]  NameNode/' \
57    | sed 's/ SecondaryNameNode/ [HDFS]  SecondaryNameNode/'
58
59  echo "****"
60  echo "* Done"
61  echo "****"
62  echo
63
64  sleep 1
65fi
66
67if [ "$using_hdfs" = "1" ] ; then
68    if [ "x$output_dir" != "x" ] ; then
69      hadoop fs -test -d "$output_dir"
70
71    if [ $? != 0 ] ; then
72      echo "Creating directory:"
73      echo "  $output_dir"
74    fi
75  fi
76fi
77   
78self_contained_jar=target/htrc-ef-ingest-0.9-jar-with-dependencies.jar
79cmd="spark-submit --class org.hathitrust.extractedfeatures.ProcessForSolrIngest $master_opt $self_contained_jar"
80
81if [ "x$solr_url" != "x" ] ; then
82    cmd="$cmd --solr-url $solr_url"
83fi
84
85if [ "x$output_dir" != "x" ] ; then
86    cmd="$cmd --output-dir $output_dir"
87fi
88
89
90cmd="$cmd --properties ef-solr.properties $input_dir $json_filelist $*"
91
92echo "****"
93echo "* Lauching:"
94echo "*   $cmd"
95echo "****"
96
97if [ "$run_jps" = "1" ] ; then
98  echo "* Monitor progress on Spark cluster through:"
99  echo "*   http://10.10.0.52:8080/"
100  echo "****"
101fi
102echo
103sleep 2
104
105$cmd
106
Note: See TracBrowser for help on using the browser.