root/other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/scripts/_RUN.sh @ 32109

Revision 32109, 3.5 KB (checked in by davidb, 3 years ago)

Changes made after testing through YARN

  • Property svn:executable set to *
Line 
1#!/bin/bash
2
3# To work, the follow bash variables need to have been set:
4#
5#  seq_file (output_dir optional)
6#
7# Or:
8#
9#  json_filelist input_dir (output_dir optional)
10#
11# Typically done through running a wrapper script, such as:
12#
13#  FULL-RUN-YARN-SPARK.sh
14
15
16show_usage=1
17class_mode=""
18if [ "x$seq_file" != "x" ] ; then
19    show_usage=0
20    class_mode="seq"
21else
22
23   
24    if [ "x$json_filelist" != "x" ] ; then
25    class_mode="json"
26    fi
27
28    if [ "x$input_dir" != "x" ] ; then
29    if [ $class_mode = "json" ] ; then
30        show_usage=0
31    fi
32    fi
33
34fi
35
36if [ $show_usage = "1" ] ; then
37    echo "_RUN.bash: Failed to set 'seq_file' or 'input_dir json_filelist" 1>&2
38    exit 1
39fi
40
41
42#if [ "x$output_dir" = "x" ] ; then
43#    echo "_RUN.bash: Failed to set 'output_dir'" 1>&2
44#    exit
45#fi
46
47run_jps=0
48run_jps_daemons=""
49run_jps_daemons_suffix="daemon"
50using_hdfs=0
51
52if [ "$class_mode" = "seq" ] ; then
53  if [ "x${seq_file##hdfs://*}" = "x" ] || [ "x${output_dir##hdfs://*}" = "x" ] ; then
54    # Evidence of running command over HDFS
55    run_jps=1
56    run_jps_daemons="Spark"
57    using_hdfs=1
58  fi
59fi
60
61if [ "$class_mode" = "json" ] ; then
62  if [ "x${input_dir##hdfs://*}" = "x" ] || [ "x${output_dir##hdfs://*}" = "x" ] ; then
63    # Evidence of running command over HDFS
64    run_jps=1
65    run_jps_daemons="Spark"
66    using_hdfs=1
67  fi
68fi
69
70if [ "x${master_opt##--master spark://*}" = "x" ] ; then
71    # Evidence of running command submitted to Spark cluster
72    run_jps=1
73    if [ "x$run_jps_daemons" != "x" ] ; then
74        run_jps_daemons="$run_jps_daemons and Hadoop"
75    run_jps_daemons_suffix="daemons"
76    else
77        run_jps_daemons="Hadoop"
78    fi
79fi
80
81if [ "$run_jps" = "1" ] ; then
82  echo
83  echo "****"
84  echo "* Checking for $run_jps_daemons $run_jps_daemons_suffix, by running 'jps':"
85  echo "****"
86  jps | egrep -v " Jps$" |  sed 's/^/* /g' \
87    | sed 's/ Master/ [Spark] Master/' \
88    | sed 's/ NameNode/ [HDFS]  NameNode/' \
89    | sed 's/ SecondaryNameNode/ [HDFS]  SecondaryNameNode/'
90
91  echo "****"
92  echo "* Done"
93  echo "****"
94  echo
95
96  sleep 1
97fi
98
99if [ "$using_hdfs" = "1" ] ; then
100    if [ "x$output_dir" != "x" ] ; then
101      hadoop fs -test -d "$output_dir"
102
103    if [ $? != 0 ] ; then
104      echo "Creating directory:"
105      echo "  $output_dir"
106    fi
107  fi
108fi
109
110if [ "x$classmain" = "x" ] ; then
111    classmain="org.hathitrust.extractedfeatures.ProcessForSolrIngest"
112fi   
113
114self_contained_jar=target/htrc-ef-ingest-0.9-jar-with-dependencies.jar
115cmd="spark-submit --class $classmain $master_opt $self_contained_jar"
116
117if [ "$classmain" = "org.hathitrust.extractedfeatures.ProcessForSolrIngest" ] || [ "$classmain" = "org.hathitrust.extractedfeatures.ProcessForSolrIngestJSONFilelist" ] ; then
118  if [ "x$solr_base_url" != "x" ] ; then
119      cmd="$cmd --solr-base-url $solr_base_url"
120  fi
121
122  if [ "x$output_dir" != "x" ] ; then
123    cmd="$cmd --output-dir $output_dir"
124  fi
125fi
126
127
128if [ "$class_mode" = "seq" ] ; then
129    cmd="$cmd --properties ef-solr.properties $seq_file $*"
130    #cmd="$cmd --properties /homea/dbbridge/extracted-features-solr/solr-ingest/ef-solr.properties $seq_file $*"
131else
132    cmd="$cmd --properties ef-solr.properties $input_dir $json_filelist $*"
133    #cmd="$cmd --properties /homea/dbbridge/extracted-features-solr/solr-ingest/ef-solr.properties $input_dir $json_filelist $*"
134
135fi
136
137echo "****"
138echo "* Lauching:"
139echo "*   $cmd"
140echo "****"
141
142if [ "$run_jps" = "1" ] ; then
143  echo "* Monitor progress on Spark cluster through:"
144  echo "*   http://$SPARK_MASTER_HOST:8080/"
145  echo "****"
146fi
147echo
148sleep 2
149
150$cmd
151
Note: See TracBrowser for help on using the browser.