source: other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/scripts/_RUN.sh@ 31184

Last change on this file since 31184 was 31184, checked in by davidb, 7 years ago

New provision to run different main classes in _RUN.sh; New top-level script for white list generation

  • Property svn:executable set to *
File size: 2.4 KB
RevLine 
[30912]1#!/bin/bash
2
[30926]3# To work, the follow bash variables need to have been set:
4#
5# json_filelist input_dir output_dir
6#
7# Typically done through running a wrapper script, such as:
8#
9# RUN-PD-CLUSTER.bash
[30912]10
[30926]11if [ "x$json_filelist" = "x" ] ; then
[30927]12 echo "_RUN.bash: Failed to set 'json_filelist'" 1>&2
[30926]13 exit
14fi
[30923]15
[30926]16if [ "x$input_dir" = "x" ] ; then
[30927]17 echo "_RUN.bash: Failed to set 'input_dir'" 1>&2
[30926]18 exit
19fi
20
[30975]21#if [ "x$output_dir" = "x" ] ; then
22# echo "_RUN.bash: Failed to set 'output_dir'" 1>&2
23# exit
24#fi
[30926]25
[30934]26run_jps=0
27run_jps_daemons=""
28run_jps_daemons_suffix="daemon"
[30939]29using_hdfs=0
[30929]30
[30934]31if [ "x${input_dir##hdfs://*}" = "x" ] || [ "x${output_dir##hdfs://*}" = "x" ] ; then
32 # Evidence of running command over HDFS
33 run_jps=1
34 run_jps_daemons="Spark"
[30939]35 using_hdfs=1
[30934]36fi
37
[30935]38if [ "x${master_opt##--master spark://*}" = "x" ] ; then
[30934]39 # Evidence of running command submitted to Spark cluster
40 run_jps=1
41 if [ "x$run_jps_daemons" != "x" ] ; then
42 run_jps_daemons="$run_jps_daemons and Hadoop"
43 run_jps_daemons_suffix="daemons"
44 else
45 run_jps_daemons="Hadoop"
46 fi
47fi
48
49if [ "$run_jps" = "1" ] ; then
50 echo
51 echo "****"
[30952]52 echo "* Checking for $run_jps_daemons $run_jps_daemons_suffix, by running 'jps':"
[30934]53 echo "****"
[31057]54 jps | egrep -v " Jps$" | sed 's/^/* /g' \
55 | sed 's/ Master/ [Spark] Master/' \
56 | sed 's/ NameNode/ [HDFS] NameNode/' \
57 | sed 's/ SecondaryNameNode/ [HDFS] SecondaryNameNode/'
58
[30934]59 echo "****"
60 echo "* Done"
61 echo "****"
62 echo
63
[30935]64 sleep 1
[30934]65fi
66
[30939]67if [ "$using_hdfs" = "1" ] ; then
[31044]68 if [ "x$output_dir" != "x" ] ; then
69 hadoop fs -test -d "$output_dir"
[30939]70
[31044]71 if [ $? != 0 ] ; then
[30939]72 echo "Creating directory:"
73 echo " $output_dir"
[31044]74 fi
[30939]75 fi
76fi
[31184]77
78if [ "x$classmain" = "x" ] ; then
79 classmain="org.hathitrust.extractedfeatures.ProcessForSolrIngest"
80fi
81
[30918]82self_contained_jar=target/htrc-ef-ingest-0.9-jar-with-dependencies.jar
[31184]83cmd="spark-submit --class $classmain $master_opt $self_contained_jar"
[30918]84
[30975]85if [ "x$solr_url" != "x" ] ; then
86 cmd="$cmd --solr-url $solr_url"
87fi
[30918]88
[30975]89if [ "x$output_dir" != "x" ] ; then
90 cmd="$cmd --output-dir $output_dir"
91fi
92
93
[31028]94cmd="$cmd --properties ef-solr.properties $input_dir $json_filelist $*"
[30975]95
[30929]96echo "****"
97echo "* Lauching:"
98echo "* $cmd"
99echo "****"
[30975]100
[30936]101if [ "$run_jps" = "1" ] ; then
102 echo "* Monitor progress on Spark cluster through:"
[31093]103 echo "* http://$SPARK_MASTER_HOST:8080/"
[30936]104 echo "****"
105fi
[30929]106echo
[30939]107sleep 2
[30929]108
109$cmd
110
Note: See TracBrowser for help on using the repository browser.