source: other-projects/hathitrust/solr-extracted-features/trunk/_RUN.bash@ 30950

Last change on this file since 30950 was 30950, checked in by davidb, 5 years ago

Tweak to text

  • Property svn:executable set to *
File size: 1.9 KB
Line 
1#!/bin/bash
2
3# To work, the follow bash variables need to have been set:
4#
5# json_filelist input_dir output_dir
6#
7# Typically done through running a wrapper script, such as:
8#
9# RUN-PD-CLUSTER.bash
10
11if [ "x$json_filelist" = "x" ] ; then
12 echo "_RUN.bash: Failed to set 'json_filelist'" 1>&2
13 exit
14fi
15
16if [ "x$input_dir" = "x" ] ; then
17 echo "_RUN.bash: Failed to set 'input_dir'" 1>&2
18 exit
19fi
20
21if [ "x$output_dir" = "x" ] ; then
22 echo "_RUN.bash: Failed to set 'output_dir'" 1>&2
23 exit
24fi
25
26run_jps=0
27run_jps_daemons=""
28run_jps_daemons_suffix="daemon"
29using_hdfs=0
30
31if [ "x${input_dir##hdfs://*}" = "x" ] || [ "x${output_dir##hdfs://*}" = "x" ] ; then
32 # Evidence of running command over HDFS
33 run_jps=1
34 run_jps_daemons="Spark"
35 using_hdfs=1
36fi
37
38if [ "x${master_opt##--master spark://*}" = "x" ] ; then
39 # Evidence of running command submitted to Spark cluster
40 run_jps=1
41 if [ "x$run_jps_daemons" != "x" ] ; then
42 run_jps_daemons="$run_jps_daemons and Hadoop"
43 run_jps_daemons_suffix="daemons"
44 else
45 run_jps_daemons="Hadoop"
46 fi
47fi
48
49if [ "$run_jps" = "1" ] ; then
50 echo
51 echo "****"
52 echo "* Checking for $run_jps_daemons $run_jps_daemons_suffix"
53 echo "* Running 'jps':"
54 echo "****"
55 jps | egrep -v " Jps$" | sed 's/^/* /g'
56 echo "****"
57 echo "* Done"
58 echo "****"
59 echo
60
61 sleep 1
62fi
63
64if [ "$using_hdfs" = "1" ] ; then
65 hadoop fs -test -d "$output_dir"
66
67 if [ $? != 0 ] ; then
68 echo "Creating directory:"
69 echo " $output_dir"
70 fi
71fi
72
73self_contained_jar=target/htrc-ef-ingest-0.9-jar-with-dependencies.jar
74base_cmd="spark-submit --class org.hathitrust.PrepareForIngest $master_opt $self_contained_jar"
75
76cmd="$base_cmd --verbosity 1 $json_filelist $input_dir $output_dir $*"
77
78echo "****"
79echo "* Lauching:"
80echo "* $cmd"
81echo "****"
82if [ "$run_jps" = "1" ] ; then
83 echo "* Monitor progress on Spark cluster through:"
84 echo "* http://10.10.0.52:8080/"
85 echo "****"
86fi
87echo
88sleep 2
89
90$cmd
91
Note: See TracBrowser for help on using the repository browser.