Context Navigation

source: other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/scripts/_RUN.sh@ 31184

Last change on this file since 31184 was 31184, checked in by davidb, 7 years ago
New provision to run different main classes in _RUN.sh; New top-level script for white list generation
Property svn:executable set to ``*
File size: 2.4 KB

Rev	Line
[30912]	1	#!/bin/bash
	2
[30926]	3	# To work, the follow bash variables need to have been set:
	4	#
	5	# json_filelist input_dir output_dir
	6	#
	7	# Typically done through running a wrapper script, such as:
	8	#
	9	# RUN-PD-CLUSTER.bash
[30912]	10
[30926]	11	if [ "x$json_filelist" = "x" ] ; then
[30927]	12	echo "_RUN.bash: Failed to set 'json_filelist'" 1>&2
[30926]	13	exit
	14	fi
[30923]	15
[30926]	16	if [ "x$input_dir" = "x" ] ; then
[30927]	17	echo "_RUN.bash: Failed to set 'input_dir'" 1>&2
[30926]	18	exit
	19	fi
	20
[30975]	21	#if [ "x$output_dir" = "x" ] ; then
	22	# echo "_RUN.bash: Failed to set 'output_dir'" 1>&2
	23	# exit
	24	#fi
[30926]	25
[30934]	26	run_jps=0
	27	run_jps_daemons=""
	28	run_jps_daemons_suffix="daemon"
[30939]	29	using_hdfs=0
[30929]	30
[30934]	31	if [ "x${input_dir##hdfs://}" = "x" ] \|\| [ "x${output_dir##hdfs://}" = "x" ] ; then
	32	# Evidence of running command over HDFS
	33	run_jps=1
	34	run_jps_daemons="Spark"
[30939]	35	using_hdfs=1
[30934]	36	fi
	37
[30935]	38	if [ "x${master_opt##--master spark://*}" = "x" ] ; then
[30934]	39	# Evidence of running command submitted to Spark cluster
	40	run_jps=1
	41	if [ "x$run_jps_daemons" != "x" ] ; then
	42	run_jps_daemons="$run_jps_daemons and Hadoop"
	43	run_jps_daemons_suffix="daemons"
	44	else
	45	run_jps_daemons="Hadoop"
	46	fi
	47	fi
	48
	49	if [ "$run_jps" = "1" ] ; then
	50	echo
	51	echo "****"
[30952]	52	echo "* Checking for $run_jps_daemons $run_jps_daemons_suffix, by running 'jps':"
[30934]	53	echo "****"
[31057]	54	jps \| egrep -v " Jps$" \| sed 's/^/* /g' \
	55	\| sed 's/ Master/ [Spark] Master/' \
	56	\| sed 's/ NameNode/ [HDFS] NameNode/' \
	57	\| sed 's/ SecondaryNameNode/ [HDFS] SecondaryNameNode/'
	58
[30934]	59	echo "****"
	60	echo "* Done"
	61	echo "****"
	62	echo
	63
[30935]	64	sleep 1
[30934]	65	fi
	66
[30939]	67	if [ "$using_hdfs" = "1" ] ; then
[31044]	68	if [ "x$output_dir" != "x" ] ; then
	69	hadoop fs -test -d "$output_dir"
[30939]	70
[31044]	71	if [ $? != 0 ] ; then
[30939]	72	echo "Creating directory:"
	73	echo " $output_dir"
[31044]	74	fi
[30939]	75	fi
	76	fi
[31184]	77
	78	if [ "x$classmain" = "x" ] ; then
	79	classmain="org.hathitrust.extractedfeatures.ProcessForSolrIngest"
	80	fi
	81
[30918]	82	self_contained_jar=target/htrc-ef-ingest-0.9-jar-with-dependencies.jar
[31184]	83	cmd="spark-submit --class $classmain $master_opt $self_contained_jar"
[30918]	84
[30975]	85	if [ "x$solr_url" != "x" ] ; then
	86	cmd="$cmd --solr-url $solr_url"
	87	fi
[30918]	88
[30975]	89	if [ "x$output_dir" != "x" ] ; then
	90	cmd="$cmd --output-dir $output_dir"
	91	fi
	92
	93
[31028]	94	cmd="$cmd --properties ef-solr.properties $input_dir $json_filelist $*"
[30975]	95
[30929]	96	echo "****"
	97	echo "* Lauching:"
	98	echo "* $cmd"
	99	echo "****"
[30975]	100
[30936]	101	if [ "$run_jps" = "1" ] ; then
	102	echo "* Monitor progress on Spark cluster through:"
[31093]	103	echo "* http://$SPARK_MASTER_HOST:8080/"
[30936]	104	echo "****"
	105	fi
[30929]	106	echo
[30939]	107	sleep 2
[30929]	108
	109	$cmd
	110

Note: See TracBrowser for help on using the repository browser.

Download in other formats: