Context Navigation

source: other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/scripts/_RUN.sh@ 31102

Last change on this file since 31102 was 31093, checked in by davidb, 8 years ago
Changes triggered by running on gsliscluster1
Property svn:executable set to ``*
File size: 2.3 KB

Rev	Line
[30912]	1	#!/bin/bash
	2
[30926]	3	# To work, the follow bash variables need to have been set:
	4	#
	5	# json_filelist input_dir output_dir
	6	#
	7	# Typically done through running a wrapper script, such as:
	8	#
	9	# RUN-PD-CLUSTER.bash
[30912]	10
[30926]	11	if [ "x$json_filelist" = "x" ] ; then
[30927]	12	echo "_RUN.bash: Failed to set 'json_filelist'" 1>&2
[30926]	13	exit
	14	fi
[30923]	15
[30926]	16	if [ "x$input_dir" = "x" ] ; then
[30927]	17	echo "_RUN.bash: Failed to set 'input_dir'" 1>&2
[30926]	18	exit
	19	fi
	20
[30975]	21	#if [ "x$output_dir" = "x" ] ; then
	22	# echo "_RUN.bash: Failed to set 'output_dir'" 1>&2
	23	# exit
	24	#fi
[30926]	25
[30934]	26	run_jps=0
	27	run_jps_daemons=""
	28	run_jps_daemons_suffix="daemon"
[30939]	29	using_hdfs=0
[30929]	30
[30934]	31	if [ "x${input_dir##hdfs://}" = "x" ] \|\| [ "x${output_dir##hdfs://}" = "x" ] ; then
	32	# Evidence of running command over HDFS
	33	run_jps=1
	34	run_jps_daemons="Spark"
[30939]	35	using_hdfs=1
[30934]	36	fi
	37
[30935]	38	if [ "x${master_opt##--master spark://*}" = "x" ] ; then
[30934]	39	# Evidence of running command submitted to Spark cluster
	40	run_jps=1
	41	if [ "x$run_jps_daemons" != "x" ] ; then
	42	run_jps_daemons="$run_jps_daemons and Hadoop"
	43	run_jps_daemons_suffix="daemons"
	44	else
	45	run_jps_daemons="Hadoop"
	46	fi
	47	fi
	48
	49	if [ "$run_jps" = "1" ] ; then
	50	echo
	51	echo "****"
[30952]	52	echo "* Checking for $run_jps_daemons $run_jps_daemons_suffix, by running 'jps':"
[30934]	53	echo "****"
[31057]	54	jps \| egrep -v " Jps$" \| sed 's/^/* /g' \
	55	\| sed 's/ Master/ [Spark] Master/' \
	56	\| sed 's/ NameNode/ [HDFS] NameNode/' \
	57	\| sed 's/ SecondaryNameNode/ [HDFS] SecondaryNameNode/'
	58
[30934]	59	echo "****"
	60	echo "* Done"
	61	echo "****"
	62	echo
	63
[30935]	64	sleep 1
[30934]	65	fi
	66
[30939]	67	if [ "$using_hdfs" = "1" ] ; then
[31044]	68	if [ "x$output_dir" != "x" ] ; then
	69	hadoop fs -test -d "$output_dir"
[30939]	70
[31044]	71	if [ $? != 0 ] ; then
[30939]	72	echo "Creating directory:"
	73	echo " $output_dir"
[31044]	74	fi
[30939]	75	fi
	76	fi
	77
[30918]	78	self_contained_jar=target/htrc-ef-ingest-0.9-jar-with-dependencies.jar
[31000]	79	cmd="spark-submit --class org.hathitrust.extractedfeatures.ProcessForSolrIngest $master_opt $self_contained_jar"
[30918]	80
[30975]	81	if [ "x$solr_url" != "x" ] ; then
	82	cmd="$cmd --solr-url $solr_url"
	83	fi
[30918]	84
[30975]	85	if [ "x$output_dir" != "x" ] ; then
	86	cmd="$cmd --output-dir $output_dir"
	87	fi
	88
	89
[31028]	90	cmd="$cmd --properties ef-solr.properties $input_dir $json_filelist $*"
[30975]	91
[30929]	92	echo "****"
	93	echo "* Lauching:"
	94	echo "* $cmd"
	95	echo "****"
[30975]	96
[30936]	97	if [ "$run_jps" = "1" ] ; then
	98	echo "* Monitor progress on Spark cluster through:"
[31093]	99	echo "* http://$SPARK_MASTER_HOST:8080/"
[30936]	100	echo "****"
	101	fi
[30929]	102	echo
[30939]	103	sleep 2
[30929]	104
	105	$cmd
	106

Note: See TracBrowser for help on using the repository browser.

Download in other formats: