source: other-projects/hathitrust/solr-extracted-features/trunk/_RUN.bash@ 30929

Last change on this file since 30929 was 30929, checked in by davidb, 8 years ago

Tweaks made while testing the script

  • Property svn:executable set to *
File size: 1.4 KB
Line 
1#!/bin/bash
2
3# To work, the follow bash variables need to have been set:
4#
5# json_filelist input_dir output_dir
6#
7# Typically done through running a wrapper script, such as:
8#
9# RUN-PD-CLUSTER.bash
10
11if [ "x$json_filelist" = "x" ] ; then
12 echo "_RUN.bash: Failed to set 'json_filelist'" 1>&2
13 exit
14fi
15
16if [ "x$input_dir" = "x" ] ; then
17 echo "_RUN.bash: Failed to set 'input_dir'" 1>&2
18 exit
19fi
20
21if [ "x$output_dir" = "x" ] ; then
22 echo "_RUN.bash: Failed to set 'output_dir'" 1>&2
23 exit
24fi
25
26echo
27echo "****"
28echo "* Checking for Spark and Hadoop daemons"
29echo "****"
30jps | sed 's/^/* /g'
31echo "****"
32echo "* Done"
33echo "****"
34echo
35
36self_contained_jar=target/htrc-ef-ingest-0.9-jar-with-dependencies.jar
37base_cmd="spark-submit --class org.hathitrust.PrepareForIngest $master_opt $self_contained_jar"
38
39cmd="$base_cmd --json-filelist=\"$json_filelist\" $input_dir $output_dir $*"
40
41echo "****"
42echo "* Lauching:"
43echo "* $cmd"
44echo "****"
45echo "* Monitor progress through:"
46echo "* http://10.10.0.52:8080/"
47echo "****"
48echo
49
50$cmd
51
52# spark-submit --class org.hathitrust.PrepareForIngest --master local[4] target/htrc-ef-ingest-0.9-jar-with-dependencies.jar --json-filelist=pd-file-listing-step10000.txt pd-ef-json-files pd-solr-json-files $*
53
54# spark-submit --class org.hathitrust.PrepareForIngest --master local[4] target\htrc-ef-ingest-0.9-jar-with-dependencies.jar --json-filelist=pd-file-listing-step1000.txt json-files solr-files $*
Note: See TracBrowser for help on using the repository browser.