Last change
on this file since 30929 was 30929, checked in by davidb, 7 years ago |
Tweaks made while testing the script
|
-
Property svn:executable
set to
*
|
File size:
1.4 KB
|
Line | |
---|
1 | #!/bin/bash
|
---|
2 |
|
---|
3 | # To work, the follow bash variables need to have been set:
|
---|
4 | #
|
---|
5 | # json_filelist input_dir output_dir
|
---|
6 | #
|
---|
7 | # Typically done through running a wrapper script, such as:
|
---|
8 | #
|
---|
9 | # RUN-PD-CLUSTER.bash
|
---|
10 |
|
---|
11 | if [ "x$json_filelist" = "x" ] ; then
|
---|
12 | echo "_RUN.bash: Failed to set 'json_filelist'" 1>&2
|
---|
13 | exit
|
---|
14 | fi
|
---|
15 |
|
---|
16 | if [ "x$input_dir" = "x" ] ; then
|
---|
17 | echo "_RUN.bash: Failed to set 'input_dir'" 1>&2
|
---|
18 | exit
|
---|
19 | fi
|
---|
20 |
|
---|
21 | if [ "x$output_dir" = "x" ] ; then
|
---|
22 | echo "_RUN.bash: Failed to set 'output_dir'" 1>&2
|
---|
23 | exit
|
---|
24 | fi
|
---|
25 |
|
---|
26 | echo
|
---|
27 | echo "****"
|
---|
28 | echo "* Checking for Spark and Hadoop daemons"
|
---|
29 | echo "****"
|
---|
30 | jps | sed 's/^/* /g'
|
---|
31 | echo "****"
|
---|
32 | echo "* Done"
|
---|
33 | echo "****"
|
---|
34 | echo
|
---|
35 |
|
---|
36 | self_contained_jar=target/htrc-ef-ingest-0.9-jar-with-dependencies.jar
|
---|
37 | base_cmd="spark-submit --class org.hathitrust.PrepareForIngest $master_opt $self_contained_jar"
|
---|
38 |
|
---|
39 | cmd="$base_cmd --json-filelist=\"$json_filelist\" $input_dir $output_dir $*"
|
---|
40 |
|
---|
41 | echo "****"
|
---|
42 | echo "* Lauching:"
|
---|
43 | echo "* $cmd"
|
---|
44 | echo "****"
|
---|
45 | echo "* Monitor progress through:"
|
---|
46 | echo "* http://10.10.0.52:8080/"
|
---|
47 | echo "****"
|
---|
48 | echo
|
---|
49 |
|
---|
50 | $cmd
|
---|
51 |
|
---|
52 | # spark-submit --class org.hathitrust.PrepareForIngest --master local[4] target/htrc-ef-ingest-0.9-jar-with-dependencies.jar --json-filelist=pd-file-listing-step10000.txt pd-ef-json-files pd-solr-json-files $*
|
---|
53 |
|
---|
54 | # spark-submit --class org.hathitrust.PrepareForIngest --master local[4] target\htrc-ef-ingest-0.9-jar-with-dependencies.jar --json-filelist=pd-file-listing-step1000.txt json-files solr-files $*
|
---|
Note:
See
TracBrowser
for help on using the repository browser.