#!/bin/bash

# To work, the follow bash variables need to have been set:
#
#  json_filelist input_dir output_dir
#
# Typically done through running a wrapper script, such as:
#
#  RUN-PD-CLUSTER.bash

if [ "x$json_filelist" = "x" ] ; then
    echo "_RUN.bash: Failed to set 'json_filelist'" 1>&2
    exit
fi

if [ "x$input_dir" = "x" ] ; then
    echo "_RUN.bash: Failed to set 'input_dir'" 1>&2
    exit
fi

if [ "x$output_dir" = "x" ] ; then
    echo "_RUN.bash: Failed to set 'output_dir'" 1>&2
    exit
fi

run_jps=0
run_jps_daemons=""
run_jps_daemons_suffix="daemon"

if [ "x${input_dir##hdfs://*}" = "x" ] || [ "x${output_dir##hdfs://*}" = "x" ] ; then
    # Evidence of running command over HDFS
    run_jps=1
    run_jps_daemons="Spark"
fi

if [ "x${master_opt##--master spark://*}" = "x" ] ; then
    # Evidence of running command submitted to Spark cluster
    run_jps=1
    if [ "x$run_jps_daemons" != "x" ] ; then
        run_jps_daemons="$run_jps_daemons and Hadoop"
	run_jps_daemons_suffix="daemons"
    else
        run_jps_daemons="Hadoop"
    fi
fi

if [ "$run_jps" = "1" ] ; then
  echo
  echo "****"
  echo "* Checking for $run_jps_daemons $run_jps_daemons_suffix"
  echo "****"
  jps | sed 's/^/* /g'
  echo "****"
  echo "* Done"
  echo "****"
  echo

  sleep 1
fi

self_contained_jar=target/htrc-ef-ingest-0.9-jar-with-dependencies.jar
base_cmd="spark-submit --class org.hathitrust.PrepareForIngest $master_opt $self_contained_jar"

cmd="$base_cmd --verbosity 1 $json_filelist $input_dir $output_dir $*"

echo "****"
echo "* Lauching:"
echo "*   $cmd"
echo "****"
if [ "$run_jps" = "1" ] ; then
  echo "* Monitor progress on Spark cluster through:"
  echo "*   http://10.10.0.52:8080/"
  echo "****"
fi
echo
sleep 1

$cmd

#    spark-submit --class org.hathitrust.PrepareForIngest --master local[4] target/htrc-ef-ingest-0.9-jar-with-dependencies.jar --json-filelist=pd-file-listing-step10000.txt pd-ef-json-files pd-solr-json-files $* 

# spark-submit --class org.hathitrust.PrepareForIngest --master local[4] target\htrc-ef-ingest-0.9-jar-with-dependencies.jar --json-filelist=pd-file-listing-step1000.txt json-files solr-files $*