Changeset 30934


Ignore:
Timestamp:
10/26/16 11:05:28 (5 years ago)
Author:
davidb
Message:

Providing json-filelist now a compulsory argument, rather than an option

Location:
other-projects/hathitrust/solr-extracted-features/trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/solr-extracted-features/trunk/_RUN.bash

    r30929 r30934  
    2424fi
    2525
    26 echo
    27 echo "****"
    28 echo "* Checking for Spark and Hadoop daemons"
    29 echo "****"
    30 jps | sed 's/^/* /g'
    31 echo "****"
    32 echo "* Done"
    33 echo "****"
    34 echo
     26run_jps=0
     27run_jps_daemons=""
     28run_jps_daemons_suffix="daemon"
     29
     30if [ "x${input_dir##hdfs://*}" = "x" ] || [ "x${output_dir##hdfs://*}" = "x" ] ; then
     31    # Evidence of running command over HDFS
     32    run_jps=1
     33    run_jps_daemons="Spark"
     34fi
     35
     36if [ "x${master_op##--master spark://*}" = "x" ] ; then
     37    # Evidence of running command submitted to Spark cluster
     38    run_jps=1
     39    if [ "x$run_jps_daemons" != "x" ] ; then
     40        run_jps_daemons="$run_jps_daemons and Hadoop"
     41    run_jps_daemons_suffix="daemons"
     42    else
     43        run_jps_daemons="Hadoop"
     44    fi
     45fi
     46
     47if [ "$run_jps" = "1" ] ; then
     48  echo
     49  echo "****"
     50  echo "* Checking for $run_jps_daemons $run_jps_daemons_suffix"
     51  echo "****"
     52  jps | sed 's/^/* /g'
     53  echo "****"
     54  echo "* Done"
     55  echo "****"
     56  echo
     57
     58fi
    3559
    3660self_contained_jar=target/htrc-ef-ingest-0.9-jar-with-dependencies.jar
    3761base_cmd="spark-submit --class org.hathitrust.PrepareForIngest $master_opt $self_contained_jar"
    3862
    39 cmd="$base_cmd --json-filelist=\"$json_filelist\" $input_dir $output_dir $*"
     63cmd="$base_cmd --verbosity 1 $json_filelist $input_dir $output_dir $*"
    4064
    4165echo "****"
  • other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/PrepareForIngest.java

    r30918 r30934  
    1919    private static final long serialVersionUID = 1L;
    2020
    21 /*
    22     class ContainsA implements Function<String, Boolean> {
    23        
    24         private static final long serialVersionUID = 1L;
    25 
    26         public Boolean call(String s) { return s.contains("a"); }
    27     }
    28 
    29     class ConvertJSON implements Function<String, Boolean> {
    30 
    31         private static final long serialVersionUID = 1L;
    32 
    33         public Boolean call(String s) { return s.contains("a"); }
    34     }
    35 */
    36 
    37     //protected int _num_cores;
    3821    protected String _input_dir;
    3922    protected String _json_list_filename;
    4023    protected String _output_dir;
     24    protected int    _verbosity;
    4125   
    42     public PrepareForIngest(String input_dir, String json_list_filename, String output_dir)
     26    public PrepareForIngest(String input_dir, String json_list_filename, String output_dir, int verbosity)
    4327    {
    44         //_num_cores = num_cores;
    4528        _input_dir = input_dir;
    4629        _json_list_filename = (json_list_filename != null) ? json_list_filename : input_dir;
    4730        _output_dir = output_dir;
     31        _verbosity = verbosity;
    4832    }
    4933
     
    6650        }).count();
    6751
    68         System.out.println("####**** Lines with a: " + numAs + ", lines with b: " + numBs);
     52        System.out.println("#### Lines with a: " + numAs + ", lines with b: " + numBs);
    6953    */
    7054        long num_ids = json_ids.count();
    71         System.out.println("####**** number of IDS: " + num_ids);
     55        System.out.println("");
     56        System.out.println("############");
     57        System.out.println("# number of IDS: " + num_ids);
     58        System.out.println("############");
     59        System.out.println("");
    7260       
    7361        sc.close();
     
    8169        //.withType(Integer.class)
    8270
    83         options.addOption(OptionBuilder.withLongOpt("json-filelist")
    84                 .withDescription("Explicit list of JSON files to read in")
     71        options.addOption(OptionBuilder.withLongOpt("verbosity")
     72                .withDescription("Set to control the level of debugging output [0=none, 1=some, 2=lots]")
    8573                .hasArg()
    86                 .withArgName("f")
     74                .withArgName("v")
    8775                .isRequired(false)
    8876                .create());
     
    10290        catch (ParseException e) {
    10391            System.err.println(e.getMessage());
    104             System.err.println("Usage: RUN.bat [options] input-dir output-dir");
    105             formatter.printHelp("utility-name", options);
     92            //System.err.println("Usage: RUN.bat [options] json-file-list.txt input-dir output-dir");
     93            formatter.printHelp("RUN.bash/RUN.bat [options] json-file-list.txt input-dir output-dir", options);
    10694            //System.err.println("  Where 'filename.txt' contains a list of JSON files, one per line,");
    10795            //System.err.println("  which use the HathiTrust Extracted Feature JSON format");
     
    115103
    116104        //cmd.hasOption("json-filelist")
    117         String json_list_filename = cmd.getOptionValue("json-filelist");
    118         //int num_cores = Integer.parseInt(num_cores_str);
     105        String verbosity_str = cmd.getOptionValue("verbosity","0");
     106        int verbosity = Integer.parseInt(verbosity_str);
    119107
    120108        //System.out.println(inputFilePath);
     
    125113        String[] filtered_args = cmd.getArgs();
    126114       
    127         if (filtered_args.length != 2) {
    128                 System.err.println("Usage: RUN.bat [options] input-dir output-dir");
    129             formatter.printHelp("utility-name", options);
     115        if (filtered_args.length != 3) {
     116                //System.err.println("Usage: RUN.bat [options] json-filelist.txt input-dir output-dir");
     117                formatter.printHelp("RUN.bash/RUN.bat  [options] json-filelist.txt input-dir output-dir", options);
    130118
    131119            //System.err.println("Usage: RUN.bat [options] input-dir output-dir");
    132120            //System.err.println("  Where 'filename.txt' contains a list of JSON files, one per line,");
    133121            //System.err.println("  which use the HathiTrust Extracted Feature JSON format");
    134             System.exit(1);
     122                System.exit(1);
    135123        }
    136         String input_dir  = filtered_args[0];
    137         String output_dir = filtered_args[1];
     124        String json_list_filename = filtered_args[0];
     125        String input_dir  = filtered_args[1];
     126        String output_dir = filtered_args[2];
    138127       
    139128
     
    142131        //int num_cores = 2;
    143132
    144         PrepareForIngest prep_for_ingest = new PrepareForIngest(input_dir,json_list_filename,output_dir);
     133        PrepareForIngest prep_for_ingest = new PrepareForIngest(input_dir,json_list_filename,output_dir,verbosity);
    145134        prep_for_ingest.exec();
    146135
Note: See TracChangeset for help on using the changeset viewer.