Show
Ignore:
Timestamp:
26.10.2016 11:05:28 (3 years ago)
Author:
davidb
Message:

Providing json-filelist now a compulsory argument, rather than an option

Location:
other-projects/hathitrust/solr-extracted-features/trunk
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/solr-extracted-features/trunk/_RUN.bash

    r30929 r30934  
    2424fi 
    2525 
    26 echo 
    27 echo "****" 
    28 echo "* Checking for Spark and Hadoop daemons" 
    29 echo "****" 
    30 jps | sed 's/^/* /g' 
    31 echo "****" 
    32 echo "* Done" 
    33 echo "****" 
    34 echo 
     26run_jps=0 
     27run_jps_daemons="" 
     28run_jps_daemons_suffix="daemon" 
     29 
     30if [ "x${input_dir##hdfs://*}" = "x" ] || [ "x${output_dir##hdfs://*}" = "x" ] ; then 
     31    # Evidence of running command over HDFS 
     32    run_jps=1 
     33    run_jps_daemons="Spark" 
     34fi 
     35 
     36if [ "x${master_op##--master spark://*}" = "x" ] ; then 
     37    # Evidence of running command submitted to Spark cluster 
     38    run_jps=1 
     39    if [ "x$run_jps_daemons" != "x" ] ; then 
     40        run_jps_daemons="$run_jps_daemons and Hadoop" 
     41    run_jps_daemons_suffix="daemons" 
     42    else 
     43        run_jps_daemons="Hadoop" 
     44    fi 
     45fi 
     46 
     47if [ "$run_jps" = "1" ] ; then 
     48  echo 
     49  echo "****" 
     50  echo "* Checking for $run_jps_daemons $run_jps_daemons_suffix" 
     51  echo "****" 
     52  jps | sed 's/^/* /g' 
     53  echo "****" 
     54  echo "* Done" 
     55  echo "****" 
     56  echo 
     57 
     58fi 
    3559 
    3660self_contained_jar=target/htrc-ef-ingest-0.9-jar-with-dependencies.jar 
    3761base_cmd="spark-submit --class org.hathitrust.PrepareForIngest $master_opt $self_contained_jar" 
    3862 
    39 cmd="$base_cmd --json-filelist=\"$json_filelist\" $input_dir $output_dir $*" 
     63cmd="$base_cmd --verbosity 1 $json_filelist $input_dir $output_dir $*" 
    4064 
    4165echo "****" 
  • other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/PrepareForIngest.java

    r30918 r30934  
    1919    private static final long serialVersionUID = 1L; 
    2020 
    21 /* 
    22     class ContainsA implements Function<String, Boolean> { 
    23          
    24         private static final long serialVersionUID = 1L; 
    25  
    26         public Boolean call(String s) { return s.contains("a"); } 
    27     } 
    28  
    29     class ConvertJSON implements Function<String, Boolean> { 
    30  
    31         private static final long serialVersionUID = 1L; 
    32  
    33         public Boolean call(String s) { return s.contains("a"); } 
    34     } 
    35 */ 
    36  
    37     //protected int _num_cores; 
    3821    protected String _input_dir; 
    3922    protected String _json_list_filename; 
    4023    protected String _output_dir; 
     24    protected int    _verbosity; 
    4125     
    42     public PrepareForIngest(String input_dir, String json_list_filename, String output_dir) 
     26    public PrepareForIngest(String input_dir, String json_list_filename, String output_dir, int verbosity) 
    4327    { 
    44         //_num_cores = num_cores; 
    4528        _input_dir = input_dir; 
    4629        _json_list_filename = (json_list_filename != null) ? json_list_filename : input_dir; 
    4730        _output_dir = output_dir; 
     31        _verbosity = verbosity; 
    4832    } 
    4933 
     
    6650        }).count(); 
    6751 
    68         System.out.println("####**** Lines with a: " + numAs + ", lines with b: " + numBs); 
     52        System.out.println("#### Lines with a: " + numAs + ", lines with b: " + numBs); 
    6953    */ 
    7054        long num_ids = json_ids.count(); 
    71         System.out.println("####**** number of IDS: " + num_ids); 
     55        System.out.println(""); 
     56        System.out.println("############"); 
     57        System.out.println("# number of IDS: " + num_ids); 
     58        System.out.println("############"); 
     59        System.out.println(""); 
    7260         
    7361        sc.close(); 
     
    8169        //.withType(Integer.class) 
    8270 
    83         options.addOption(OptionBuilder.withLongOpt("json-filelist") 
    84                 .withDescription("Explicit list of JSON files to read in") 
     71        options.addOption(OptionBuilder.withLongOpt("verbosity") 
     72                .withDescription("Set to control the level of debugging output [0=none, 1=some, 2=lots]") 
    8573                .hasArg() 
    86                 .withArgName("f") 
     74                .withArgName("v") 
    8775                .isRequired(false) 
    8876                .create()); 
     
    10290        catch (ParseException e) { 
    10391            System.err.println(e.getMessage()); 
    104             System.err.println("Usage: RUN.bat [options] input-dir output-dir"); 
    105             formatter.printHelp("utility-name", options); 
     92            //System.err.println("Usage: RUN.bat [options] json-file-list.txt input-dir output-dir"); 
     93            formatter.printHelp("RUN.bash/RUN.bat [options] json-file-list.txt input-dir output-dir", options); 
    10694            //System.err.println("  Where 'filename.txt' contains a list of JSON files, one per line,"); 
    10795            //System.err.println("  which use the HathiTrust Extracted Feature JSON format"); 
     
    115103 
    116104        //cmd.hasOption("json-filelist") 
    117         String json_list_filename = cmd.getOptionValue("json-filelist"); 
    118         //int num_cores = Integer.parseInt(num_cores_str); 
     105        String verbosity_str = cmd.getOptionValue("verbosity","0"); 
     106        int verbosity = Integer.parseInt(verbosity_str); 
    119107 
    120108        //System.out.println(inputFilePath); 
     
    125113        String[] filtered_args = cmd.getArgs(); 
    126114         
    127         if (filtered_args.length != 2) { 
    128                 System.err.println("Usage: RUN.bat [options] input-dir output-dir"); 
    129             formatter.printHelp("utility-name", options); 
     115        if (filtered_args.length != 3) { 
     116                //System.err.println("Usage: RUN.bat [options] json-filelist.txt input-dir output-dir"); 
     117                formatter.printHelp("RUN.bash/RUN.bat  [options] json-filelist.txt input-dir output-dir", options); 
    130118 
    131119            //System.err.println("Usage: RUN.bat [options] input-dir output-dir"); 
    132120            //System.err.println("  Where 'filename.txt' contains a list of JSON files, one per line,"); 
    133121            //System.err.println("  which use the HathiTrust Extracted Feature JSON format"); 
    134             System.exit(1); 
     122                System.exit(1); 
    135123        } 
    136         String input_dir  = filtered_args[0]; 
    137         String output_dir = filtered_args[1]; 
     124        String json_list_filename = filtered_args[0]; 
     125        String input_dir  = filtered_args[1]; 
     126        String output_dir = filtered_args[2]; 
    138127         
    139128 
     
    142131        //int num_cores = 2; 
    143132 
    144         PrepareForIngest prep_for_ingest = new PrepareForIngest(input_dir,json_list_filename,output_dir); 
     133        PrepareForIngest prep_for_ingest = new PrepareForIngest(input_dir,json_list_filename,output_dir,verbosity); 
    145134        prep_for_ingest.exec(); 
    146135