Changeset 30934 for other-projects/hathitrust
- Timestamp:
- 2016-10-26T11:05:28+13:00 (7 years ago)
- Location:
- other-projects/hathitrust/solr-extracted-features/trunk
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/solr-extracted-features/trunk/_RUN.bash
r30929 r30934 24 24 fi 25 25 26 echo 27 echo "****" 28 echo "* Checking for Spark and Hadoop daemons" 29 echo "****" 30 jps | sed 's/^/* /g' 31 echo "****" 32 echo "* Done" 33 echo "****" 34 echo 26 run_jps=0 27 run_jps_daemons="" 28 run_jps_daemons_suffix="daemon" 29 30 if [ "x${input_dir##hdfs://*}" = "x" ] || [ "x${output_dir##hdfs://*}" = "x" ] ; then 31 # Evidence of running command over HDFS 32 run_jps=1 33 run_jps_daemons="Spark" 34 fi 35 36 if [ "x${master_op##--master spark://*}" = "x" ] ; then 37 # Evidence of running command submitted to Spark cluster 38 run_jps=1 39 if [ "x$run_jps_daemons" != "x" ] ; then 40 run_jps_daemons="$run_jps_daemons and Hadoop" 41 run_jps_daemons_suffix="daemons" 42 else 43 run_jps_daemons="Hadoop" 44 fi 45 fi 46 47 if [ "$run_jps" = "1" ] ; then 48 echo 49 echo "****" 50 echo "* Checking for $run_jps_daemons $run_jps_daemons_suffix" 51 echo "****" 52 jps | sed 's/^/* /g' 53 echo "****" 54 echo "* Done" 55 echo "****" 56 echo 57 58 fi 35 59 36 60 self_contained_jar=target/htrc-ef-ingest-0.9-jar-with-dependencies.jar 37 61 base_cmd="spark-submit --class org.hathitrust.PrepareForIngest $master_opt $self_contained_jar" 38 62 39 cmd="$base_cmd -- json-filelist=\"$json_filelist\"$input_dir $output_dir $*"63 cmd="$base_cmd --verbosity 1 $json_filelist $input_dir $output_dir $*" 40 64 41 65 echo "****" -
other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/PrepareForIngest.java
r30918 r30934 19 19 private static final long serialVersionUID = 1L; 20 20 21 /*22 class ContainsA implements Function<String, Boolean> {23 24 private static final long serialVersionUID = 1L;25 26 public Boolean call(String s) { return s.contains("a"); }27 }28 29 class ConvertJSON implements Function<String, Boolean> {30 31 private static final long serialVersionUID = 1L;32 33 public Boolean call(String s) { return s.contains("a"); }34 }35 */36 37 //protected int _num_cores;38 21 protected String _input_dir; 39 22 protected String _json_list_filename; 40 23 protected String _output_dir; 24 protected int _verbosity; 41 25 42 public PrepareForIngest(String input_dir, String json_list_filename, String output_dir )26 public PrepareForIngest(String input_dir, String json_list_filename, String output_dir, int verbosity) 43 27 { 44 //_num_cores = num_cores;45 28 _input_dir = input_dir; 46 29 _json_list_filename = (json_list_filename != null) ? json_list_filename : input_dir; 47 30 _output_dir = output_dir; 31 _verbosity = verbosity; 48 32 } 49 33 … … 66 50 }).count(); 67 51 68 System.out.println("#### ****Lines with a: " + numAs + ", lines with b: " + numBs);52 System.out.println("#### Lines with a: " + numAs + ", lines with b: " + numBs); 69 53 */ 70 54 long num_ids = json_ids.count(); 71 System.out.println("####**** number of IDS: " + num_ids); 55 System.out.println(""); 56 System.out.println("############"); 57 System.out.println("# number of IDS: " + num_ids); 58 System.out.println("############"); 59 System.out.println(""); 72 60 73 61 sc.close(); … … 81 69 //.withType(Integer.class) 82 70 83 options.addOption(OptionBuilder.withLongOpt(" json-filelist")84 .withDescription(" Explicit list of JSON files to read in")71 options.addOption(OptionBuilder.withLongOpt("verbosity") 72 .withDescription("Set to control the level of debugging output [0=none, 1=some, 2=lots]") 85 73 .hasArg() 86 .withArgName(" f")74 .withArgName("v") 87 75 .isRequired(false) 88 76 .create()); … … 102 90 catch (ParseException e) { 103 91 System.err.println(e.getMessage()); 104 System.err.println("Usage: RUN.bat [options]input-dir output-dir");105 formatter.printHelp(" utility-name", options);92 //System.err.println("Usage: RUN.bat [options] json-file-list.txt input-dir output-dir"); 93 formatter.printHelp("RUN.bash/RUN.bat [options] json-file-list.txt input-dir output-dir", options); 106 94 //System.err.println(" Where 'filename.txt' contains a list of JSON files, one per line,"); 107 95 //System.err.println(" which use the HathiTrust Extracted Feature JSON format"); … … 115 103 116 104 //cmd.hasOption("json-filelist") 117 String json_list_filename = cmd.getOptionValue("json-filelist");118 //int num_cores = Integer.parseInt(num_cores_str);105 String verbosity_str = cmd.getOptionValue("verbosity","0"); 106 int verbosity = Integer.parseInt(verbosity_str); 119 107 120 108 //System.out.println(inputFilePath); … … 125 113 String[] filtered_args = cmd.getArgs(); 126 114 127 if (filtered_args.length != 2) {128 System.err.println("Usage: RUN.bat [options]input-dir output-dir");129 formatter.printHelp("utility-name", options);115 if (filtered_args.length != 3) { 116 //System.err.println("Usage: RUN.bat [options] json-filelist.txt input-dir output-dir"); 117 formatter.printHelp("RUN.bash/RUN.bat [options] json-filelist.txt input-dir output-dir", options); 130 118 131 119 //System.err.println("Usage: RUN.bat [options] input-dir output-dir"); 132 120 //System.err.println(" Where 'filename.txt' contains a list of JSON files, one per line,"); 133 121 //System.err.println(" which use the HathiTrust Extracted Feature JSON format"); 134 122 System.exit(1); 135 123 } 136 String input_dir = filtered_args[0]; 137 String output_dir = filtered_args[1]; 124 String json_list_filename = filtered_args[0]; 125 String input_dir = filtered_args[1]; 126 String output_dir = filtered_args[2]; 138 127 139 128 … … 142 131 //int num_cores = 2; 143 132 144 PrepareForIngest prep_for_ingest = new PrepareForIngest(input_dir,json_list_filename,output_dir );133 PrepareForIngest prep_for_ingest = new PrepareForIngest(input_dir,json_list_filename,output_dir,verbosity); 145 134 prep_for_ingest.exec(); 146 135
Note:
See TracChangeset
for help on using the changeset viewer.