Changeset 30918
- Timestamp:
- 2016-10-25T14:49:36+13:00 (6 years ago)
- Location:
- other-projects/hathitrust/solr-extracted-features/trunk
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/solr-extracted-features/trunk/RUN.bash
r30912 r30918 1 1 #!/bin/bash 2 2 3 spark-submit --class org.hathitrust.PrepareForIngest --master local[4] target/htrc-ef-ingest-0.9-jar-with-dependencies.jar --json-filelist=pd-file-listing-step10000.txt pd-ef-json-files pd-solr-files $* 3 input_dir=pd-ef-json-files 4 output_dir=pd-solr-json-files 5 6 master_opt="--master local[4]" 7 self_contained_jar=target/htrc-ef-ingest-0.9-jar-with-dependencies.jar 8 base_cmd="spark-submit --class org.hathitrust.PrepareForIngest $master_opt $self_contained_jar" 9 10 if [ $# -ge 1 ] ; then 11 file_listing=shift $* 12 $base_cmd --json-filelist="$file_listing" $input_dir $output_dir $* 13 else 14 echo "****" 15 echo "* Processing all files in: $input_dir" 16 echo "****" 17 $base_cmd $input_dir $output_dir $* 18 fi 19 20 # spark-submit --class org.hathitrust.PrepareForIngest --master local[4] target/htrc-ef-ingest-0.9-jar-with-dependencies.jar --json-filelist=pd-file-listing-step10000.txt pd-ef-json-files pd-solr-json-files $* 4 21 5 22 # spark-submit --class org.hathitrust.PrepareForIngest --master local[4] target\htrc-ef-ingest-0.9-jar-with-dependencies.jar --json-filelist=pd-file-listing-step1000.txt json-files solr-files $* -
other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/PagedJSON.java
r30898 r30918 32 32 } 33 33 */ 34 35 36 /* 37 URI uri = URI.create (âhdfs://host:port/file pathâ); 38 Configuration conf = new Configuration(); 39 FileSystem file = FileSystem.get(uri, conf); 40 FSDataInputStream in = file.open(new Path(uri)); 41 42 */ 34 43 35 44 class PagedJSON implements FlatMapFunction<String, String> -
other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/PrepareForIngest.java
r30898 r30918 35 35 */ 36 36 37 38 39 37 //protected int _num_cores; 38 protected String _input_dir; 39 protected String _json_list_filename; 40 40 protected String _output_dir; 41 41 … … 44 44 //_num_cores = num_cores; 45 45 _input_dir = input_dir; 46 _json_list_filename = json_list_filename;46 _json_list_filename = (json_list_filename != null) ? json_list_filename : input_dir; 47 47 _output_dir = output_dir; 48 48 } … … 115 115 116 116 //cmd.hasOption("json-filelist") 117 String json_list_filename = cmd.getOptionValue("json-filelist" ,"pd-file-listing.txt");117 String json_list_filename = cmd.getOptionValue("json-filelist"); 118 118 //int num_cores = Integer.parseInt(num_cores_str); 119 119 … … 126 126 127 127 if (filtered_args.length != 2) { 128 System.err.println("Usage: RUN.bat [options] input-dir output-dir"); 128 System.err.println("Usage: RUN.bat [options] input-dir output-dir"); 129 formatter.printHelp("utility-name", options); 130 131 //System.err.println("Usage: RUN.bat [options] input-dir output-dir"); 129 132 //System.err.println(" Where 'filename.txt' contains a list of JSON files, one per line,"); 130 133 //System.err.println(" which use the HathiTrust Extracted Feature JSON format");
Note:
See TracChangeset
for help on using the changeset viewer.