Changeset 30918 for other-projects

Show
Ignore:
Timestamp:
25.10.2016 14:49:36 (3 years ago)
Author:
davidb
Message:

More flexible command-line args

Location:
other-projects/hathitrust/solr-extracted-features/trunk
Files:
3 modified

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/solr-extracted-features/trunk/RUN.bash

    r30912 r30918  
    11#!/bin/bash 
    22 
    3 spark-submit --class org.hathitrust.PrepareForIngest --master local[4] target/htrc-ef-ingest-0.9-jar-with-dependencies.jar --json-filelist=pd-file-listing-step10000.txt pd-ef-json-files pd-solr-files $*  
     3input_dir=pd-ef-json-files 
     4output_dir=pd-solr-json-files 
     5 
     6master_opt="--master local[4]" 
     7self_contained_jar=target/htrc-ef-ingest-0.9-jar-with-dependencies.jar 
     8base_cmd="spark-submit --class org.hathitrust.PrepareForIngest $master_opt $self_contained_jar" 
     9 
     10if [ $# -ge 1 ] ; then 
     11    file_listing=shift $* 
     12    $base_cmd --json-filelist="$file_listing" $input_dir $output_dir $* 
     13else 
     14    echo "****" 
     15    echo "* Processing all files in: $input_dir" 
     16    echo "****" 
     17    $base_cmd $input_dir $output_dir $* 
     18fi 
     19 
     20#    spark-submit --class org.hathitrust.PrepareForIngest --master local[4] target/htrc-ef-ingest-0.9-jar-with-dependencies.jar --json-filelist=pd-file-listing-step10000.txt pd-ef-json-files pd-solr-json-files $*  
    421 
    522# spark-submit --class org.hathitrust.PrepareForIngest --master local[4] target\htrc-ef-ingest-0.9-jar-with-dependencies.jar --json-filelist=pd-file-listing-step1000.txt json-files solr-files $* 
  • other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/PagedJSON.java

    r30898 r30918  
    3232} 
    3333 */ 
     34 
     35 
     36/*  
     37URI uri = URI.create (“hdfs://host:port/file path”); 
     38Configuration conf = new Configuration(); 
     39FileSystem file = FileSystem.get(uri, conf); 
     40FSDataInputStream in = file.open(new Path(uri)); 
     41 
     42*/ 
    3443 
    3544class PagedJSON implements FlatMapFunction<String, String>  
  • other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/PrepareForIngest.java

    r30898 r30918  
    3535*/ 
    3636 
    37     //protected int _num_cores; 
    38     protected String _input_dir; 
    39     protected String _json_list_filename; 
     37    //protected int _num_cores; 
     38    protected String _input_dir; 
     39    protected String _json_list_filename; 
    4040    protected String _output_dir; 
    4141     
     
    4444        //_num_cores = num_cores; 
    4545        _input_dir = input_dir; 
    46         _json_list_filename = json_list_filename; 
     46        _json_list_filename = (json_list_filename != null) ? json_list_filename : input_dir; 
    4747        _output_dir = output_dir; 
    4848    } 
     
    115115 
    116116        //cmd.hasOption("json-filelist") 
    117         String json_list_filename = cmd.getOptionValue("json-filelist","pd-file-listing.txt"); 
     117        String json_list_filename = cmd.getOptionValue("json-filelist"); 
    118118        //int num_cores = Integer.parseInt(num_cores_str); 
    119119 
     
    126126         
    127127        if (filtered_args.length != 2) { 
    128             System.err.println("Usage: RUN.bat [options] input-dir output-dir"); 
     128                System.err.println("Usage: RUN.bat [options] input-dir output-dir"); 
     129            formatter.printHelp("utility-name", options); 
     130 
     131            //System.err.println("Usage: RUN.bat [options] input-dir output-dir"); 
    129132            //System.err.println("  Where 'filename.txt' contains a list of JSON files, one per line,"); 
    130133            //System.err.println("  which use the HathiTrust Extracted Feature JSON format");