Changeset 30918 for other-projects


Ignore:
Timestamp:
2016-10-25T14:49:36+13:00 (7 years ago)
Author:
davidb
Message:

More flexible command-line args

Location:
other-projects/hathitrust/solr-extracted-features/trunk
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/solr-extracted-features/trunk/RUN.bash

    r30912 r30918  
    11#!/bin/bash
    22
    3 spark-submit --class org.hathitrust.PrepareForIngest --master local[4] target/htrc-ef-ingest-0.9-jar-with-dependencies.jar --json-filelist=pd-file-listing-step10000.txt pd-ef-json-files pd-solr-files $*
     3input_dir=pd-ef-json-files
     4output_dir=pd-solr-json-files
     5
     6master_opt="--master local[4]"
     7self_contained_jar=target/htrc-ef-ingest-0.9-jar-with-dependencies.jar
     8base_cmd="spark-submit --class org.hathitrust.PrepareForIngest $master_opt $self_contained_jar"
     9
     10if [ $# -ge 1 ] ; then
     11    file_listing=shift $*
     12    $base_cmd --json-filelist="$file_listing" $input_dir $output_dir $*
     13else
     14    echo "****"
     15    echo "* Processing all files in: $input_dir"
     16    echo "****"
     17    $base_cmd $input_dir $output_dir $*
     18fi
     19
     20#    spark-submit --class org.hathitrust.PrepareForIngest --master local[4] target/htrc-ef-ingest-0.9-jar-with-dependencies.jar --json-filelist=pd-file-listing-step10000.txt pd-ef-json-files pd-solr-json-files $*
    421
    522# spark-submit --class org.hathitrust.PrepareForIngest --master local[4] target\htrc-ef-ingest-0.9-jar-with-dependencies.jar --json-filelist=pd-file-listing-step1000.txt json-files solr-files $*
  • other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/PagedJSON.java

    r30898 r30918  
    3232}
    3333 */
     34
     35
     36/*
     37URI uri = URI.create (“hdfs://host:port/file path”);
     38Configuration conf = new Configuration();
     39FileSystem file = FileSystem.get(uri, conf);
     40FSDataInputStream in = file.open(new Path(uri));
     41
     42*/
    3443
    3544class PagedJSON implements FlatMapFunction<String, String>
  • other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/PrepareForIngest.java

    r30898 r30918  
    3535*/
    3636
    37     //protected int _num_cores;
    38     protected String _input_dir;
    39     protected String _json_list_filename;
     37    //protected int _num_cores;
     38    protected String _input_dir;
     39    protected String _json_list_filename;
    4040    protected String _output_dir;
    4141   
     
    4444        //_num_cores = num_cores;
    4545        _input_dir = input_dir;
    46         _json_list_filename = json_list_filename;
     46        _json_list_filename = (json_list_filename != null) ? json_list_filename : input_dir;
    4747        _output_dir = output_dir;
    4848    }
     
    115115
    116116        //cmd.hasOption("json-filelist")
    117         String json_list_filename = cmd.getOptionValue("json-filelist","pd-file-listing.txt");
     117        String json_list_filename = cmd.getOptionValue("json-filelist");
    118118        //int num_cores = Integer.parseInt(num_cores_str);
    119119
     
    126126       
    127127        if (filtered_args.length != 2) {
    128             System.err.println("Usage: RUN.bat [options] input-dir output-dir");
     128                System.err.println("Usage: RUN.bat [options] input-dir output-dir");
     129            formatter.printHelp("utility-name", options);
     130
     131            //System.err.println("Usage: RUN.bat [options] input-dir output-dir");
    129132            //System.err.println("  Where 'filename.txt' contains a list of JSON files, one per line,");
    130133            //System.err.println("  which use the HathiTrust Extracted Feature JSON format");
Note: See TracChangeset for help on using the changeset viewer.