Ignore:
Timestamp:
2016-10-30T21:43:02+13:00 (7 years ago)
Author:
davidb
Message:

Adjustment of NUM_PARTITIONS to be based on Spark recommended calculation

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/PrepareForIngest.java

    r30990 r30995  
    1212    private static final long serialVersionUID = 1L;
    1313
    14     public static final int NUM_PARTITIONS = 6; // default would appear to be 2
     14    // Following details on number of partitions to use given in
     15    //  "Parallelized collections" section of:
     16    //   https://spark.apache.org/docs/2.0.1/programming-guide.html
     17    //
     18    // For a more detailed discussion see:
     19    //   http://blog.cloudera.com/blog/2015/03/how-to-tune-your-apache-spark-jobs-part-2/
     20   
     21    public static final int NUM_CORES = 6;
     22    public static final int NUM_PARTITIONS = 2*NUM_CORES; // default would appear to be 2
    1523   
    1624    protected String _input_dir;
     
    155163            System.exit(1);
    156164        }
     165        if (read_only) {
     166            // For this case, need to ensure solr-url and output-dir are null
     167            output_dir = null;
     168            solr_url = null;
     169        }
    157170       
    158171        String input_dir  = filtered_args[0];
Note: See TracChangeset for help on using the changeset viewer.