Changeset 30995 for other-projects

Show
Ignore:
Timestamp:
30.10.2016 21:43:02 (3 years ago)
Author:
davidb
Message:

Adjustment of NUM_PARTITIONS to be based on Spark recommended calculation

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/PrepareForIngest.java

    r30990 r30995  
    1212    private static final long serialVersionUID = 1L; 
    1313 
    14     public static final int NUM_PARTITIONS = 6; // default would appear to be 2 
     14    // Following details on number of partitions to use given in  
     15    //  "Parallelized collections" section of: 
     16    //   https://spark.apache.org/docs/2.0.1/programming-guide.html 
     17    // 
     18    // For a more detailed discussion see: 
     19    //   http://blog.cloudera.com/blog/2015/03/how-to-tune-your-apache-spark-jobs-part-2/ 
     20     
     21    public static final int NUM_CORES = 6; 
     22    public static final int NUM_PARTITIONS = 2*NUM_CORES; // default would appear to be 2 
    1523     
    1624    protected String _input_dir; 
     
    155163            System.exit(1); 
    156164        } 
     165        if (read_only) { 
     166            // For this case, need to ensure solr-url and output-dir are null 
     167            output_dir = null; 
     168            solr_url = null; 
     169        } 
    157170         
    158171        String input_dir  = filtered_args[0];