Show
Ignore:
Timestamp:
21.12.2016 13:26:31 (3 years ago)
Author:
davidb
Message:

Change to using long for higher word counts

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForPOSCount.java

    r31260 r31263  
    9898        */ 
    9999         
    100         JavaPairRDD<String, Integer> pos_pairs = pos_list.mapToPair(s -> new Tuple2<String, Integer>(s, 1)); 
     100        JavaPairRDD<String, Long> pos_pairs = pos_list.mapToPair(s -> new Tuple2<String, Long>(s, (long)1)); 
    101101        pos_pairs.setName("single-pos-count"); 
    102102         
    103         JavaPairRDD<String, Integer> pos_counts = pos_pairs.reduceByKey((a, b) -> a + b); 
     103        JavaPairRDD<String, Long> pos_counts = pos_pairs.reduceByKey((a, b) -> a + b); 
    104104        pos_counts.setName("pos-frequency"); 
    105105         
    106         JavaPairRDD<Integer, String> pos_counts_swapped_pair 
     106        JavaPairRDD<Long, String> pos_counts_swapped_pair 
    107107            = pos_counts.mapToPair(item -> item.swap()); 
    108108        pos_counts_swapped_pair.setName("frequency-pos-swap"); 
    109109         
    110         JavaPairRDD<Integer, String> pos_counts_swapped_pair_sorted  
     110        JavaPairRDD<Long, String> pos_counts_swapped_pair_sorted  
    111111            = pos_counts_swapped_pair.sortByKey(false, num_partitions); 
    112112        pos_counts_swapped_pair_sorted.setName("descending-sorted-frequency-pos"); 
    113113         
    114         JavaPairRDD<String, Integer> pos_count_sorted  
     114        JavaPairRDD<String, Long> pos_count_sorted  
    115115            = pos_counts_swapped_pair_sorted.mapToPair(item -> item.swap()); 
    116116        pos_count_sorted.setName("descending-pos-frequency");