Ignore:
Timestamp:
2016-12-21T13:26:31+13:00 (7 years ago)
Author:
davidb
Message:

Change to using long for higher word counts

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForPOSCount.java

    r31260 r31263  
    9898        */
    9999       
    100         JavaPairRDD<String, Integer> pos_pairs = pos_list.mapToPair(s -> new Tuple2<String, Integer>(s, 1));
     100        JavaPairRDD<String, Long> pos_pairs = pos_list.mapToPair(s -> new Tuple2<String, Long>(s, (long)1));
    101101        pos_pairs.setName("single-pos-count");
    102102       
    103         JavaPairRDD<String, Integer> pos_counts = pos_pairs.reduceByKey((a, b) -> a + b);
     103        JavaPairRDD<String, Long> pos_counts = pos_pairs.reduceByKey((a, b) -> a + b);
    104104        pos_counts.setName("pos-frequency");
    105105       
    106         JavaPairRDD<Integer, String> pos_counts_swapped_pair
     106        JavaPairRDD<Long, String> pos_counts_swapped_pair
    107107            = pos_counts.mapToPair(item -> item.swap());
    108108        pos_counts_swapped_pair.setName("frequency-pos-swap");
    109109       
    110         JavaPairRDD<Integer, String> pos_counts_swapped_pair_sorted
     110        JavaPairRDD<Long, String> pos_counts_swapped_pair_sorted
    111111            = pos_counts_swapped_pair.sortByKey(false, num_partitions);
    112112        pos_counts_swapped_pair_sorted.setName("descending-sorted-frequency-pos");
    113113       
    114         JavaPairRDD<String, Integer> pos_count_sorted
     114        JavaPairRDD<String, Long> pos_count_sorted
    115115            = pos_counts_swapped_pair_sorted.mapToPair(item -> item.swap());
    116116        pos_count_sorted.setName("descending-pos-frequency");
Note: See TracChangeset for help on using the changeset viewer.