Ignore:
Timestamp:
2016-12-21T13:47:56+13:00 (7 years ago)
Author:
davidb
Message:

Switching to 'long' in counts to allow higher number representation

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForLangCount.java

    r31261 r31264  
    8686   
    8787       
    88         JavaPairRDD<String, Integer> lang_pairs = lang_list.mapToPair(s -> new Tuple2<String, Integer>(s, 1));
     88        JavaPairRDD<String, Long> lang_pairs = lang_list.mapToPair(s -> new Tuple2<String, Long>(s, 1L));
    8989        lang_pairs.setName("single-lang-count");
    9090       
    91         JavaPairRDD<String, Integer> lang_counts = lang_pairs.reduceByKey((a, b) -> a + b);
     91        JavaPairRDD<String, Long> lang_counts = lang_pairs.reduceByKey((a, b) -> a + b);
    9292        lang_counts.setName("lang-frequency");
    9393       
    94         JavaPairRDD<Integer, String> lang_counts_swapped_pair
     94        JavaPairRDD<Long, String> lang_counts_swapped_pair
    9595            = lang_counts.mapToPair(item -> item.swap());
    9696        lang_counts_swapped_pair.setName("frequency-lang-swap");
    9797       
    98         JavaPairRDD<Integer, String> lang_counts_swapped_pair_sorted
     98        JavaPairRDD<Long, String> lang_counts_swapped_pair_sorted
    9999            = lang_counts_swapped_pair.sortByKey(false, num_partitions);
    100100        lang_counts_swapped_pair_sorted.setName("descending-sorted-frequency-lang");
    101101       
    102         JavaPairRDD<String, Integer> lang_count_sorted
     102        JavaPairRDD<String, Long> lang_count_sorted
    103103            = lang_counts_swapped_pair_sorted.mapToPair(item -> item.swap());
    104104        lang_count_sorted.setName("descending-lang-frequency");
Note: See TracChangeset for help on using the changeset viewer.