Changeset 31264 for other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForLangCount.java
- Timestamp:
- 2016-12-21T13:47:56+13:00 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForLangCount.java
r31261 r31264 86 86 87 87 88 JavaPairRDD<String, Integer> lang_pairs = lang_list.mapToPair(s -> new Tuple2<String, Integer>(s, 1));88 JavaPairRDD<String, Long> lang_pairs = lang_list.mapToPair(s -> new Tuple2<String, Long>(s, 1L)); 89 89 lang_pairs.setName("single-lang-count"); 90 90 91 JavaPairRDD<String, Integer> lang_counts = lang_pairs.reduceByKey((a, b) -> a + b);91 JavaPairRDD<String, Long> lang_counts = lang_pairs.reduceByKey((a, b) -> a + b); 92 92 lang_counts.setName("lang-frequency"); 93 93 94 JavaPairRDD< Integer, String> lang_counts_swapped_pair94 JavaPairRDD<Long, String> lang_counts_swapped_pair 95 95 = lang_counts.mapToPair(item -> item.swap()); 96 96 lang_counts_swapped_pair.setName("frequency-lang-swap"); 97 97 98 JavaPairRDD< Integer, String> lang_counts_swapped_pair_sorted98 JavaPairRDD<Long, String> lang_counts_swapped_pair_sorted 99 99 = lang_counts_swapped_pair.sortByKey(false, num_partitions); 100 100 lang_counts_swapped_pair_sorted.setName("descending-sorted-frequency-lang"); 101 101 102 JavaPairRDD<String, Integer> lang_count_sorted102 JavaPairRDD<String, Long> lang_count_sorted 103 103 = lang_counts_swapped_pair_sorted.mapToPair(item -> item.swap()); 104 104 lang_count_sorted.setName("descending-lang-frequency");
Note:
See TracChangeset
for help on using the changeset viewer.