Changeset 31264 for other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForWhitelist.java
- Timestamp:
- 2016-12-21T13:47:56+13:00 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForWhitelist.java
r31259 r31264 91 91 words.setName("tokenized-words"); 92 92 93 JavaPairRDD<String, Integer> pairs = words.mapToPair(new PairFunction<String, String, Integer>() {94 public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); }93 JavaPairRDD<String, Long> pairs = words.mapToPair(new PairFunction<String, String, Long>() { 94 public Tuple2<String, Long> call(String s) { return new Tuple2<String, Long>(s, 1L); } 95 95 }); 96 96 pairs.setName("single-word-count"); 97 97 98 JavaPairRDD<String, Integer> counts = pairs.reduceByKey(new Function2<Integer, Integer, Integer>() {99 public Integer call(Integer a, Integerb) { return a + b; }98 JavaPairRDD<String, Long> counts = pairs.reduceByKey(new Function2<Long, Long, Long>() { 99 public Long call(Long a, Long b) { return a + b; } 100 100 }); 101 101 counts.setName("word-frequency"); 102 102 103 103 /* 104 JavaPairRDD< Integer, String> swapped_pair = counts.mapToPair(new PairFunction<Tuple2<String, Integer>, Integer, String>() {104 JavaPairRDD<Long, String> swapped_pair = counts.mapToPair(new PairFunction<Tuple2<String, Long>, Long, String>() { 105 105 @Override 106 public Tuple2< Integer, String> call(Tuple2<String, Integer> item) throws Exception {106 public Tuple2<Long, String> call(Tuple2<String, Long> item) throws Exception { 107 107 return item.swap(); 108 108 } … … 111 111 swapped_pair.setName("frequency-word-swap"); 112 112 113 JavaPairRDD< Integer, String> sorted_swapped_pair = swapped_pair.sortByKey(false,num_partitions);113 JavaPairRDD<Long, String> sorted_swapped_pair = swapped_pair.sortByKey(false,num_partitions); 114 114 sorted_swapped_pair.setName("descending-sorted-frequency-word"); 115 115 116 JavaPairRDD<String, Integer> sorted_swaped_back_pair = sorted_swapped_pair.mapToPair(new PairFunction<Tuple2<Integer, String>, String, Integer>() {116 JavaPairRDD<String, Long> sorted_swaped_back_pair = sorted_swapped_pair.mapToPair(new PairFunction<Tuple2<Long, String>, String, Long>() { 117 117 @Override 118 public Tuple2<String, Integer> call(Tuple2<Integer, String> item) throws Exception {118 public Tuple2<String, Long> call(Tuple2<Long, String> item) throws Exception { 119 119 return item.swap(); 120 120 } … … 124 124 125 125 126 JavaPairRDD< Integer, String> counts_swapped_pair126 JavaPairRDD<Long, String> counts_swapped_pair 127 127 = counts.mapToPair(item -> item.swap()); 128 128 counts_swapped_pair.setName("frequency-word-swap"); 129 129 130 JavaPairRDD< Integer, String> counts_swapped_pair_sorted130 JavaPairRDD<Long, String> counts_swapped_pair_sorted 131 131 = counts_swapped_pair.sortByKey(false, num_partitions); 132 132 counts_swapped_pair_sorted.setName("descending-sorted-frequency-word"); 133 133 134 JavaPairRDD<String, Integer> count_sorted134 JavaPairRDD<String, Long> count_sorted 135 135 = counts_swapped_pair_sorted.mapToPair(item -> item.swap()); 136 136 count_sorted.setName("descending-word-frequency");
Note:
See TracChangeset
for help on using the changeset viewer.