Changeset 31264 for other-projects
- Timestamp:
- 2016-12-21T13:47:56+13:00 (7 years ago)
- Location:
- other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForLangCount.java
r31261 r31264 86 86 87 87 88 JavaPairRDD<String, Integer> lang_pairs = lang_list.mapToPair(s -> new Tuple2<String, Integer>(s, 1));88 JavaPairRDD<String, Long> lang_pairs = lang_list.mapToPair(s -> new Tuple2<String, Long>(s, 1L)); 89 89 lang_pairs.setName("single-lang-count"); 90 90 91 JavaPairRDD<String, Integer> lang_counts = lang_pairs.reduceByKey((a, b) -> a + b);91 JavaPairRDD<String, Long> lang_counts = lang_pairs.reduceByKey((a, b) -> a + b); 92 92 lang_counts.setName("lang-frequency"); 93 93 94 JavaPairRDD< Integer, String> lang_counts_swapped_pair94 JavaPairRDD<Long, String> lang_counts_swapped_pair 95 95 = lang_counts.mapToPair(item -> item.swap()); 96 96 lang_counts_swapped_pair.setName("frequency-lang-swap"); 97 97 98 JavaPairRDD< Integer, String> lang_counts_swapped_pair_sorted98 JavaPairRDD<Long, String> lang_counts_swapped_pair_sorted 99 99 = lang_counts_swapped_pair.sortByKey(false, num_partitions); 100 100 lang_counts_swapped_pair_sorted.setName("descending-sorted-frequency-lang"); 101 101 102 JavaPairRDD<String, Integer> lang_count_sorted102 JavaPairRDD<String, Long> lang_count_sorted 103 103 = lang_counts_swapped_pair_sorted.mapToPair(item -> item.swap()); 104 104 lang_count_sorted.setName("descending-lang-frequency"); -
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForPOSCount.java
r31263 r31264 98 98 */ 99 99 100 JavaPairRDD<String, Long> pos_pairs = pos_list.mapToPair(s -> new Tuple2<String, Long>(s, (long)1));100 JavaPairRDD<String, Long> pos_pairs = pos_list.mapToPair(s -> new Tuple2<String, Long>(s, 1L)); 101 101 pos_pairs.setName("single-pos-count"); 102 102 -
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForWhitelist.java
r31259 r31264 91 91 words.setName("tokenized-words"); 92 92 93 JavaPairRDD<String, Integer> pairs = words.mapToPair(new PairFunction<String, String, Integer>() {94 public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); }93 JavaPairRDD<String, Long> pairs = words.mapToPair(new PairFunction<String, String, Long>() { 94 public Tuple2<String, Long> call(String s) { return new Tuple2<String, Long>(s, 1L); } 95 95 }); 96 96 pairs.setName("single-word-count"); 97 97 98 JavaPairRDD<String, Integer> counts = pairs.reduceByKey(new Function2<Integer, Integer, Integer>() {99 public Integer call(Integer a, Integerb) { return a + b; }98 JavaPairRDD<String, Long> counts = pairs.reduceByKey(new Function2<Long, Long, Long>() { 99 public Long call(Long a, Long b) { return a + b; } 100 100 }); 101 101 counts.setName("word-frequency"); 102 102 103 103 /* 104 JavaPairRDD< Integer, String> swapped_pair = counts.mapToPair(new PairFunction<Tuple2<String, Integer>, Integer, String>() {104 JavaPairRDD<Long, String> swapped_pair = counts.mapToPair(new PairFunction<Tuple2<String, Long>, Long, String>() { 105 105 @Override 106 public Tuple2< Integer, String> call(Tuple2<String, Integer> item) throws Exception {106 public Tuple2<Long, String> call(Tuple2<String, Long> item) throws Exception { 107 107 return item.swap(); 108 108 } … … 111 111 swapped_pair.setName("frequency-word-swap"); 112 112 113 JavaPairRDD< Integer, String> sorted_swapped_pair = swapped_pair.sortByKey(false,num_partitions);113 JavaPairRDD<Long, String> sorted_swapped_pair = swapped_pair.sortByKey(false,num_partitions); 114 114 sorted_swapped_pair.setName("descending-sorted-frequency-word"); 115 115 116 JavaPairRDD<String, Integer> sorted_swaped_back_pair = sorted_swapped_pair.mapToPair(new PairFunction<Tuple2<Integer, String>, String, Integer>() {116 JavaPairRDD<String, Long> sorted_swaped_back_pair = sorted_swapped_pair.mapToPair(new PairFunction<Tuple2<Long, String>, String, Long>() { 117 117 @Override 118 public Tuple2<String, Integer> call(Tuple2<Integer, String> item) throws Exception {118 public Tuple2<String, Long> call(Tuple2<Long, String> item) throws Exception { 119 119 return item.swap(); 120 120 } … … 124 124 125 125 126 JavaPairRDD< Integer, String> counts_swapped_pair126 JavaPairRDD<Long, String> counts_swapped_pair 127 127 = counts.mapToPair(item -> item.swap()); 128 128 counts_swapped_pair.setName("frequency-word-swap"); 129 129 130 JavaPairRDD< Integer, String> counts_swapped_pair_sorted130 JavaPairRDD<Long, String> counts_swapped_pair_sorted 131 131 = counts_swapped_pair.sortByKey(false, num_partitions); 132 132 counts_swapped_pair_sorted.setName("descending-sorted-frequency-word"); 133 133 134 JavaPairRDD<String, Integer> count_sorted134 JavaPairRDD<String, Long> count_sorted 135 135 = counts_swapped_pair_sorted.mapToPair(item -> item.swap()); 136 136 count_sorted.setName("descending-word-frequency");
Note:
See TracChangeset
for help on using the changeset viewer.