Changeset 31255 for other-projects/hathitrust
- Timestamp:
- 2016-12-20T15:37:26+13:00 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForWhitelist.java
r31252 r31255 92 92 counts.setName("word-frequency"); 93 93 94 /* 94 95 JavaPairRDD<Integer, String> swapped_pair = counts.mapToPair(new PairFunction<Tuple2<String, Integer>, Integer, String>() { 95 96 @Override … … 102 103 103 104 JavaPairRDD<Integer, String> sorted_swapped_pair = swapped_pair.sortByKey(false,num_partitions); 104 105 105 sorted_swapped_pair.setName("descending-sorted-frequency-word"); 106 106 … … 112 112 }); 113 113 sorted_swaped_back_pair.setName("descending-word-frequency"); 114 115 /* 116 JavaPairRDD<Integer, String> counts_swapped_pair 114 */ 115 116 117 JavaPairRDD<Integer, String> counts_swapped_pair 117 118 = counts.mapToPair(item -> item.swap()); 119 counts_swapped_pair.setName("frequency-word-swap"); 120 118 121 JavaPairRDD<Integer, String> counts_swapped_pair_sorted 119 = counts_swapped_pair.sortByKey(true, 1); 120 JavaPairRDD<String, Integer> count_sorted = counts_swapped_pair_sorted.mapToPair(item -> item.swap()); 121 */ 122 122 = counts_swapped_pair.sortByKey(true, num_partitions); 123 counts_swapped_pair_sorted.setName("descending-sorted-frequency-word"); 124 125 JavaPairRDD<String, Integer> count_sorted 126 = counts_swapped_pair_sorted.mapToPair(item -> item.swap()); 127 count_sorted.setName("descending-word-frequency"); 123 128 124 129 String filename_root = _json_list_filename.replaceAll(".*/","").replaceAll("\\..*$",""); 125 130 String output_directory = "whitelist-" + filename_root + "-out"; 126 sorted_swaped_back_pair.saveAsTextFile(output_directory); 127 131 132 //sorted_swaped_back_pair.saveAsTextFile(output_directory); 133 count_sorted.saveAsTextFile(output_directory); 134 128 135 129 136 //System.out.println("");
Note:
See TracChangeset
for help on using the changeset viewer.