Changeset 31246 for other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main
- Timestamp:
- 2016-12-18T18:25:02+13:00 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForWhitelist.java
r31175 r31246 95 95 }); 96 96 97 98 //JavaRDD<String> per_page_ids = per_page_jsonobjects.map(paged_json_id_map); 99 100 //long num_page_ids = per_page_ids.count(); // trigger lazy eval of: flatmap:per-vol -> map:per-page 101 102 counts.saveAsTextFile(_json_list_filename + ".out"); 103 104 97 //counts.map(lambda (x,y): (y,x)); 98 99 100 JavaPairRDD<Integer, String> swappedPair = counts.mapToPair(new PairFunction<Tuple2<String, Integer>, Integer, String>() { 101 @Override 102 public Tuple2<Integer, String> call(Tuple2<String, Integer> item) throws Exception { 103 return item.swap(); 104 } 105 106 }); 107 JavaPairRDD<Integer, String> sorted_swapped_pair = swappedPair.sortByKey(true, 1); // 1st arg configures ascending sort, 2nd arg configures one task 108 109 JavaPairRDD<String, Integer> sorted_swaped_back_pair = sorted_swapped_pair.mapToPair(new PairFunction<Tuple2<Integer, String>, String, Integer>() { 110 @Override 111 public Tuple2<String, Integer> call(Tuple2<Integer, String> item) throws Exception { 112 return item.swap(); 113 } 114 115 }); 116 117 /* 118 119 JavaPairRDD<String, Integer> sorted_counts 120 = counts.map(item -> item.swap()) // interchanges position of entries in each tuple 121 .sortByKey(true, 1) // 1st arg configures ascending sort, 2nd arg configures one task 122 .map(item -> item.swap()); 123 124 */ 125 126 127 //sorted_counts.saveAsTextFile(_json_list_filename + ".out"); 128 129 sorted_swaped_back_pair.saveAsTextFile(_json_list_filename + ".out"); 130 131 105 132 //System.out.println(""); 106 133 //System.out.println("############");
Note:
See TracChangeset
for help on using the changeset viewer.