Changeset 31362
- Timestamp:
- 2017-01-27T16:38:08+13:00 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForCatalogLangCount.java
r31361 r31362 122 122 String packed_sequence_path = "hdfs:///user/capitanu/data/packed-ef"; 123 123 124 JavaPairRDD<String, Text> inputRdd = jsc.sequenceFile(packed_sequence_path, String.class, Text.class); 125 JavaRDD<Text> jsonTextRdd = inputRdd.map(Tuple2::_2); 126 124 JavaPairRDD<Text, Text> input_pair_rdd = jsc.sequenceFile(packed_sequence_path, Text.class, Text.class); 125 //JavaRDD<Text> jsonTextRdd = input_pair_rdd.map(Tuple2::_2); 126 JavaRDD<Text> json_text_rdd = input_pair_rdd.map(item -> item._2); 127 128 JavaRDD<Text> json_text_sample_rdd = json_text_rdd.sample(false,0.0001); 129 127 130 /* 128 131 jsonTextRdd.map( … … 135 138 PerVolumeCatalogLangSequenceFileMap volume_catalog_langfreq_map 136 139 = new PerVolumeCatalogLangSequenceFileMap(_input_dir,_verbosity,strict_file_io); 137 JavaRDD<String> catalog_lang_list = json TextRdd.map(volume_catalog_langfreq_map);140 JavaRDD<String> catalog_lang_list = json_text_sample_rdd.map(volume_catalog_langfreq_map); 138 141 //catalog_lang_list.persist(StorageLevel.MEMORY_AND_DISK()); 139 142 catalog_lang_list.setName("catalog-lang-stream");
Note:
See TracChangeset
for help on using the changeset viewer.