Changeset 31369


Ignore:
Timestamp:
2017-01-30T10:34:01+13:00 (7 years ago)
Author:
davidb
Message:

Trial new save

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForCatalogLangCount.java

    r31368 r31369  
    130130    }
    131131   
     132     public static class ConvertToWritableTypes implements PairFunction<Tuple2<Text, Text>, String, String> {
     133            /**
     134         *
     135         */
     136        private static final long serialVersionUID = 1L;
     137
     138            public Tuple2<String, String> call(Tuple2<Text, Text> record) {
     139              return new Tuple2(record._1.toString(), record._2.toString());
     140            }
     141          }
     142     
    132143    public void sampleDown100()
    133144    {
     
    146157        JavaPairRDD<Text, Text> json_text_sample_repart_rdd = json_text_sample_rdd.repartition(120);
    147158       
     159        //JavaPairRDD<Text, Text> json_text_sample_repart_rdd = json_text_sample_rdd.repartition(120);
     160       
    148161        String output_directory = "packed-full-ef-100";
    149162        //json_text_sample_repart_rdd.saveAsTextFile(output_directory);
    150163        //json_text_sample_repart_rdd.saveAsSequenceFile(output_directory);
    151         json_text_sample_repart_rdd.saveAsHadoopFile(output_directory, Text.class, Text.class, SequenceFileOutputFormat.class);
    152        
    153        
     164        //json_text_sample_repart_rdd.saveAsHadoopFile(output_directory, Text.class, Text.class, SequenceFileOutputFormat.class);
     165       
     166       
     167        JavaPairRDD<String,String> result = json_text_sample_repart_rdd.mapToPair(new ConvertToWritableTypes());
     168        result.saveAsHadoopFile(output_directory, String.class, String.class, SequenceFileOutputFormat.class);
     169       
    154170        jsc.close();
    155171    }
Note: See TracChangeset for help on using the changeset viewer.