Changeset 31371


Ignore:
Timestamp:
2017-01-31T00:06:08+13:00 (6 years ago)
Author:
davidb
Message:

Trying to get saveAsSequenceFile working

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForCatalogLangCount.java

    r31369 r31371  
    88import org.apache.commons.cli.*;
    99import org.apache.hadoop.io.Text;
     10import org.apache.hadoop.io.compress.BZip2Codec;
     11import org.apache.hadoop.mapreduce.OutputFormat;
    1012import org.apache.hadoop.mapred.SequenceFileOutputFormat;
     13//import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
    1114import org.apache.spark.api.java.*;
    1215import org.apache.spark.api.java.function.Function2;
     
    162165        //json_text_sample_repart_rdd.saveAsTextFile(output_directory);
    163166        //json_text_sample_repart_rdd.saveAsSequenceFile(output_directory);
    164         //json_text_sample_repart_rdd.saveAsHadoopFile(output_directory, Text.class, Text.class, SequenceFileOutputFormat.class);
    165        
    166        
    167         JavaPairRDD<String,String> result = json_text_sample_repart_rdd.mapToPair(new ConvertToWritableTypes());
    168         result.saveAsHadoopFile(output_directory, String.class, String.class, SequenceFileOutputFormat.class);
     167        json_text_sample_repart_rdd.saveAsHadoopFile(output_directory, Text.class, Text.class, SequenceFileOutputFormat.class);
     168       
     169        //SequenceFileOutputFormat<Text,Text> sfof = new SequenceFileOutputFormat<Text,Text>();
     170        // //sfof.setOutputCompressionClass(BZip2Codec.class);
     171        // //sfof.setOutputCompressorClass(conf);
     172        // json_text_sample_repart_rdd.saveAsNewAPIHadoopFile(output_directory, Text.class, Text.class, sfof);
     173        //org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat
     174        //org.apache.hadoop.mapred.
     175        //json_text_sample_repart_rdd.saveAsObjectFile(output_directory);
     176       
     177        //JavaPairRDD<String,String> result = json_text_sample_repart_rdd.mapToPair(new ConvertToWritableTypes());
     178        //result.saveAsHadoopFile(output_directory, String.class, String.class, SequenceFileOutputFormat.class);
    169179       
    170180        jsc.close();
Note: See TracChangeset for help on using the changeset viewer.