Changeset 31371

Show
Ignore:
Timestamp:
31.01.2017 00:06:08 (3 years ago)
Author:
davidb
Message:

Trying to get saveAsSequenceFile working

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForCatalogLangCount.java

    r31369 r31371  
    88import org.apache.commons.cli.*; 
    99import org.apache.hadoop.io.Text; 
     10import org.apache.hadoop.io.compress.BZip2Codec; 
     11import org.apache.hadoop.mapreduce.OutputFormat; 
    1012import org.apache.hadoop.mapred.SequenceFileOutputFormat; 
     13//import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; 
    1114import org.apache.spark.api.java.*; 
    1215import org.apache.spark.api.java.function.Function2; 
     
    162165        //json_text_sample_repart_rdd.saveAsTextFile(output_directory); 
    163166        //json_text_sample_repart_rdd.saveAsSequenceFile(output_directory); 
    164         //json_text_sample_repart_rdd.saveAsHadoopFile(output_directory, Text.class, Text.class, SequenceFileOutputFormat.class); 
    165          
    166          
    167         JavaPairRDD<String,String> result = json_text_sample_repart_rdd.mapToPair(new ConvertToWritableTypes()); 
    168         result.saveAsHadoopFile(output_directory, String.class, String.class, SequenceFileOutputFormat.class); 
     167        json_text_sample_repart_rdd.saveAsHadoopFile(output_directory, Text.class, Text.class, SequenceFileOutputFormat.class); 
     168         
     169        //SequenceFileOutputFormat<Text,Text> sfof = new SequenceFileOutputFormat<Text,Text>(); 
     170        // //sfof.setOutputCompressionClass(BZip2Codec.class); 
     171        // //sfof.setOutputCompressorClass(conf); 
     172        // json_text_sample_repart_rdd.saveAsNewAPIHadoopFile(output_directory, Text.class, Text.class, sfof); 
     173        //org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat 
     174        //org.apache.hadoop.mapred. 
     175        //json_text_sample_repart_rdd.saveAsObjectFile(output_directory); 
     176         
     177        //JavaPairRDD<String,String> result = json_text_sample_repart_rdd.mapToPair(new ConvertToWritableTypes()); 
     178        //result.saveAsHadoopFile(output_directory, String.class, String.class, SequenceFileOutputFormat.class); 
    169179         
    170180        jsc.close();