Ignore:
Timestamp:
2013-05-24T09:23:29+12:00 (11 years ago)
Author:
jmt12
Message:

Making the Hadoop processing aware that there are two different possible prefixes for HDFS based files (each using a different 'driver' under the new FileUtils system)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gs2-extensions/parallel-building/trunk/src/src/java/org/nzdl/gsdl/HadoopGreenstoneIngest.java

    r27102 r27413  
    164164      Configuration conf = context.getConfiguration();
    165165      String gsdlhome = conf.get("gsdlhome");
    166       String hdfs_host = conf.get("hdfshost");
    167       String hdfs_port = conf.get("hdfsport");
     166      String hdfs_prefix = conf.get("hdfsprefix");
    168167      String hadoop_prefix = conf.get("hadoopprefix");
    169168      String collection = conf.get("collection");
     
    199198      // - call Greenstone passing in the path to the manifest
    200199      ProcessBuilder import_process_builder
    201         = new ProcessBuilder("time", "-p", "import.pl", "-manifest", manifest_path.toString(), "-keepold", "-verbosity", "42", "-archivedir", "hdfs://" + hdfs_host + ":" + hdfs_port + "/user/jmt12/gsdl/collect/" + collection + "/archives", collection);
     200        = new ProcessBuilder("time", "-p", "import.pl", "-manifest", manifest_path.toString(), "-keepold", "-verbosity", "42", "-archivedir", hdfs_prefix + "/user/jmt12/gsdl/collect/" + collection + "/archives", collection);
    202201      fw1.write("[Command:" + import_process_builder.command() + "]\n");
    203202      // - alter environment
     
    284283    if (args.length < 6)
    285284    {
    286       System.out.println("Usage: bin/hadoop jar hadoop-greenstone.jar org.nzdl.gsdl.HadoopGreenstoneIngest <gsdlhome> <hdfs host> <hdfs port> <hadoop prefix> <collection> <hdfsin> <hdfsout>\n");
     285      System.out.println("Usage: bin/hadoop jar hadoop-greenstone.jar org.nzdl.gsdl.HadoopGreenstoneIngest <gsdlhome> <hdfsprefix> <hadoop prefix> <collection> <hdfsin> <hdfsout>\n");
    287286      System.exit(0);
    288287    }
     
    290289    Configuration conf = new Configuration();
    291290    conf.set("gsdlhome", args[0]);
    292     conf.set("hdfshost", args[1]);
    293     conf.set("hdfsport", args[2]);
    294     conf.set("hadoopprefix", args[3]);
    295     conf.set("collection", args[4]);
     291    conf.set("hdfsprefix", args[1]); // HDThriftFS or HDFSShell
     292    conf.set("hadoopprefix", args[2]);
     293    conf.set("collection", args[3]);
    296294    // Set the number of retries to 1 - hopefully one of the following will work
    297295    conf.setInt("mapred.map.max.attempts", 1); // Old Hadoop
     
    323321    // - this input path should be to a file (in HDFS) that lists the paths to
    324322    //   the manifest files
    325     FileInputFormat.setInputPaths(job, new Path(args[5]));
     323    FileInputFormat.setInputPaths(job, new Path(args[4]));
    326324    // - for now the output isn't that important, but in the future I may use
    327325    //   this mechanism to produce a time based log.
    328     FileOutputFormat.setOutputPath(job, new Path(args[6]));
     326    FileOutputFormat.setOutputPath(job, new Path(args[5]));
    329327
    330328    // Recommended notation despite my hatiness of ?: syntax
Note: See TracChangeset for help on using the changeset viewer.