Changeset 27413 for gs2-extensions/parallel-building/trunk
- Timestamp:
- 2013-05-24T09:23:29+12:00 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs2-extensions/parallel-building/trunk/src/src/java/org/nzdl/gsdl/HadoopGreenstoneIngest.java
r27102 r27413 164 164 Configuration conf = context.getConfiguration(); 165 165 String gsdlhome = conf.get("gsdlhome"); 166 String hdfs_host = conf.get("hdfshost"); 167 String hdfs_port = conf.get("hdfsport"); 166 String hdfs_prefix = conf.get("hdfsprefix"); 168 167 String hadoop_prefix = conf.get("hadoopprefix"); 169 168 String collection = conf.get("collection"); … … 199 198 // - call Greenstone passing in the path to the manifest 200 199 ProcessBuilder import_process_builder 201 = new ProcessBuilder("time", "-p", "import.pl", "-manifest", manifest_path.toString(), "-keepold", "-verbosity", "42", "-archivedir", "hdfs://" + hdfs_host + ":" + hdfs_port+ "/user/jmt12/gsdl/collect/" + collection + "/archives", collection);200 = new ProcessBuilder("time", "-p", "import.pl", "-manifest", manifest_path.toString(), "-keepold", "-verbosity", "42", "-archivedir", hdfs_prefix + "/user/jmt12/gsdl/collect/" + collection + "/archives", collection); 202 201 fw1.write("[Command:" + import_process_builder.command() + "]\n"); 203 202 // - alter environment … … 284 283 if (args.length < 6) 285 284 { 286 System.out.println("Usage: bin/hadoop jar hadoop-greenstone.jar org.nzdl.gsdl.HadoopGreenstoneIngest <gsdlhome> <hdfs host> <hdfs port> <hadoop prefix> <collection> <hdfsin> <hdfsout>\n");285 System.out.println("Usage: bin/hadoop jar hadoop-greenstone.jar org.nzdl.gsdl.HadoopGreenstoneIngest <gsdlhome> <hdfsprefix> <hadoop prefix> <collection> <hdfsin> <hdfsout>\n"); 287 286 System.exit(0); 288 287 } … … 290 289 Configuration conf = new Configuration(); 291 290 conf.set("gsdlhome", args[0]); 292 conf.set("hdfshost", args[1]); 293 conf.set("hdfsport", args[2]); 294 conf.set("hadoopprefix", args[3]); 295 conf.set("collection", args[4]); 291 conf.set("hdfsprefix", args[1]); // HDThriftFS or HDFSShell 292 conf.set("hadoopprefix", args[2]); 293 conf.set("collection", args[3]); 296 294 // Set the number of retries to 1 - hopefully one of the following will work 297 295 conf.setInt("mapred.map.max.attempts", 1); // Old Hadoop … … 323 321 // - this input path should be to a file (in HDFS) that lists the paths to 324 322 // the manifest files 325 FileInputFormat.setInputPaths(job, new Path(args[ 5]));323 FileInputFormat.setInputPaths(job, new Path(args[4])); 326 324 // - for now the output isn't that important, but in the future I may use 327 325 // this mechanism to produce a time based log. 328 FileOutputFormat.setOutputPath(job, new Path(args[ 6]));326 FileOutputFormat.setOutputPath(job, new Path(args[5])); 329 327 330 328 // Recommended notation despite my hatiness of ?: syntax
Note:
See TracChangeset
for help on using the changeset viewer.