- Timestamp:
- 2013-03-05T10:09:55+13:00 (11 years ago)
- Location:
- gs2-extensions/parallel-building/trunk/src/src/java/org/nzdl/gsdl
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
gs2-extensions/parallel-building/trunk/src/src/java/org/nzdl/gsdl
-
Property svn:ignore
set to
hgi_classes
-
Property svn:ignore
set to
-
gs2-extensions/parallel-building/trunk/src/src/java/org/nzdl/gsdl/HadoopGreenstoneIngest.java
r26954 r26980 161 161 Configuration conf = context.getConfiguration(); 162 162 String gsdlhome = conf.get("gsdlhome"); 163 String hdfs_host = conf.get("hdfshost"); 164 String hdfs_port = conf.get("hdfsport"); 165 String hadoop_prefix = conf.get("hadoopprefix"); 163 166 String collection = conf.get("collection"); 164 167 String task_id = conf.get("mapred.task.id"); … … 193 196 // - call Greenstone passing in the path to the manifest 194 197 ProcessBuilder import_process_builder 195 = new ProcessBuilder("time", "-p", "import.pl", "-manifest", manifest_path.toString(), "-keepold", "-verbosity", "42", "-archivedir", "hdfs:// localhost:54310/user/jmt12/gsdl/collect/" + collection + "/archives", collection);198 = new ProcessBuilder("time", "-p", "import.pl", "-manifest", manifest_path.toString(), "-keepold", "-verbosity", "42", "-archivedir", "hdfs://" + hdfs_host + ":" + hdfs_port + "/user/jmt12/gsdl/collect/" + collection + "/archives", collection); 196 199 fw1.write("[Command:" + import_process_builder.command() + "]\n"); 197 200 // - alter environment … … 199 202 // - path 200 203 String path = import_process_env.get("PATH"); 201 path = gsdlhome + "/ext/ hadoop/bin/script:" + path;202 path = gsdlhome + "/ext/ hadoop/linux/bin:" + path;203 path = gsdlhome + "/ext/hadoop/packages/hadoop/bin:" + path;204 path = gsdlhome + "/ext/parallel-building/bin/script:" + path; 205 path = gsdlhome + "/ext/parallel-building/linux/bin:" + path; 206 path = hadoop_prefix + "/bin:" + path; 204 207 path = gsdlhome + "/ext/tdb-edit/linux/bin:" + path; 205 208 path = gsdlhome + "/ext/tdb-edit/bin/script:" + path; … … 209 212 import_process_env.put("PATH", path); 210 213 // - ld_library_path 211 import_process_env.put("LD_LIBRARY_PATH", gsdlhome + "/ext/ hadoop/linux/lib:" + gsdlhome + "/ext/video-and-audio/linux/lib:" + gsdlhome + "/ext/tdb-edit/linux/lib:" + gsdlhome + "/ext/parallel-building/linux/lib:" + gsdlhome + "/lib/linux");214 import_process_env.put("LD_LIBRARY_PATH", gsdlhome + "/ext/parallel-building/linux/lib:" + gsdlhome + "/ext/hadoop/linux/lib:" + gsdlhome + "/ext/video-and-audio/linux/lib:" + gsdlhome + "/ext/tdb-edit/linux/lib"); 212 215 // - dyld_library_path 213 216 import_process_env.put("DYLD_LIBRARY_PATH", gsdlhome + "/ext/video-and-audio/linux/lib"); … … 215 218 import_process_env.put("GSDLHOME", gsdlhome); 216 219 import_process_env.put("GSDLOS", "linux"); 217 //import_process_env.put("GSDLEXTS", "hadoop:parallel-building:tdb-edit:video-and-audio"); 218 import_process_env.put("GSDLEXTS", "video-and-audio:tdb-edit:hadoop"); 220 import_process_env.put("GSDLEXTS", "parallel-building:tdb-edit:video-and-audio"); 219 221 // - installed extension paths 220 import_process_env.put("GEXT HADOOP_INSTALLED", gsdlhome + "/ext/hadoop/linux");222 import_process_env.put("GEXTPARALLELBUILDING_INSTALLED", gsdlhome + "/ext/parallel-building/linux"); 221 223 import_process_env.put("GEXTVIDEO_INSTALLED", gsdlhome + "/ext/video-and-audio/linux"); 222 224 import_process_env.put("GEXTTDBEDIT_INSTALLED", gsdlhome + "/ext/tdb-edit/linux"); 223 225 // - Hadoop specific 224 import_process_env.put("HADOOP_PREFIX", gsdlhome + "/ext/hadoop/packages/hadoop");226 import_process_env.put("HADOOP_PREFIX", hadoop_prefix); 225 227 // - change working directory 226 228 import_process_builder.directory(new File(gsdlhome)); … … 268 270 throws Exception 269 271 { 270 if (args.length < 4)271 { 272 System.out.println("Usage: bin/hadoop jar hadoop-greenstone.jar org.nzdl.gsdl.HadoopGreenstoneIngest <gsdlhome> < collection> <hdfsin> <hdfsout>\n");272 if (args.length < 6) 273 { 274 System.out.println("Usage: bin/hadoop jar hadoop-greenstone.jar org.nzdl.gsdl.HadoopGreenstoneIngest <gsdlhome> <hdfs host> <hdfs port> <hadoop prefix> <collection> <hdfsin> <hdfsout>\n"); 273 275 System.exit(0); 274 276 } … … 276 278 Configuration conf = new Configuration(); 277 279 conf.set("gsdlhome", args[0]); 278 conf.set("collection", args[1]); 280 conf.set("hdfshost", args[1]); 281 conf.set("hdfsport", args[2]); 282 conf.set("hadoopprefix", args[3]); 283 conf.set("collection", args[4]); 279 284 // prevent timeouts 280 285 long milli_seconds = 60*60*1000; // 1 hour … … 303 308 // - this input path should be to a file (in HDFS) that lists the paths to 304 309 // the manifest files 305 FileInputFormat.setInputPaths(job, new Path(args[ 2]));310 FileInputFormat.setInputPaths(job, new Path(args[5])); 306 311 // - for now the output isn't that important, but in the future I may use 307 312 // this mechanism to produce a time based log. 308 FileOutputFormat.setOutputPath(job, new Path(args[ 3]));313 FileOutputFormat.setOutputPath(job, new Path(args[6])); 309 314 310 315 // Recommended notation despite my hatiness of ?: syntax
Note:
See TracChangeset
for help on using the changeset viewer.