Changeset 26980

Show
Ignore:
Timestamp:
05.03.2013 10:09:55 (7 years ago)
Author:
jmt12
Message:

setting svnignore

Location:
gs2-extensions/parallel-building/trunk/src/src/java/org/nzdl/gsdl
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • gs2-extensions/parallel-building/trunk/src/src/java/org/nzdl/gsdl

    • Property svn:ignore set to
      hgi_classes
  • gs2-extensions/parallel-building/trunk/src/src/java/org/nzdl/gsdl/HadoopGreenstoneIngest.java

    r26954 r26980  
    161161      Configuration conf = context.getConfiguration(); 
    162162      String gsdlhome = conf.get("gsdlhome"); 
     163      String hdfs_host = conf.get("hdfshost"); 
     164      String hdfs_port = conf.get("hdfsport"); 
     165      String hadoop_prefix = conf.get("hadoopprefix"); 
    163166      String collection = conf.get("collection"); 
    164167      String task_id = conf.get("mapred.task.id"); 
     
    193196      // - call Greenstone passing in the path to the manifest 
    194197      ProcessBuilder import_process_builder 
    195         = new ProcessBuilder("time", "-p", "import.pl", "-manifest", manifest_path.toString(), "-keepold", "-verbosity", "42", "-archivedir", "hdfs://localhost:54310/user/jmt12/gsdl/collect/" + collection + "/archives", collection); 
     198        = new ProcessBuilder("time", "-p", "import.pl", "-manifest", manifest_path.toString(), "-keepold", "-verbosity", "42", "-archivedir", "hdfs://" + hdfs_host + ":" + hdfs_port + "/user/jmt12/gsdl/collect/" + collection + "/archives", collection); 
    196199      fw1.write("[Command:" + import_process_builder.command() + "]\n"); 
    197200      // - alter environment 
     
    199202      //   - path 
    200203      String path = import_process_env.get("PATH"); 
    201       path = gsdlhome + "/ext/hadoop/bin/script:" + path; 
    202       path = gsdlhome + "/ext/hadoop/linux/bin:" + path; 
    203       path = gsdlhome + "/ext/hadoop/packages/hadoop/bin:" + path; 
     204      path = gsdlhome + "/ext/parallel-building/bin/script:" + path; 
     205      path = gsdlhome + "/ext/parallel-building/linux/bin:" + path; 
     206      path = hadoop_prefix + "/bin:" + path; 
    204207      path = gsdlhome + "/ext/tdb-edit/linux/bin:" + path; 
    205208      path = gsdlhome + "/ext/tdb-edit/bin/script:" + path; 
     
    209212      import_process_env.put("PATH", path); 
    210213      //   - ld_library_path 
    211       import_process_env.put("LD_LIBRARY_PATH", gsdlhome + "/ext/hadoop/linux/lib:" + gsdlhome + "/ext/video-and-audio/linux/lib:" + gsdlhome + "/ext/tdb-edit/linux/lib:" + gsdlhome + "/ext/parallel-building/linux/lib:" + gsdlhome + "/lib/linux"); 
     214      import_process_env.put("LD_LIBRARY_PATH", gsdlhome + "/ext/parallel-building/linux/lib:" + gsdlhome + "/ext/hadoop/linux/lib:" + gsdlhome + "/ext/video-and-audio/linux/lib:" + gsdlhome + "/ext/tdb-edit/linux/lib"); 
    212215      //   - dyld_library_path 
    213216      import_process_env.put("DYLD_LIBRARY_PATH", gsdlhome + "/ext/video-and-audio/linux/lib"); 
     
    215218      import_process_env.put("GSDLHOME", gsdlhome); 
    216219      import_process_env.put("GSDLOS", "linux"); 
    217       //import_process_env.put("GSDLEXTS", "hadoop:parallel-building:tdb-edit:video-and-audio"); 
    218       import_process_env.put("GSDLEXTS", "video-and-audio:tdb-edit:hadoop"); 
     220      import_process_env.put("GSDLEXTS", "parallel-building:tdb-edit:video-and-audio"); 
    219221      //   - installed extension paths 
    220       import_process_env.put("GEXTHADOOP_INSTALLED", gsdlhome + "/ext/hadoop/linux"); 
     222      import_process_env.put("GEXTPARALLELBUILDING_INSTALLED", gsdlhome + "/ext/parallel-building/linux"); 
    221223      import_process_env.put("GEXTVIDEO_INSTALLED", gsdlhome + "/ext/video-and-audio/linux"); 
    222224      import_process_env.put("GEXTTDBEDIT_INSTALLED", gsdlhome + "/ext/tdb-edit/linux"); 
    223225      // - Hadoop specific 
    224       import_process_env.put("HADOOP_PREFIX", gsdlhome + "/ext/hadoop/packages/hadoop"); 
     226      import_process_env.put("HADOOP_PREFIX", hadoop_prefix); 
    225227      // - change working directory 
    226228      import_process_builder.directory(new File(gsdlhome)); 
     
    268270    throws Exception 
    269271  { 
    270     if (args.length < 4) 
    271     { 
    272       System.out.println("Usage: bin/hadoop jar hadoop-greenstone.jar org.nzdl.gsdl.HadoopGreenstoneIngest <gsdlhome> <collection> <hdfsin> <hdfsout>\n"); 
     272    if (args.length < 6) 
     273    { 
     274      System.out.println("Usage: bin/hadoop jar hadoop-greenstone.jar org.nzdl.gsdl.HadoopGreenstoneIngest <gsdlhome> <hdfs host> <hdfs port> <hadoop prefix> <collection> <hdfsin> <hdfsout>\n"); 
    273275      System.exit(0); 
    274276    } 
     
    276278    Configuration conf = new Configuration(); 
    277279    conf.set("gsdlhome", args[0]); 
    278     conf.set("collection", args[1]); 
     280    conf.set("hdfshost", args[1]); 
     281    conf.set("hdfsport", args[2]); 
     282    conf.set("hadoopprefix", args[3]); 
     283    conf.set("collection", args[4]); 
    279284    // prevent timeouts 
    280285    long milli_seconds = 60*60*1000; // 1 hour 
     
    303308    // - this input path should be to a file (in HDFS) that lists the paths to 
    304309    //   the manifest files 
    305     FileInputFormat.setInputPaths(job, new Path(args[2])); 
     310    FileInputFormat.setInputPaths(job, new Path(args[5])); 
    306311    // - for now the output isn't that important, but in the future I may use 
    307312    //   this mechanism to produce a time based log. 
    308     FileOutputFormat.setOutputPath(job, new Path(args[3])); 
     313    FileOutputFormat.setOutputPath(job, new Path(args[6])); 
    309314 
    310315    // Recommended notation despite my hatiness of ?: syntax