Ignore:
Timestamp:
2015-12-16T16:15:39+13:00 (8 years ago)
Author:
jmt12
Message:

Extending manifest v2 support to allow for directories to be listed in manifest. Matched with changes in Directory plugin to allow paths into systems like HDFS to be listed in manifest.cd

Location:
gs2-extensions/parallel-building/trunk/src/src/java/org/nzdl/gsdl
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • gs2-extensions/parallel-building/trunk/src/src/java/org/nzdl/gsdl/HadoopGreenstoneIngest.java

    r27654 r30354  
    262262
    263263      // - call Greenstone passing in the path to the manifest
    264       ProcessBuilder import_process_builder = new ProcessBuilder("time", "-p", "import.pl", "-manifest", manifest_path.toString(), "-keepold", "-archivedir", conf.get("archivesdir"), collection);
     264      //ProcessBuilder import_process_builder = new ProcessBuilder("time", "-p", "import.pl", "-manifest", manifest_path.toString(), "-keepold", "-archivedir", conf.get("archivesdir"), collection);
     265      String environment_script_filename = "setup.bash";
     266      StringBuffer cmd_buffer = new StringBuffer();
     267      cmd_buffer.append("source ./");
     268      cmd_buffer.append(environment_script_filename);
     269      cmd_buffer.append(" && time -p import.pl -keepold -manifest \"");
     270      cmd_buffer.append(manifest_path.toString());
     271      cmd_buffer.append("\" -archivedir \"");
     272      cmd_buffer.append(conf.get("archivesdir"));
     273      cmd_buffer.append("\" ");
     274      cmd_buffer.append(collection);
     275      ProcessBuilder import_process_builder = new ProcessBuilder("bash", "-c", cmd_buffer.toString());
    265276      fw1.write("[Command:" + import_process_builder.command() + "]\n");
     277      /*
    266278      // - alter environment
    267279      Map<String, String> import_process_env = import_process_builder.environment();
    268       //   - path
     280      // - build up the path
    269281      String path = import_process_env.get("PATH");
    270282      path = gsdlhome + "/ext/parallel-building/bin/script:" + path;
     
    294306      import_process_env.put("HADOOP_PREFIX", hadoop_home);
    295307      fw1.write("[HADOOP_PREFIX: " + hadoop_home + "]\n");
     308      */
    296309
    297310      // - change working directory
  • gs2-extensions/parallel-building/trunk/src/src/java/org/nzdl/gsdl/HadoopGreenstoneIngest2.java

    r28312 r30354  
    324324      manifest_writer.close();
    325325
     326      /* Original process calling - sets up environment in Java
    326327      // - call Greenstone passing in the path to the manifest
    327328      ProcessBuilder import_process_builder = new ProcessBuilder("time", "-p", "import.pl", "-manifest", manifest_path.toString(), "-keepold", "-archivedir", conf.get("archivesdir"), collection);
     
    357358      import_process_env.put("HADOOP_PREFIX", hadoop_home);
    358359      fw1.write("[HADOOP_PREFIX: " + hadoop_home + "]\n");
     360      */
     361
     362      /* New process call - adds call to setup.bash first to prepare
     363       * environment... hopefully */
     364      // - call Greenstone passing in the path to the manifest
     365      String environment_script_filename = "setup.bash";
     366      StringBuffer cmd_buffer = new StringBuffer();
     367      cmd_buffer.append("source ./");
     368      cmd_buffer.append(environment_script_filename);
     369      cmd_buffer.append(" && time -p import.pl -keepold -manifest \"");
     370      cmd_buffer.append(manifest_path.toString());
     371      cmd_buffer.append("\" -archivedir \"");
     372      cmd_buffer.append(conf.get("archivesdir"));
     373      cmd_buffer.append("\" ");
     374      cmd_buffer.append(collection);
     375      ProcessBuilder import_process_builder = new ProcessBuilder("bash", "-c", cmd_buffer.toString());
     376      fw1.write("[Command:" + import_process_builder.command() + "]\n");
    359377
    360378      // - change working directory
     
    738756    job.setReducerClass(GSReducer.class);
    739757
    740     // Sets the input and output handlers - may need to adjust input to provide me
    741     // a series of filenames (TextInputFormat will instead read in a text file and
    742     // return each line...)
     758    // Sets the input and output handlers - may need to adjust input to provide
     759    // a series of filenames (TextInputFormat will instead read in a text file
     760    // and return each line...)
    743761    job.setInputFormatClass(GSFileInputFormat.class);
    744762    //job.setOutputFormatClass(NullOutputFormat.class);
Note: See TracChangeset for help on using the changeset viewer.