Changeset 27102


Ignore:
Timestamp:
2013-03-20T13:00:34+13:00 (11 years ago)
Author:
jmt12
Message:

Adding in a progress reporter, as the set timeout to 10 hours trick doesn't want to work on Medusa

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gs2-extensions/parallel-building/trunk/src/src/java/org/nzdl/gsdl/HadoopGreenstoneIngest.java

    r26982 r27102  
    1313import java.lang.ProcessBuilder;
    1414import java.lang.ProcessBuilder.*;
     15import java.lang.Thread;
    1516import java.net.InetAddress;
    1617import java.util.Map;
     
    2021import org.apache.hadoop.io.*;
    2122import org.apache.hadoop.mapreduce.*;
     23import org.apache.hadoop.mapreduce.Mapper.Context;
    2224import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    2325import org.apache.hadoop.mapreduce.lib.input.FileSplit;
     
    234236      import_process_builder.redirectErrorStream(true);
    235237      import_process_builder.redirectOutput(Redirect.appendTo(import_process_log));
     238
     239      // - create progress reporter (so Hadoop doesn't time us out)
     240      Thread reporter = new HadoopProgressReporter(context, import_process_log);
     241      reporter.start();
     242
    236243      // - run process
    237244      Process import_process = import_process_builder.start();
     
    247254      {
    248255        System.err.println("Error! Import command failed (" + e.toString() + ")");
    249         System.exit(0);
    250       }
     256      }
     257
     258      // - stop the progress reporter as, one way or another, there will be no
     259      //   more progress
     260      reporter.interrupt();
     261      reporter = null; // force gc
    251262
    252263      // - write end time to log
     
    283294    conf.set("hadoopprefix", args[3]);
    284295    conf.set("collection", args[4]);
     296    // Set the number of retries to 1 - hopefully one of the following will work
     297    conf.setInt("mapred.map.max.attempts", 1); // Old Hadoop
     298    conf.setInt("mapreduce.map.maxattempts", 1); // Hadoop 2.0.3-alpha
     299    conf.setInt("mapreduce.map.max.attempts", 1); // Solution on Web
    285300    // prevent timeouts
    286301    long milli_seconds = 60*60*1000; // 1 hour
    287302    conf.setLong("mapred.task.timeout", milli_seconds);
    288 
    289303    Job job = new Job(conf, "hadoopgreenstoneingest");
    290304    job.setJarByClass(HadoopGreenstoneIngest.class);
     
    319333  /** main(String[]) **/
    320334}
     335
     336class HadoopProgressReporter
     337extends Thread
     338{
     339
     340  private Context hadoop_process;
     341
     342  private File log_file;
     343
     344  HadoopProgressReporter(Context hadoop_process, File log_file)
     345  {
     346    this.hadoop_process = hadoop_process;
     347    this.log_file = log_file;
     348  }
     349
     350  public void run()
     351  {
     352    try
     353    {
     354      while (!this.isInterrupted())
     355      {
     356        sleep(60000); // Wait a minute
     357        //FileWriter fw1 = new FileWriter(this.log_file, true);
     358        //long time = System.currentTimeMillis()/1000;
     359        //fw1.write("[" + time + "] HadoopProgressReporter.progress()\n");
     360        //fw1.close();
     361        this.hadoop_process.progress(); // Inform Hadoop we are still processing
     362      }
     363    }
     364    catch (InterruptedException iex)
     365    {
     366      // We've been interrupted: no more progress
     367    }
     368    catch (Exception ex)
     369    {
     370      ex.printStackTrace();
     371    }
     372  }
     373}
Note: See TracChangeset for help on using the changeset viewer.