[26982] | 1 | /** jmt12 **/
|
---|
[26954] | 2 | package org.nzdl.gsdl;
|
---|
| 3 |
|
---|
| 4 | import java.io.BufferedOutputStream;
|
---|
| 5 | import java.io.BufferedReader;
|
---|
| 6 | import java.io.File;
|
---|
| 7 | import java.io.FileOutputStream;
|
---|
| 8 | import java.io.FileWriter;
|
---|
| 9 | import java.io.InputStream;
|
---|
| 10 | import java.io.InputStreamReader;
|
---|
| 11 | import java.io.IOException;
|
---|
| 12 | import java.io.PrintWriter;
|
---|
| 13 | import java.lang.ProcessBuilder;
|
---|
| 14 | import java.lang.ProcessBuilder.*;
|
---|
[27102] | 15 | import java.lang.Thread;
|
---|
[26954] | 16 | import java.net.InetAddress;
|
---|
| 17 | import java.util.Map;
|
---|
[27654] | 18 | import java.util.regex.Matcher;
|
---|
| 19 | import java.util.regex.Pattern;
|
---|
[26954] | 20 |
|
---|
| 21 | import org.apache.hadoop.fs.Path;
|
---|
| 22 | import org.apache.hadoop.conf.*;
|
---|
| 23 | import org.apache.hadoop.io.*;
|
---|
| 24 | import org.apache.hadoop.mapreduce.*;
|
---|
[27102] | 25 | import org.apache.hadoop.mapreduce.Mapper.Context;
|
---|
[26954] | 26 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
---|
| 27 | import org.apache.hadoop.mapreduce.lib.input.FileSplit;
|
---|
| 28 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
---|
| 29 | import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
|
---|
| 30 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
|
---|
| 31 | import org.apache.hadoop.util.*;
|
---|
| 32 |
|
---|
| 33 | /** @class WordCount
|
---|
| 34 | */
|
---|
| 35 | public class HadoopGreenstoneIngest
|
---|
| 36 | {
|
---|
| 37 |
|
---|
| 38 | /** @class GSFileRecordReader
|
---|
| 39 | */
|
---|
| 40 | public static class GSFileRecordReader
|
---|
| 41 | extends RecordReader<Text, IntWritable>
|
---|
| 42 | {
|
---|
| 43 | /** Uncompressed file name */
|
---|
| 44 | private Text current_key;
|
---|
| 45 |
|
---|
| 46 | private IntWritable current_value = new IntWritable(1);
|
---|
| 47 |
|
---|
| 48 | /** Used to indicate progress */
|
---|
| 49 | private boolean is_finished = false;
|
---|
| 50 |
|
---|
| 51 | /**
|
---|
| 52 | */
|
---|
| 53 | @Override
|
---|
| 54 | public void initialize( InputSplit inputSplit, TaskAttemptContext taskAttemptContext )
|
---|
| 55 | throws IOException, InterruptedException
|
---|
| 56 | {
|
---|
| 57 | FileSplit split = (FileSplit) inputSplit;
|
---|
| 58 | current_key = new Text(split.getPath().toString());
|
---|
| 59 | }
|
---|
| 60 | /** initialize() **/
|
---|
| 61 |
|
---|
| 62 | /**
|
---|
[27494] | 63 | * We only ever have a single key/value
|
---|
[26954] | 64 | */
|
---|
| 65 | @Override
|
---|
| 66 | public boolean nextKeyValue()
|
---|
| 67 | throws IOException, InterruptedException
|
---|
| 68 | {
|
---|
| 69 | if (!is_finished)
|
---|
| 70 | {
|
---|
| 71 | is_finished = true;
|
---|
| 72 | return true;
|
---|
| 73 | }
|
---|
| 74 | return false;
|
---|
| 75 | }
|
---|
| 76 | /** nextKeyValue() **/
|
---|
| 77 |
|
---|
| 78 | /** @function getProgress
|
---|
| 79 | * Rather than calculating progress, we just keep it simple
|
---|
| 80 | */
|
---|
| 81 | @Override
|
---|
| 82 | public float getProgress()
|
---|
| 83 | throws IOException, InterruptedException
|
---|
| 84 | {
|
---|
| 85 | return is_finished ? 1 : 0;
|
---|
| 86 | }
|
---|
| 87 | /** getProgress() **/
|
---|
| 88 |
|
---|
| 89 | /**
|
---|
| 90 | * Returns the current key (name of the zipped file)
|
---|
| 91 | */
|
---|
| 92 | @Override
|
---|
| 93 | public Text getCurrentKey()
|
---|
| 94 | throws IOException, InterruptedException
|
---|
| 95 | {
|
---|
| 96 | return current_key;
|
---|
| 97 | }
|
---|
| 98 | /** getCurrentKey() **/
|
---|
| 99 |
|
---|
| 100 | /**
|
---|
| 101 | * Returns the current value (contents of the zipped file)
|
---|
| 102 | */
|
---|
| 103 | @Override
|
---|
| 104 | public IntWritable getCurrentValue()
|
---|
| 105 | throws IOException, InterruptedException
|
---|
| 106 | {
|
---|
| 107 | return current_value;
|
---|
| 108 | }
|
---|
| 109 | /** getCurrentValue() **/
|
---|
| 110 |
|
---|
| 111 | /**
|
---|
| 112 | * Close quietly, ignoring any exceptions
|
---|
| 113 | */
|
---|
| 114 | @Override
|
---|
| 115 | public void close()
|
---|
| 116 | throws IOException
|
---|
| 117 | {
|
---|
| 118 | // nothing to do
|
---|
| 119 | }
|
---|
| 120 | /** close() **/
|
---|
| 121 |
|
---|
| 122 | }
|
---|
| 123 | /** GSFileRecordReader **/
|
---|
| 124 |
|
---|
| 125 | /** @class GSFileInputFormat
|
---|
| 126 | */
|
---|
| 127 | public static class GSFileInputFormat
|
---|
| 128 | extends FileInputFormat<Text, IntWritable>
|
---|
| 129 | {
|
---|
| 130 | /**
|
---|
| 131 | * Don't split the files
|
---|
| 132 | */
|
---|
| 133 | @Override
|
---|
| 134 | protected boolean isSplitable(JobContext context, Path filename)
|
---|
| 135 | {
|
---|
| 136 | return false;
|
---|
| 137 | }
|
---|
| 138 | /** isSplitable() **/
|
---|
| 139 |
|
---|
| 140 | /**
|
---|
| 141 | */
|
---|
| 142 | @Override
|
---|
| 143 | public RecordReader<Text, IntWritable> createRecordReader(InputSplit split, TaskAttemptContext content)
|
---|
| 144 | throws IOException, InterruptedException
|
---|
| 145 | {
|
---|
| 146 | return new GSFileRecordReader();
|
---|
| 147 | }
|
---|
| 148 | /** createRecordReader() **/
|
---|
| 149 |
|
---|
| 150 | }
|
---|
| 151 | /** class GSFileInputFormat **/
|
---|
| 152 |
|
---|
| 153 | /** @class GSMap
|
---|
| 154 | */
|
---|
| 155 | public static class GSMap
|
---|
| 156 | extends Mapper<Text, IntWritable, Text, IntWritable>
|
---|
| 157 | {
|
---|
| 158 | /** @function map
|
---|
| 159 | * The key is the full path (HDFS) of the file to be processed.
|
---|
| 160 | */
|
---|
| 161 | public void map(Text key, IntWritable value, Context context)
|
---|
| 162 | throws IOException, InterruptedException
|
---|
| 163 | {
|
---|
| 164 | String file_path = key.toString();
|
---|
| 165 | // - configuration for the task
|
---|
| 166 | Configuration conf = context.getConfiguration();
|
---|
| 167 | String gsdlhome = conf.get("gsdlhome");
|
---|
[27413] | 168 | String hdfs_prefix = conf.get("hdfsprefix");
|
---|
[27641] | 169 | String hadoop_home = conf.get("hadoophome");
|
---|
[26954] | 170 | String collection = conf.get("collection");
|
---|
| 171 | String task_id = conf.get("mapred.task.id");
|
---|
| 172 | task_id = task_id.substring(8); // remove "attempt_" prefix
|
---|
[27641] | 173 |
|
---|
[27546] | 174 | // Programatically rewrite the protocol as appropriate for the given
|
---|
[27641] | 175 | // archives directory (not necessary if path is local or NFS)
|
---|
| 176 | if (hdfs_prefix.equals("/hdfs"))
|
---|
| 177 | {
|
---|
| 178 | file_path = file_path.replaceFirst("hdfs://[^/]*", hdfs_prefix);
|
---|
| 179 | }
|
---|
| 180 | else
|
---|
| 181 | {
|
---|
| 182 | file_path = file_path.replace("hdfs://", hdfs_prefix);
|
---|
| 183 | }
|
---|
| 184 |
|
---|
[26954] | 185 | // - create a temporary directory
|
---|
| 186 | File greenstone_tmp_dir = new File("/tmp/greenstone");
|
---|
| 187 | if (!greenstone_tmp_dir.isDirectory())
|
---|
| 188 | {
|
---|
| 189 | greenstone_tmp_dir.mkdir();
|
---|
| 190 | }
|
---|
[27641] | 191 |
|
---|
[26954] | 192 | // - open a unique log file
|
---|
| 193 | File import_process_log = new File("/tmp/greenstone/import-hadoop-" + task_id + ".log");
|
---|
[27654] | 194 | FileWriter fw1 = new FileWriter(import_process_log, true);
|
---|
| 195 | // MEDUSA Customization: Introduce a slight delay based upon the hostname
|
---|
| 196 | // in order to stagger the startup of Map workers. It looks like the avg
|
---|
| 197 | // IO is around 25 minutes... so lets try to make it so the last mapper
|
---|
| 198 | // starts up 25 minutes after the first (with all others spread in
|
---|
| 199 | // between).
|
---|
| 200 | String hostname = InetAddress.getLocalHost().getHostName();
|
---|
| 201 | // We only do this if there is a sentinel file lurking in tmp
|
---|
| 202 | try
|
---|
| 203 | {
|
---|
| 204 | File delay_file = new File("/tmp/greenstone/delay.me");
|
---|
| 205 | if (delay_file.exists())
|
---|
| 206 | {
|
---|
| 207 | Pattern p = Pattern.compile("compute-0-([0-9]+).local");
|
---|
| 208 | Matcher m = p.matcher(hostname);
|
---|
| 209 | if (m.matches())
|
---|
| 210 | {
|
---|
| 211 | String node_str = m.group(1);
|
---|
| 212 | int node_number = Integer.parseInt(node_str) * 100;
|
---|
| 213 | fw1.write("[DEBUG] Delaying start for " + node_number + " seconds");
|
---|
| 214 | Thread.currentThread().sleep(1000 * node_number);
|
---|
| 215 | }
|
---|
| 216 | // We only do this once for each compute node
|
---|
| 217 | delay_file.delete();
|
---|
| 218 | }
|
---|
| 219 | }
|
---|
| 220 | catch (Exception ie)
|
---|
| 221 | {
|
---|
| 222 | System.err.println(ie.toString());
|
---|
| 223 | }
|
---|
| 224 |
|
---|
[26954] | 225 | // - start the log by writing the time and the manifest line
|
---|
| 226 | long start_time = System.currentTimeMillis()/1000;
|
---|
[27546] | 227 | StringBuffer header_block = new StringBuffer("[Started:");
|
---|
| 228 | header_block.append(start_time);
|
---|
| 229 | header_block.append("]\n[Host:");
|
---|
[27654] | 230 | header_block.append(hostname);
|
---|
[27546] | 231 | header_block.append("]\n[CPU:");
|
---|
[27566] | 232 | String getcpu_executable_cmd = gsdlhome + "/ext/parallel-building/linux/bin/getcpu";
|
---|
| 233 | File getcpu_executable = new File(getcpu_executable_cmd);
|
---|
| 234 | if (getcpu_executable.exists())
|
---|
| 235 | {
|
---|
| 236 | header_block.append(runCommand(getcpu_executable_cmd));
|
---|
| 237 | }
|
---|
| 238 | else
|
---|
| 239 | {
|
---|
| 240 | header_block.append("0");
|
---|
| 241 | }
|
---|
[27546] | 242 | header_block.append("]\n[Task:");
|
---|
| 243 | header_block.append(task_id);
|
---|
| 244 | header_block.append("]\n[Map:");
|
---|
| 245 | header_block.append(file_path);
|
---|
| 246 | header_block.append("=>");
|
---|
| 247 | header_block.append(value);
|
---|
| 248 | header_block.append("]\n");
|
---|
| 249 | fw1.write(header_block.toString());
|
---|
| 250 | header_block = null;
|
---|
[27487] | 251 |
|
---|
[26954] | 252 | // - create a temporary manifest file to process this file. Overwrite any
|
---|
| 253 | // existing file
|
---|
| 254 | File manifest_path = new File("/tmp/greenstone/manifest" + task_id + ".xml");
|
---|
| 255 | FileWriter manifest_writer = new FileWriter(manifest_path);
|
---|
[27416] | 256 | manifest_writer.write("<Manifest version=\"2.0\">\n");
|
---|
[26954] | 257 | manifest_writer.write("\t<Index>\n");
|
---|
| 258 | manifest_writer.write("\t\t<Filename>" + file_path + "</Filename>\n");
|
---|
| 259 | manifest_writer.write("\t</Index>\n");
|
---|
| 260 | manifest_writer.write("</Manifest>\n");
|
---|
| 261 | manifest_writer.close();
|
---|
| 262 |
|
---|
| 263 | // - call Greenstone passing in the path to the manifest
|
---|
[30354] | 264 | //ProcessBuilder import_process_builder = new ProcessBuilder("time", "-p", "import.pl", "-manifest", manifest_path.toString(), "-keepold", "-archivedir", conf.get("archivesdir"), collection);
|
---|
| 265 | String environment_script_filename = "setup.bash";
|
---|
| 266 | StringBuffer cmd_buffer = new StringBuffer();
|
---|
| 267 | cmd_buffer.append("source ./");
|
---|
| 268 | cmd_buffer.append(environment_script_filename);
|
---|
| 269 | cmd_buffer.append(" && time -p import.pl -keepold -manifest \"");
|
---|
| 270 | cmd_buffer.append(manifest_path.toString());
|
---|
| 271 | cmd_buffer.append("\" -archivedir \"");
|
---|
| 272 | cmd_buffer.append(conf.get("archivesdir"));
|
---|
| 273 | cmd_buffer.append("\" ");
|
---|
| 274 | cmd_buffer.append(collection);
|
---|
| 275 | ProcessBuilder import_process_builder = new ProcessBuilder("bash", "-c", cmd_buffer.toString());
|
---|
[26954] | 276 | fw1.write("[Command:" + import_process_builder.command() + "]\n");
|
---|
[30354] | 277 | /*
|
---|
[26954] | 278 | // - alter environment
|
---|
| 279 | Map<String, String> import_process_env = import_process_builder.environment();
|
---|
[30354] | 280 | // - build up the path
|
---|
[26954] | 281 | String path = import_process_env.get("PATH");
|
---|
[26980] | 282 | path = gsdlhome + "/ext/parallel-building/bin/script:" + path;
|
---|
| 283 | path = gsdlhome + "/ext/parallel-building/linux/bin:" + path;
|
---|
[27641] | 284 | path = hadoop_home + "/bin:" + path;
|
---|
[26954] | 285 | path = gsdlhome + "/ext/tdb-edit/linux/bin:" + path;
|
---|
| 286 | path = gsdlhome + "/ext/tdb-edit/bin/script:" + path;
|
---|
| 287 | path = gsdlhome + "/ext/video-and-audio/linux/bin:" + path;
|
---|
| 288 | path = gsdlhome + "/bin/script:" + path;
|
---|
| 289 | path = gsdlhome + "/bin/linux:" + path;
|
---|
| 290 | import_process_env.put("PATH", path);
|
---|
[27433] | 291 | fw1.write("[PATH: " + path + "]\n");
|
---|
[26954] | 292 | // - ld_library_path
|
---|
[26980] | 293 | import_process_env.put("LD_LIBRARY_PATH", gsdlhome + "/ext/parallel-building/linux/lib:" + gsdlhome + "/ext/hadoop/linux/lib:" + gsdlhome + "/ext/video-and-audio/linux/lib:" + gsdlhome + "/ext/tdb-edit/linux/lib");
|
---|
[26954] | 294 | // - dyld_library_path
|
---|
| 295 | import_process_env.put("DYLD_LIBRARY_PATH", gsdlhome + "/ext/video-and-audio/linux/lib");
|
---|
| 296 | // - misc
|
---|
| 297 | import_process_env.put("GSDLHOME", gsdlhome);
|
---|
| 298 | import_process_env.put("GSDLOS", "linux");
|
---|
[26980] | 299 | import_process_env.put("GSDLEXTS", "parallel-building:tdb-edit:video-and-audio");
|
---|
[26954] | 300 | // - installed extension paths
|
---|
[27487] | 301 | import_process_env.put("GEXTPARALLELBUILDING", gsdlhome + "/ext/parallel-building");
|
---|
[26980] | 302 | import_process_env.put("GEXTPARALLELBUILDING_INSTALLED", gsdlhome + "/ext/parallel-building/linux");
|
---|
[27487] | 303 | import_process_env.put("GEXTTDBEDIT_INSTALLED", gsdlhome + "/ext/tdb-edit/linux");
|
---|
[26954] | 304 | import_process_env.put("GEXTVIDEO_INSTALLED", gsdlhome + "/ext/video-and-audio/linux");
|
---|
| 305 | // - Hadoop specific
|
---|
[27641] | 306 | import_process_env.put("HADOOP_PREFIX", hadoop_home);
|
---|
| 307 | fw1.write("[HADOOP_PREFIX: " + hadoop_home + "]\n");
|
---|
[30354] | 308 | */
|
---|
[27433] | 309 |
|
---|
[26954] | 310 | // - change working directory
|
---|
| 311 | import_process_builder.directory(new File(gsdlhome));
|
---|
| 312 | // - close our output to the log before opening in the process
|
---|
| 313 | fw1.close();
|
---|
| 314 |
|
---|
| 315 | // - write output to log
|
---|
| 316 | import_process_builder.redirectErrorStream(true);
|
---|
| 317 | import_process_builder.redirectOutput(Redirect.appendTo(import_process_log));
|
---|
[27102] | 318 |
|
---|
| 319 | // - create progress reporter (so Hadoop doesn't time us out)
|
---|
| 320 | Thread reporter = new HadoopProgressReporter(context, import_process_log);
|
---|
| 321 | reporter.start();
|
---|
| 322 |
|
---|
[26954] | 323 | // - run process
|
---|
| 324 | Process import_process = import_process_builder.start();
|
---|
| 325 | try
|
---|
| 326 | {
|
---|
| 327 | int import_status = import_process.waitFor();
|
---|
| 328 | if (import_status != 0)
|
---|
| 329 | {
|
---|
| 330 | throw new Exception("exit status: " + import_status);
|
---|
| 331 | }
|
---|
| 332 | }
|
---|
| 333 | catch (Exception e)
|
---|
| 334 | {
|
---|
| 335 | System.err.println("Error! Import command failed (" + e.toString() + ")");
|
---|
| 336 | }
|
---|
| 337 |
|
---|
[27102] | 338 | // - stop the progress reporter as, one way or another, there will be no
|
---|
| 339 | // more progress
|
---|
| 340 | reporter.interrupt();
|
---|
| 341 | reporter = null; // force gc
|
---|
| 342 |
|
---|
[26954] | 343 | // - write end time to log
|
---|
| 344 | FileWriter fw2 = new FileWriter(import_process_log, true);
|
---|
| 345 | long end_time = System.currentTimeMillis()/1000;
|
---|
| 346 | fw2.write("[Completed:" + end_time + "]\n");
|
---|
| 347 | fw2.close();
|
---|
| 348 |
|
---|
| 349 | // - for now return a dummy output. In the future I may want to parse the
|
---|
| 350 | // output from Greenstone as output and allow reducing to make me a
|
---|
| 351 | // pretty timebased log
|
---|
| 352 | context.write(key, value);
|
---|
| 353 | }
|
---|
| 354 | /** map(LongWritable,Text,Context) **/
|
---|
| 355 |
|
---|
| 356 | }
|
---|
| 357 | /** class GSMap **/
|
---|
| 358 |
|
---|
| 359 | /** @function main
|
---|
| 360 | */
|
---|
| 361 | public static void main(String[] args)
|
---|
| 362 | throws Exception
|
---|
| 363 | {
|
---|
[26980] | 364 | if (args.length < 6)
|
---|
[26954] | 365 | {
|
---|
[27641] | 366 | System.out.println("Usage: bin/hadoop jar hadoop-greenstone.jar org.nzdl.gsdl.HadoopGreenstoneIngest <gsdlhome> <hadoop home> <collection> <archivesdir> <hdfsprefix> <hdfsin> <hdfsout>\n");
|
---|
[26954] | 367 | System.exit(0);
|
---|
| 368 | }
|
---|
| 369 |
|
---|
| 370 | Configuration conf = new Configuration();
|
---|
[27641] | 371 | conf.set("gsdlhome", args[0]);
|
---|
| 372 | conf.set("hadoophome", args[1]);
|
---|
| 373 | conf.set("collection", args[2]);
|
---|
| 374 | conf.set("archivesdir", args[3]);
|
---|
| 375 | conf.set("hdfsprefix", args[4]); // "HDThriftFS", "HDFSShell", or ""
|
---|
| 376 | conf.set("hdfsin", args[5]);
|
---|
| 377 | conf.set("hdfsout", args[6]);
|
---|
| 378 |
|
---|
[27102] | 379 | // Set the number of retries to 1 - hopefully one of the following will work
|
---|
| 380 | conf.setInt("mapred.map.max.attempts", 1); // Old Hadoop
|
---|
| 381 | conf.setInt("mapreduce.map.maxattempts", 1); // Hadoop 2.0.3-alpha
|
---|
| 382 | conf.setInt("mapreduce.map.max.attempts", 1); // Solution on Web
|
---|
[26954] | 383 | // prevent timeouts
|
---|
[27571] | 384 | long milli_seconds = 4*60*60*1000; // 4 hour
|
---|
[26954] | 385 | conf.setLong("mapred.task.timeout", milli_seconds);
|
---|
| 386 | Job job = new Job(conf, "hadoopgreenstoneingest");
|
---|
| 387 | job.setJarByClass(HadoopGreenstoneIngest.class);
|
---|
| 388 |
|
---|
| 389 | job.setOutputKeyClass(Text.class);
|
---|
| 390 | job.setOutputValueClass(IntWritable.class);
|
---|
| 391 |
|
---|
| 392 | // Register the map, combiner, and reducer classes
|
---|
| 393 | job.setMapperClass(GSMap.class);
|
---|
| 394 | // - in theory, uses the IdentityReducer by default, which simply returns
|
---|
| 395 | // the input as the output (so no processing)
|
---|
| 396 | job.setNumReduceTasks(0);
|
---|
| 397 |
|
---|
| 398 | // Sets the input and output handlers - may need to adjust input to provide me
|
---|
| 399 | // a series of filenames (TextInputFormat will instead read in a text file and
|
---|
| 400 | // return each line...)
|
---|
| 401 | job.setInputFormatClass(GSFileInputFormat.class);
|
---|
| 402 | job.setOutputFormatClass(NullOutputFormat.class);
|
---|
| 403 | //job.setOutputFormatClass(TextOutputFormat.class);
|
---|
| 404 |
|
---|
| 405 | // Register the input and output paths
|
---|
| 406 | // - this input path should be to a file (in HDFS) that lists the paths to
|
---|
| 407 | // the manifest files
|
---|
[27641] | 408 | FileInputFormat.setInputPaths(job, new Path(conf.get("hdfsin")));
|
---|
[26954] | 409 | // - for now the output isn't that important, but in the future I may use
|
---|
| 410 | // this mechanism to produce a time based log.
|
---|
[27641] | 411 | FileOutputFormat.setOutputPath(job, new Path(conf.get("hdfsout")));
|
---|
[26954] | 412 |
|
---|
| 413 | // Recommended notation despite my hatiness of ?: syntax
|
---|
| 414 | System.exit(job.waitForCompletion(true)?0:1);
|
---|
| 415 | }
|
---|
| 416 | /** main(String[]) **/
|
---|
[27546] | 417 |
|
---|
| 418 | /** @function runCommand()
|
---|
| 419 | *
|
---|
| 420 | * A convenience method that calls an external command and returns its
|
---|
| 421 | * standard out as a string. Warning! Not safe if the command could return a
|
---|
| 422 | * large amount of text in the STDERR stream - may infinitely block.
|
---|
| 423 | *
|
---|
| 424 | */
|
---|
| 425 | public static String runCommand(String command)
|
---|
| 426 | {
|
---|
| 427 | StringBuffer result = new StringBuffer();
|
---|
| 428 | try
|
---|
| 429 | {
|
---|
| 430 | Runtime run = Runtime.getRuntime() ;
|
---|
| 431 | Process pr = run.exec(command) ;
|
---|
| 432 | pr.waitFor() ;
|
---|
| 433 | BufferedReader buf = new BufferedReader( new InputStreamReader( pr.getInputStream() ) ) ;
|
---|
| 434 | String line;
|
---|
| 435 | while ( ( line = buf.readLine() ) != null )
|
---|
| 436 | {
|
---|
| 437 | result.append(line);
|
---|
| 438 | }
|
---|
| 439 | }
|
---|
| 440 | catch (Exception ex)
|
---|
| 441 | {
|
---|
| 442 | System.err.println("Error! " + ex.getMessage());
|
---|
| 443 | }
|
---|
| 444 | return result.toString();
|
---|
| 445 | }
|
---|
| 446 | /** runCommand() **/
|
---|
[26954] | 447 | }
|
---|
[27102] | 448 |
|
---|
| 449 | class HadoopProgressReporter
|
---|
| 450 | extends Thread
|
---|
| 451 | {
|
---|
| 452 |
|
---|
| 453 | private Context hadoop_process;
|
---|
| 454 |
|
---|
| 455 | private File log_file;
|
---|
| 456 |
|
---|
| 457 | HadoopProgressReporter(Context hadoop_process, File log_file)
|
---|
| 458 | {
|
---|
| 459 | this.hadoop_process = hadoop_process;
|
---|
[27571] | 460 | //this.log_file = log_file;
|
---|
| 461 | this.log_file = new File("/tmp/hadoop_progress_reporter.log");
|
---|
[27102] | 462 | }
|
---|
| 463 |
|
---|
| 464 | public void run()
|
---|
| 465 | {
|
---|
| 466 | try
|
---|
| 467 | {
|
---|
| 468 | while (!this.isInterrupted())
|
---|
| 469 | {
|
---|
| 470 | sleep(60000); // Wait a minute
|
---|
| 471 | //FileWriter fw1 = new FileWriter(this.log_file, true);
|
---|
| 472 | //long time = System.currentTimeMillis()/1000;
|
---|
| 473 | //fw1.write("[" + time + "] HadoopProgressReporter.progress()\n");
|
---|
| 474 | //fw1.close();
|
---|
| 475 | this.hadoop_process.progress(); // Inform Hadoop we are still processing
|
---|
| 476 | }
|
---|
| 477 | }
|
---|
| 478 | catch (InterruptedException iex)
|
---|
| 479 | {
|
---|
| 480 | // We've been interrupted: no more progress
|
---|
| 481 | }
|
---|
| 482 | catch (Exception ex)
|
---|
| 483 | {
|
---|
| 484 | ex.printStackTrace();
|
---|
| 485 | }
|
---|
| 486 | }
|
---|
| 487 | }
|
---|