Changeset 27586


Ignore:
Timestamp:
2013-06-10T11:15:36+12:00 (11 years ago)
Author:
jmt12
Message:

Updating script to date date of hadoop job into account when searching for the task tracker log to parse in - just in case log has been rolled

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gs2-extensions/parallel-building/trunk/src/bin/script/parse_task_info_from_hadoop_log.pl

    r27412 r27586  
    5252print " Results directory: " . $results_dir . "\n";
    5353
     54# 1. Determine job ID
    5455my $hadoop_log_path = &fileCat($results_dir, 'hadoop.log');
    5556if (!-e $hadoop_log_path)
     
    5859}
    5960print " Hadoop log path: " . $hadoop_log_path . "\n";
    60 
     61print " * Determine JobID: ";
     62my $job_id;
     63my $result = `grep "Running job:" "$hadoop_log_path"`;
     64if ($result =~ /Running job: job_(\d+_\d+)/)
     65{
     66  $job_id = $1;
     67}
     68else
     69{
     70  die("Error! Failed to locate JobID\n");
     71}
     72print $job_id . "\n";
     73# - we'll need the date to locate the appopriate log file
     74my $log_date_suffix = '';
     75if ($job_id =~ /^(\d\d\d\d)(\d\d)(\d\d)/)
     76{
     77  $log_date_suffix = '.' . $1 . '-' . $2 . '-' . $3;
     78}
     79
     80# 2. Determine appropriate job tracker log
    6181my $username = `whoami`;
    6282chomp($username);
     
    6585chomp($hostname);
    6686print " Hostname: " . $hostname . "\n";
    67 my $jobtracker_log_path = &fileCat($ENV{'HADOOP_PREFIX'}, 'logs', 'hadoop-' . $username . '-jobtracker-' . $hostname . '.log');
     87my $jobtracker_log_path = &fileCat($ENV{'HADOOP_PREFIX'}, 'logs', 'hadoop-' . $username . '-jobtracker-' . $hostname . '.log' . $log_date_suffix);
    6888if (!-e $jobtracker_log_path)
    6989{
    70   die("Error! Hadoop JobTracker log file cannot be found: " . $jobtracker_log_path . "\n");
     90  $jobtracker_log_path = &fileCat($ENV{'HADOOP_PREFIX'}, 'logs', 'hadoop-' . $username . '-jobtracker-' . $hostname . '.log');
     91  if (!-e $jobtracker_log_path)
     92  {
     93    die("Error! Hadoop JobTracker log file cannot be found: " . $jobtracker_log_path . "\n");
     94  }
    7195}
    7296print " Jobtracker log path: " . $jobtracker_log_path . "\n";
     
    7498print " Report path: " . $data_locality_report_path . "\n";
    7599
    76 # 1. Determine job ID
    77 print " * Determine JobID: ";
    78 my $job_id;
    79 my $result = `grep "Running job:" "$hadoop_log_path"`;
    80 if ($result =~ /Running job: job_(\d+_\d+)/)
    81 {
    82   $job_id = $1;
    83 }
    84 else
    85 {
    86   die("Error! Failed to locate JobID\n");
    87 }
    88 print $job_id . "\n";
    89 
    90 # 2. Parse log
     100# 3. Parse log
    91101print " * Parse JobTracker Log... ";
    92102my $tid_2_splits = {};
     
    145155
    146156
    147 # 3. Write CSV of information
     157# 4. Write CSV of information
    148158print " * Writing Job Information... ";
    149159&debugPrint("\nAttemptID\tComputeNode\tSucceeded");
     
    183193print "Done\n";
    184194
    185 # 4. Done
     195# 5. Done
    186196print "===== Complete! =====\n\n";
    187197exit;
Note: See TracChangeset for help on using the changeset viewer.