Changeset 27586

Show
Ignore:
Timestamp:
10.06.2013 11:15:36 (6 years ago)
Author:
jmt12
Message:

Updating script to date date of hadoop job into account when searching for the task tracker log to parse in - just in case log has been rolled

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • gs2-extensions/parallel-building/trunk/src/bin/script/parse_task_info_from_hadoop_log.pl

    r27412 r27586  
    5252print " Results directory: " . $results_dir . "\n"; 
    5353 
     54# 1. Determine job ID 
    5455my $hadoop_log_path = &fileCat($results_dir, 'hadoop.log'); 
    5556if (!-e $hadoop_log_path) 
     
    5859} 
    5960print " Hadoop log path: " . $hadoop_log_path . "\n"; 
    60  
     61print " * Determine JobID: "; 
     62my $job_id; 
     63my $result = `grep "Running job:" "$hadoop_log_path"`; 
     64if ($result =~ /Running job: job_(\d+_\d+)/) 
     65{ 
     66  $job_id = $1; 
     67} 
     68else 
     69{ 
     70  die("Error! Failed to locate JobID\n"); 
     71} 
     72print $job_id . "\n"; 
     73# - we'll need the date to locate the appopriate log file 
     74my $log_date_suffix = ''; 
     75if ($job_id =~ /^(\d\d\d\d)(\d\d)(\d\d)/) 
     76{ 
     77  $log_date_suffix = '.' . $1 . '-' . $2 . '-' . $3; 
     78} 
     79 
     80# 2. Determine appropriate job tracker log 
    6181my $username = `whoami`; 
    6282chomp($username); 
     
    6585chomp($hostname); 
    6686print " Hostname: " . $hostname . "\n"; 
    67 my $jobtracker_log_path = &fileCat($ENV{'HADOOP_PREFIX'}, 'logs', 'hadoop-' . $username . '-jobtracker-' . $hostname . '.log'); 
     87my $jobtracker_log_path = &fileCat($ENV{'HADOOP_PREFIX'}, 'logs', 'hadoop-' . $username . '-jobtracker-' . $hostname . '.log' . $log_date_suffix); 
    6888if (!-e $jobtracker_log_path) 
    6989{ 
    70   die("Error! Hadoop JobTracker log file cannot be found: " . $jobtracker_log_path . "\n"); 
     90  $jobtracker_log_path = &fileCat($ENV{'HADOOP_PREFIX'}, 'logs', 'hadoop-' . $username . '-jobtracker-' . $hostname . '.log'); 
     91  if (!-e $jobtracker_log_path) 
     92  { 
     93    die("Error! Hadoop JobTracker log file cannot be found: " . $jobtracker_log_path . "\n"); 
     94  } 
    7195} 
    7296print " Jobtracker log path: " . $jobtracker_log_path . "\n"; 
     
    7498print " Report path: " . $data_locality_report_path . "\n"; 
    7599 
    76 # 1. Determine job ID 
    77 print " * Determine JobID: "; 
    78 my $job_id; 
    79 my $result = `grep "Running job:" "$hadoop_log_path"`; 
    80 if ($result =~ /Running job: job_(\d+_\d+)/) 
    81 { 
    82   $job_id = $1; 
    83 } 
    84 else 
    85 { 
    86   die("Error! Failed to locate JobID\n"); 
    87 } 
    88 print $job_id . "\n"; 
    89  
    90 # 2. Parse log 
     100# 3. Parse log 
    91101print " * Parse JobTracker Log... "; 
    92102my $tid_2_splits = {}; 
     
    145155 
    146156 
    147 # 3. Write CSV of information 
     157# 4. Write CSV of information 
    148158print " * Writing Job Information... "; 
    149159&debugPrint("\nAttemptID\tComputeNode\tSucceeded"); 
     
    183193print "Done\n"; 
    184194 
    185 # 4. Done 
     195# 5. Done 
    186196print "===== Complete! =====\n\n"; 
    187197exit;