Changeset 27586 for gs2-extensions/parallel-building/trunk/src/bin/script/parse_task_info_from_hadoop_log.pl
- Timestamp:
- 2013-06-10T11:15:36+12:00 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs2-extensions/parallel-building/trunk/src/bin/script/parse_task_info_from_hadoop_log.pl
r27412 r27586 52 52 print " Results directory: " . $results_dir . "\n"; 53 53 54 # 1. Determine job ID 54 55 my $hadoop_log_path = &fileCat($results_dir, 'hadoop.log'); 55 56 if (!-e $hadoop_log_path) … … 58 59 } 59 60 print " Hadoop log path: " . $hadoop_log_path . "\n"; 60 61 print " * Determine JobID: "; 62 my $job_id; 63 my $result = `grep "Running job:" "$hadoop_log_path"`; 64 if ($result =~ /Running job: job_(\d+_\d+)/) 65 { 66 $job_id = $1; 67 } 68 else 69 { 70 die("Error! Failed to locate JobID\n"); 71 } 72 print $job_id . "\n"; 73 # - we'll need the date to locate the appopriate log file 74 my $log_date_suffix = ''; 75 if ($job_id =~ /^(\d\d\d\d)(\d\d)(\d\d)/) 76 { 77 $log_date_suffix = '.' . $1 . '-' . $2 . '-' . $3; 78 } 79 80 # 2. Determine appropriate job tracker log 61 81 my $username = `whoami`; 62 82 chomp($username); … … 65 85 chomp($hostname); 66 86 print " Hostname: " . $hostname . "\n"; 67 my $jobtracker_log_path = &fileCat($ENV{'HADOOP_PREFIX'}, 'logs', 'hadoop-' . $username . '-jobtracker-' . $hostname . '.log' );87 my $jobtracker_log_path = &fileCat($ENV{'HADOOP_PREFIX'}, 'logs', 'hadoop-' . $username . '-jobtracker-' . $hostname . '.log' . $log_date_suffix); 68 88 if (!-e $jobtracker_log_path) 69 89 { 70 die("Error! Hadoop JobTracker log file cannot be found: " . $jobtracker_log_path . "\n"); 90 $jobtracker_log_path = &fileCat($ENV{'HADOOP_PREFIX'}, 'logs', 'hadoop-' . $username . '-jobtracker-' . $hostname . '.log'); 91 if (!-e $jobtracker_log_path) 92 { 93 die("Error! Hadoop JobTracker log file cannot be found: " . $jobtracker_log_path . "\n"); 94 } 71 95 } 72 96 print " Jobtracker log path: " . $jobtracker_log_path . "\n"; … … 74 98 print " Report path: " . $data_locality_report_path . "\n"; 75 99 76 # 1. Determine job ID 77 print " * Determine JobID: "; 78 my $job_id; 79 my $result = `grep "Running job:" "$hadoop_log_path"`; 80 if ($result =~ /Running job: job_(\d+_\d+)/) 81 { 82 $job_id = $1; 83 } 84 else 85 { 86 die("Error! Failed to locate JobID\n"); 87 } 88 print $job_id . "\n"; 89 90 # 2. Parse log 100 # 3. Parse log 91 101 print " * Parse JobTracker Log... "; 92 102 my $tid_2_splits = {}; … … 145 155 146 156 147 # 3. Write CSV of information157 # 4. Write CSV of information 148 158 print " * Writing Job Information... "; 149 159 &debugPrint("\nAttemptID\tComputeNode\tSucceeded"); … … 183 193 print "Done\n"; 184 194 185 # 4. Done195 # 5. Done 186 196 print "===== Complete! =====\n\n"; 187 197 exit;
Note:
See TracChangeset
for help on using the changeset viewer.