Changeset 27752 for gs2-extensions/parallel-building/trunk
- Timestamp:
- 2013-07-04T13:44:22+12:00 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_report.pl
r27684 r27752 89 89 90 90 # Read in data_locality.csv (will be matched to task logs) 91 print ' * Reading and parsing "data_locality.csv"... ';92 91 my $data_was_local = {}; 93 92 my $data_locality_csv_path = $results_path . '/data_locality.csv'; 94 if (open(DLIN, '<:utf8', $data_locality_csv_path)) 95 { 96 while (my $line = <DLIN>) 97 { 98 if ($line =~ /(\d+),\d,(\d)/) 99 { 100 $data_was_local->{$1} = $2; 101 } 102 } 103 close(DLIN); 93 if (-f $data_locality_csv_path) 94 { 95 print ' * Reading and parsing "data_locality.csv"... '; 96 if (open(DLIN, '<:utf8', $data_locality_csv_path)) 97 { 98 while (my $line = <DLIN>) 99 { 100 if ($line =~ /(\d+),\d,(\d)/) 101 { 102 $data_was_local->{$1} = $2; 103 } 104 } 105 close(DLIN); 106 } 107 else 108 { 109 die('Error! Failed to open file for reading: ' . $data_locality_csv_path); 110 } 111 print "Done!\n"; 104 112 } 105 113 else 106 114 { 107 die('Error! Failed to open file for reading: ' . $data_locality_csv_path); 108 } 109 print "Done!\n"; 115 print " * Data locality not available or not applicable\n"; 116 } 110 117 111 118 # Read in all task logs and parse task records … … 121 128 my $job_no = $1; 122 129 my $task_no = $2; 123 my $is_data_local = $data_was_local->{$task_no}; 124 my $task_record = {'host'=>'', 'cpu'=>0, 'job' => $job_no, 'task' => $task_no, 'start'=>0, 'end'=>0, 'cpu_time'=>0, 'data_locality'=>$is_data_local, 'file'=>''}; 130 my $is_data_local = 0; 131 if (defined ($data_was_local->{$task_no})) 132 { 133 $is_data_local = $data_was_local->{$task_no}; 134 } 135 my $task_record = {'host'=>'', 'cpu'=>0, 'job' => $job_no, 'task' => $task_no, 'start'=>0, 'end'=>0, 'cpu_time'=>0, 'data_locality'=>$is_data_local, 'file'=>'', 'percom'=>'NA'}; 125 136 print ' - Reading and parsing "' . $file . '"... '; 126 137 my $task_log_path = $results_path . '/' . $file; … … 169 180 # Calculate CPU time (total time - IO time) 170 181 $task_record->{'cpu_time'} = $task_record->{'end'} - $task_record->{'start'} - $io_time; 182 183 # We should now have the filename - use this and try and locate a 184 # convert log for this item (assuming it is multimedia, which it may 185 # not be) 186 if (defined $task_record->{'file'} && $task_record->{'file'} =~ /\/([^\/]+)\.ts/) 187 { 188 my $filename_sans_extension = $1; 189 my $convert_log = $results_path . '/convert-' . $filename_sans_extension . '.log'; 190 if (-f $convert_log) 191 { 192 print '[Reading and parsing convert log]... '; 193 if (open(CLIN, '<:utf8', $convert_log)) 194 { 195 my $max_percent = 0.00; 196 while (my $line = <CLIN>) 197 { 198 if ($line =~ /.*Encoding: task 1 of 1, (\d+\.\d\d) \%/) 199 { 200 my $percent = $1; 201 if ($percent > $max_percent) 202 { 203 $max_percent = $percent; 204 } 205 } 206 } 207 close(CLIN); 208 $task_record->{'percom'} = $max_percent; 209 } 210 else 211 { 212 print STDERR "Warning! Failed to open log file for reading: " . $convert_log . "\n"; 213 } 214 } 215 } 216 171 217 # Store this record 172 218 $task_records->{$task_no} = $task_record; … … 232 278 my $row_counter = 1; 233 279 # Header 234 print CSVOUT "id,name,hostname,start,end,cputime,dl,pid,filename \n";280 print CSVOUT "id,name,hostname,start,end,cputime,dl,pid,filename,percom\n"; 235 281 # Master Record 236 print CSVOUT $row_counter . ',M0,' . $job_record->{'host'} . ',' . $job_record->{'start'} . ',' . $job_record->{'end'} . ',' . ($job_record->{'cpu_time'} / 1000) . ",0,0,NA \n";282 print CSVOUT $row_counter . ',M0,' . $job_record->{'host'} . ',' . $job_record->{'start'} . ',' . $job_record->{'end'} . ',' . ($job_record->{'cpu_time'} / 1000) . ",0,0,NA,NA\n"; 237 283 $row_counter++; 238 284 # For each compute node record … … 259 305 } 260 306 $known_workers->{$csv_worker_id} = 1; 261 print CSVOUT $node_id . ',' . $csv_worker_id . ',' . $node_record->{'host'} . ',' . $node_record->{'start'} . ',' . $node_record->{'end'} . ',' . $node_record->{'cpu_time'} . ",0,1,NA \n";307 print CSVOUT $node_id . ',' . $csv_worker_id . ',' . $node_record->{'host'} . ',' . $node_record->{'start'} . ',' . $node_record->{'end'} . ',' . $node_record->{'cpu_time'} . ",0,1,NA,NA\n"; 262 308 # List the child task records 263 309 foreach my $taskno (sort keys %{$task_records}) … … 266 312 if ($task_record->{'host'} . '#' . $task_record->{'cpu'} eq $worker_id) 267 313 { 268 print CSVOUT $row_counter . ',T' . ($task_record->{'task'} + 0) . ',' . $task_record->{'host'} . ',' . $task_record->{'start'} . ',' . $task_record->{'end'} . ',' . $task_record->{'cpu_time'} . ',' . $task_record->{'data_locality'} . ',' . $node_id . ',' . $task_record->{'file'} . "\n";314 print CSVOUT $row_counter . ',T' . ($task_record->{'task'} + 0) . ',' . $task_record->{'host'} . ',' . $task_record->{'start'} . ',' . $task_record->{'end'} . ',' . $task_record->{'cpu_time'} . ',' . $task_record->{'data_locality'} . ',' . $node_id . ',' . $task_record->{'file'} . ',' . $task_record->{'percom'} . "\n"; 269 315 $row_counter++; 270 316 }
Note:
See TracChangeset
for help on using the changeset viewer.