source: gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl@ 30353

Last change on this file since 30353 was 29663, checked in by jmt12, 10 years ago

Supporting grayscale printing, fixing mismatched tags and speechmarks, and data locality and incomplete import occurances more obvious

  • Property svn:executable set to *
File size: 18.5 KB
Line 
1#!/usr/bin/perl
2
3# Pragma
4use strict;
5use warnings;
6
7# Modules
8use Sort::Naturally;
9use POSIX qw(floor strftime);
10
11print "\n===== Generate Timing (GANTT) =====\n";
12
13# 0. Init
14# - configurables
15my $chart_width = 1600;
16# - any video more than 95% complete is probably complete with rounding errors
17my $complete_threshold = 95;
18
19my $debug = 0;
20my $color_master = 'blue';
21my $color_worker = 'green';
22my $color_nlocal = 'red';
23my $disable_header = 0;
24my $max_worker_count = 0;
25# - globals
26my $chart_count = 0;
27my @months = ("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec");
28
29# 1. Parse options
30while (defined $ARGV[0] && $ARGV[0] =~ /^-/)
31{
32 my $option = shift(@ARGV);
33 if ($option eq '-debug')
34 {
35 $debug = 1;
36 }
37 elsif ($option eq '-width')
38 {
39 if (!defined $ARGV[0])
40 {
41 &printUsage('Error! No width value specified');
42 }
43 my $value = shift(@ARGV);
44 if ($value !~ /^\d+$/)
45 {
46 &printUsage('Error! Chart width not a number');
47 }
48 $chart_width = $value;
49 }
50 elsif ($option eq '-grayscale')
51 {
52 $color_master = '#1D1D1D';
53 $color_worker = '#969696';
54 $color_nlocal = '#4C4C4C';
55 }
56 elsif ($option eq '-noheader')
57 {
58 $disable_header = 1;
59 }
60 elsif ($option eq '-maxworkers')
61 {
62 if (!defined $ARGV[0])
63 {
64 &printUsage('Error! No maxworkers value specified');
65 }
66 my $value = shift(@ARGV);
67 if ($value !~ /^\d+$/)
68 {
69 &printUsage('Error! Maxworkers not a number');
70 }
71 $max_worker_count = $value;
72 }
73 else
74 {
75 &printUsage('Error! Unknown option: ' . $option);
76 }
77}
78print "Chart Width: " . $chart_width . "px\n";
79print "Grayscale? " . ($debug ? 'Yes' : 'No') . "\n";
80print "Debug? " . ($debug ? 'Yes' : 'No') . "\n";
81print "===================================\n\n";
82
83# 2. Search for valid directories (containing timing.csv)
84while (defined $ARGV[0])
85{
86 my $dir = shift(@ARGV);
87 if (!-d $dir)
88 {
89 &printUsage('Error! Not a directory: ' . $dir);
90 }
91 if ($dir =~ /(.*)[\\\/]$/)
92 {
93 $dir = $1;
94 }
95 &searchForTimingCSV($dir);
96}
97
98# 3. Done
99print "Complete!\n\n";
100print "===================================\n";
101print 'Generated ' . $chart_count . " charts\n";
102print "===================================\n\n";
103exit;
104## main() ##
105
106
107## @function searchForTimingCSV()
108#
109sub searchForTimingCSV
110{
111 my $dir = shift(@_);
112 # For every directory where we find a timing.csv we generate a gantt chart
113 my $timing_path = &filenameCat($dir, 'timing.csv');
114 if (-e $timing_path)
115 {
116 &generateChart($dir, $timing_path);
117 }
118 # We also recursively search for other directories containing timing.csv's
119 opendir(my $dh, $dir) or &printError('Failed to open directory for reading: ' . $dir);
120 my @files = readdir($dh);
121 foreach my $file (@files)
122 {
123 if ($file !~ /^\./)
124 {
125 my $path = &filenameCat($dir, $file);
126 if (-d $path)
127 {
128 &searchForTimingCSV($path);
129 }
130 }
131 }
132}
133## searchForTimingCSV() ##
134
135
136## @function generateChart()
137#
138sub generateChart
139{
140 my $dir = shift(@_);
141 my $timing_csv_path = shift(@_);
142 my $import_dir;
143 my ($epoc) = $dir =~ /(\d+)$/;
144 my $gantt_path = $dir . '/' . $epoc . '-gantt.html';
145
146 print ' * Generating chart for: ' . $dir . "\n";
147 print ' - timing file: ' . $timing_csv_path . "\n";
148 print ' - gantt chart: ' . $gantt_path . "\n";
149
150 # Read in timing.csv and parse information into data structure
151 print ' - parsing timing.csv... ';
152 my $timing_data = {};
153 my $id_2_worker_id = {};
154 if (open(TIN, '<:utf8', $timing_csv_path))
155 {
156 my $line;
157 while ($line = <TIN>)
158 {
159 my @parts = split(/,/, $line);
160 if ($parts[1] eq 'M0')
161 {
162 $timing_data->{'M'} = {'N'=>$parts[2], 'S'=>$parts[3], 'E'=>$parts[4]};
163 }
164 elsif ($parts[1] =~ /W\d+/)
165 {
166 my $worker_id = $parts[1];
167 my $hostname = $parts[2];
168 # Alter the worker name for compute nodes so they can be naturally
169 # sorted
170 if ($hostname =~ /compute-0-(\d+)/)
171 {
172 $worker_id = 'W' . $1;
173 }
174 $timing_data->{$worker_id} = {'N'=>$hostname, 'S'=>$parts[3], 'E'=>$parts[4], 'F'=>{}};
175 $id_2_worker_id->{$parts[0]} = $worker_id;
176 }
177 elsif ($parts[1] =~ /T\d+/)
178 {
179 my $worker_id = $id_2_worker_id->{$parts[7]};
180 my $stop = $parts[4];
181 my $filepath = $parts[8];
182 $filepath =~ s/^\s+|\s+$//g;
183 my $percent_complete = 'NA';
184 if (defined($parts[9]))
185 {
186 $percent_complete = $parts[9];
187 chomp($percent_complete);
188 if ($percent_complete >= $complete_threshold)
189 {
190 $percent_complete = 'NA';
191 }
192 }
193 $import_dir = &longestCommonPath($filepath, $import_dir);
194 my $start_time = $parts[3];
195 while (defined $timing_data->{$worker_id}->{'F'}->{$start_time})
196 {
197 $start_time += 0.000001;
198 }
199 $timing_data->{$worker_id}->{'F'}->{$start_time} = {'FN'=>$filepath, 'S'=>$parts[3], 'PS'=>($stop - $parts[5]), 'PE'=>$stop, 'E'=>$stop, 'DL'=>$parts[6], 'PC'=>$percent_complete};
200 }
201 }
202 close(TIN);
203 }
204 else
205 {
206 die('Error! Failed to open file for reading: ' . $timing_csv_path);
207 }
208 my $number_of_workers = scalar(keys(%{$id_2_worker_id}));;
209 print "Done\n";
210
211 # 3. Produce pretty HTML chart of timing information including jobs
212 print " - generating timing information as chart in HTML... ";
213 open(HTMLOUT, '>:utf8', $gantt_path) or die('Error! Failed to open file for writing: gantt.html');
214 print HTMLOUT "<html>\n";
215 print HTMLOUT '<head>' . "\n";
216 print HTMLOUT '<style type="text/css">' . "\n";
217 print HTMLOUT "body {margin:0px;padding:4px}\n";
218 print HTMLOUT 'div.thread {position:relative}' . "\n";
219 print HTMLOUT 'div.master {border:1px solid gray;color:white;font-weight:bold}' . "\n";
220 print HTMLOUT 'div.worker {border:1px solid black;background-color:' . $color_worker . ';color:white;font-weight:bold;margin-bottom:1px;}' . "\n";
221 print HTMLOUT 'div.time {font-size:smaller;font-weight:normal}' . "\n";
222 print HTMLOUT 'div.job {background-color:transparent;color:black;border:1px solid black;display:block;font-size:smaller;font-weight:normal;position:relative;text-align:left;margin-bottom:1px;}' . "\n";
223 print HTMLOUT 'span.process {z-index:-1;background-color:#C7C7C7;position:absolute}' . "\n";
224 print HTMLOUT 'div.label {z-index:1;background-color:transparent;white-space:nowrap;text-align:center}' . "\n";
225 print HTMLOUT "th {text-align:left}\n";
226 print HTMLOUT "tr.toprule th,tr.toprule td {border-top:2px solid black;width:17%}\n";
227 print HTMLOUT "tr.bottomrule th,tr.bottomrule td {border-bottom:2px solid black}\n";
228 print HTMLOUT '</style>' . "\n";
229 print HTMLOUT '</head>' . "\n";
230 print HTMLOUT "<body>\n";
231 ##print HTMLOUT "<h2>Parallel Import Timing Chart</h2>\n";
232 print HTMLOUT "<table style=\"border-collapse:collapse;width:" . $chart_width . "px;";
233 if ($disable_header)
234 {
235 print HTMLOUT "display:none;";
236 }
237 print HTMLOUT "\">\n";
238
239 my $total_duration = $timing_data->{'M'}->{'E'} - $timing_data->{'M'}->{'S'};
240 my $file_count = 0;
241 my $data_locality = 0;
242 my $total_io_time = 0;
243 my $total_process_time = 0;
244 my $fastest_file = 0;
245 my $slowest_file = 0;
246 my $problem_files = 0;
247 foreach my $worker_id (keys %{$timing_data})
248 {
249 if ($worker_id ne 'M')
250 {
251 foreach my $job_start ( keys %{$timing_data->{$worker_id}->{'F'}} )
252 {
253 my $process_start = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'PS'};
254 my $process_end = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'PE'};
255 my $job_end = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'E'};
256 my $percent_complete = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'PC'};
257 if ($process_start == 0 || $process_end == 0 || $job_end == 0 || ($percent_complete =~ /^\d+$/ && $percent_complete < $complete_threshold))
258 {
259 $problem_files++;
260 }
261 else
262 {
263 my $io_duration = ($process_start - $job_start) + ($job_end - $process_end);
264 my $process_duration = $process_end - $process_start;
265 my $total_duration = $io_duration + $process_duration;
266 &debugPrint("filename: " . $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'});
267 &debugPrint("start: $job_start ps: $process_start pe: $process_end end: $job_end");
268 &debugPrint("io: $io_duration process: $process_duration duration: $total_duration");
269 # Running stats
270 $total_io_time += $io_duration;
271 $total_process_time += $process_duration;
272 if ($fastest_file == 0 || $total_duration < $fastest_file)
273 {
274 $fastest_file = $total_duration;
275 }
276 if ($slowest_file == 0 || $total_duration > $slowest_file)
277 {
278 $slowest_file = $total_duration;
279 }
280 }
281 # Shorten filename
282 if (defined $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'} && $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'} ne '')
283 {
284 $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'} = substr($timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'}, length($import_dir) + 1);
285 }
286 $file_count++;
287 if ($timing_data->{$worker_id}->{'F'}->{$job_start}->{'DL'} == 1)
288 {
289 $data_locality++;
290 }
291 }
292 }
293 }
294 if ($file_count <= 0)
295 {
296 $file_count = 1;
297 }
298 if ($total_process_time <= 0)
299 {
300 $total_process_time = 1;
301 }
302 my $avg_processing_time = floor(($total_io_time + $total_process_time) / $file_count);
303 my $avg_io_time = int(($total_io_time / $file_count) + 0.5);
304 my $avg_cpu_time = int(($total_process_time / $file_count) + 0.5);
305
306 print HTMLOUT "<tr class=\"toprule\">\n";
307 print HTMLOUT ' <th style="width:17%;">Import Directory:</th><td style="width:83%;" colspan="5">' . $import_dir . "</td>\n";
308 print HTMLOUT "</tr>\n";
309
310 print HTMLOUT "<tr>\n";
311 my ($sec, $min, $hour, $day, $month, $year) = (localtime($timing_data->{'M'}->{'S'}))[0,1,2,3,4,5];
312 print HTMLOUT ' <th>Start Time:</th><td>' . sprintf('%04d%s%02d %02d:%02d:%02d', ($year+1900), $months[$month], $day, $hour, $min, $sec) . "</td>\n";
313 ($sec, $min, $hour, $day, $month, $year) = (localtime($timing_data->{'M'}->{'E'}))[0,1,2,3,4,5];
314 print HTMLOUT ' <th>End Time:</th><td>' . sprintf('%04d%s%02d %02d:%02d:%02d', ($year+1900), $months[$month], $day, $hour, $min, $sec) . "</td>\n";
315 print HTMLOUT " <th>Processing Time:</th><td>" . &renderTime($total_duration) . "</td>\n";
316 print HTMLOUT "</tr>\n";
317
318 print HTMLOUT "<tr>\n";
319 print HTMLOUT " <th>Processing Threads:</th><td>" . $number_of_workers . "</td>\n";
320 print HTMLOUT " <th>Files Processed:</th><td>" . $file_count . "</td>\n";
321 print HTMLOUT " <th>Problem Files:</th><td>" . $problem_files . "</td>\n";
322 print HTMLOUT "</tr>\n";
323
324 print HTMLOUT "<tr>\n";
325 print HTMLOUT ' <th>Serial Processing Time:</th><td>' . &renderTime($total_process_time) . "</td>\n";
326 print HTMLOUT ' <th>Serial IO Time:</th><td>' . &renderTime($total_io_time) . "</td>\n";
327 print HTMLOUT ' <th>IO Percentage:</th><td>' . sprintf('%d%%', (($total_io_time / $total_process_time) * 100)) . "</td>\n";
328 print HTMLOUT "</tr>\n";
329
330 print HTMLOUT "<tr>\n";
331 print HTMLOUT " <th>Avg Processing Time:</th><td>" . &renderTime($avg_processing_time) . "</td>\n";
332 print HTMLOUT " <th>Avg File IO Time:</th><td>" . &renderTime($avg_io_time) . "</td>\n";
333 print HTMLOUT " <th>Avg File CPU Time:</th><td>" . &renderTime($avg_cpu_time) . "</td>\n";
334 print HTMLOUT "</tr>\n";
335
336 print HTMLOUT "<tr class=\"bottomrule\">\n";
337 print HTMLOUT " <th>Fastest File:</th><td>" . &renderTime($fastest_file) . "</td>\n";
338 print HTMLOUT " <th>Slowest File:</th><td>" . &renderTime($slowest_file) . "</td>\n";
339 #if ($data_locality > 0)
340 #{
341 print HTMLOUT " <th>Data Locality:</th><td>" . sprintf('%d%% [%d out of %d]', (($data_locality / $file_count) * 100), $data_locality, $file_count) . "</td>\n";
342 #}
343 #else
344 #{
345 # print HTMLOUT " <th>Data Locality:</th><td><i>Not Applicable</i></td>\n";
346 #}
347 print HTMLOUT "</tr>\n";
348
349 print HTMLOUT "</table>\n";
350 print HTMLOUT renderLine($chart_width, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, 'master', $timing_data->{'M'}->{'N'}, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, {}, $data_locality);
351 my $worker_count = 0;
352 foreach my $worker_id (nsort keys %{$timing_data})
353 {
354 if ($max_worker_count < 1 || $worker_count <= $max_worker_count)
355 {
356 if ($worker_id ne 'M')
357 {
358 my $data = $timing_data->{$worker_id};
359 print HTMLOUT renderLine($chart_width, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, 'worker', $worker_id . ' [' . $data->{'N'} . ']', $data->{'S'}, $data->{'E'}, $data->{'F'}, 2); #$data_locality);
360 $worker_count++;
361 }
362 }
363 }
364 print HTMLOUT '</div>' . "\n";
365 print HTMLOUT "</body>\n";
366 print HTMLOUT "</html>";
367 close(HTMLOUT);
368 print "Done!\n\n";
369 $chart_count++;
370}
371## generateChart() ##
372
373
374## @function debugPrint()
375#
376sub debugPrint
377{
378 my $msg = shift(@_);
379 if ($debug)
380 {
381 print STDERR '[DEBUG] ' . $msg . "\n";
382 }
383}
384## debugPrint() ##
385
386
387## @function filenameCat
388#
389sub filenameCat
390{
391 my $path = join('/', @_);
392 $path =~ s/[\/\\]+/\//g;
393 return $path;
394}
395## filenameCat() ##
396
397
398## @function printError()
399#
400sub printError
401{
402 my $msg = shift(@_);
403 die('Error! ' . $msg . "\n\n");
404}
405## printError() ##
406
407
408## @function printUsage()
409#
410sub printUsage
411{
412 my $msg = shift(@_);
413 if (defined $msg)
414 {
415 print 'Error! ' . $msg . "\n";
416 }
417 die("Usage: generate_gantt.pl [-width <width in pixels>] <dir> [<dir> ...]\n\n");
418}
419## printUsage() ##
420
421
422## @function longestCommonPath
423#
424sub longestCommonPath
425{
426 my ($path_new, $path_current) = @_;
427 my $result = '';
428 if (defined $path_current)
429 {
430 # Hide protocol before we split by slash
431 $path_new =~ s/:\/\//:/;
432 $path_current =~ s/:\/\//:/;
433 my @path_new_parts = split(/\//, $path_new);
434 my @path_current_parts = split(/\//, $path_current);
435 my @path_parts;
436 for (my $i = 0; $i < scalar(@path_current_parts); $i++)
437 {
438 if ($path_current_parts[$i] eq $path_new_parts[$i])
439 {
440 push(@path_parts, $path_new_parts[$i]);
441 }
442 else
443 {
444 last;
445 }
446 }
447 $result = &filenameCat(@path_parts);
448 # Restore protocol
449 $result =~ s/:/:\/\//;
450 }
451 else
452 {
453 $result = $path_new;
454 }
455 return $result;
456}
457## longestCommonPath() ##
458
459
460## @function renderLine()
461#
462sub renderLine
463{
464 my ($table_width, $start, $end, $class, $tname, $tstart, $tend, $jobs, $data_locality) = @_;
465 &debugPrint("renderLine($table_width, $start, $end, $class, $tname, $tstart, $tend, <jobs>, $data_locality)");
466 # All timings need to be relative to 0 (relative start)
467 my $duration = $end - $start;
468 my $rtstart = $tstart - $start;
469 my $rtend = $tend - $start;
470 # We need to scale these depending on the timing of this thread relative to
471 # the master thread
472 my $width = $chart_width;
473 my $left = 0;
474 if ($start != $tstart)
475 {
476 my $left_offset_percent = $rtstart / $duration;
477 $left = $left_offset_percent * $table_width;
478 }
479 # - subtract any left offset from width
480 $width = $width - $left;
481 # - right offset directly subtracted from width
482 if ($end != $tend)
483 {
484 my $right_offset_percent = ($duration - $rtend) / $duration;
485 my $right = $right_offset_percent * $table_width;
486 $width = $width - $right;
487 }
488 # Round things off (simple dutch rounding)
489 $left = int($left + 0.5);
490 $width = int($width + 0.5);
491 # Output the bar for this master/worker
492 my $html = '<div class="thread ' . $class . '" style="left:' . $left . 'px;width:' . $width . 'px;">';
493 if ($class eq 'master')
494 {
495 $html .= '<div style="background-color:' . $color_master . ';margin-bottom:1px">';
496 }
497 $html .= '<div class="time" style="display:table-cell">' . &renderTime($rtstart) . '</div><div style="display:table-cell;padding-left:20px;width:100%;">' . ucfirst($class) . ': ' . $tname . '</div><div class="time" style="display:table-cell">' . renderTime($rtend) . '</div></div>';
498 my $previous_jright = 0;
499 foreach my $jstart (sort keys %{$jobs})
500 {
501 my $rjstart = $jstart - $start;
502 my $rpstart = $jobs->{$jstart}->{'PS'} - $start;
503 my $rpend = $jobs->{$jstart}->{'PE'} - $start;
504 my $rjend = $jobs->{$jstart}->{'E'} - $start;
505 my $jduration = $jobs->{$jstart}->{'E'} - $jstart;
506 my $io_duration = $rpstart - $rjstart;
507 my $cpu_duration = $rpend - $rpstart;
508 # Scale Job co-ordinates
509 my $jleft_percent = $rjstart / $duration;
510 my $jleft = int(($jleft_percent * $table_width) + 0.5);
511 my $jwidth_percent = $jduration / $duration;
512 # -2 for left and right 1 pixel border
513 my $jwidth = int(($jwidth_percent * $table_width) + 0.5) - 2;
514 if ($jleft + $jwidth > $left + $width)
515 {
516 $jwidth = ($left + $width) - $jleft;
517 }
518 # Then scale process timings within that!
519 my $rpleft_percent = ($rpstart - $rjstart) / $duration;
520 my $rpleft = int(($rpleft_percent * $table_width) + 0.5);
521 my $rpwidth = $jwidth - $rpleft;
522 my $cpu_percent = int((($rpwidth / $jwidth) * 100) + 0.5);
523 $html .= '<div class="job" style="left:' . $jleft . 'px;width:' . $jwidth . 'px;';
524 ###rint "Data Locality? " . $data_locality . " DL? " . $jobs->{$jstart}->{'DL'} . "\n";
525 if ($data_locality > 1 && $jobs->{$jstart}->{'DL'} != 1)
526 {
527 $html .= 'border:1px solid #C7C7C7;';
528 }
529 $html .= '" title="FN:' . $jobs->{$jstart}->{'FN'} . ', S:' . &renderTime($rjstart) . ', E:' . &renderTime($rjend) . ', CPU: ' . $cpu_percent . '% [' . &renderTime($io_duration) . ', ' . &renderTime($cpu_duration) . ', PC: ' . $jobs->{$jstart}->{'PC'} . '%]"><span class="process" style="left:' . $rpleft . 'px;width:' . $rpwidth . 'px">&nbsp;</span><div class="label" style="width:' . $jwidth;
530 if ($data_locality > 1 && $jobs->{$jstart}->{'DL'} != 1)
531 {
532 $html .= ';color:' . $color_nlocal;
533 }
534 $html .= '">' . $jobs->{$jstart}->{'FN'};
535 if ($jobs->{$jstart}->{'PC'} ne 'NA')
536 {
537 $html .= ' <b>[Incomplete! ' . $jobs->{$jstart}->{'PC'} . '%]</b>';
538 }
539 if ($data_locality > 1 && $jobs->{$jstart}->{'DL'} != 1)
540 {
541 $html .= ' [NL]';
542 }
543 $html .= '</div></div>';
544 }
545 return $html;
546}
547## renderLine() ##
548
549
550## @function renderTime()
551#
552sub renderTime
553{
554 my ($seconds) = @_;
555 my $time_str = '';
556 # determine how many hours
557 my $an_hour = 60 * 60;
558 my $hours = floor($seconds / $an_hour);
559 $seconds = $seconds - ($hours * $an_hour);
560 my $a_minute = 60;
561 my $minutes = floor($seconds / $a_minute);
562 $seconds = $seconds - ($minutes * $a_minute);
563 if ($hours > 0)
564 {
565 $time_str = sprintf('%dh%02dm%02ds', $hours, $minutes, $seconds);
566 }
567 else
568 {
569 $time_str = sprintf('%dm%02ds', $minutes, $seconds);
570 }
571 return $time_str;
572}
Note: See TracBrowser for help on using the repository browser.