source: gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl@ 29663

Last change on this file since 29663 was 29663, checked in by jmt12, 9 years ago

Supporting grayscale printing, fixing mismatched tags and speechmarks, and data locality and incomplete import occurances more obvious

  • Property svn:executable set to *
File size: 18.5 KB
Line 
1#!/usr/bin/perl
2
3# Pragma
4use strict;
5use warnings;
6
7# Modules
8use Sort::Naturally;
9use POSIX qw(floor strftime);
10
11print "\n===== Generate Timing (GANTT) =====\n";
12
13# 0. Init
14# - configurables
15my $chart_width = 1600;
16# - any video more than 95% complete is probably complete with rounding errors
17my $complete_threshold = 95;
18
19my $debug = 0;
20my $color_master = 'blue';
21my $color_worker = 'green';
22my $color_nlocal = 'red';
23my $disable_header = 0;
24my $max_worker_count = 0;
25# - globals
26my $chart_count = 0;
27my @months = ("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec");
28
29# 1. Parse options
30while (defined $ARGV[0] && $ARGV[0] =~ /^-/)
31{
32 my $option = shift(@ARGV);
33 if ($option eq '-debug')
34 {
35 $debug = 1;
36 }
37 elsif ($option eq '-width')
38 {
39 if (!defined $ARGV[0])
40 {
41 &printUsage('Error! No width value specified');
42 }
43 my $value = shift(@ARGV);
44 if ($value !~ /^\d+$/)
45 {
46 &printUsage('Error! Chart width not a number');
47 }
48 $chart_width = $value;
49 }
50 elsif ($option eq '-grayscale')
51 {
52 $color_master = '#1D1D1D';
53 $color_worker = '#969696';
54 $color_nlocal = '#4C4C4C';
55 }
56 elsif ($option eq '-noheader')
57 {
58 $disable_header = 1;
59 }
60 elsif ($option eq '-maxworkers')
61 {
62 if (!defined $ARGV[0])
63 {
64 &printUsage('Error! No maxworkers value specified');
65 }
66 my $value = shift(@ARGV);
67 if ($value !~ /^\d+$/)
68 {
69 &printUsage('Error! Maxworkers not a number');
70 }
71 $max_worker_count = $value;
72 }
73 else
74 {
75 &printUsage('Error! Unknown option: ' . $option);
76 }
77}
78print "Chart Width: " . $chart_width . "px\n";
79print "Grayscale? " . ($debug ? 'Yes' : 'No') . "\n";
80print "Debug? " . ($debug ? 'Yes' : 'No') . "\n";
81print "===================================\n\n";
82
83# 2. Search for valid directories (containing timing.csv)
84while (defined $ARGV[0])
85{
86 my $dir = shift(@ARGV);
87 if (!-d $dir)
88 {
89 &printUsage('Error! Not a directory: ' . $dir);
90 }
91 if ($dir =~ /(.*)[\\\/]$/)
92 {
93 $dir = $1;
94 }
95 &searchForTimingCSV($dir);
96}
97
98# 3. Done
99print "Complete!\n\n";
100print "===================================\n";
101print 'Generated ' . $chart_count . " charts\n";
102print "===================================\n\n";
103exit;
104## main() ##
105
106
107## @function searchForTimingCSV()
108#
109sub searchForTimingCSV
110{
111 my $dir = shift(@_);
112 # For every directory where we find a timing.csv we generate a gantt chart
113 my $timing_path = &filenameCat($dir, 'timing.csv');
114 if (-e $timing_path)
115 {
116 &generateChart($dir, $timing_path);
117 }
118 # We also recursively search for other directories containing timing.csv's
119 opendir(my $dh, $dir) or &printError('Failed to open directory for reading: ' . $dir);
120 my @files = readdir($dh);
121 foreach my $file (@files)
122 {
123 if ($file !~ /^\./)
124 {
125 my $path = &filenameCat($dir, $file);
126 if (-d $path)
127 {
128 &searchForTimingCSV($path);
129 }
130 }
131 }
132}
133## searchForTimingCSV() ##
134
135
136## @function generateChart()
137#
138sub generateChart
139{
140 my $dir = shift(@_);
141 my $timing_csv_path = shift(@_);
142 my $import_dir;
143 my ($epoc) = $dir =~ /(\d+)$/;
144 my $gantt_path = $dir . '/' . $epoc . '-gantt.html';
145
146 print ' * Generating chart for: ' . $dir . "\n";
147 print ' - timing file: ' . $timing_csv_path . "\n";
148 print ' - gantt chart: ' . $gantt_path . "\n";
149
150 # Read in timing.csv and parse information into data structure
151 print ' - parsing timing.csv... ';
152 my $timing_data = {};
153 my $id_2_worker_id = {};
154 if (open(TIN, '<:utf8', $timing_csv_path))
155 {
156 my $line;
157 while ($line = <TIN>)
158 {
159 my @parts = split(/,/, $line);
160 if ($parts[1] eq 'M0')
161 {
162 $timing_data->{'M'} = {'N'=>$parts[2], 'S'=>$parts[3], 'E'=>$parts[4]};
163 }
164 elsif ($parts[1] =~ /W\d+/)
165 {
166 my $worker_id = $parts[1];
167 my $hostname = $parts[2];
168 # Alter the worker name for compute nodes so they can be naturally
169 # sorted
170 if ($hostname =~ /compute-0-(\d+)/)
171 {
172 $worker_id = 'W' . $1;
173 }
174 $timing_data->{$worker_id} = {'N'=>$hostname, 'S'=>$parts[3], 'E'=>$parts[4], 'F'=>{}};
175 $id_2_worker_id->{$parts[0]} = $worker_id;
176 }
177 elsif ($parts[1] =~ /T\d+/)
178 {
179 my $worker_id = $id_2_worker_id->{$parts[7]};
180 my $stop = $parts[4];
181 my $filepath = $parts[8];
182 $filepath =~ s/^\s+|\s+$//g;
183 my $percent_complete = 'NA';
184 if (defined($parts[9]))
185 {
186 $percent_complete = $parts[9];
187 chomp($percent_complete);
188 if ($percent_complete >= $complete_threshold)
189 {
190 $percent_complete = 'NA';
191 }
192 }
193 $import_dir = &longestCommonPath($filepath, $import_dir);
194 my $start_time = $parts[3];
195 while (defined $timing_data->{$worker_id}->{'F'}->{$start_time})
196 {
197 $start_time += 0.000001;
198 }
199 $timing_data->{$worker_id}->{'F'}->{$start_time} = {'FN'=>$filepath, 'S'=>$parts[3], 'PS'=>($stop - $parts[5]), 'PE'=>$stop, 'E'=>$stop, 'DL'=>$parts[6], 'PC'=>$percent_complete};
200 }
201 }
202 close(TIN);
203 }
204 else
205 {
206 die('Error! Failed to open file for reading: ' . $timing_csv_path);
207 }
208 my $number_of_workers = scalar(keys(%{$id_2_worker_id}));;
209 print "Done\n";
210
211 # 3. Produce pretty HTML chart of timing information including jobs
212 print " - generating timing information as chart in HTML... ";
213 open(HTMLOUT, '>:utf8', $gantt_path) or die('Error! Failed to open file for writing: gantt.html');
214 print HTMLOUT "<html>\n";
215 print HTMLOUT '<head>' . "\n";
216 print HTMLOUT '<style type="text/css">' . "\n";
217 print HTMLOUT "body {margin:0px;padding:4px}\n";
218 print HTMLOUT 'div.thread {position:relative}' . "\n";
219 print HTMLOUT 'div.master {border:1px solid gray;color:white;font-weight:bold}' . "\n";
220 print HTMLOUT 'div.worker {border:1px solid black;background-color:' . $color_worker . ';color:white;font-weight:bold;margin-bottom:1px;}' . "\n";
221 print HTMLOUT 'div.time {font-size:smaller;font-weight:normal}' . "\n";
222 print HTMLOUT 'div.job {background-color:transparent;color:black;border:1px solid black;display:block;font-size:smaller;font-weight:normal;position:relative;text-align:left;margin-bottom:1px;}' . "\n";
223 print HTMLOUT 'span.process {z-index:-1;background-color:#C7C7C7;position:absolute}' . "\n";
224 print HTMLOUT 'div.label {z-index:1;background-color:transparent;white-space:nowrap;text-align:center}' . "\n";
225 print HTMLOUT "th {text-align:left}\n";
226 print HTMLOUT "tr.toprule th,tr.toprule td {border-top:2px solid black;width:17%}\n";
227 print HTMLOUT "tr.bottomrule th,tr.bottomrule td {border-bottom:2px solid black}\n";
228 print HTMLOUT '</style>' . "\n";
229 print HTMLOUT '</head>' . "\n";
230 print HTMLOUT "<body>\n";
231 ##print HTMLOUT "<h2>Parallel Import Timing Chart</h2>\n";
232 print HTMLOUT "<table style=\"border-collapse:collapse;width:" . $chart_width . "px;";
233 if ($disable_header)
234 {
235 print HTMLOUT "display:none;";
236 }
237 print HTMLOUT "\">\n";
238
239 my $total_duration = $timing_data->{'M'}->{'E'} - $timing_data->{'M'}->{'S'};
240 my $file_count = 0;
241 my $data_locality = 0;
242 my $total_io_time = 0;
243 my $total_process_time = 0;
244 my $fastest_file = 0;
245 my $slowest_file = 0;
246 my $problem_files = 0;
247 foreach my $worker_id (keys %{$timing_data})
248 {
249 if ($worker_id ne 'M')
250 {
251 foreach my $job_start ( keys %{$timing_data->{$worker_id}->{'F'}} )
252 {
253 my $process_start = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'PS'};
254 my $process_end = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'PE'};
255 my $job_end = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'E'};
256 my $percent_complete = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'PC'};
257 if ($process_start == 0 || $process_end == 0 || $job_end == 0 || ($percent_complete =~ /^\d+$/ && $percent_complete < $complete_threshold))
258 {
259 $problem_files++;
260 }
261 else
262 {
263 my $io_duration = ($process_start - $job_start) + ($job_end - $process_end);
264 my $process_duration = $process_end - $process_start;
265 my $total_duration = $io_duration + $process_duration;
266 &debugPrint("filename: " . $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'});
267 &debugPrint("start: $job_start ps: $process_start pe: $process_end end: $job_end");
268 &debugPrint("io: $io_duration process: $process_duration duration: $total_duration");
269 # Running stats
270 $total_io_time += $io_duration;
271 $total_process_time += $process_duration;
272 if ($fastest_file == 0 || $total_duration < $fastest_file)
273 {
274 $fastest_file = $total_duration;
275 }
276 if ($slowest_file == 0 || $total_duration > $slowest_file)
277 {
278 $slowest_file = $total_duration;
279 }
280 }
281 # Shorten filename
282 if (defined $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'} && $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'} ne '')
283 {
284 $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'} = substr($timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'}, length($import_dir) + 1);
285 }
286 $file_count++;
287 if ($timing_data->{$worker_id}->{'F'}->{$job_start}->{'DL'} == 1)
288 {
289 $data_locality++;
290 }
291 }
292 }
293 }
294 if ($file_count <= 0)
295 {
296 $file_count = 1;
297 }
298 if ($total_process_time <= 0)
299 {
300 $total_process_time = 1;
301 }
302 my $avg_processing_time = floor(($total_io_time + $total_process_time) / $file_count);
303 my $avg_io_time = int(($total_io_time / $file_count) + 0.5);
304 my $avg_cpu_time = int(($total_process_time / $file_count) + 0.5);
305
306 print HTMLOUT "<tr class=\"toprule\">\n";
307 print HTMLOUT ' <th style="width:17%;">Import Directory:</th><td style="width:83%;" colspan="5">' . $import_dir . "</td>\n";
308 print HTMLOUT "</tr>\n";
309
310 print HTMLOUT "<tr>\n";
311 my ($sec, $min, $hour, $day, $month, $year) = (localtime($timing_data->{'M'}->{'S'}))[0,1,2,3,4,5];
312 print HTMLOUT ' <th>Start Time:</th><td>' . sprintf('%04d%s%02d %02d:%02d:%02d', ($year+1900), $months[$month], $day, $hour, $min, $sec) . "</td>\n";
313 ($sec, $min, $hour, $day, $month, $year) = (localtime($timing_data->{'M'}->{'E'}))[0,1,2,3,4,5];
314 print HTMLOUT ' <th>End Time:</th><td>' . sprintf('%04d%s%02d %02d:%02d:%02d', ($year+1900), $months[$month], $day, $hour, $min, $sec) . "</td>\n";
315 print HTMLOUT " <th>Processing Time:</th><td>" . &renderTime($total_duration) . "</td>\n";
316 print HTMLOUT "</tr>\n";
317
318 print HTMLOUT "<tr>\n";
319 print HTMLOUT " <th>Processing Threads:</th><td>" . $number_of_workers . "</td>\n";
320 print HTMLOUT " <th>Files Processed:</th><td>" . $file_count . "</td>\n";
321 print HTMLOUT " <th>Problem Files:</th><td>" . $problem_files . "</td>\n";
322 print HTMLOUT "</tr>\n";
323
324 print HTMLOUT "<tr>\n";
325 print HTMLOUT ' <th>Serial Processing Time:</th><td>' . &renderTime($total_process_time) . "</td>\n";
326 print HTMLOUT ' <th>Serial IO Time:</th><td>' . &renderTime($total_io_time) . "</td>\n";
327 print HTMLOUT ' <th>IO Percentage:</th><td>' . sprintf('%d%%', (($total_io_time / $total_process_time) * 100)) . "</td>\n";
328 print HTMLOUT "</tr>\n";
329
330 print HTMLOUT "<tr>\n";
331 print HTMLOUT " <th>Avg Processing Time:</th><td>" . &renderTime($avg_processing_time) . "</td>\n";
332 print HTMLOUT " <th>Avg File IO Time:</th><td>" . &renderTime($avg_io_time) . "</td>\n";
333 print HTMLOUT " <th>Avg File CPU Time:</th><td>" . &renderTime($avg_cpu_time) . "</td>\n";
334 print HTMLOUT "</tr>\n";
335
336 print HTMLOUT "<tr class=\"bottomrule\">\n";
337 print HTMLOUT " <th>Fastest File:</th><td>" . &renderTime($fastest_file) . "</td>\n";
338 print HTMLOUT " <th>Slowest File:</th><td>" . &renderTime($slowest_file) . "</td>\n";
339 #if ($data_locality > 0)
340 #{
341 print HTMLOUT " <th>Data Locality:</th><td>" . sprintf('%d%% [%d out of %d]', (($data_locality / $file_count) * 100), $data_locality, $file_count) . "</td>\n";
342 #}
343 #else
344 #{
345 # print HTMLOUT " <th>Data Locality:</th><td><i>Not Applicable</i></td>\n";
346 #}
347 print HTMLOUT "</tr>\n";
348
349 print HTMLOUT "</table>\n";
350 print HTMLOUT renderLine($chart_width, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, 'master', $timing_data->{'M'}->{'N'}, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, {}, $data_locality);
351 my $worker_count = 0;
352 foreach my $worker_id (nsort keys %{$timing_data})
353 {
354 if ($max_worker_count < 1 || $worker_count <= $max_worker_count)
355 {
356 if ($worker_id ne 'M')
357 {
358 my $data = $timing_data->{$worker_id};
359 print HTMLOUT renderLine($chart_width, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, 'worker', $worker_id . ' [' . $data->{'N'} . ']', $data->{'S'}, $data->{'E'}, $data->{'F'}, 2); #$data_locality);
360 $worker_count++;
361 }
362 }
363 }
364 print HTMLOUT '</div>' . "\n";
365 print HTMLOUT "</body>\n";
366 print HTMLOUT "</html>";
367 close(HTMLOUT);
368 print "Done!\n\n";
369 $chart_count++;
370}
371## generateChart() ##
372
373
374## @function debugPrint()
375#
376sub debugPrint
377{
378 my $msg = shift(@_);
379 if ($debug)
380 {
381 print STDERR '[DEBUG] ' . $msg . "\n";
382 }
383}
384## debugPrint() ##
385
386
387## @function filenameCat
388#
389sub filenameCat
390{
391 my $path = join('/', @_);
392 $path =~ s/[\/\\]+/\//g;
393 return $path;
394}
395## filenameCat() ##
396
397
398## @function printError()
399#
400sub printError
401{
402 my $msg = shift(@_);
403 die('Error! ' . $msg . "\n\n");
404}
405## printError() ##
406
407
408## @function printUsage()
409#
410sub printUsage
411{
412 my $msg = shift(@_);
413 if (defined $msg)
414 {
415 print 'Error! ' . $msg . "\n";
416 }
417 die("Usage: generate_gantt.pl [-width <width in pixels>] <dir> [<dir> ...]\n\n");
418}
419## printUsage() ##
420
421
422## @function longestCommonPath
423#
424sub longestCommonPath
425{
426 my ($path_new, $path_current) = @_;
427 my $result = '';
428 if (defined $path_current)
429 {
430 # Hide protocol before we split by slash
431 $path_new =~ s/:\/\//:/;
432 $path_current =~ s/:\/\//:/;
433 my @path_new_parts = split(/\//, $path_new);
434 my @path_current_parts = split(/\//, $path_current);
435 my @path_parts;
436 for (my $i = 0; $i < scalar(@path_current_parts); $i++)
437 {
438 if ($path_current_parts[$i] eq $path_new_parts[$i])
439 {
440 push(@path_parts, $path_new_parts[$i]);
441 }
442 else
443 {
444 last;
445 }
446 }
447 $result = &filenameCat(@path_parts);
448 # Restore protocol
449 $result =~ s/:/:\/\//;
450 }
451 else
452 {
453 $result = $path_new;
454 }
455 return $result;
456}
457## longestCommonPath() ##
458
459
460## @function renderLine()
461#
462sub renderLine
463{
464 my ($table_width, $start, $end, $class, $tname, $tstart, $tend, $jobs, $data_locality) = @_;
465 &debugPrint("renderLine($table_width, $start, $end, $class, $tname, $tstart, $tend, <jobs>, $data_locality)");
466 # All timings need to be relative to 0 (relative start)
467 my $duration = $end - $start;
468 my $rtstart = $tstart - $start;
469 my $rtend = $tend - $start;
470 # We need to scale these depending on the timing of this thread relative to
471 # the master thread
472 my $width = $chart_width;
473 my $left = 0;
474 if ($start != $tstart)
475 {
476 my $left_offset_percent = $rtstart / $duration;
477 $left = $left_offset_percent * $table_width;
478 }
479 # - subtract any left offset from width
480 $width = $width - $left;
481 # - right offset directly subtracted from width
482 if ($end != $tend)
483 {
484 my $right_offset_percent = ($duration - $rtend) / $duration;
485 my $right = $right_offset_percent * $table_width;
486 $width = $width - $right;
487 }
488 # Round things off (simple dutch rounding)
489 $left = int($left + 0.5);
490 $width = int($width + 0.5);
491 # Output the bar for this master/worker
492 my $html = '<div class="thread ' . $class . '" style="left:' . $left . 'px;width:' . $width . 'px;">';
493 if ($class eq 'master')
494 {
495 $html .= '<div style="background-color:' . $color_master . ';margin-bottom:1px">';
496 }
497 $html .= '<div class="time" style="display:table-cell">' . &renderTime($rtstart) . '</div><div style="display:table-cell;padding-left:20px;width:100%;">' . ucfirst($class) . ': ' . $tname . '</div><div class="time" style="display:table-cell">' . renderTime($rtend) . '</div></div>';
498 my $previous_jright = 0;
499 foreach my $jstart (sort keys %{$jobs})
500 {
501 my $rjstart = $jstart - $start;
502 my $rpstart = $jobs->{$jstart}->{'PS'} - $start;
503 my $rpend = $jobs->{$jstart}->{'PE'} - $start;
504 my $rjend = $jobs->{$jstart}->{'E'} - $start;
505 my $jduration = $jobs->{$jstart}->{'E'} - $jstart;
506 my $io_duration = $rpstart - $rjstart;
507 my $cpu_duration = $rpend - $rpstart;
508 # Scale Job co-ordinates
509 my $jleft_percent = $rjstart / $duration;
510 my $jleft = int(($jleft_percent * $table_width) + 0.5);
511 my $jwidth_percent = $jduration / $duration;
512 # -2 for left and right 1 pixel border
513 my $jwidth = int(($jwidth_percent * $table_width) + 0.5) - 2;
514 if ($jleft + $jwidth > $left + $width)
515 {
516 $jwidth = ($left + $width) - $jleft;
517 }
518 # Then scale process timings within that!
519 my $rpleft_percent = ($rpstart - $rjstart) / $duration;
520 my $rpleft = int(($rpleft_percent * $table_width) + 0.5);
521 my $rpwidth = $jwidth - $rpleft;
522 my $cpu_percent = int((($rpwidth / $jwidth) * 100) + 0.5);
523 $html .= '<div class="job" style="left:' . $jleft . 'px;width:' . $jwidth . 'px;';
524 ###rint "Data Locality? " . $data_locality . " DL? " . $jobs->{$jstart}->{'DL'} . "\n";
525 if ($data_locality > 1 && $jobs->{$jstart}->{'DL'} != 1)
526 {
527 $html .= 'border:1px solid #C7C7C7;';
528 }
529 $html .= '" title="FN:' . $jobs->{$jstart}->{'FN'} . ', S:' . &renderTime($rjstart) . ', E:' . &renderTime($rjend) . ', CPU: ' . $cpu_percent . '% [' . &renderTime($io_duration) . ', ' . &renderTime($cpu_duration) . ', PC: ' . $jobs->{$jstart}->{'PC'} . '%]"><span class="process" style="left:' . $rpleft . 'px;width:' . $rpwidth . 'px">&nbsp;</span><div class="label" style="width:' . $jwidth;
530 if ($data_locality > 1 && $jobs->{$jstart}->{'DL'} != 1)
531 {
532 $html .= ';color:' . $color_nlocal;
533 }
534 $html .= '">' . $jobs->{$jstart}->{'FN'};
535 if ($jobs->{$jstart}->{'PC'} ne 'NA')
536 {
537 $html .= ' <b>[Incomplete! ' . $jobs->{$jstart}->{'PC'} . '%]</b>';
538 }
539 if ($data_locality > 1 && $jobs->{$jstart}->{'DL'} != 1)
540 {
541 $html .= ' [NL]';
542 }
543 $html .= '</div></div>';
544 }
545 return $html;
546}
547## renderLine() ##
548
549
550## @function renderTime()
551#
552sub renderTime
553{
554 my ($seconds) = @_;
555 my $time_str = '';
556 # determine how many hours
557 my $an_hour = 60 * 60;
558 my $hours = floor($seconds / $an_hour);
559 $seconds = $seconds - ($hours * $an_hour);
560 my $a_minute = 60;
561 my $minutes = floor($seconds / $a_minute);
562 $seconds = $seconds - ($minutes * $a_minute);
563 if ($hours > 0)
564 {
565 $time_str = sprintf('%dh%02dm%02ds', $hours, $minutes, $seconds);
566 }
567 else
568 {
569 $time_str = sprintf('%dm%02ds', $minutes, $seconds);
570 }
571 return $time_str;
572}
Note: See TracBrowser for help on using the repository browser.