source: gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl@ 28188

Last change on this file since 28188 was 28188, checked in by jmt12, 11 years ago

Minor fix to allow for tasks that start in the same second (now each offset by 1 nano second)

  • Property svn:executable set to *
File size: 16.8 KB
Line 
1#!/usr/bin/perl
2
3# Pragma
4use strict;
5use warnings;
6use 5.012; # so readdir assigns to $_ in a lone while test
7
8# Modules
9use Sort::Naturally;
10use POSIX qw(floor strftime);
11
12print "\n===== Generate Timing (GANTT) =====\n";
13
14# 0. Init
15# - configurables
16my $chart_width = 1600;
17my $debug = 0;
18# - globals
19my $chart_count = 0;
20my @months = ("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec");
21
22# 1. Parse options
23while (defined $ARGV[0] && $ARGV[0] =~ /^-/)
24{
25 my $option = shift(@ARGV);
26 if ($option eq '-debug')
27 {
28 $debug = 1;
29 }
30 elsif ($option eq '-width')
31 {
32 if (!defined $ARGV[0])
33 {
34 &printUsage('Error! No width value specified');
35 }
36 my $value = shift(@ARGV);
37 if ($value !~ /^\d+$/)
38 {
39 &printUsage('Error! Chart width not a number');
40 }
41 $chart_width = $value;
42 }
43 else
44 {
45 &printUsage('Error! Unknown option: ' . $option);
46 }
47}
48print "Chart Width: " . $chart_width . "px\n";
49print "Debug? " . ($debug ? 'Yes' : 'No') . "\n";
50print "===================================\n\n";
51
52# 2. Search for valid directories (containing timing.csv)
53while (defined $ARGV[0])
54{
55 my $dir = shift(@ARGV);
56 if (!-d $dir)
57 {
58 &printUsage('Error! Not a directory: ' . $dir);
59 }
60 &searchForTimingCSV($dir);
61}
62
63# 3. Done
64print "Complete!\n\n";
65print "===================================\n";
66print 'Generated ' . $chart_count . " charts\n";
67print "===================================\n\n";
68exit;
69## main() ##
70
71
72## @function searchForTimingCSV()
73#
74sub searchForTimingCSV
75{
76 my $dir = shift(@_);
77 # For every directory where we find a timing.csv we generate a gantt chart
78 my $timing_path = &filenameCat($dir, 'timing.csv');
79 if (-e $timing_path)
80 {
81 &generateChart($dir, $timing_path);
82 }
83 # We also recursively search for other directories containing timing.csv's
84 opendir(my $dh, $dir) or &printError('Failed to open directory for reading: ' . $dir);
85 while (readdir($dh))
86 {
87 my $file = $_;
88 if ($file !~ /^\./)
89 {
90 my $path = &filenameCat($dir, $file);
91 if (-d $path)
92 {
93 &searchForTimingCSV($path);
94 }
95 }
96 }
97}
98## searchForTimingCSV() ##
99
100
101## @function generateChart()
102#
103sub generateChart
104{
105 my $dir = shift(@_);
106 my $timing_csv_path = shift(@_);
107 my $import_dir;
108 my ($epoc) = $dir =~ /(\d+)$/;
109 my $gantt_path = $dir . '/' . $epoc . '-gantt.html';
110
111 print ' * Generating chart for: ' . $dir . "\n";
112 print ' - timing file: ' . $timing_csv_path . "\n";
113 print ' - gantt chart: ' . $gantt_path . "\n";
114
115 # Read in timing.csv and parse information into data structure
116 print ' - parsing timing.csv... ';
117 my $timing_data = {};
118 my $id_2_worker_id = {};
119 if (open(TIN, '<:utf8', $timing_csv_path))
120 {
121 my $line;
122 while ($line = <TIN>)
123 {
124 my @parts = split(/,/, $line);
125 if ($parts[1] eq 'M0')
126 {
127 $timing_data->{'M'} = {'N'=>$parts[2], 'S'=>$parts[3], 'E'=>$parts[4]};
128 }
129 elsif ($parts[1] =~ /W\d+/)
130 {
131 my $worker_id = $parts[1];
132 my $hostname = $parts[2];
133 # Alter the worker name for compute nodes so they can be naturally
134 # sorted
135 if ($hostname =~ /compute-0-(\d+)/)
136 {
137 $worker_id = 'W' . $1;
138 }
139 $timing_data->{$worker_id} = {'N'=>$hostname, 'S'=>$parts[3], 'E'=>$parts[4], 'F'=>{}};
140 $id_2_worker_id->{$parts[0]} = $worker_id;
141 }
142 elsif ($parts[1] =~ /T\d+/)
143 {
144 my $worker_id = $id_2_worker_id->{$parts[7]};
145 my $stop = $parts[4];
146 my $filepath = $parts[8];
147 $filepath =~ s/^\s+|\s+$//g;
148 my $percent_complete = $parts[9];
149 chomp($percent_complete);
150 $import_dir = &longestCommonPath($filepath, $import_dir);
151 my $start_time = $parts[3];
152 while (defined $timing_data->{$worker_id}->{'F'}->{$start_time})
153 {
154 $start_time += 0.000001;
155 }
156 $timing_data->{$worker_id}->{'F'}->{$start_time} = {'FN'=>$filepath, 'S'=>$parts[3], 'PS'=>($stop - $parts[5]), 'PE'=>$stop, 'E'=>$stop, 'DL'=>$parts[6], 'PC'=>$percent_complete};
157 }
158 }
159 close(TIN);
160 }
161 else
162 {
163 die('Error! Failed to open file for reading: ' . $timing_csv_path);
164 }
165 my $number_of_workers = scalar(keys(%{$id_2_worker_id}));;
166 print "Done\n";
167
168 # 3. Produce pretty HTML chart of timing information including jobs
169 print " - generating timing information as chart in HTML... ";
170 open(HTMLOUT, '>:utf8', $gantt_path) or die('Error! Failed to open file for writing: gantt.html');
171 print HTMLOUT "<html>\n";
172 print HTMLOUT '<head>' . "\n";
173 print HTMLOUT '<style type="text/css">' . "\n";
174 print HTMLOUT 'div.thread {position:relative}' . "\n";
175 print HTMLOUT 'div.master {border:1px solid gray;color:white;font-weight:bold}' . "\n";
176 print HTMLOUT 'div.worker {border:1px solid black;background-color:green;color:white;font-weight:bold;margin-bottom:1px;}' . "\n";
177 print HTMLOUT 'div.time {font-size:smaller;font-weight:normal}' . "\n";
178 print HTMLOUT 'div.job {background-color:transparent;color:black;border:1px solid black;display:block;font-size:smaller;position:relative;text-align:center;overflow:hidden;margin-bottom:1px;}' . "\n";
179 print HTMLOUT 'span.process {z-index:-1;background-color:#C7C7C7;position:absolute}' . "\n";
180 print HTMLOUT 'span.label {z-index:1;background-color:transparent;overflow:hidden;white-space:nowrap;}' . "\n";
181 print HTMLOUT "th {text-align:left}\n";
182 print HTMLOUT '</style>' . "\n";
183 print HTMLOUT '</head>' . "\n";
184 print HTMLOUT "<body>\n";
185 print HTMLOUT "<h2>Statistics</h2>\n";
186 print HTMLOUT "<table style=\"width:100%;\">\n";
187
188 my $total_duration = $timing_data->{'M'}->{'E'} - $timing_data->{'M'}->{'S'};
189 my $file_count = 0;
190 my $data_locality = 0;
191 my $total_io_time = 0;
192 my $total_process_time = 0;
193 my $fastest_file = 0;
194 my $slowest_file = 0;
195 my $problem_files = 0;
196 foreach my $worker_id (keys %{$timing_data})
197 {
198 if ($worker_id ne 'M')
199 {
200 foreach my $job_start ( keys %{$timing_data->{$worker_id}->{'F'}} )
201 {
202 my $process_start = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'PS'};
203 my $process_end = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'PE'};
204 my $job_end = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'E'};
205 if ($process_start == 0 || $process_end == 0 || $job_end == 0)
206 {
207 $problem_files++;
208 }
209 else
210 {
211 my $io_duration = ($process_start - $job_start) + ($job_end - $process_end);
212 my $process_duration = $process_end - $process_start;
213 my $total_duration = $io_duration + $process_duration;
214 &debugPrint("filename: " . $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'});
215 &debugPrint("start: $job_start ps: $process_start pe: $process_end end: $job_end");
216 &debugPrint("io: $io_duration process: $process_duration duration: $total_duration");
217 # Running stats
218 $total_io_time += $io_duration;
219 $total_process_time += $process_duration;
220 if ($fastest_file == 0 || $total_duration < $fastest_file)
221 {
222 $fastest_file = $total_duration;
223 }
224 if ($slowest_file == 0 || $total_duration > $slowest_file)
225 {
226 $slowest_file = $total_duration;
227 }
228 }
229 # Shorten filename
230 if (defined $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'} && $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'} ne '')
231 {
232 $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'} = substr($timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'}, length($import_dir) + 1);
233 }
234 $file_count++;
235 if ($timing_data->{$worker_id}->{'F'}->{$job_start}->{'DL'} == 1)
236 {
237 $data_locality++;
238 }
239 }
240 }
241 }
242 if ($file_count <= 0)
243 {
244 $file_count = 1;
245 }
246 if ($total_process_time <= 0)
247 {
248 $total_process_time = 1;
249 }
250 my $avg_processing_time = floor(($total_io_time + $total_process_time) / $file_count);
251 my $avg_io_time = int(($total_io_time / $file_count) + 0.5);
252 my $avg_cpu_time = int(($total_process_time / $file_count) + 0.5);
253
254 print HTMLOUT "<tr>\n";
255 print HTMLOUT ' <th style="width:12%;">Import Directory:</th><td style="width:22%;" colspan="5">' . $import_dir . "</td>\n";
256 print HTMLOUT "</tr>\n";
257
258 print HTMLOUT "<tr>\n";
259 my ($sec, $min, $hour, $day, $month, $year) = (localtime($timing_data->{'M'}->{'S'}))[0,1,2,3,4,5];
260 print HTMLOUT ' <th style="width:11%;">Start Time:</th><td style="width:22%;">' . sprintf('%04d%s%02d %02d:%02d:%02d', ($year+1900), $months[$month], $day, $hour, $min, $sec) . "</td>\n";
261 ($sec, $min, $hour, $day, $month, $year) = (localtime($timing_data->{'M'}->{'E'}))[0,1,2,3,4,5];
262 print HTMLOUT ' <th style="width:11%;">End Time:</th><td style="width:22%;">' . sprintf('%04d%s%02d %02d:%02d:%02d', ($year+1900), $months[$month], $day, $hour, $min, $sec) . "</td>\n";
263 print HTMLOUT " <th>Processing Time:</th><td>" . &renderTime($total_duration) . "</td>\n";
264 print HTMLOUT "</tr>\n";
265
266 print HTMLOUT "<tr>\n";
267 print HTMLOUT " <th>Processing Threads:</th><td>" . $number_of_workers . "</td>\n";
268 print HTMLOUT " <th>Files Processed:</th><td>" . $file_count . "</td>\n";
269 print HTMLOUT " <th>Problem Files:</th><td>" . $problem_files . "</td>\n";
270 print HTMLOUT "</tr>\n";
271
272 print HTMLOUT "<tr>\n";
273 print HTMLOUT ' <th>Serial Processing Time:</th><td>' . &renderTime($total_process_time) . "</td>\n";
274 print HTMLOUT ' <th>Serial IO Time:</th><td>' . &renderTime($total_io_time) . "</td>\n";
275 print HTMLOUT ' <th>IO Percentage:</th><td>' . sprintf('%d%%', (($total_io_time / $total_process_time) * 100)) . "</td>\n";
276 print HTMLOUT "</tr>\n";
277
278 print HTMLOUT "<tr>\n";
279 print HTMLOUT " <th>Average Processing Time:</th><td>" . &renderTime($avg_processing_time) . "</td>\n";
280 print HTMLOUT " <th>Average File IO Time:</th><td>" . &renderTime($avg_io_time) . "</td>\n";
281 print HTMLOUT " <th>Average File CPU Time:</th><td>" . &renderTime($avg_cpu_time) . "</td>\n";
282 print HTMLOUT "</tr>\n";
283
284 print HTMLOUT "<tr>\n";
285 print HTMLOUT " <th>Fastest File:</th><td>" . &renderTime($fastest_file) . "</td>\n";
286 print HTMLOUT " <th>Slowest File:</th><td>" . &renderTime($slowest_file) . "</td>\n";
287 if ($data_locality > 0)
288 {
289 print HTMLOUT " <th>Data Locality:</th><td>" . sprintf('%d%% [%d out of %d]', (($data_locality / $file_count) * 100), $data_locality, $file_count) . "</td>\n";
290 }
291 else
292 {
293 print HTMLOUT " <th>Data Locality:</th><td><i>Not Applicable</i></td>\n";
294 }
295 print HTMLOUT "</tr>\n";
296
297 print HTMLOUT "</table>\n";
298 print HTMLOUT "<hr />\n";
299 print HTMLOUT "<h2>Timing Chart (Gantt)</h2>\n";
300 print HTMLOUT renderLine($chart_width, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, 'master', $timing_data->{'M'}->{'N'}, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, {}, $data_locality);
301 foreach my $worker_id (nsort keys %{$timing_data})
302 {
303 if ($worker_id ne 'M')
304 {
305 my $data = $timing_data->{$worker_id};
306 print HTMLOUT renderLine($chart_width, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, 'worker', $worker_id . ' [' . $data->{'N'} . ']', $data->{'S'}, $data->{'E'}, $data->{'F'}, $data_locality);
307 }
308 }
309 print HTMLOUT '<div>' . "\n";
310 print HTMLOUT "</body>\n";
311 print HTMLOUT "</html>";
312 close(HTMLOUT);
313 print "Done!\n\n";
314 $chart_count++;
315}
316## generateChart() ##
317
318
319## @function debugPrint()
320#
321sub debugPrint
322{
323 my $msg = shift(@_);
324 if ($debug)
325 {
326 print STDERR '[DEBUG] ' . $msg . "\n";
327 }
328}
329## debugPrint() ##
330
331
332## @function filenameCat
333#
334sub filenameCat
335{
336 my $path = join('/', @_);
337 $path =~ s/[\/\\]+/\//g;
338 return $path;
339}
340## filenameCat() ##
341
342
343## @function printError()
344#
345sub printError
346{
347 my $msg = shift(@_);
348 die('Error! ' . $msg . "\n\n");
349}
350## printError() ##
351
352
353## @function printUsage()
354#
355sub printUsage
356{
357 my $msg = shift(@_);
358 if (defined $msg)
359 {
360 print 'Error! ' . $msg . "\n";
361 }
362 die("Usage: generate_gantt.pl [-width <width in pixels>] <dir> [<dir> ...]\n\n");
363}
364## printUsage() ##
365
366
367## @function longestCommonPath
368#
369sub longestCommonPath
370{
371 my ($path_new, $path_current) = @_;
372 my $result = '';
373 if (defined $path_current)
374 {
375 # Hide protocol before we split by slash
376 $path_new =~ s/:\/\//:/;
377 $path_current =~ s/:\/\//:/;
378 my @path_new_parts = split(/\//, $path_new);
379 my @path_current_parts = split(/\//, $path_current);
380 my @path_parts;
381 for (my $i = 0; $i < scalar(@path_current_parts); $i++)
382 {
383 if ($path_current_parts[$i] eq $path_new_parts[$i])
384 {
385 push(@path_parts, $path_new_parts[$i]);
386 }
387 else
388 {
389 last;
390 }
391 }
392 $result = &filenameCat(@path_parts);
393 # Restore protocol
394 $result =~ s/:/:\/\//;
395 }
396 else
397 {
398 $result = $path_new;
399 }
400 return $result;
401}
402## longestCommonPath() ##
403
404
405## @function renderLine()
406#
407sub renderLine
408{
409 my ($table_width, $start, $end, $class, $tname, $tstart, $tend, $jobs, $data_locality) = @_;
410 &debugPrint("renderLine($table_width, $start, $end, $class, $tname, $tstart, $tend, <jobs>)");
411 # All timings need to be relative to 0 (relative start)
412 my $duration = $end - $start;
413 my $rtstart = $tstart - $start;
414 my $rtend = $tend - $start;
415 # We need to scale these depending on the timing of this thread relative to
416 # the master thread
417 my $width = $chart_width;
418 my $left = 0;
419 if ($start != $tstart)
420 {
421 my $left_offset_percent = $rtstart / $duration;
422 $left = $left_offset_percent * $table_width;
423 }
424 # - subtract any left offset from width
425 $width = $width - $left;
426 # - right offset directly subtracted from width
427 if ($end != $tend)
428 {
429 my $right_offset_percent = ($duration - $rtend) / $duration;
430 my $right = $right_offset_percent * $table_width;
431 $width = $width - $right;
432 }
433 # Round things off (simple dutch rounding)
434 $left = int($left + 0.5);
435 $width = int($width + 0.5);
436 # Output the bar for this master/worker
437 my $html = '<div class="thread ' . $class . '" style="left:' . $left . 'px;width:' . $width . 'px;">';
438 if ($class eq 'master')
439 {
440 $html .= '<div style="background-color:blue;margin-bottom:1px">';
441 }
442 $html .= '<div class="time" style="display:table-cell">' . &renderTime($rtstart) . '</div><div style="display:table-cell;padding-left:20px;width:100%;">' . ucfirst($class) . ': ' . $tname . '</div><div class="time" style="display:table-cell">' . renderTime($rtend) . '</div></div>';
443 my $previous_jright = 0;
444 foreach my $jstart (sort keys %{$jobs})
445 {
446 my $rjstart = $jstart - $start;
447 my $rpstart = $jobs->{$jstart}->{'PS'} - $start;
448 my $rpend = $jobs->{$jstart}->{'PE'} - $start;
449 my $rjend = $jobs->{$jstart}->{'E'} - $start;
450 my $jduration = $jobs->{$jstart}->{'E'} - $jstart;
451 my $io_duration = $rpstart - $rjstart;
452 my $cpu_duration = $rpend - $rpstart;
453 # Scale Job co-ordinates
454 my $jleft_percent = $rjstart / $duration;
455 my $jleft = int(($jleft_percent * $table_width) + 0.5);
456 my $jwidth_percent = $jduration / $duration;
457 # -2 for left and right 1 pixel border
458 my $jwidth = int(($jwidth_percent * $table_width) + 0.5) - 2;
459 if ($jleft + $jwidth > $left + $width)
460 {
461 $jwidth = ($left + $width) - $jleft;
462 }
463 # Then scale process timings within that!
464 my $rpleft_percent = ($rpstart - $rjstart) / $duration;
465 my $rpleft = int(($rpleft_percent * $table_width) + 0.5);
466 my $rpwidth = $jwidth - $rpleft;
467 my $cpu_percent = int((($rpwidth / $jwidth) * 100) + 0.5);
468 $html .= '<div class="job" style="left:' . $jleft . 'px;width:' . $jwidth . 'px;';
469 if ($data_locality > 1 && $jobs->{$jstart}->{'DL'} != 1)
470 {
471 $html .= 'border:1px dashed black;';
472 }
473 $html .= '" title="FN:' . $jobs->{$jstart}->{'FN'} . ', S:' . &renderTime($rjstart) . ', E:' . &renderTime($rjend) . ', CPU: ' . $cpu_percent . '% [' . &renderTime($io_duration) . ', ' . &renderTime($cpu_duration) . ', PC: ' . $jobs->{$jstart}->{'PC'} . '%]"><span class="process" style="left:' . $rpleft . 'px;width:' . $rpwidth . 'px">&nbsp;</span><span class="label"';
474 if ($data_locality > 1 && $jobs->{$jstart}->{'DL'} != 1)
475 {
476 $html .= ' style="color:#FF0000"';
477 }
478 $html .= '>' . $jobs->{$jstart}->{'FN'};
479 if ($jobs->{$jstart}->{'PC'} ne 'NA')
480 {
481 $html .= ' <small>[' . $jobs->{$jstart}->{'PC'} . '%]</small>';
482 }
483 if ($data_locality > 1 && $jobs->{$jstart}->{'DL'} != 1)
484 {
485 $html .= ' [NL]';
486 }
487 $html .= '</span></div>';
488 }
489 return $html;
490}
491## renderLine() ##
492
493
494## @function renderTime()
495#
496sub renderTime
497{
498 my ($seconds) = @_;
499 my $time_str = '';
500 # determine how many hours
501 my $an_hour = 60 * 60;
502 my $hours = floor($seconds / $an_hour);
503 $seconds = $seconds - ($hours * $an_hour);
504 my $a_minute = 60;
505 my $minutes = floor($seconds / $a_minute);
506 $seconds = $seconds - ($minutes * $a_minute);
507 if ($hours > 0)
508 {
509 $time_str = sprintf('%dh%02dm%02ds', $hours, $minutes, $seconds);
510 }
511 else
512 {
513 $time_str = sprintf('%dm%02ds', $minutes, $seconds);
514 }
515 return $time_str;
516}
Note: See TracBrowser for help on using the repository browser.