source: gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl@ 28358

Last change on this file since 28358 was 28358, checked in by jmt12, 11 years ago

Replacing my earlier decision to only have data locality information printed if dl count was greater than 1 with always displaying data locality information (even for machines it doesn't make sense like Karearea) as otherwise my experiments with a forced 0 dl don't render properly.

  • Property svn:executable set to *
File size: 16.9 KB
Line 
1#!/usr/bin/perl
2
3# Pragma
4use strict;
5use warnings;
6
7# Modules
8use Sort::Naturally;
9use POSIX qw(floor strftime);
10
11print "\n===== Generate Timing (GANTT) =====\n";
12
13# 0. Init
14# - configurables
15my $chart_width = 1600;
16my $debug = 0;
17# - globals
18my $chart_count = 0;
19my @months = ("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec");
20
21# 1. Parse options
22while (defined $ARGV[0] && $ARGV[0] =~ /^-/)
23{
24 my $option = shift(@ARGV);
25 if ($option eq '-debug')
26 {
27 $debug = 1;
28 }
29 elsif ($option eq '-width')
30 {
31 if (!defined $ARGV[0])
32 {
33 &printUsage('Error! No width value specified');
34 }
35 my $value = shift(@ARGV);
36 if ($value !~ /^\d+$/)
37 {
38 &printUsage('Error! Chart width not a number');
39 }
40 $chart_width = $value;
41 }
42 else
43 {
44 &printUsage('Error! Unknown option: ' . $option);
45 }
46}
47print "Chart Width: " . $chart_width . "px\n";
48print "Debug? " . ($debug ? 'Yes' : 'No') . "\n";
49print "===================================\n\n";
50
51# 2. Search for valid directories (containing timing.csv)
52while (defined $ARGV[0])
53{
54 my $dir = shift(@ARGV);
55 if (!-d $dir)
56 {
57 &printUsage('Error! Not a directory: ' . $dir);
58 }
59 if ($dir =~ /(.*)[\\\/]$/)
60 {
61 $dir = $1;
62 }
63 &searchForTimingCSV($dir);
64}
65
66# 3. Done
67print "Complete!\n\n";
68print "===================================\n";
69print 'Generated ' . $chart_count . " charts\n";
70print "===================================\n\n";
71exit;
72## main() ##
73
74
75## @function searchForTimingCSV()
76#
77sub searchForTimingCSV
78{
79 my $dir = shift(@_);
80 # For every directory where we find a timing.csv we generate a gantt chart
81 my $timing_path = &filenameCat($dir, 'timing.csv');
82 if (-e $timing_path)
83 {
84 &generateChart($dir, $timing_path);
85 }
86 # We also recursively search for other directories containing timing.csv's
87 opendir(my $dh, $dir) or &printError('Failed to open directory for reading: ' . $dir);
88 my @files = readdir($dh);
89 foreach my $file (@files)
90 {
91 if ($file !~ /^\./)
92 {
93 my $path = &filenameCat($dir, $file);
94 if (-d $path)
95 {
96 &searchForTimingCSV($path);
97 }
98 }
99 }
100}
101## searchForTimingCSV() ##
102
103
104## @function generateChart()
105#
106sub generateChart
107{
108 my $dir = shift(@_);
109 my $timing_csv_path = shift(@_);
110 my $import_dir;
111 my ($epoc) = $dir =~ /(\d+)$/;
112 my $gantt_path = $dir . '/' . $epoc . '-gantt.html';
113
114 print ' * Generating chart for: ' . $dir . "\n";
115 print ' - timing file: ' . $timing_csv_path . "\n";
116 print ' - gantt chart: ' . $gantt_path . "\n";
117
118 # Read in timing.csv and parse information into data structure
119 print ' - parsing timing.csv... ';
120 my $timing_data = {};
121 my $id_2_worker_id = {};
122 if (open(TIN, '<:utf8', $timing_csv_path))
123 {
124 my $line;
125 while ($line = <TIN>)
126 {
127 my @parts = split(/,/, $line);
128 if ($parts[1] eq 'M0')
129 {
130 $timing_data->{'M'} = {'N'=>$parts[2], 'S'=>$parts[3], 'E'=>$parts[4]};
131 }
132 elsif ($parts[1] =~ /W\d+/)
133 {
134 my $worker_id = $parts[1];
135 my $hostname = $parts[2];
136 # Alter the worker name for compute nodes so they can be naturally
137 # sorted
138 if ($hostname =~ /compute-0-(\d+)/)
139 {
140 $worker_id = 'W' . $1;
141 }
142 $timing_data->{$worker_id} = {'N'=>$hostname, 'S'=>$parts[3], 'E'=>$parts[4], 'F'=>{}};
143 $id_2_worker_id->{$parts[0]} = $worker_id;
144 }
145 elsif ($parts[1] =~ /T\d+/)
146 {
147 my $worker_id = $id_2_worker_id->{$parts[7]};
148 my $stop = $parts[4];
149 my $filepath = $parts[8];
150 $filepath =~ s/^\s+|\s+$//g;
151 my $percent_complete = $parts[9];
152 chomp($percent_complete);
153 $import_dir = &longestCommonPath($filepath, $import_dir);
154 my $start_time = $parts[3];
155 while (defined $timing_data->{$worker_id}->{'F'}->{$start_time})
156 {
157 $start_time += 0.000001;
158 }
159 $timing_data->{$worker_id}->{'F'}->{$start_time} = {'FN'=>$filepath, 'S'=>$parts[3], 'PS'=>($stop - $parts[5]), 'PE'=>$stop, 'E'=>$stop, 'DL'=>$parts[6], 'PC'=>$percent_complete};
160 }
161 }
162 close(TIN);
163 }
164 else
165 {
166 die('Error! Failed to open file for reading: ' . $timing_csv_path);
167 }
168 my $number_of_workers = scalar(keys(%{$id_2_worker_id}));;
169 print "Done\n";
170
171 # 3. Produce pretty HTML chart of timing information including jobs
172 print " - generating timing information as chart in HTML... ";
173 open(HTMLOUT, '>:utf8', $gantt_path) or die('Error! Failed to open file for writing: gantt.html');
174 print HTMLOUT "<html>\n";
175 print HTMLOUT '<head>' . "\n";
176 print HTMLOUT '<style type="text/css">' . "\n";
177 print HTMLOUT 'div.thread {position:relative}' . "\n";
178 print HTMLOUT 'div.master {border:1px solid gray;color:white;font-weight:bold}' . "\n";
179 print HTMLOUT 'div.worker {border:1px solid black;background-color:green;color:white;font-weight:bold;margin-bottom:1px;}' . "\n";
180 print HTMLOUT 'div.time {font-size:smaller;font-weight:normal}' . "\n";
181 print HTMLOUT 'div.job {background-color:transparent;color:black;border:1px solid black;display:block;font-size:smaller;position:relative;text-align:center;overflow:hidden;margin-bottom:1px;}' . "\n";
182 print HTMLOUT 'span.process {z-index:-1;background-color:#C7C7C7;position:absolute}' . "\n";
183 print HTMLOUT 'span.label {z-index:1;background-color:transparent;overflow:hidden;white-space:nowrap;}' . "\n";
184 print HTMLOUT "th {text-align:left}\n";
185 print HTMLOUT '</style>' . "\n";
186 print HTMLOUT '</head>' . "\n";
187 print HTMLOUT "<body>\n";
188 print HTMLOUT "<h2>Statistics</h2>\n";
189 print HTMLOUT "<table style=\"width:100%;\">\n";
190
191 my $total_duration = $timing_data->{'M'}->{'E'} - $timing_data->{'M'}->{'S'};
192 my $file_count = 0;
193 my $data_locality = 0;
194 my $total_io_time = 0;
195 my $total_process_time = 0;
196 my $fastest_file = 0;
197 my $slowest_file = 0;
198 my $problem_files = 0;
199 foreach my $worker_id (keys %{$timing_data})
200 {
201 if ($worker_id ne 'M')
202 {
203 foreach my $job_start ( keys %{$timing_data->{$worker_id}->{'F'}} )
204 {
205 my $process_start = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'PS'};
206 my $process_end = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'PE'};
207 my $job_end = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'E'};
208 if ($process_start == 0 || $process_end == 0 || $job_end == 0)
209 {
210 $problem_files++;
211 }
212 else
213 {
214 my $io_duration = ($process_start - $job_start) + ($job_end - $process_end);
215 my $process_duration = $process_end - $process_start;
216 my $total_duration = $io_duration + $process_duration;
217 &debugPrint("filename: " . $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'});
218 &debugPrint("start: $job_start ps: $process_start pe: $process_end end: $job_end");
219 &debugPrint("io: $io_duration process: $process_duration duration: $total_duration");
220 # Running stats
221 $total_io_time += $io_duration;
222 $total_process_time += $process_duration;
223 if ($fastest_file == 0 || $total_duration < $fastest_file)
224 {
225 $fastest_file = $total_duration;
226 }
227 if ($slowest_file == 0 || $total_duration > $slowest_file)
228 {
229 $slowest_file = $total_duration;
230 }
231 }
232 # Shorten filename
233 if (defined $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'} && $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'} ne '')
234 {
235 $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'} = substr($timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'}, length($import_dir) + 1);
236 }
237 $file_count++;
238 if ($timing_data->{$worker_id}->{'F'}->{$job_start}->{'DL'} == 1)
239 {
240 $data_locality++;
241 }
242 }
243 }
244 }
245 if ($file_count <= 0)
246 {
247 $file_count = 1;
248 }
249 if ($total_process_time <= 0)
250 {
251 $total_process_time = 1;
252 }
253 my $avg_processing_time = floor(($total_io_time + $total_process_time) / $file_count);
254 my $avg_io_time = int(($total_io_time / $file_count) + 0.5);
255 my $avg_cpu_time = int(($total_process_time / $file_count) + 0.5);
256
257 print HTMLOUT "<tr>\n";
258 print HTMLOUT ' <th style="width:12%;">Import Directory:</th><td style="width:22%;" colspan="5">' . $import_dir . "</td>\n";
259 print HTMLOUT "</tr>\n";
260
261 print HTMLOUT "<tr>\n";
262 my ($sec, $min, $hour, $day, $month, $year) = (localtime($timing_data->{'M'}->{'S'}))[0,1,2,3,4,5];
263 print HTMLOUT ' <th style="width:11%;">Start Time:</th><td style="width:22%;">' . sprintf('%04d%s%02d %02d:%02d:%02d', ($year+1900), $months[$month], $day, $hour, $min, $sec) . "</td>\n";
264 ($sec, $min, $hour, $day, $month, $year) = (localtime($timing_data->{'M'}->{'E'}))[0,1,2,3,4,5];
265 print HTMLOUT ' <th style="width:11%;">End Time:</th><td style="width:22%;">' . sprintf('%04d%s%02d %02d:%02d:%02d', ($year+1900), $months[$month], $day, $hour, $min, $sec) . "</td>\n";
266 print HTMLOUT " <th>Processing Time:</th><td>" . &renderTime($total_duration) . "</td>\n";
267 print HTMLOUT "</tr>\n";
268
269 print HTMLOUT "<tr>\n";
270 print HTMLOUT " <th>Processing Threads:</th><td>" . $number_of_workers . "</td>\n";
271 print HTMLOUT " <th>Files Processed:</th><td>" . $file_count . "</td>\n";
272 print HTMLOUT " <th>Problem Files:</th><td>" . $problem_files . "</td>\n";
273 print HTMLOUT "</tr>\n";
274
275 print HTMLOUT "<tr>\n";
276 print HTMLOUT ' <th>Serial Processing Time:</th><td>' . &renderTime($total_process_time) . "</td>\n";
277 print HTMLOUT ' <th>Serial IO Time:</th><td>' . &renderTime($total_io_time) . "</td>\n";
278 print HTMLOUT ' <th>IO Percentage:</th><td>' . sprintf('%d%%', (($total_io_time / $total_process_time) * 100)) . "</td>\n";
279 print HTMLOUT "</tr>\n";
280
281 print HTMLOUT "<tr>\n";
282 print HTMLOUT " <th>Average Processing Time:</th><td>" . &renderTime($avg_processing_time) . "</td>\n";
283 print HTMLOUT " <th>Average File IO Time:</th><td>" . &renderTime($avg_io_time) . "</td>\n";
284 print HTMLOUT " <th>Average File CPU Time:</th><td>" . &renderTime($avg_cpu_time) . "</td>\n";
285 print HTMLOUT "</tr>\n";
286
287 print HTMLOUT "<tr>\n";
288 print HTMLOUT " <th>Fastest File:</th><td>" . &renderTime($fastest_file) . "</td>\n";
289 print HTMLOUT " <th>Slowest File:</th><td>" . &renderTime($slowest_file) . "</td>\n";
290 #if ($data_locality > 0)
291 #{
292 print HTMLOUT " <th>Data Locality:</th><td>" . sprintf('%d%% [%d out of %d]', (($data_locality / $file_count) * 100), $data_locality, $file_count) . "</td>\n";
293 #}
294 #else
295 #{
296 # print HTMLOUT " <th>Data Locality:</th><td><i>Not Applicable</i></td>\n";
297 #}
298 print HTMLOUT "</tr>\n";
299
300 print HTMLOUT "</table>\n";
301 print HTMLOUT "<hr />\n";
302 print HTMLOUT "<h2>Timing Chart (Gantt)</h2>\n";
303 print HTMLOUT renderLine($chart_width, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, 'master', $timing_data->{'M'}->{'N'}, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, {}, $data_locality);
304 foreach my $worker_id (nsort keys %{$timing_data})
305 {
306 if ($worker_id ne 'M')
307 {
308 my $data = $timing_data->{$worker_id};
309 print HTMLOUT renderLine($chart_width, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, 'worker', $worker_id . ' [' . $data->{'N'} . ']', $data->{'S'}, $data->{'E'}, $data->{'F'}, 2); #$data_locality);
310 }
311 }
312 print HTMLOUT '<div>' . "\n";
313 print HTMLOUT "</body>\n";
314 print HTMLOUT "</html>";
315 close(HTMLOUT);
316 print "Done!\n\n";
317 $chart_count++;
318}
319## generateChart() ##
320
321
322## @function debugPrint()
323#
324sub debugPrint
325{
326 my $msg = shift(@_);
327 if ($debug)
328 {
329 print STDERR '[DEBUG] ' . $msg . "\n";
330 }
331}
332## debugPrint() ##
333
334
335## @function filenameCat
336#
337sub filenameCat
338{
339 my $path = join('/', @_);
340 $path =~ s/[\/\\]+/\//g;
341 return $path;
342}
343## filenameCat() ##
344
345
346## @function printError()
347#
348sub printError
349{
350 my $msg = shift(@_);
351 die('Error! ' . $msg . "\n\n");
352}
353## printError() ##
354
355
356## @function printUsage()
357#
358sub printUsage
359{
360 my $msg = shift(@_);
361 if (defined $msg)
362 {
363 print 'Error! ' . $msg . "\n";
364 }
365 die("Usage: generate_gantt.pl [-width <width in pixels>] <dir> [<dir> ...]\n\n");
366}
367## printUsage() ##
368
369
370## @function longestCommonPath
371#
372sub longestCommonPath
373{
374 my ($path_new, $path_current) = @_;
375 my $result = '';
376 if (defined $path_current)
377 {
378 # Hide protocol before we split by slash
379 $path_new =~ s/:\/\//:/;
380 $path_current =~ s/:\/\//:/;
381 my @path_new_parts = split(/\//, $path_new);
382 my @path_current_parts = split(/\//, $path_current);
383 my @path_parts;
384 for (my $i = 0; $i < scalar(@path_current_parts); $i++)
385 {
386 if ($path_current_parts[$i] eq $path_new_parts[$i])
387 {
388 push(@path_parts, $path_new_parts[$i]);
389 }
390 else
391 {
392 last;
393 }
394 }
395 $result = &filenameCat(@path_parts);
396 # Restore protocol
397 $result =~ s/:/:\/\//;
398 }
399 else
400 {
401 $result = $path_new;
402 }
403 return $result;
404}
405## longestCommonPath() ##
406
407
408## @function renderLine()
409#
410sub renderLine
411{
412 my ($table_width, $start, $end, $class, $tname, $tstart, $tend, $jobs, $data_locality) = @_;
413 &debugPrint("renderLine($table_width, $start, $end, $class, $tname, $tstart, $tend, <jobs>, $data_locality)");
414 # All timings need to be relative to 0 (relative start)
415 my $duration = $end - $start;
416 my $rtstart = $tstart - $start;
417 my $rtend = $tend - $start;
418 # We need to scale these depending on the timing of this thread relative to
419 # the master thread
420 my $width = $chart_width;
421 my $left = 0;
422 if ($start != $tstart)
423 {
424 my $left_offset_percent = $rtstart / $duration;
425 $left = $left_offset_percent * $table_width;
426 }
427 # - subtract any left offset from width
428 $width = $width - $left;
429 # - right offset directly subtracted from width
430 if ($end != $tend)
431 {
432 my $right_offset_percent = ($duration - $rtend) / $duration;
433 my $right = $right_offset_percent * $table_width;
434 $width = $width - $right;
435 }
436 # Round things off (simple dutch rounding)
437 $left = int($left + 0.5);
438 $width = int($width + 0.5);
439 # Output the bar for this master/worker
440 my $html = '<div class="thread ' . $class . '" style="left:' . $left . 'px;width:' . $width . 'px;">';
441 if ($class eq 'master')
442 {
443 $html .= '<div style="background-color:blue;margin-bottom:1px">';
444 }
445 $html .= '<div class="time" style="display:table-cell">' . &renderTime($rtstart) . '</div><div style="display:table-cell;padding-left:20px;width:100%;">' . ucfirst($class) . ': ' . $tname . '</div><div class="time" style="display:table-cell">' . renderTime($rtend) . '</div></div>';
446 my $previous_jright = 0;
447 foreach my $jstart (sort keys %{$jobs})
448 {
449 my $rjstart = $jstart - $start;
450 my $rpstart = $jobs->{$jstart}->{'PS'} - $start;
451 my $rpend = $jobs->{$jstart}->{'PE'} - $start;
452 my $rjend = $jobs->{$jstart}->{'E'} - $start;
453 my $jduration = $jobs->{$jstart}->{'E'} - $jstart;
454 my $io_duration = $rpstart - $rjstart;
455 my $cpu_duration = $rpend - $rpstart;
456 # Scale Job co-ordinates
457 my $jleft_percent = $rjstart / $duration;
458 my $jleft = int(($jleft_percent * $table_width) + 0.5);
459 my $jwidth_percent = $jduration / $duration;
460 # -2 for left and right 1 pixel border
461 my $jwidth = int(($jwidth_percent * $table_width) + 0.5) - 2;
462 if ($jleft + $jwidth > $left + $width)
463 {
464 $jwidth = ($left + $width) - $jleft;
465 }
466 # Then scale process timings within that!
467 my $rpleft_percent = ($rpstart - $rjstart) / $duration;
468 my $rpleft = int(($rpleft_percent * $table_width) + 0.5);
469 my $rpwidth = $jwidth - $rpleft;
470 my $cpu_percent = int((($rpwidth / $jwidth) * 100) + 0.5);
471 $html .= '<div class="job" style="left:' . $jleft . 'px;width:' . $jwidth . 'px;';
472 print "Data Locality? " . $data_locality . " DL? " . $jobs->{$jstart}->{'DL'} . "\n";
473 if ($data_locality > 1 && $jobs->{$jstart}->{'DL'} != 1)
474 {
475 $html .= 'border:1px dashed black;';
476 }
477 $html .= '" title="FN:' . $jobs->{$jstart}->{'FN'} . ', S:' . &renderTime($rjstart) . ', E:' . &renderTime($rjend) . ', CPU: ' . $cpu_percent . '% [' . &renderTime($io_duration) . ', ' . &renderTime($cpu_duration) . ', PC: ' . $jobs->{$jstart}->{'PC'} . '%]"><span class="process" style="left:' . $rpleft . 'px;width:' . $rpwidth . 'px">&nbsp;</span><span class="label"';
478 if ($data_locality > 1 && $jobs->{$jstart}->{'DL'} != 1)
479 {
480 $html .= ' style="color:#FF0000"';
481 }
482 $html .= '>' . $jobs->{$jstart}->{'FN'};
483 if ($jobs->{$jstart}->{'PC'} ne 'NA')
484 {
485 $html .= ' <small>[' . $jobs->{$jstart}->{'PC'} . '%]</small>';
486 }
487 if ($data_locality > 1 && $jobs->{$jstart}->{'DL'} != 1)
488 {
489 $html .= ' [NL]';
490 }
491 $html .= '</span></div>';
492 }
493 return $html;
494}
495## renderLine() ##
496
497
498## @function renderTime()
499#
500sub renderTime
501{
502 my ($seconds) = @_;
503 my $time_str = '';
504 # determine how many hours
505 my $an_hour = 60 * 60;
506 my $hours = floor($seconds / $an_hour);
507 $seconds = $seconds - ($hours * $an_hour);
508 my $a_minute = 60;
509 my $minutes = floor($seconds / $a_minute);
510 $seconds = $seconds - ($minutes * $a_minute);
511 if ($hours > 0)
512 {
513 $time_str = sprintf('%dh%02dm%02ds', $hours, $minutes, $seconds);
514 }
515 else
516 {
517 $time_str = sprintf('%dm%02ds', $minutes, $seconds);
518 }
519 return $time_str;
520}
Note: See TracBrowser for help on using the repository browser.