source: gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl@ 27643

Last change on this file since 27643 was 27643, checked in by jmt12, 11 years ago

Changed the script generator so it can recurse through directories and generate several charts at once. Added a few more tidbits of information, such as average IO time per file

  • Property svn:executable set to *
File size: 15.0 KB
RevLine 
[27543]1#!/usr/bin/perl
2
[27643]3# Pragma
[27543]4use strict;
5use warnings;
[27643]6use 5.012; # so readdir assigns to $_ in a lone while test
[27543]7
[27643]8# Modules
[27543]9use Sort::Naturally;
10use POSIX qw(floor strftime);
11
12print "\n===== Generate Timing (GANTT) =====\n";
13
[27643]14# 0. Init
15# - configurables
16my $chart_width = 1600;
[27551]17my $debug = 0;
[27643]18# - globals
19my $chart_count = 0;
20my @months = ("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec");
[27551]21
[27643]22# 1. Parse options
23while (defined $ARGV[0] && $ARGV[0] =~ /^-/)
[27543]24{
[27643]25 my $option = shift(@ARGV);
26 if ($option eq '-debug')
27 {
28 $debug = 1;
29 }
30 elsif ($option eq '-width')
31 {
32 if (!defined $ARGV[0])
33 {
34 &printUsage('Error! No width value specified');
35 }
36 my $value = shift(@ARGV);
37 if ($value !~ /^\d+$/)
38 {
39 &printUsage('Error! Chart width not a number');
40 }
41 $chart_width = $value;
42 }
43 else
44 {
45 &printUsage('Error! Unknown option: ' . $option);
46 }
[27543]47}
[27643]48print "Chart Width: " . $chart_width . "px\n";
49print "Debug? " . ($debug ? 'Yes' : 'No') . "\n";
50print "===================================\n\n";
51
52# 2. Search for valid directories (containing timing.csv)
53while (defined $ARGV[0])
[27543]54{
[27643]55 my $dir = shift(@ARGV);
56 if (!-d $dir)
[27543]57 {
[27643]58 &printUsage('Error! Not a directory: ' . $dir);
[27543]59 }
[27643]60 &searchForTimingCSV($dir);
[27543]61}
[27590]62
[27643]63# 3. Done
64print "Complete!\n\n";
65print "===================================\n";
66print 'Generated ' . $chart_count . " charts\n";
[27551]67print "===================================\n\n";
[27643]68exit;
69## main() ##
[27543]70
[27643]71
72## @function searchForTimingCSV()
73#
74sub searchForTimingCSV
[27543]75{
[27643]76 my $dir = shift(@_);
77 # For every directory where we find a timing.csv we generate a gantt chart
78 my $timing_path = &filenameCat($dir, 'timing.csv');
79 if (-e $timing_path)
[27543]80 {
[27643]81 &generateChart($dir, $timing_path);
82 }
83 # We also recursively search for other directories containing timing.csv's
84 opendir(my $dh, $dir) or &printError('Failed to open directory for reading: ' . $dir);
85 while (readdir($dh))
86 {
87 my $file = $_;
88 if ($file !~ /^\./)
[27543]89 {
[27643]90 my $path = &filenameCat($dir, $file);
91 if (-d $path)
92 {
93 &searchForTimingCSV($path);
94 }
[27543]95 }
96 }
97}
[27643]98## searchForTimingCSV() ##
99
100
101## @function generateChart()
102#
103sub generateChart
[27543]104{
[27643]105 my $dir = shift(@_);
106 my $timing_csv_path = shift(@_);
107 my $import_dir;
108 my ($epoc) = $dir =~ /(\d+)$/;
109 my $gantt_path = $dir . '/' . $epoc . '-gantt.html';
[27543]110
[27643]111 print ' * Generating chart for: ' . $dir . "\n";
112 print ' - timing file: ' . $timing_csv_path . "\n";
113 print ' - gantt chart: ' . $gantt_path . "\n";
[27543]114
[27643]115 # Read in timing.csv and parse information into data structure
116 print ' - parsing timing.csv... ';
117 my $timing_data = {};
118 my $id_2_worker_id = {};
119 if (open(TIN, '<:utf8', $timing_csv_path))
[27543]120 {
[27643]121 my $line;
122 while ($line = <TIN>)
[27543]123 {
[27643]124 my @parts = split(/,/, $line);
125 if ($parts[1] eq 'M0')
[27543]126 {
[27643]127 $timing_data->{'M'} = {'N'=>$parts[2], 'S'=>$parts[3], 'E'=>$parts[4]};
[27543]128 }
[27643]129 elsif ($parts[1] =~ /W\d+/)
[27543]130 {
[27643]131 $timing_data->{$parts[1]} = {'N'=>$parts[2], 'S'=>$parts[3], 'E'=>$parts[4], 'F'=>{}};
132 $id_2_worker_id->{$parts[0]} = $parts[1];
133 }
134 elsif ($parts[1] =~ /T\d+/)
135 {
136 my $worker_id = $id_2_worker_id->{$parts[7]};
137 my $stop = $parts[4];
138 my $filepath = $parts[8];
139 $filepath =~ s/^\s+|\s+$//g;
140 $import_dir = &longestCommonPath($filepath, $import_dir);
141 $timing_data->{$worker_id}->{'F'}->{$parts[3]} = {'FN'=>$filepath, 'S'=>$parts[3], 'PS'=>($stop - $parts[5]), 'PE'=>$stop, 'E'=>$stop, 'DL'=>$parts[6]};
142 }
143 }
144 close(TIN);
145 }
146 else
147 {
148 die('Error! Failed to open file for reading: ' . $timing_csv_path);
149 }
150 my $number_of_workers = scalar(keys(%{$id_2_worker_id}));;
151 print "Done\n";
152
153 # 3. Produce pretty HTML chart of timing information including jobs
154 print " - generating timing information as chart in HTML... ";
155 open(HTMLOUT, '>:utf8', $gantt_path) or die('Error! Failed to open file for writing: gantt.html');
156 print HTMLOUT "<html>\n";
157 print HTMLOUT '<head>' . "\n";
158 print HTMLOUT '<style type="text/css">' . "\n";
159 print HTMLOUT 'div.thread {position:relative}' . "\n";
160 print HTMLOUT 'div.master {border:1px solid gray;color:white;font-weight:bold}' . "\n";
161 print HTMLOUT 'div.worker {border:1px solid black;background-color:green;color:white;font-weight:bold}' . "\n";
162 print HTMLOUT 'div.time {font-size:smaller;font-weight:normal}' . "\n";
163 print HTMLOUT 'div.job {background-color:transparent;color:black;border:1px solid black;display:block;font-size:smaller;position:relative;text-align:center;overflow:hidden}' . "\n";
164 print HTMLOUT 'span.process {z-index:-1;background-color:#C7C7C7;position:absolute}' . "\n";
165 print HTMLOUT 'span.label {z-index:1;background-color:transparent;overflow:hidden;white-space:nowrap;}' . "\n";
166 print HTMLOUT "th {text-align:left}\n";
167 print HTMLOUT '</style>' . "\n";
168 print HTMLOUT '</head>' . "\n";
169 print HTMLOUT "<body>\n";
170 print HTMLOUT "<h2>Statistics</h2>\n";
171 print HTMLOUT "<table>\n";
172
173 my $total_duration = $timing_data->{'M'}->{'E'} - $timing_data->{'M'}->{'S'};
174 my $file_count = 0;
175 my $data_locality = 0;
176 my $total_io_time = 0;
177 my $total_process_time = 0;
178 my $fastest_file = 0;
179 my $slowest_file = 0;
180 my $problem_files = 0;
181 foreach my $worker_id (keys %{$timing_data})
182 {
183 if ($worker_id ne 'M')
184 {
185 foreach my $job_start ( keys %{$timing_data->{$worker_id}->{'F'}} )
186 {
187 my $process_start = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'PS'};
188 my $process_end = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'PE'};
189 my $job_end = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'E'};
190 if ($process_start == 0 || $process_end == 0 || $job_end == 0)
[27543]191 {
[27643]192 $problem_files++;
[27543]193 }
[27643]194 else
[27543]195 {
[27643]196 my $io_duration = ($process_start - $job_start) + ($job_end - $process_end);
197 my $process_duration = $process_end - $process_start;
198 my $total_duration = $io_duration + $process_duration;
199 &debugPrint("filename: " . $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'});
200 &debugPrint("start: $job_start ps: $process_start pe: $process_end end: $job_end");
201 &debugPrint("io: $io_duration process: $process_duration duration: $total_duration");
202 # Running stats
203 $total_io_time += $io_duration;
204 $total_process_time += $process_duration;
205 if ($fastest_file == 0 || $total_duration < $fastest_file)
206 {
207 $fastest_file = $total_duration;
208 }
209 if ($slowest_file == 0 || $total_duration > $slowest_file)
210 {
211 $slowest_file = $total_duration;
212 }
[27543]213 }
[27643]214 # Shorten filename
215 if (defined $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'})
216 {
217 $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'} = substr($timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'}, length($import_dir) + 1);
218 }
219 $file_count++;
220 if ($timing_data->{$worker_id}->{'F'}->{$job_start}->{'DL'} == 1)
221 {
222 $data_locality++;
223 }
[27543]224 }
225 }
226 }
[27643]227 my $avg_processing_time = floor(($total_io_time + $total_process_time) / $file_count);
228 my $avg_io_time = int(($total_io_time / $file_count) + 0.5);
229 my $avg_cpu_time = int(($total_process_time / $file_count) + 0.5);
[27543]230
[27643]231 print HTMLOUT "<tr><th>Import Directory:</th><td>" . $import_dir . "</td></tr>\n";
232 my ($sec, $min, $hour, $day, $month, $year) = (localtime($timing_data->{'M'}->{'S'}))[0,1,2,3,4,5];
233 print HTMLOUT "<tr><th>Start Time:</th><td>" . sprintf('%04d%s%02d %02d:%02d:%02d', ($year+1900), $months[$month], $day, $hour, $min, $sec) . "</td></tr>\n";
234 print HTMLOUT "<tr><th>Processing Time:</th><td>" . &renderTime($total_duration) . "</td></tr>\n";
235 print HTMLOUT "<tr><th>Processing Threads:</th><td>" . $number_of_workers . "</td></tr>\n";
236 print HTMLOUT "<tr><th>Files Processed:</th><td>" . $file_count . "</td></tr>\n";
237 if ($data_locality > 0)
238 {
239 print HTMLOUT "<tr><th>Data Locality:</th><td>" . sprintf('%d%% [%d out of %d]', (($data_locality / $file_count) * 100), $data_locality, $file_count) . "</td></tr>\n";
240 }
241 print HTMLOUT "<tr><th>Serial Processing Time:</th><td>" . &renderTime($total_process_time) . "</td></tr>\n";
242 print HTMLOUT "<tr><th>Serial IO Time:</th><td>" . &renderTime($total_io_time) . "</td></tr>\n";
243 print HTMLOUT '<tr><th>IO Percentage:</th><td>' . sprintf('%d%%', (($total_io_time / $total_process_time) * 100)) . "</td></tr>\n";
244 print HTMLOUT "<tr><th>Average File Processing Time:</th><td>" . &renderTime($avg_processing_time) . "</td></tr>\n";
245 print HTMLOUT "<tr><th>Average File IO Time:</th><td>" . &renderTime($avg_io_time) . "</td></tr>\n";
246 print HTMLOUT "<tr><th>Average File CPU Time:</th><td>" . &renderTime($avg_cpu_time) . "</td></tr>\n";
247 print HTMLOUT "<tr><th>Fastest File:</th><td>" . &renderTime($fastest_file) . "</td></tr>\n";
248 print HTMLOUT "<tr><th>Slowest File:</th><td>" . &renderTime($slowest_file) . "</td></tr>\n";
249 print HTMLOUT "<tr><th>Problem Files:</th><td>" . $problem_files . "</td></tr>\n";
[27543]250
[27643]251 print HTMLOUT "</table>\n";
252 print HTMLOUT "<hr />\n";
253 print HTMLOUT "<h2>Timing Chart (Gannt)</h2>\n";
254 print HTMLOUT renderLine($chart_width, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, 'master', $timing_data->{'M'}->{'N'}, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, {});
255 foreach my $worker_id (nsort keys %{$timing_data})
[27543]256 {
[27643]257 if ($worker_id ne 'M')
258 {
259 my $data = $timing_data->{$worker_id};
260 print HTMLOUT renderLine($chart_width, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, 'worker', $worker_id . ' [' . $data->{'N'} . ']', $data->{'S'}, $data->{'E'}, $data->{'F'});
261 }
[27543]262 }
[27643]263 print HTMLOUT '<div>' . "\n";
264 print HTMLOUT "</body>\n";
265 print HTMLOUT "</html>";
266 close(HTMLOUT);
267 print "Done!\n\n";
268 $chart_count++;
[27543]269}
[27643]270## generateChart() ##
[27543]271
272
[27551]273## @function debugPrint()
274#
275sub debugPrint
276{
277 my $msg = shift(@_);
278 if ($debug)
279 {
280 print STDERR '[DEBUG] ' . $msg . "\n";
281 }
282}
283## debugPrint() ##
284
285
286## @function filenameCat
287#
288sub filenameCat
289{
290 my $path = join('/', @_);
291 $path =~ s/[\/\\]+/\//g;
292 return $path;
293}
294## filenameCat() ##
295
[27643]296
297## @function printError()
298#
299sub printError
300{
301 my $msg = shift(@_);
302 die('Error! ' . $msg . "\n\n");
303}
304## printError() ##
305
306
[27551]307## @function printUsage()
308#
309sub printUsage
310{
311 my $msg = shift(@_);
312 if (defined $msg)
313 {
314 print 'Error! ' . $msg . "\n";
315 }
[27643]316 die("Usage: generate_gantt.pl [-width <width in pixels>] <dir> [<dir> ...]\n\n");
[27551]317}
318## printUsage() ##
319
320
321## @function longestCommonPath
322#
323sub longestCommonPath
324{
325 my ($path_new, $path_current) = @_;
326 my $result = '';
327 if (defined $path_current)
328 {
[27643]329 # Hide protocol before we split by slash
330 $path_new =~ s/:\/\//:/;
331 $path_current =~ s/:\/\//:/;
[27551]332 my @path_new_parts = split(/\//, $path_new);
333 my @path_current_parts = split(/\//, $path_current);
334 my @path_parts;
335 for (my $i = 0; $i < scalar(@path_current_parts); $i++)
336 {
337 if ($path_current_parts[$i] eq $path_new_parts[$i])
338 {
339 push(@path_parts, $path_new_parts[$i]);
340 }
341 else
342 {
343 last;
344 }
345 }
346 $result = &filenameCat(@path_parts);
[27643]347 # Restore protocol
348 $result =~ s/:/:\/\//;
[27551]349 }
350 else
351 {
352 $result = $path_new;
353 }
354 return $result;
355}
356## longestCommonPath() ##
357
358
359## @function renderLine()
360#
[27543]361sub renderLine
362{
363 my ($table_width, $start, $end, $class, $tname, $tstart, $tend, $jobs) = @_;
[27551]364 &debugPrint("renderLine($table_width, $start, $end, $class, $tname, $tstart, $tend, <jobs>)");
[27543]365 # All timings need to be relative to 0 (relative start)
366 my $duration = $end - $start;
367 my $rtstart = $tstart - $start;
368 my $rtend = $tend - $start;
369 # We need to scale these depending on the timing of this thread relative to
370 # the master thread
371 my $width = $chart_width;
372 my $left = 0;
373 if ($start != $tstart)
374 {
375 my $left_offset_percent = $rtstart / $duration;
376 $left = $left_offset_percent * $table_width;
377 }
378 # - subtract any left offset from width
379 $width = $width - $left;
[27559]380 # - right offset directly subtracted from width
[27543]381 if ($end != $tend)
382 {
383 my $right_offset_percent = ($duration - $rtend) / $duration;
384 my $right = $right_offset_percent * $table_width;
385 $width = $width - $right;
386 }
[27559]387 # Round things off (simple dutch rounding)
388 $left = int($left + 0.5);
389 $width = int($width + 0.5);
390 # Output the bar for this master/worker
[27543]391 my $html = '<div class="thread ' . $class . '" style="left:' . $left . 'px;width:' . $width . 'px;">';
392 if ($class eq 'master')
393 {
394 $html .= '<div style="background-color:blue">';
395 }
396 $html .= '<div class="time" style="display:table-cell">' . &renderTime($rtstart) . '</div><div style="display:table-cell;padding-left:20px;width:100%;">' . ucfirst($class) . ': ' . $tname . '</div><div class="time" style="display:table-cell">' . renderTime($rtend) . '</div></div>';
397 my $previous_jright = 0;
398 foreach my $jstart (sort keys %{$jobs})
399 {
400 my $rjstart = $jstart - $start;
401 my $rpstart = $jobs->{$jstart}->{'PS'} - $start;
402 my $rpend = $jobs->{$jstart}->{'PE'} - $start;
403 my $rjend = $jobs->{$jstart}->{'E'} - $start;
404 my $jduration = $jobs->{$jstart}->{'E'} - $jstart;
[27643]405 my $io_duration = $rpstart - $rjstart;
406 my $cpu_duration = $rpend - $rpstart;
[27543]407 # Scale Job co-ordinates
408 my $jleft_percent = $rjstart / $duration;
[27559]409 my $jleft = int(($jleft_percent * $table_width) + 0.5);
[27543]410 my $jwidth_percent = $jduration / $duration;
[27559]411 my $jwidth = int(($jwidth_percent * $table_width) + 0.5);
[27543]412 if ($jleft + $jwidth > $left + $width)
413 {
414 $jwidth = ($left + $width) - $jleft;
415 }
416 # Then scale process timings within that!
417 my $rpleft_percent = ($rpstart - $rjstart) / $duration;
[27559]418 my $rpleft = int(($rpleft_percent * $table_width) + 0.5);
419 my $rpwidth = $jwidth - $rpleft;
420 my $cpu_percent = int((($rpwidth / $jwidth) * 100) + 0.5);
[27590]421 $html .= '<div class="job" style="left:' . $jleft . 'px;width:' . $jwidth . 'px;';
422 if ($jobs->{$jstart}->{'DL'} != 1)
423 {
424 $html .= 'border:1px dashed black;';
425 }
[27643]426 $html .= '" title="FN:' . $jobs->{$jstart}->{'FN'} . ', S:' . &renderTime($rjstart) . ', E:' . &renderTime($rjend) . ', CPU: ' . $cpu_percent . '% [' . &renderTime($io_duration) . ', ' . &renderTime($cpu_duration) . ']"><span class="process" style="left:' . $rpleft . 'px;width:' . $rpwidth . 'px">&nbsp;</span><span class="label"';
[27590]427 if ($jobs->{$jstart}->{'DL'} != 1)
428 {
429 $html .= ' style="color:#FF0000"';
430 }
431 $html .= '>' . $jobs->{$jstart}->{'FN'};
432 if ($jobs->{$jstart}->{'DL'} != 1)
433 {
434 $html .= ' [NL]';
435 }
436 $html .= '</span></div>';
[27543]437 }
438 return $html;
439}
[27551]440## renderLine() ##
[27543]441
[27551]442
443## @function renderTime()
444#
[27543]445sub renderTime
446{
447 my ($seconds) = @_;
448 my $time_str = '';
449 # determine how many hours
450 my $an_hour = 60 * 60;
451 my $hours = floor($seconds / $an_hour);
452 $seconds = $seconds - ($hours * $an_hour);
453 my $a_minute = 60;
454 my $minutes = floor($seconds / $a_minute);
455 $seconds = $seconds - ($minutes * $a_minute);
456 if ($hours > 0)
457 {
458 $time_str = sprintf('%dh%02dm%02ds', $hours, $minutes, $seconds);
459 }
[27643]460 else
[27543]461 {
462 $time_str = sprintf('%dm%02ds', $minutes, $seconds);
463 }
464 return $time_str;
465}
Note: See TracBrowser for help on using the repository browser.