root/gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl @ 29663

Revision 29663, 18.5 KB (checked in by jmt12, 5 years ago)

Supporting grayscale printing, fixing mismatched tags and speechmarks, and data locality and incomplete import occurances more obvious

  • Property svn:executable set to *
Line 
1#!/usr/bin/perl
2
3# Pragma
4use strict;
5use warnings;
6
7# Modules
8use Sort::Naturally;
9use POSIX qw(floor strftime);
10
11print "\n===== Generate Timing (GANTT) =====\n";
12
13# 0. Init
14# - configurables
15my $chart_width = 1600;
16# - any video more than 95% complete is probably complete with rounding errors
17my $complete_threshold = 95;
18
19my $debug = 0;
20my $color_master = 'blue';
21my $color_worker = 'green';
22my $color_nlocal = 'red';
23my $disable_header = 0;
24my $max_worker_count = 0;
25# - globals
26my $chart_count = 0;
27my @months = ("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec");
28
29# 1. Parse options
30while (defined $ARGV[0] && $ARGV[0] =~ /^-/)
31{
32  my $option = shift(@ARGV);
33  if ($option eq '-debug')
34  {
35    $debug = 1;
36  }
37  elsif ($option eq '-width')
38  {
39    if (!defined $ARGV[0])
40    {
41      &printUsage('Error! No width value specified');
42    }
43    my $value = shift(@ARGV);
44    if ($value !~ /^\d+$/)
45    {
46      &printUsage('Error! Chart width not a number');
47    }
48    $chart_width = $value;
49  }
50  elsif ($option eq '-grayscale')
51  {
52    $color_master = '#1D1D1D';
53    $color_worker = '#969696';
54    $color_nlocal = '#4C4C4C';
55  }
56  elsif ($option eq '-noheader')
57  {
58    $disable_header = 1;
59  }
60  elsif ($option eq '-maxworkers')
61  {
62    if (!defined $ARGV[0])
63    {
64      &printUsage('Error! No maxworkers value specified');
65    }
66    my $value = shift(@ARGV);
67    if ($value !~ /^\d+$/)
68    {
69      &printUsage('Error! Maxworkers not a number');
70    }
71    $max_worker_count = $value;
72  }
73  else
74  {
75    &printUsage('Error! Unknown option: ' . $option);
76  }
77}
78print "Chart Width: " . $chart_width . "px\n";
79print "Grayscale? " . ($debug ? 'Yes' : 'No') . "\n";
80print "Debug? " . ($debug ? 'Yes' : 'No') . "\n";
81print "===================================\n\n";
82
83# 2. Search for valid directories (containing timing.csv)
84while (defined $ARGV[0])
85{
86  my $dir = shift(@ARGV);
87  if (!-d $dir)
88  {
89    &printUsage('Error! Not a directory: ' . $dir);
90  }
91  if ($dir =~ /(.*)[\\\/]$/)
92  {
93    $dir = $1;
94  }
95  &searchForTimingCSV($dir);
96}
97
98# 3. Done
99print "Complete!\n\n";
100print "===================================\n";
101print 'Generated ' . $chart_count . " charts\n";
102print "===================================\n\n";
103exit;
104## main() ##
105
106
107## @function searchForTimingCSV()
108#
109sub searchForTimingCSV
110{
111  my $dir = shift(@_);
112  # For every directory where we find a timing.csv we generate a gantt chart
113  my $timing_path = &filenameCat($dir, 'timing.csv');
114  if (-e $timing_path)
115  {
116    &generateChart($dir, $timing_path);
117  }
118  # We also recursively search for other directories containing timing.csv's
119  opendir(my $dh, $dir) or &printError('Failed to open directory for reading: ' . $dir);
120  my @files = readdir($dh);
121  foreach my $file (@files)
122  {
123    if ($file !~ /^\./)
124    {
125      my $path = &filenameCat($dir, $file);
126      if (-d $path)
127      {
128        &searchForTimingCSV($path);
129      }
130    }
131  }
132}
133## searchForTimingCSV() ##
134
135
136## @function generateChart()
137#
138sub generateChart
139{
140  my $dir = shift(@_);
141  my $timing_csv_path = shift(@_);
142  my $import_dir;
143  my ($epoc) = $dir =~ /(\d+)$/;
144  my $gantt_path = $dir . '/' . $epoc . '-gantt.html';
145
146  print ' * Generating chart for: ' . $dir . "\n";
147  print ' - timing file: ' . $timing_csv_path . "\n";
148  print ' - gantt chart: ' . $gantt_path . "\n";
149
150  # Read in timing.csv and parse information into data structure
151  print ' - parsing timing.csv... ';
152  my $timing_data = {};
153  my $id_2_worker_id = {};
154  if (open(TIN, '<:utf8', $timing_csv_path))
155  {
156    my $line;
157    while ($line = <TIN>)
158    {
159      my @parts = split(/,/, $line);
160      if ($parts[1] eq 'M0')
161      {
162        $timing_data->{'M'} = {'N'=>$parts[2], 'S'=>$parts[3], 'E'=>$parts[4]};
163      }
164      elsif ($parts[1] =~ /W\d+/)
165      {
166        my $worker_id = $parts[1];
167        my $hostname = $parts[2];
168        # Alter the worker name for compute nodes so they can be naturally
169        # sorted
170        if ($hostname =~ /compute-0-(\d+)/)
171        {
172          $worker_id = 'W' . $1;
173        }
174        $timing_data->{$worker_id} = {'N'=>$hostname, 'S'=>$parts[3], 'E'=>$parts[4], 'F'=>{}};
175        $id_2_worker_id->{$parts[0]} = $worker_id;
176      }
177      elsif ($parts[1] =~ /T\d+/)
178      {
179        my $worker_id = $id_2_worker_id->{$parts[7]};
180        my $stop = $parts[4];
181        my $filepath = $parts[8];
182        $filepath =~ s/^\s+|\s+$//g;
183        my $percent_complete = 'NA';
184        if (defined($parts[9]))
185        {
186          $percent_complete = $parts[9];
187          chomp($percent_complete);
188          if ($percent_complete >= $complete_threshold)
189          {
190            $percent_complete = 'NA';
191          }
192        }
193        $import_dir = &longestCommonPath($filepath, $import_dir);
194        my $start_time = $parts[3];
195        while (defined $timing_data->{$worker_id}->{'F'}->{$start_time})
196        {
197          $start_time += 0.000001;
198        }
199        $timing_data->{$worker_id}->{'F'}->{$start_time} = {'FN'=>$filepath, 'S'=>$parts[3], 'PS'=>($stop - $parts[5]), 'PE'=>$stop, 'E'=>$stop, 'DL'=>$parts[6], 'PC'=>$percent_complete};
200      }
201    }
202    close(TIN);
203  }
204  else
205  {
206    die('Error! Failed to open file for reading: ' . $timing_csv_path);
207  }
208  my $number_of_workers = scalar(keys(%{$id_2_worker_id}));;
209  print "Done\n";
210
211  # 3. Produce pretty HTML chart of timing information including jobs
212  print " - generating timing information as chart in HTML... ";
213  open(HTMLOUT, '>:utf8', $gantt_path) or die('Error! Failed to open file for writing: gantt.html');
214  print HTMLOUT "<html>\n";
215  print HTMLOUT '<head>' . "\n";
216  print HTMLOUT '<style type="text/css">' . "\n";
217  print HTMLOUT "body {margin:0px;padding:4px}\n";
218  print HTMLOUT 'div.thread {position:relative}' . "\n";
219  print HTMLOUT 'div.master {border:1px solid gray;color:white;font-weight:bold}' . "\n";
220  print HTMLOUT 'div.worker {border:1px solid black;background-color:' . $color_worker . ';color:white;font-weight:bold;margin-bottom:1px;}' . "\n";
221  print HTMLOUT 'div.time {font-size:smaller;font-weight:normal}' . "\n";
222  print HTMLOUT 'div.job {background-color:transparent;color:black;border:1px solid black;display:block;font-size:smaller;font-weight:normal;position:relative;text-align:left;margin-bottom:1px;}' . "\n";
223  print HTMLOUT 'span.process {z-index:-1;background-color:#C7C7C7;position:absolute}' . "\n";
224  print HTMLOUT 'div.label {z-index:1;background-color:transparent;white-space:nowrap;text-align:center}' . "\n";
225  print HTMLOUT "th {text-align:left}\n";
226  print HTMLOUT "tr.toprule th,tr.toprule td {border-top:2px solid black;width:17%}\n";
227  print HTMLOUT "tr.bottomrule th,tr.bottomrule td {border-bottom:2px solid black}\n";
228  print HTMLOUT '</style>' . "\n";
229  print HTMLOUT '</head>' . "\n";
230  print HTMLOUT "<body>\n";
231  ##print HTMLOUT "<h2>Parallel Import Timing Chart</h2>\n";
232  print HTMLOUT "<table style=\"border-collapse:collapse;width:" . $chart_width . "px;";
233  if ($disable_header)
234  {
235    print HTMLOUT "display:none;";
236  }
237  print HTMLOUT "\">\n";
238
239  my $total_duration = $timing_data->{'M'}->{'E'} - $timing_data->{'M'}->{'S'};
240  my $file_count = 0;
241  my $data_locality = 0;
242  my $total_io_time = 0;
243  my $total_process_time = 0;
244  my $fastest_file = 0;
245  my $slowest_file = 0;
246  my $problem_files = 0;
247  foreach my $worker_id (keys %{$timing_data})
248  {
249    if ($worker_id ne 'M')
250    {
251      foreach my $job_start ( keys %{$timing_data->{$worker_id}->{'F'}} )
252      {
253        my $process_start = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'PS'};
254        my $process_end = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'PE'};
255        my $job_end = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'E'};
256        my $percent_complete = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'PC'};
257        if ($process_start == 0 || $process_end == 0 || $job_end == 0 || ($percent_complete =~ /^\d+$/ && $percent_complete < $complete_threshold))
258        {
259          $problem_files++;
260        }
261        else
262        {
263          my $io_duration = ($process_start - $job_start) + ($job_end - $process_end);
264          my $process_duration = $process_end - $process_start;
265          my $total_duration = $io_duration + $process_duration;
266          &debugPrint("filename: " . $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'});
267          &debugPrint("start: $job_start ps: $process_start pe: $process_end end: $job_end");
268          &debugPrint("io: $io_duration process: $process_duration duration: $total_duration");
269          # Running stats
270          $total_io_time += $io_duration;
271          $total_process_time += $process_duration;
272          if ($fastest_file == 0 || $total_duration < $fastest_file)
273          {
274            $fastest_file = $total_duration;
275          }
276          if ($slowest_file == 0 || $total_duration > $slowest_file)
277          {
278            $slowest_file = $total_duration;
279          }
280        }
281        # Shorten filename
282        if (defined $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'} && $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'} ne '')
283        {
284          $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'} = substr($timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'}, length($import_dir) + 1);
285        }
286        $file_count++;
287        if ($timing_data->{$worker_id}->{'F'}->{$job_start}->{'DL'} == 1)
288        {
289          $data_locality++;
290        }
291      }
292    }
293  }
294  if ($file_count <= 0)
295  {
296    $file_count = 1;
297  }
298  if ($total_process_time <= 0)
299  {
300    $total_process_time = 1;
301  }
302  my $avg_processing_time = floor(($total_io_time + $total_process_time) / $file_count);
303  my $avg_io_time = int(($total_io_time / $file_count) + 0.5);
304  my $avg_cpu_time = int(($total_process_time / $file_count) + 0.5);
305
306  print HTMLOUT "<tr class=\"toprule\">\n";
307  print HTMLOUT ' <th style="width:17%;">Import Directory:</th><td style="width:83%;" colspan="5">' . $import_dir . "</td>\n";
308  print HTMLOUT "</tr>\n";
309
310  print HTMLOUT "<tr>\n";
311  my ($sec, $min, $hour, $day, $month, $year) = (localtime($timing_data->{'M'}->{'S'}))[0,1,2,3,4,5];
312  print HTMLOUT ' <th>Start Time:</th><td>' . sprintf('%04d%s%02d %02d:%02d:%02d', ($year+1900), $months[$month], $day, $hour, $min, $sec) . "</td>\n";
313  ($sec, $min, $hour, $day, $month, $year) = (localtime($timing_data->{'M'}->{'E'}))[0,1,2,3,4,5];
314  print HTMLOUT ' <th>End Time:</th><td>' . sprintf('%04d%s%02d %02d:%02d:%02d', ($year+1900), $months[$month], $day, $hour, $min, $sec) . "</td>\n";
315  print HTMLOUT "  <th>Processing Time:</th><td>" . &renderTime($total_duration) . "</td>\n";
316  print HTMLOUT "</tr>\n";
317
318  print HTMLOUT "<tr>\n";
319  print HTMLOUT "  <th>Processing Threads:</th><td>" . $number_of_workers . "</td>\n";
320  print HTMLOUT "  <th>Files Processed:</th><td>" . $file_count . "</td>\n";
321  print HTMLOUT "  <th>Problem Files:</th><td>" . $problem_files . "</td>\n";
322  print HTMLOUT "</tr>\n";
323
324  print HTMLOUT "<tr>\n";
325  print HTMLOUT '  <th>Serial Processing Time:</th><td>' . &renderTime($total_process_time) . "</td>\n";
326  print HTMLOUT '  <th>Serial IO Time:</th><td>' . &renderTime($total_io_time) . "</td>\n";
327  print HTMLOUT '  <th>IO Percentage:</th><td>' . sprintf('%d%%', (($total_io_time / $total_process_time) * 100)) . "</td>\n";
328  print HTMLOUT "</tr>\n";
329
330  print HTMLOUT "<tr>\n";
331  print HTMLOUT "  <th>Avg Processing Time:</th><td>" . &renderTime($avg_processing_time) . "</td>\n";
332  print HTMLOUT "  <th>Avg File IO Time:</th><td>" . &renderTime($avg_io_time) . "</td>\n";
333  print HTMLOUT "  <th>Avg File CPU Time:</th><td>" . &renderTime($avg_cpu_time) . "</td>\n";
334  print HTMLOUT "</tr>\n";
335
336  print HTMLOUT "<tr class=\"bottomrule\">\n";
337  print HTMLOUT "  <th>Fastest File:</th><td>" . &renderTime($fastest_file) . "</td>\n";
338  print HTMLOUT "  <th>Slowest File:</th><td>" . &renderTime($slowest_file) . "</td>\n";
339  #if ($data_locality > 0)
340  #{
341    print HTMLOUT "  <th>Data Locality:</th><td>" . sprintf('%d%% [%d out of %d]', (($data_locality / $file_count) * 100), $data_locality, $file_count) . "</td>\n";
342  #}
343  #else
344  #{
345  #  print HTMLOUT "  <th>Data Locality:</th><td><i>Not Applicable</i></td>\n";
346  #}
347  print HTMLOUT "</tr>\n";
348
349  print HTMLOUT "</table>\n";
350  print HTMLOUT renderLine($chart_width, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, 'master', $timing_data->{'M'}->{'N'}, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, {}, $data_locality);
351  my $worker_count = 0;
352  foreach my $worker_id (nsort keys %{$timing_data})
353  {
354    if ($max_worker_count < 1 || $worker_count <= $max_worker_count)
355    {
356      if ($worker_id ne 'M')
357      {
358        my $data = $timing_data->{$worker_id};
359        print HTMLOUT renderLine($chart_width, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, 'worker', $worker_id . ' [' . $data->{'N'} . ']', $data->{'S'}, $data->{'E'}, $data->{'F'}, 2); #$data_locality);
360        $worker_count++;
361      }
362    }
363  }
364  print HTMLOUT '</div>' . "\n";
365  print HTMLOUT "</body>\n";
366  print HTMLOUT "</html>";
367  close(HTMLOUT);
368  print "Done!\n\n";
369  $chart_count++;
370}
371## generateChart() ##
372
373
374## @function debugPrint()
375#
376sub debugPrint
377{
378  my $msg = shift(@_);
379  if ($debug)
380  {
381    print STDERR '[DEBUG] ' . $msg . "\n";
382  }
383}
384## debugPrint() ##
385
386
387## @function filenameCat
388#
389sub filenameCat
390{
391  my $path = join('/', @_);
392  $path =~ s/[\/\\]+/\//g;
393  return $path;
394}
395## filenameCat() ##
396
397
398## @function printError()
399#
400sub printError
401{
402  my $msg = shift(@_);
403  die('Error! ' . $msg . "\n\n");
404}
405## printError() ##
406
407
408## @function printUsage()
409#
410sub printUsage
411{
412  my $msg = shift(@_);
413  if (defined $msg)
414  {
415    print 'Error! ' . $msg . "\n";
416  }
417  die("Usage: generate_gantt.pl [-width <width in pixels>] <dir> [<dir> ...]\n\n");
418}
419## printUsage() ##
420
421
422## @function longestCommonPath
423#
424sub longestCommonPath
425{
426  my ($path_new, $path_current) = @_;
427  my $result = '';
428  if (defined $path_current)
429  {
430    # Hide protocol before we split by slash
431    $path_new =~ s/:\/\//:/;
432    $path_current =~ s/:\/\//:/;
433    my @path_new_parts = split(/\//, $path_new);
434    my @path_current_parts = split(/\//, $path_current);
435    my @path_parts;
436    for (my $i = 0; $i < scalar(@path_current_parts); $i++)
437    {
438      if ($path_current_parts[$i] eq $path_new_parts[$i])
439      {
440        push(@path_parts, $path_new_parts[$i]);
441      }
442      else
443      {
444        last;
445      }
446    }
447    $result = &filenameCat(@path_parts);
448    # Restore protocol
449    $result =~ s/:/:\/\//;
450  }
451  else
452  {
453    $result = $path_new;
454  }
455  return $result;
456}
457## longestCommonPath() ##
458
459
460## @function renderLine()
461#
462sub renderLine
463{
464  my ($table_width, $start, $end, $class, $tname, $tstart, $tend, $jobs, $data_locality) = @_;
465  &debugPrint("renderLine($table_width, $start, $end, $class, $tname, $tstart, $tend, <jobs>, $data_locality)");
466  # All timings need to be relative to 0 (relative start)
467  my $duration = $end - $start;
468  my $rtstart = $tstart - $start;
469  my $rtend = $tend - $start;
470  # We need to scale these depending on the timing of this thread relative to
471  # the master thread
472  my $width = $chart_width;
473  my $left = 0;
474  if ($start != $tstart)
475  {
476    my $left_offset_percent = $rtstart / $duration;
477    $left = $left_offset_percent * $table_width;
478  }
479  # - subtract any left offset from width
480  $width = $width - $left;
481  # - right offset directly subtracted from width
482  if ($end != $tend)
483  {
484    my $right_offset_percent = ($duration - $rtend) / $duration;
485    my $right = $right_offset_percent * $table_width;
486    $width = $width - $right;
487  }
488  # Round things off (simple dutch rounding)
489  $left = int($left + 0.5);
490  $width = int($width + 0.5);
491  # Output the bar for this master/worker
492  my $html = '<div class="thread ' . $class . '" style="left:' . $left . 'px;width:' . $width . 'px;">';
493  if ($class eq 'master')
494  {
495    $html .= '<div style="background-color:' . $color_master . ';margin-bottom:1px">';
496  }
497  $html .= '<div class="time" style="display:table-cell">' . &renderTime($rtstart) . '</div><div style="display:table-cell;padding-left:20px;width:100%;">' . ucfirst($class) . ': ' . $tname . '</div><div class="time" style="display:table-cell">' . renderTime($rtend) . '</div></div>';
498  my $previous_jright = 0;
499  foreach my $jstart (sort keys %{$jobs})
500  {
501    my $rjstart = $jstart - $start;
502    my $rpstart = $jobs->{$jstart}->{'PS'} - $start;
503    my $rpend = $jobs->{$jstart}->{'PE'} - $start;
504    my $rjend = $jobs->{$jstart}->{'E'} - $start;
505    my $jduration = $jobs->{$jstart}->{'E'} - $jstart;
506    my $io_duration = $rpstart - $rjstart;
507    my $cpu_duration = $rpend - $rpstart;
508    # Scale Job co-ordinates
509    my $jleft_percent = $rjstart / $duration;
510    my $jleft = int(($jleft_percent * $table_width) + 0.5);
511    my $jwidth_percent = $jduration / $duration;
512    # -2 for left and right 1 pixel border
513    my $jwidth = int(($jwidth_percent * $table_width) + 0.5) - 2;
514    if ($jleft + $jwidth > $left + $width)
515    {
516      $jwidth = ($left + $width) - $jleft;
517    }
518    # Then scale process timings within that!
519    my $rpleft_percent = ($rpstart - $rjstart) / $duration;
520    my $rpleft = int(($rpleft_percent * $table_width) + 0.5);
521    my $rpwidth = $jwidth - $rpleft;
522    my $cpu_percent = int((($rpwidth / $jwidth) * 100) + 0.5);
523    $html .= '<div class="job" style="left:' . $jleft . 'px;width:' . $jwidth . 'px;';
524    ###rint "Data Locality? " . $data_locality . " DL? " . $jobs->{$jstart}->{'DL'} . "\n";
525    if ($data_locality > 1 && $jobs->{$jstart}->{'DL'} != 1)
526    {
527      $html .= 'border:1px solid #C7C7C7;';
528    }
529    $html .= '" title="FN:' . $jobs->{$jstart}->{'FN'} . ', S:' . &renderTime($rjstart) . ', E:' . &renderTime($rjend) . ', CPU: ' . $cpu_percent . '% [' . &renderTime($io_duration) . ', ' . &renderTime($cpu_duration) . ', PC: ' . $jobs->{$jstart}->{'PC'} . '%]"><span class="process" style="left:' . $rpleft . 'px;width:' . $rpwidth . 'px">&nbsp;</span><div class="label" style="width:' . $jwidth;
530    if ($data_locality > 1 && $jobs->{$jstart}->{'DL'} != 1)
531    {
532      $html .= ';color:' . $color_nlocal;
533    }
534    $html .=  '">' . $jobs->{$jstart}->{'FN'};
535    if ($jobs->{$jstart}->{'PC'} ne 'NA')
536    {
537      $html .= ' <b>[Incomplete! ' . $jobs->{$jstart}->{'PC'} . '%]</b>';
538    }
539    if ($data_locality > 1 && $jobs->{$jstart}->{'DL'} != 1)
540    {
541      $html .= ' [NL]';
542    }
543    $html .= '</div></div>';
544  }
545  return $html;
546}
547## renderLine() ##
548
549
550## @function renderTime()
551#
552sub renderTime
553{
554  my ($seconds) = @_;
555  my $time_str = '';
556  # determine how many hours
557  my $an_hour = 60 * 60;
558  my $hours = floor($seconds / $an_hour);
559  $seconds = $seconds - ($hours * $an_hour);
560  my $a_minute = 60;
561  my $minutes = floor($seconds / $a_minute);
562  $seconds = $seconds - ($minutes * $a_minute);
563  if ($hours > 0)
564  {
565    $time_str = sprintf('%dh%02dm%02ds', $hours, $minutes, $seconds);
566  }
567  else
568  {
569    $time_str = sprintf('%dm%02ds', $minutes, $seconds);
570  }
571  return $time_str;
572}
Note: See TracBrowser for help on using the browser.