root/gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl @ 27543

Revision 27543, 12.4 KB (checked in by jmt12, 7 years ago)

Adding generate_gantt.pl script in its original form - i.e. directly reads OpenMPI Greenstone logs and generates CSV of information and Gantt chart. I intended to split this script in half into a openmpi_report.pl and generate_gantt.pl

  • Property svn:executable set to *
  • Property svn:mime-type set to application/x-perl
Line 
1#!/usr/bin/perl
2
3use strict;
4use warnings;
5
6use Sort::Naturally;
7use POSIX qw(floor strftime);
8
9print "\n===== Generate Timing (GANTT) =====\n";
10
11# 1. Initialization
12#my $chart_width = 1024;
13my $chart_width = 1536;
14#my $chart_width = 2048;
15print "Chart Width: " . $chart_width . "px\n";
16# 1.1 Store all information extracted in a cool data structure
17# - N = hostname, S = thread start, E = thread end
18my $timing_data = {'M' => {'N'=>'', 'S'=>0, 'E'=>0}};
19# 1.2 Check the file exists
20if (!-f $ARGV[0])
21{
22  die("Error! File can't be read: " . $ARGV[0]);
23}
24my $main_log_filename = $ARGV[0];
25# 1.2 From the filename we can parse in some information like the number of worker threads
26my $number_of_workers = 0;
27if ($main_log_filename =~ /\-W(\d+)E/)
28{
29  $number_of_workers = $1;
30}
31else
32{
33  die("Error! Malformed filename (expecting number of workers): " . $main_log_filename);
34}
35# 1.3 Initialize the data structure with the number of workers too (we don't
36#     know the number of files yet, so they'll have to be adding on the fly)
37for (my $i = 1; $i <= $number_of_workers; $i++)
38{
39  $timing_data->{'W' . $i} = {'N'=>'', 'S'=>0, 'E'=>0, 'F'=>{}};
40}
41
42# 2. Read in main log file
43print " * Reading main log: " . $main_log_filename . "\n";
44open(LOGIN, '<:utf8', $main_log_filename) or die("Error! Failed to open file for reading: " . $main_log_filename);
45my $line = '';
46my $currently_processing = {};
47while ($line = <LOGIN>)
48{
49  # 2.1 Parse in the Master thread start time
50  if ($line =~ /\[M\d?:(\d+)\] Starting on (.+)/)
51  {
52    $timing_data->{'M'}->{'S'} = $1;
53    $timing_data->{'M'}->{'N'} = $2;
54  }
55  elsif ($line =~ /\[(W\d+):(\d+)\] Starting on (.+)/)
56  {
57    my $worker_id = $1;
58    $timing_data->{$worker_id}->{'S'} = $2;
59    $timing_data->{$worker_id}->{'N'} = $3;
60  }
61  elsif ($line =~ /\[(W\d+):(\d+)\] Processing/)
62  {
63    my $worker_id = $1;
64    my $job_start_time = $2;
65    $timing_data->{$worker_id}->{'F'}->{$job_start_time} = {'FN'=>'', 'PS'=>0, 'PE'=>0, 'E'=>0}; 
66    $currently_processing->{$worker_id} = $job_start_time;
67  }
68  # 2.3 Or we may parse in the starting times for each working thread
69  # 2.4 Or we may also parse (the last encountered) completion time for each
70  #     working thread
71  elsif ($line =~ /\[(W\d+):(\d+)\] Process complete/)
72  {
73    my $worker_id = $1;
74    my $job_end_time = $2;
75    $timing_data->{$worker_id}->{'E'} = $job_end_time;
76    my $job_start_time = $currently_processing->{$worker_id};
77    $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'E'} = $job_end_time;
78    delete($currently_processing->{$worker_id});
79  }
80  # 2.5 Finally, we may parse in the Master thread end time
81  elsif ($line =~ /\[M\d?:(\d+)\] Master will exit when workers complete/)
82  {
83    $timing_data->{'M'}->{'E'} = $1;
84  }
85}
86close(LOGIN);
87
88# 3. Read each of worker logs parsing in information about the files processed
89#    - each will be stored (in an associative array) against its start time
90print " * Reading worker logs";
91foreach my $worker_id (nsort keys %{$timing_data})
92{
93  my $jobs = $timing_data->{$worker_id}->{'F'};
94  my $counter = 1;
95  foreach my $job_start_time (sort keys %{$jobs})
96  {
97    my $log_filename = 'gsimport-' . $worker_id . '-' . $counter . '.log';
98    print ".";
99    open(WLOGIN, '<:utf8', $log_filename) or die("Error! Failed to open for reading: " . $log_filename);
100    my $wline = '';
101    while ($wline = <WLOGIN>)
102    {
103      if ($wline =~ /\[A:\d+\] SimpleVideoPlugin processing: (.+)/)
104      {
105        $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'FN'} = $1;
106      }
107      # Start of video processing (excluding as much IO as possible)
108      elsif ($wline =~ /\[C1:(\d+)\]/)
109      {
110        $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'PS'} = $1;
111      }
112      # Immediately after processing video
113      elsif ($wline =~ /\[E2:(\d+)\]/)
114      {
115        $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'PE'} = $1;
116      }
117    }
118
119    if ($timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'PE'} <= 0)
120    {
121      print "\n[Warning - bogus log: $log_filename]";
122    }
123
124    close(WLOGIN);
125    $counter++;
126  }
127}
128print " Done!\n";
129
130# 4. Produce CSV of information
131print " * Generating timing information as CSV... ";
132open(CSVOUT, '>:utf8', 'timing.csv') or die('Error! Failed to open file for writing: timing.csv');
133print CSVOUT "number,id,hostname,start,end,hierarchy\n";
134my $thread_counter = 1;
135foreach my $thread (nsort keys %{$timing_data})
136{
137  my $data = $timing_data->{$thread};
138  print CSVOUT $thread_counter . ',' . $thread . ',' . $data->{'N'} . ',' . strftime("%H:%M:%S", localtime($data->{'S'})) . ',' . strftime("%H:%M:%S", localtime($data->{'E'})) . ',';
139  if ($thread eq 'M')
140  {
141    print CSVOUT '0';
142  }
143  else
144  {
145    print CSVOUT '1';
146  }
147  print CSVOUT "\n";
148  $thread_counter++;
149}
150close(CSVOUT);
151print "Done!\n";
152
153# 5. Produce pretty HTML chart of timing information including jobs
154print " * Generating timing information as HTML... ";
155open(HTMLOUT, '>:utf8', 'gantt.html') or die('Error! Failed to open file for writing: gantt.html');
156print HTMLOUT "<html>\n";
157print HTMLOUT '<head>' . "\n";
158print HTMLOUT '<style type="text/css">' . "\n";
159print HTMLOUT 'div.thread {position:relative}' . "\n";
160print HTMLOUT 'div.master {border:1px solid gray;color:white;font-weight:bold}' . "\n";
161print HTMLOUT 'div.worker {background-color:green;color:white;font-weight:bold}' . "\n";
162print HTMLOUT 'div.time {font-size:smaller;font-weight:normal}' . "\n";
163print HTMLOUT 'div.job {background-color:transparent;color:black;border:1px solid black;display:block;font-size:smaller;position:relative;text-align:center}' . "\n";
164print HTMLOUT 'span.process {z-index:-1;background-color:#C7C7C7;position:absolute}' . "\n";
165print HTMLOUT "th {text-align:left}\n";
166print HTMLOUT '</style>' . "\n";
167print HTMLOUT '</head>' . "\n";
168print HTMLOUT "<body>\n";
169print HTMLOUT "<h2>Statistics</h2>\n";
170print HTMLOUT "<table>\n";
171
172my $total_duration = $timing_data->{'M'}->{'E'} - $timing_data->{'M'}->{'S'};
173my $file_count = 0;
174my $total_io_time = 0;
175my $total_process_time = 0;
176my $fastest_file = 0;
177my $slowest_file = 0;
178my $problem_files = 0;
179foreach my $worker_id (keys %{$timing_data})
180{
181  if ($worker_id ne 'M')
182  {
183    foreach my $job_start ( keys %{$timing_data->{$worker_id}->{'F'}} )
184    {
185      my $process_start = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'PS'};
186      my $process_end = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'PE'};
187      my $job_end = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'E'};
188      if ($process_start == 0 || $process_end == 0 || $job_end == 0)
189      {
190        $problem_files++;
191      }
192      else
193      {
194        my $io_duration = ($process_start - $job_start) + ($job_end - $process_end);
195        my $process_duration = $process_end - $process_start;
196        my $total_duration = $io_duration + $process_duration;
197        ###rint "[DEBUG] filename: " . $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'} . "\n";
198        ###rint "[DEBUG] start: $job_start ps: $process_start pe: $process_end end: $job_end\n";
199        ###rint "[DEBUG] io: $io_duration process: $process_duration duration: $total_duration\n";
200        # Running stats
201        $total_io_time += $io_duration;
202        $total_process_time += $process_duration;
203        if ($fastest_file == 0 || $total_duration < $fastest_file)
204        {
205          $fastest_file = $total_duration;
206        }
207        if ($slowest_file == 0 || $total_duration > $slowest_file)
208        {
209          $slowest_file = $total_duration;
210        }
211      }
212      $file_count++;
213    }
214  }
215}
216my $avg_processing_time = floor(($total_io_time + $total_process_time) / $file_count);
217
218print HTMLOUT "<tr><th>Processing Time:</th><td>" . &renderTime($total_duration) . "</td></tr>\n";
219print HTMLOUT "<tr><th>Processing Threads:</th><td>" . $number_of_workers . "</td></tr>\n";
220print HTMLOUT "<tr><th>Files Processed:</th><td>" . $file_count . "</td></tr>\n";
221print HTMLOUT "<tr><th>Problem Files:</th><td>" . $problem_files . "</td></tr>\n";
222print HTMLOUT "<tr><th>Serial Processing Time:</th><td>" . &renderTime($total_process_time) . "</td></tr>\n";
223print HTMLOUT "<tr><th>Serial IO Time:</th><td>" . &renderTime($total_io_time) . "</td></tr>\n";
224print HTMLOUT "<tr><th>Average File Processing Time:</th><td>" . &renderTime($avg_processing_time) . "</td></tr>\n";
225print HTMLOUT "<tr><th>Fastest File:</th><td>" . &renderTime($fastest_file) . "</td></tr>\n";
226print HTMLOUT "<tr><th>Slowest File:</th><td>" . &renderTime($slowest_file) . "</td></tr>\n";
227
228print HTMLOUT "</table>\n";
229print HTMLOUT "<hr />\n";
230print HTMLOUT "<h2>Timing Chart (Gannt)</h2>\n";
231print HTMLOUT renderLine($chart_width, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, 'master', $timing_data->{'M'}->{'N'}, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, {});
232foreach my $worker_id (nsort keys %{$timing_data})
233{
234  if ($worker_id ne 'M')
235  {
236    my $data = $timing_data->{$worker_id};
237    print HTMLOUT renderLine($chart_width, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, 'worker', $worker_id . ' [' . $data->{'N'} . ']', $data->{'S'}, $data->{'E'}, $data->{'F'});
238  }
239}
240print HTMLOUT '<div>' . "\n";
241print HTMLOUT "</body>\n";
242print HTMLOUT "</html>";
243close(HTMLOUT);
244
245print "Done!\n";
246print "Complete!\n\n";
247exit;
248
249# /**
250#  */
251sub renderLine
252{
253  my ($table_width, $start, $end, $class, $tname, $tstart, $tend, $jobs) = @_;
254  # All timings need to be relative to 0 (relative start)
255  my $duration = $end - $start;
256  my $rtstart = $tstart - $start;
257  my $rtend = $tend - $start;
258  # We need to scale these depending on the timing of this thread relative to
259  # the master thread
260  my $width = $chart_width;
261  my $left = 0;
262  if ($start != $tstart)
263  {
264    my $left_offset_percent = $rtstart / $duration;
265    $left = $left_offset_percent * $table_width;
266  }
267  # - subtract any left offset from width
268  $width = $width - $left;
269  # - right offset directly subtracted from width
270  if ($end != $tend)
271  {
272    my $right_offset_percent = ($duration - $rtend) / $duration;
273    my $right = $right_offset_percent * $table_width;
274    $width = $width - $right;
275  }
276  my $html = '<div class="thread ' . $class . '" style="left:' . $left . 'px;width:' . $width . 'px;">';
277  if ($class eq 'master')
278  {
279    $html .= '<div style="background-color:blue">';
280  }
281  $html .= '<div class="time" style="display:table-cell">' . &renderTime($rtstart) . '</div><div style="display:table-cell;padding-left:20px;width:100%;">' . ucfirst($class) . ': ' . $tname . '</div><div class="time" style="display:table-cell">' . renderTime($rtend) . '</div></div>';
282  my $previous_jright = 0;
283  foreach my $jstart (sort keys %{$jobs})
284  {
285    my $rjstart = $jstart - $start;
286    my $rpstart = $jobs->{$jstart}->{'PS'} - $start;
287    my $rpend = $jobs->{$jstart}->{'PE'} - $start;
288    my $rjend = $jobs->{$jstart}->{'E'} - $start;
289    my $jduration = $jobs->{$jstart}->{'E'} - $jstart;
290    # Scale Job co-ordinates
291    my $jleft_percent = $rjstart / $duration;
292    my $jleft = floor($jleft_percent * $table_width);
293    my $jwidth_percent = $jduration / $duration;
294    my $jwidth = floor($jwidth_percent * $table_width);
295    if ($jleft < $previous_jright)
296    {
297      $jleft = $previous_jright;
298    }
299    if ($jleft + $jwidth > $left + $width)
300    {
301      $jwidth = ($left + $width) - $jleft;
302    }
303    # Then scale process timings within that!
304    my $rpleft_percent = ($rpstart - $rjstart) / $duration;
305    my $rpleft = floor($rpleft_percent * $table_width);
306    my $rpwidth_percent = ($rpend - $rpstart) / $duration;
307    my $rpwidth = floor($rpwidth_percent * $table_width);
308    $html .= '<div class="job" style="left:' . $jleft . 'px;width:' . $jwidth . 'px" title="S:' . renderTime($rjstart) . ', PS:' . renderTime($rpstart) . ', PE:' . renderTime($rpend) . ', E:' . renderTime($rjend) . '"><span class="process" style="left:' . $rpleft . 'px;width:' . $rpwidth . 'px">&nbsp;</span><span style="background-color:transparent;z-index:1">' . $jobs->{$jstart}->{'FN'} . '</span></div>';
309    $previous_jright = $jleft + $jwidth;
310  }
311  return $html;
312}
313# /** renderLine() **/
314
315sub renderTime
316{
317  my ($seconds) = @_;
318  my $time_str = '';
319  # determine how many hours
320  my $an_hour = 60 * 60;
321  my $hours = floor($seconds / $an_hour);
322  $seconds = $seconds - ($hours * $an_hour);
323  my $a_minute = 60;
324  my $minutes = floor($seconds / $a_minute);
325  $seconds = $seconds - ($minutes * $a_minute);
326  if ($hours > 0)
327  {
328    $time_str = sprintf('%dh%02dm%02ds', $hours, $minutes, $seconds);
329  }
330  elsif ($minutes > 0)
331  {
332    $time_str = sprintf('%dm%02ds', $minutes, $seconds);
333  }
334  else
335  {
336    $time_str = $seconds . 's';
337  }
338  return $time_str;
339}
Note: See TracBrowser for help on using the browser.