source: gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl@ 27543

Last change on this file since 27543 was 27543, checked in by jmt12, 11 years ago

Adding generate_gantt.pl script in its original form - i.e. directly reads OpenMPI Greenstone logs and generates CSV of information and Gantt chart. I intended to split this script in half into a openmpi_report.pl and generate_gantt.pl

  • Property svn:executable set to *
  • Property svn:mime-type set to application/x-perl
File size: 12.4 KB
Line 
1#!/usr/bin/perl
2
3use strict;
4use warnings;
5
6use Sort::Naturally;
7use POSIX qw(floor strftime);
8
9print "\n===== Generate Timing (GANTT) =====\n";
10
11# 1. Initialization
12#my $chart_width = 1024;
13my $chart_width = 1536;
14#my $chart_width = 2048;
15print "Chart Width: " . $chart_width . "px\n";
16# 1.1 Store all information extracted in a cool data structure
17# - N = hostname, S = thread start, E = thread end
18my $timing_data = {'M' => {'N'=>'', 'S'=>0, 'E'=>0}};
19# 1.2 Check the file exists
20if (!-f $ARGV[0])
21{
22 die("Error! File can't be read: " . $ARGV[0]);
23}
24my $main_log_filename = $ARGV[0];
25# 1.2 From the filename we can parse in some information like the number of worker threads
26my $number_of_workers = 0;
27if ($main_log_filename =~ /\-W(\d+)E/)
28{
29 $number_of_workers = $1;
30}
31else
32{
33 die("Error! Malformed filename (expecting number of workers): " . $main_log_filename);
34}
35# 1.3 Initialize the data structure with the number of workers too (we don't
36# know the number of files yet, so they'll have to be adding on the fly)
37for (my $i = 1; $i <= $number_of_workers; $i++)
38{
39 $timing_data->{'W' . $i} = {'N'=>'', 'S'=>0, 'E'=>0, 'F'=>{}};
40}
41
42# 2. Read in main log file
43print " * Reading main log: " . $main_log_filename . "\n";
44open(LOGIN, '<:utf8', $main_log_filename) or die("Error! Failed to open file for reading: " . $main_log_filename);
45my $line = '';
46my $currently_processing = {};
47while ($line = <LOGIN>)
48{
49 # 2.1 Parse in the Master thread start time
50 if ($line =~ /\[M\d?:(\d+)\] Starting on (.+)/)
51 {
52 $timing_data->{'M'}->{'S'} = $1;
53 $timing_data->{'M'}->{'N'} = $2;
54 }
55 elsif ($line =~ /\[(W\d+):(\d+)\] Starting on (.+)/)
56 {
57 my $worker_id = $1;
58 $timing_data->{$worker_id}->{'S'} = $2;
59 $timing_data->{$worker_id}->{'N'} = $3;
60 }
61 elsif ($line =~ /\[(W\d+):(\d+)\] Processing/)
62 {
63 my $worker_id = $1;
64 my $job_start_time = $2;
65 $timing_data->{$worker_id}->{'F'}->{$job_start_time} = {'FN'=>'', 'PS'=>0, 'PE'=>0, 'E'=>0};
66 $currently_processing->{$worker_id} = $job_start_time;
67 }
68 # 2.3 Or we may parse in the starting times for each working thread
69 # 2.4 Or we may also parse (the last encountered) completion time for each
70 # working thread
71 elsif ($line =~ /\[(W\d+):(\d+)\] Process complete/)
72 {
73 my $worker_id = $1;
74 my $job_end_time = $2;
75 $timing_data->{$worker_id}->{'E'} = $job_end_time;
76 my $job_start_time = $currently_processing->{$worker_id};
77 $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'E'} = $job_end_time;
78 delete($currently_processing->{$worker_id});
79 }
80 # 2.5 Finally, we may parse in the Master thread end time
81 elsif ($line =~ /\[M\d?:(\d+)\] Master will exit when workers complete/)
82 {
83 $timing_data->{'M'}->{'E'} = $1;
84 }
85}
86close(LOGIN);
87
88# 3. Read each of worker logs parsing in information about the files processed
89# - each will be stored (in an associative array) against its start time
90print " * Reading worker logs";
91foreach my $worker_id (nsort keys %{$timing_data})
92{
93 my $jobs = $timing_data->{$worker_id}->{'F'};
94 my $counter = 1;
95 foreach my $job_start_time (sort keys %{$jobs})
96 {
97 my $log_filename = 'gsimport-' . $worker_id . '-' . $counter . '.log';
98 print ".";
99 open(WLOGIN, '<:utf8', $log_filename) or die("Error! Failed to open for reading: " . $log_filename);
100 my $wline = '';
101 while ($wline = <WLOGIN>)
102 {
103 if ($wline =~ /\[A:\d+\] SimpleVideoPlugin processing: (.+)/)
104 {
105 $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'FN'} = $1;
106 }
107 # Start of video processing (excluding as much IO as possible)
108 elsif ($wline =~ /\[C1:(\d+)\]/)
109 {
110 $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'PS'} = $1;
111 }
112 # Immediately after processing video
113 elsif ($wline =~ /\[E2:(\d+)\]/)
114 {
115 $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'PE'} = $1;
116 }
117 }
118
119 if ($timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'PE'} <= 0)
120 {
121 print "\n[Warning - bogus log: $log_filename]";
122 }
123
124 close(WLOGIN);
125 $counter++;
126 }
127}
128print " Done!\n";
129
130# 4. Produce CSV of information
131print " * Generating timing information as CSV... ";
132open(CSVOUT, '>:utf8', 'timing.csv') or die('Error! Failed to open file for writing: timing.csv');
133print CSVOUT "number,id,hostname,start,end,hierarchy\n";
134my $thread_counter = 1;
135foreach my $thread (nsort keys %{$timing_data})
136{
137 my $data = $timing_data->{$thread};
138 print CSVOUT $thread_counter . ',' . $thread . ',' . $data->{'N'} . ',' . strftime("%H:%M:%S", localtime($data->{'S'})) . ',' . strftime("%H:%M:%S", localtime($data->{'E'})) . ',';
139 if ($thread eq 'M')
140 {
141 print CSVOUT '0';
142 }
143 else
144 {
145 print CSVOUT '1';
146 }
147 print CSVOUT "\n";
148 $thread_counter++;
149}
150close(CSVOUT);
151print "Done!\n";
152
153# 5. Produce pretty HTML chart of timing information including jobs
154print " * Generating timing information as HTML... ";
155open(HTMLOUT, '>:utf8', 'gantt.html') or die('Error! Failed to open file for writing: gantt.html');
156print HTMLOUT "<html>\n";
157print HTMLOUT '<head>' . "\n";
158print HTMLOUT '<style type="text/css">' . "\n";
159print HTMLOUT 'div.thread {position:relative}' . "\n";
160print HTMLOUT 'div.master {border:1px solid gray;color:white;font-weight:bold}' . "\n";
161print HTMLOUT 'div.worker {background-color:green;color:white;font-weight:bold}' . "\n";
162print HTMLOUT 'div.time {font-size:smaller;font-weight:normal}' . "\n";
163print HTMLOUT 'div.job {background-color:transparent;color:black;border:1px solid black;display:block;font-size:smaller;position:relative;text-align:center}' . "\n";
164print HTMLOUT 'span.process {z-index:-1;background-color:#C7C7C7;position:absolute}' . "\n";
165print HTMLOUT "th {text-align:left}\n";
166print HTMLOUT '</style>' . "\n";
167print HTMLOUT '</head>' . "\n";
168print HTMLOUT "<body>\n";
169print HTMLOUT "<h2>Statistics</h2>\n";
170print HTMLOUT "<table>\n";
171
172my $total_duration = $timing_data->{'M'}->{'E'} - $timing_data->{'M'}->{'S'};
173my $file_count = 0;
174my $total_io_time = 0;
175my $total_process_time = 0;
176my $fastest_file = 0;
177my $slowest_file = 0;
178my $problem_files = 0;
179foreach my $worker_id (keys %{$timing_data})
180{
181 if ($worker_id ne 'M')
182 {
183 foreach my $job_start ( keys %{$timing_data->{$worker_id}->{'F'}} )
184 {
185 my $process_start = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'PS'};
186 my $process_end = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'PE'};
187 my $job_end = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'E'};
188 if ($process_start == 0 || $process_end == 0 || $job_end == 0)
189 {
190 $problem_files++;
191 }
192 else
193 {
194 my $io_duration = ($process_start - $job_start) + ($job_end - $process_end);
195 my $process_duration = $process_end - $process_start;
196 my $total_duration = $io_duration + $process_duration;
197 ###rint "[DEBUG] filename: " . $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'} . "\n";
198 ###rint "[DEBUG] start: $job_start ps: $process_start pe: $process_end end: $job_end\n";
199 ###rint "[DEBUG] io: $io_duration process: $process_duration duration: $total_duration\n";
200 # Running stats
201 $total_io_time += $io_duration;
202 $total_process_time += $process_duration;
203 if ($fastest_file == 0 || $total_duration < $fastest_file)
204 {
205 $fastest_file = $total_duration;
206 }
207 if ($slowest_file == 0 || $total_duration > $slowest_file)
208 {
209 $slowest_file = $total_duration;
210 }
211 }
212 $file_count++;
213 }
214 }
215}
216my $avg_processing_time = floor(($total_io_time + $total_process_time) / $file_count);
217
218print HTMLOUT "<tr><th>Processing Time:</th><td>" . &renderTime($total_duration) . "</td></tr>\n";
219print HTMLOUT "<tr><th>Processing Threads:</th><td>" . $number_of_workers . "</td></tr>\n";
220print HTMLOUT "<tr><th>Files Processed:</th><td>" . $file_count . "</td></tr>\n";
221print HTMLOUT "<tr><th>Problem Files:</th><td>" . $problem_files . "</td></tr>\n";
222print HTMLOUT "<tr><th>Serial Processing Time:</th><td>" . &renderTime($total_process_time) . "</td></tr>\n";
223print HTMLOUT "<tr><th>Serial IO Time:</th><td>" . &renderTime($total_io_time) . "</td></tr>\n";
224print HTMLOUT "<tr><th>Average File Processing Time:</th><td>" . &renderTime($avg_processing_time) . "</td></tr>\n";
225print HTMLOUT "<tr><th>Fastest File:</th><td>" . &renderTime($fastest_file) . "</td></tr>\n";
226print HTMLOUT "<tr><th>Slowest File:</th><td>" . &renderTime($slowest_file) . "</td></tr>\n";
227
228print HTMLOUT "</table>\n";
229print HTMLOUT "<hr />\n";
230print HTMLOUT "<h2>Timing Chart (Gannt)</h2>\n";
231print HTMLOUT renderLine($chart_width, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, 'master', $timing_data->{'M'}->{'N'}, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, {});
232foreach my $worker_id (nsort keys %{$timing_data})
233{
234 if ($worker_id ne 'M')
235 {
236 my $data = $timing_data->{$worker_id};
237 print HTMLOUT renderLine($chart_width, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, 'worker', $worker_id . ' [' . $data->{'N'} . ']', $data->{'S'}, $data->{'E'}, $data->{'F'});
238 }
239}
240print HTMLOUT '<div>' . "\n";
241print HTMLOUT "</body>\n";
242print HTMLOUT "</html>";
243close(HTMLOUT);
244
245print "Done!\n";
246print "Complete!\n\n";
247exit;
248
249# /**
250# */
251sub renderLine
252{
253 my ($table_width, $start, $end, $class, $tname, $tstart, $tend, $jobs) = @_;
254 # All timings need to be relative to 0 (relative start)
255 my $duration = $end - $start;
256 my $rtstart = $tstart - $start;
257 my $rtend = $tend - $start;
258 # We need to scale these depending on the timing of this thread relative to
259 # the master thread
260 my $width = $chart_width;
261 my $left = 0;
262 if ($start != $tstart)
263 {
264 my $left_offset_percent = $rtstart / $duration;
265 $left = $left_offset_percent * $table_width;
266 }
267 # - subtract any left offset from width
268 $width = $width - $left;
269 # - right offset directly subtracted from width
270 if ($end != $tend)
271 {
272 my $right_offset_percent = ($duration - $rtend) / $duration;
273 my $right = $right_offset_percent * $table_width;
274 $width = $width - $right;
275 }
276 my $html = '<div class="thread ' . $class . '" style="left:' . $left . 'px;width:' . $width . 'px;">';
277 if ($class eq 'master')
278 {
279 $html .= '<div style="background-color:blue">';
280 }
281 $html .= '<div class="time" style="display:table-cell">' . &renderTime($rtstart) . '</div><div style="display:table-cell;padding-left:20px;width:100%;">' . ucfirst($class) . ': ' . $tname . '</div><div class="time" style="display:table-cell">' . renderTime($rtend) . '</div></div>';
282 my $previous_jright = 0;
283 foreach my $jstart (sort keys %{$jobs})
284 {
285 my $rjstart = $jstart - $start;
286 my $rpstart = $jobs->{$jstart}->{'PS'} - $start;
287 my $rpend = $jobs->{$jstart}->{'PE'} - $start;
288 my $rjend = $jobs->{$jstart}->{'E'} - $start;
289 my $jduration = $jobs->{$jstart}->{'E'} - $jstart;
290 # Scale Job co-ordinates
291 my $jleft_percent = $rjstart / $duration;
292 my $jleft = floor($jleft_percent * $table_width);
293 my $jwidth_percent = $jduration / $duration;
294 my $jwidth = floor($jwidth_percent * $table_width);
295 if ($jleft < $previous_jright)
296 {
297 $jleft = $previous_jright;
298 }
299 if ($jleft + $jwidth > $left + $width)
300 {
301 $jwidth = ($left + $width) - $jleft;
302 }
303 # Then scale process timings within that!
304 my $rpleft_percent = ($rpstart - $rjstart) / $duration;
305 my $rpleft = floor($rpleft_percent * $table_width);
306 my $rpwidth_percent = ($rpend - $rpstart) / $duration;
307 my $rpwidth = floor($rpwidth_percent * $table_width);
308 $html .= '<div class="job" style="left:' . $jleft . 'px;width:' . $jwidth . 'px" title="S:' . renderTime($rjstart) . ', PS:' . renderTime($rpstart) . ', PE:' . renderTime($rpend) . ', E:' . renderTime($rjend) . '"><span class="process" style="left:' . $rpleft . 'px;width:' . $rpwidth . 'px">&nbsp;</span><span style="background-color:transparent;z-index:1">' . $jobs->{$jstart}->{'FN'} . '</span></div>';
309 $previous_jright = $jleft + $jwidth;
310 }
311 return $html;
312}
313# /** renderLine() **/
314
315sub renderTime
316{
317 my ($seconds) = @_;
318 my $time_str = '';
319 # determine how many hours
320 my $an_hour = 60 * 60;
321 my $hours = floor($seconds / $an_hour);
322 $seconds = $seconds - ($hours * $an_hour);
323 my $a_minute = 60;
324 my $minutes = floor($seconds / $a_minute);
325 $seconds = $seconds - ($minutes * $a_minute);
326 if ($hours > 0)
327 {
328 $time_str = sprintf('%dh%02dm%02ds', $hours, $minutes, $seconds);
329 }
330 elsif ($minutes > 0)
331 {
332 $time_str = sprintf('%dm%02ds', $minutes, $seconds);
333 }
334 else
335 {
336 $time_str = $seconds . 's';
337 }
338 return $time_str;
339}
Note: See TracBrowser for help on using the repository browser.