source: gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl@ 27551

Last change on this file since 27551 was 27551, checked in by jmt12, 11 years ago

Altered so that it expects to be given a CSV containing parallel processing information (which can come from OpenMPI or Hadoop Greenstone imports). Also now determine the import dir by finding the longest common directory path in the files processed

  • Property svn:executable set to *
  • Property svn:mime-type set to application/x-perl
File size: 11.1 KB
Line 
1#!/usr/bin/perl
2
3use strict;
4use warnings;
5
6use Sort::Naturally;
7use POSIX qw(floor strftime);
8
9print "\n===== Generate Timing (GANTT) =====\n";
10
11# 0. Configuration
12my $debug = 0;
13my $import_dir;
14
15# 1. Initialization
16if (!defined $ARGV[0] || !-d $ARGV[0])
17{
18 &printUsage('Directory not provided or doesn\'t exist');
19}
20my $dir = $ARGV[0];
21my $timing_csv_path = &filenameCat($dir, 'timing.csv');
22if (!-e $timing_csv_path)
23{
24 &printUsage('Directory doesn\'t contain timing.csv: ' . $dir);
25}
26print 'Timing File: ' . $timing_csv_path . "\n";
27my $chart_width = 1024;
28if (defined $ARGV[1])
29{
30 if ($ARGV[1] !~ /^\d+$/)
31 {
32 &printUsage('Chart width not a number');
33 }
34 $chart_width = $ARGV[1];
35}
36print "Chart Width: " . $chart_width . "px\n";
37print "===================================\n\n";
38
39# Read in timing.csv and parse information into data structure
40my $timing_data = {};
41my $id_2_worker_id = {};
42if (open(TIN, '<:utf8', $timing_csv_path))
43{
44 my $line;
45 while ($line = <TIN>)
46 {
47 my @parts = split(/,/, $line);
48 if ($parts[1] eq 'M0')
49 {
50 $timing_data->{'M'} = {'N'=>$parts[2], 'S'=>$parts[3], 'E'=>$parts[4]};
51 }
52 elsif ($parts[1] =~ /W\d+/)
53 {
54 $timing_data->{$parts[1]} = {'N'=>$parts[2], 'S'=>$parts[3], 'E'=>$parts[4], 'F'=>{}};
55 $id_2_worker_id->{$parts[0]} = $parts[1];
56 }
57 elsif ($parts[1] =~ /T\d+/)
58 {
59 my $worker_id = $id_2_worker_id->{$parts[7]};
60 my $stop = $parts[4];
61 my $filepath = $parts[8];
62 $import_dir = &longestCommonPath($filepath, $import_dir);
63 $timing_data->{$worker_id}->{'F'}->{$parts[3]} = {'FN'=>$filepath, 'PS'=>($stop - $parts[5]), 'PE'=>$stop, 'E'=>$stop};
64 }
65 }
66 close(TIN);
67}
68else
69{
70 die('Error! Failed to open file for reading: ' . $timing_csv_path);
71}
72my $number_of_workers = scalar(keys(%{$id_2_worker_id}));;
73
74# 3. Produce pretty HTML chart of timing information including jobs
75print " * Generating timing information as HTML... ";
76open(HTMLOUT, '>:utf8', $dir . '/gantt.html') or die('Error! Failed to open file for writing: gantt.html');
77print HTMLOUT "<html>\n";
78print HTMLOUT '<head>' . "\n";
79print HTMLOUT '<style type="text/css">' . "\n";
80print HTMLOUT 'div.thread {position:relative}' . "\n";
81print HTMLOUT 'div.master {border:1px solid gray;color:white;font-weight:bold}' . "\n";
82print HTMLOUT 'div.worker {border:1px solid black;background-color:green;color:white;font-weight:bold}' . "\n";
83print HTMLOUT 'div.time {font-size:smaller;font-weight:normal}' . "\n";
84print HTMLOUT 'div.job {background-color:transparent;color:black;border:1px solid black;display:block;font-size:smaller;position:relative;text-align:center}' . "\n";
85print HTMLOUT 'span.process {z-index:-1;background-color:#C7C7C7;position:absolute}' . "\n";
86print HTMLOUT "th {text-align:left}\n";
87print HTMLOUT '</style>' . "\n";
88print HTMLOUT '</head>' . "\n";
89print HTMLOUT "<body>\n";
90print HTMLOUT "<h2>Statistics</h2>\n";
91print HTMLOUT "<table>\n";
92
93my $total_duration = $timing_data->{'M'}->{'E'} - $timing_data->{'M'}->{'S'};
94my $file_count = 0;
95my $total_io_time = 0;
96my $total_process_time = 0;
97my $fastest_file = 0;
98my $slowest_file = 0;
99my $problem_files = 0;
100foreach my $worker_id (keys %{$timing_data})
101{
102 if ($worker_id ne 'M')
103 {
104 foreach my $job_start ( keys %{$timing_data->{$worker_id}->{'F'}} )
105 {
106 my $process_start = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'PS'};
107 my $process_end = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'PE'};
108 my $job_end = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'E'};
109 if ($process_start == 0 || $process_end == 0 || $job_end == 0)
110 {
111 $problem_files++;
112 }
113 else
114 {
115 my $io_duration = ($process_start - $job_start) + ($job_end - $process_end);
116 my $process_duration = $process_end - $process_start;
117 my $total_duration = $io_duration + $process_duration;
118 &debugPrint("filename: " . $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'});
119 &debugPrint("start: $job_start ps: $process_start pe: $process_end end: $job_end");
120 &debugPrint("io: $io_duration process: $process_duration duration: $total_duration");
121 # Running stats
122 $total_io_time += $io_duration;
123 $total_process_time += $process_duration;
124 if ($fastest_file == 0 || $total_duration < $fastest_file)
125 {
126 $fastest_file = $total_duration;
127 }
128 if ($slowest_file == 0 || $total_duration > $slowest_file)
129 {
130 $slowest_file = $total_duration;
131 }
132 }
133 # Shorten filename
134 $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'} = substr($timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'}, length($import_dir) + 1);
135 $file_count++;
136 }
137 }
138}
139my $avg_processing_time = floor(($total_io_time + $total_process_time) / $file_count);
140
141print HTMLOUT "<tr><th>Import Directory:</th><td>" . $import_dir . "</td></tr>\n";
142print HTMLOUT "<tr><th>Processing Time:</th><td>" . &renderTime($total_duration) . "</td></tr>\n";
143print HTMLOUT "<tr><th>Processing Threads:</th><td>" . $number_of_workers . "</td></tr>\n";
144print HTMLOUT "<tr><th>Files Processed:</th><td>" . $file_count . "</td></tr>\n";
145print HTMLOUT "<tr><th>Problem Files:</th><td>" . $problem_files . "</td></tr>\n";
146print HTMLOUT "<tr><th>Serial Processing Time:</th><td>" . &renderTime($total_process_time) . "</td></tr>\n";
147print HTMLOUT "<tr><th>Serial IO Time:</th><td>" . &renderTime($total_io_time) . "</td></tr>\n";
148print HTMLOUT "<tr><th>Average File Processing Time:</th><td>" . &renderTime($avg_processing_time) . "</td></tr>\n";
149print HTMLOUT "<tr><th>Fastest File:</th><td>" . &renderTime($fastest_file) . "</td></tr>\n";
150print HTMLOUT "<tr><th>Slowest File:</th><td>" . &renderTime($slowest_file) . "</td></tr>\n";
151
152print HTMLOUT "</table>\n";
153print HTMLOUT "<hr />\n";
154print HTMLOUT "<h2>Timing Chart (Gannt)</h2>\n";
155print HTMLOUT renderLine($chart_width, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, 'master', $timing_data->{'M'}->{'N'}, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, {});
156foreach my $worker_id (nsort keys %{$timing_data})
157{
158 if ($worker_id ne 'M')
159 {
160 my $data = $timing_data->{$worker_id};
161 print HTMLOUT renderLine($chart_width, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, 'worker', $worker_id . ' [' . $data->{'N'} . ']', $data->{'S'}, $data->{'E'}, $data->{'F'});
162 }
163}
164print HTMLOUT '<div>' . "\n";
165print HTMLOUT "</body>\n";
166print HTMLOUT "</html>";
167close(HTMLOUT);
168
169print "Done!\n";
170print "Complete!\n\n";
171exit;
172
173
174## @function debugPrint()
175#
176sub debugPrint
177{
178 my $msg = shift(@_);
179 if ($debug)
180 {
181 print STDERR '[DEBUG] ' . $msg . "\n";
182 }
183}
184## debugPrint() ##
185
186
187## @function filenameCat
188#
189sub filenameCat
190{
191 my $path = join('/', @_);
192 $path =~ s/[\/\\]+/\//g;
193 # protocols
194 $path =~ s/^(HDFS|HDFSShell|HDThriftFS):\//$1:\/\//;
195 return $path;
196}
197## filenameCat() ##
198
199## @function printUsage()
200#
201sub printUsage
202{
203 my $msg = shift(@_);
204 if (defined $msg)
205 {
206 print 'Error! ' . $msg . "\n";
207 }
208 die("Usage: generate_gantt.pl <results dir> [<width in pixels>]\n\n");
209}
210## printUsage() ##
211
212
213## @function longestCommonPath
214#
215sub longestCommonPath
216{
217 my ($path_new, $path_current) = @_;
218 my $result = '';
219 if (defined $path_current)
220 {
221 my @path_new_parts = split(/\//, $path_new);
222 my @path_current_parts = split(/\//, $path_current);
223 my @path_parts;
224 for (my $i = 0; $i < scalar(@path_current_parts); $i++)
225 {
226 if ($path_current_parts[$i] eq $path_new_parts[$i])
227 {
228 push(@path_parts, $path_new_parts[$i]);
229 }
230 else
231 {
232 last;
233 }
234 }
235 $result = &filenameCat(@path_parts);
236 }
237 else
238 {
239 $result = $path_new;
240 }
241 return $result;
242}
243## longestCommonPath() ##
244
245
246## @function renderLine()
247#
248sub renderLine
249{
250 my ($table_width, $start, $end, $class, $tname, $tstart, $tend, $jobs) = @_;
251 &debugPrint("renderLine($table_width, $start, $end, $class, $tname, $tstart, $tend, <jobs>)");
252 # All timings need to be relative to 0 (relative start)
253 my $duration = $end - $start;
254 my $rtstart = $tstart - $start;
255 my $rtend = $tend - $start;
256 # We need to scale these depending on the timing of this thread relative to
257 # the master thread
258 my $width = $chart_width;
259 my $left = 0;
260 if ($start != $tstart)
261 {
262 my $left_offset_percent = $rtstart / $duration;
263 $left = $left_offset_percent * $table_width;
264 }
265 # - subtract any left offset from width
266 $width = $width - $left;
267 # - right offset directly subtracted from width
268 if ($end != $tend)
269 {
270 my $right_offset_percent = ($duration - $rtend) / $duration;
271 my $right = $right_offset_percent * $table_width;
272 $width = $width - $right;
273 }
274 my $html = '<div class="thread ' . $class . '" style="left:' . $left . 'px;width:' . $width . 'px;">';
275 if ($class eq 'master')
276 {
277 $html .= '<div style="background-color:blue">';
278 }
279 $html .= '<div class="time" style="display:table-cell">' . &renderTime($rtstart) . '</div><div style="display:table-cell;padding-left:20px;width:100%;">' . ucfirst($class) . ': ' . $tname . '</div><div class="time" style="display:table-cell">' . renderTime($rtend) . '</div></div>';
280 my $previous_jright = 0;
281 foreach my $jstart (sort keys %{$jobs})
282 {
283 my $rjstart = $jstart - $start;
284 my $rpstart = $jobs->{$jstart}->{'PS'} - $start;
285 my $rpend = $jobs->{$jstart}->{'PE'} - $start;
286 my $rjend = $jobs->{$jstart}->{'E'} - $start;
287 my $jduration = $jobs->{$jstart}->{'E'} - $jstart;
288 # Scale Job co-ordinates
289 my $jleft_percent = $rjstart / $duration;
290 my $jleft = floor($jleft_percent * $table_width);
291 my $jwidth_percent = $jduration / $duration;
292 my $jwidth = floor($jwidth_percent * $table_width);
293 if ($jleft < $previous_jright)
294 {
295 $jleft = $previous_jright;
296 }
297 if ($jleft + $jwidth > $left + $width)
298 {
299 $jwidth = ($left + $width) - $jleft;
300 }
301 # Then scale process timings within that!
302 my $rpleft_percent = ($rpstart - $rjstart) / $duration;
303 my $rpleft = floor($rpleft_percent * $table_width);
304 my $rpwidth_percent = ($rpend - $rpstart) / $duration;
305 my $rpwidth = floor($rpwidth_percent * $table_width);
306 $html .= '<div class="job" style="left:' . $jleft . 'px;width:' . $jwidth . 'px" title="S:' . renderTime($rjstart) . ', PS:' . renderTime($rpstart) . ', PE:' . renderTime($rpend) . ', E:' . renderTime($rjend) . '"><span class="process" style="left:' . $rpleft . 'px;width:' . $rpwidth . 'px">&nbsp;</span><span style="background-color:transparent;z-index:1">' . $jobs->{$jstart}->{'FN'} . '</span></div>';
307 $previous_jright = $jleft + $jwidth;
308 }
309 return $html;
310}
311## renderLine() ##
312
313
314## @function renderTime()
315#
316sub renderTime
317{
318 my ($seconds) = @_;
319 my $time_str = '';
320 # determine how many hours
321 my $an_hour = 60 * 60;
322 my $hours = floor($seconds / $an_hour);
323 $seconds = $seconds - ($hours * $an_hour);
324 my $a_minute = 60;
325 my $minutes = floor($seconds / $a_minute);
326 $seconds = $seconds - ($minutes * $a_minute);
327 if ($hours > 0)
328 {
329 $time_str = sprintf('%dh%02dm%02ds', $hours, $minutes, $seconds);
330 }
331 elsif ($minutes > 0)
332 {
333 $time_str = sprintf('%dm%02ds', $minutes, $seconds);
334 }
335 else
336 {
337 $time_str = $seconds . 's';
338 }
339 return $time_str;
340}
Note: See TracBrowser for help on using the repository browser.