Changeset 27551 for gs2-extensions/parallel-building/trunk
- Timestamp:
- 2013-06-05T13:07:43+12:00 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl
r27543 r27551 9 9 print "\n===== Generate Timing (GANTT) =====\n"; 10 10 11 # 0. Configuration 12 my $debug = 0; 13 my $import_dir; 14 11 15 # 1. Initialization 12 #my $chart_width = 1024; 13 my $chart_width = 1536; 14 #my $chart_width = 2048; 16 if (!defined $ARGV[0] || !-d $ARGV[0]) 17 { 18 &printUsage('Directory not provided or doesn\'t exist'); 19 } 20 my $dir = $ARGV[0]; 21 my $timing_csv_path = &filenameCat($dir, 'timing.csv'); 22 if (!-e $timing_csv_path) 23 { 24 &printUsage('Directory doesn\'t contain timing.csv: ' . $dir); 25 } 26 print 'Timing File: ' . $timing_csv_path . "\n"; 27 my $chart_width = 1024; 28 if (defined $ARGV[1]) 29 { 30 if ($ARGV[1] !~ /^\d+$/) 31 { 32 &printUsage('Chart width not a number'); 33 } 34 $chart_width = $ARGV[1]; 35 } 15 36 print "Chart Width: " . $chart_width . "px\n"; 16 # 1.1 Store all information extracted in a cool data structure 17 # - N = hostname, S = thread start, E = thread end 18 my $timing_data = {'M' => {'N'=>'', 'S'=>0, 'E'=>0}}; 19 # 1.2 Check the file exists 20 if (!-f $ARGV[0]) 21 { 22 die("Error! File can't be read: " . $ARGV[0]); 23 } 24 my $main_log_filename = $ARGV[0]; 25 # 1.2 From the filename we can parse in some information like the number of worker threads 26 my $number_of_workers = 0; 27 if ($main_log_filename =~ /\-W(\d+)E/) 28 { 29 $number_of_workers = $1; 37 print "===================================\n\n"; 38 39 # Read in timing.csv and parse information into data structure 40 my $timing_data = {}; 41 my $id_2_worker_id = {}; 42 if (open(TIN, '<:utf8', $timing_csv_path)) 43 { 44 my $line; 45 while ($line = <TIN>) 46 { 47 my @parts = split(/,/, $line); 48 if ($parts[1] eq 'M0') 49 { 50 $timing_data->{'M'} = {'N'=>$parts[2], 'S'=>$parts[3], 'E'=>$parts[4]}; 51 } 52 elsif ($parts[1] =~ /W\d+/) 53 { 54 $timing_data->{$parts[1]} = {'N'=>$parts[2], 'S'=>$parts[3], 'E'=>$parts[4], 'F'=>{}}; 55 $id_2_worker_id->{$parts[0]} = $parts[1]; 56 } 57 elsif ($parts[1] =~ /T\d+/) 58 { 59 my $worker_id = $id_2_worker_id->{$parts[7]}; 60 my $stop = $parts[4]; 61 my $filepath = $parts[8]; 62 $import_dir = &longestCommonPath($filepath, $import_dir); 63 $timing_data->{$worker_id}->{'F'}->{$parts[3]} = {'FN'=>$filepath, 'PS'=>($stop - $parts[5]), 'PE'=>$stop, 'E'=>$stop}; 64 } 65 } 66 close(TIN); 30 67 } 31 68 else 32 69 { 33 die("Error! Malformed filename (expecting number of workers): " . $main_log_filename); 34 } 35 # 1.3 Initialize the data structure with the number of workers too (we don't 36 # know the number of files yet, so they'll have to be adding on the fly) 37 for (my $i = 1; $i <= $number_of_workers; $i++) 38 { 39 $timing_data->{'W' . $i} = {'N'=>'', 'S'=>0, 'E'=>0, 'F'=>{}}; 40 } 41 42 # 2. Read in main log file 43 print " * Reading main log: " . $main_log_filename . "\n"; 44 open(LOGIN, '<:utf8', $main_log_filename) or die("Error! Failed to open file for reading: " . $main_log_filename); 45 my $line = ''; 46 my $currently_processing = {}; 47 while ($line = <LOGIN>) 48 { 49 # 2.1 Parse in the Master thread start time 50 if ($line =~ /\[M\d?:(\d+)\] Starting on (.+)/) 51 { 52 $timing_data->{'M'}->{'S'} = $1; 53 $timing_data->{'M'}->{'N'} = $2; 54 } 55 elsif ($line =~ /\[(W\d+):(\d+)\] Starting on (.+)/) 56 { 57 my $worker_id = $1; 58 $timing_data->{$worker_id}->{'S'} = $2; 59 $timing_data->{$worker_id}->{'N'} = $3; 60 } 61 elsif ($line =~ /\[(W\d+):(\d+)\] Processing/) 62 { 63 my $worker_id = $1; 64 my $job_start_time = $2; 65 $timing_data->{$worker_id}->{'F'}->{$job_start_time} = {'FN'=>'', 'PS'=>0, 'PE'=>0, 'E'=>0}; 66 $currently_processing->{$worker_id} = $job_start_time; 67 } 68 # 2.3 Or we may parse in the starting times for each working thread 69 # 2.4 Or we may also parse (the last encountered) completion time for each 70 # working thread 71 elsif ($line =~ /\[(W\d+):(\d+)\] Process complete/) 72 { 73 my $worker_id = $1; 74 my $job_end_time = $2; 75 $timing_data->{$worker_id}->{'E'} = $job_end_time; 76 my $job_start_time = $currently_processing->{$worker_id}; 77 $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'E'} = $job_end_time; 78 delete($currently_processing->{$worker_id}); 79 } 80 # 2.5 Finally, we may parse in the Master thread end time 81 elsif ($line =~ /\[M\d?:(\d+)\] Master will exit when workers complete/) 82 { 83 $timing_data->{'M'}->{'E'} = $1; 84 } 85 } 86 close(LOGIN); 87 88 # 3. Read each of worker logs parsing in information about the files processed 89 # - each will be stored (in an associative array) against its start time 90 print " * Reading worker logs"; 91 foreach my $worker_id (nsort keys %{$timing_data}) 92 { 93 my $jobs = $timing_data->{$worker_id}->{'F'}; 94 my $counter = 1; 95 foreach my $job_start_time (sort keys %{$jobs}) 96 { 97 my $log_filename = 'gsimport-' . $worker_id . '-' . $counter . '.log'; 98 print "."; 99 open(WLOGIN, '<:utf8', $log_filename) or die("Error! Failed to open for reading: " . $log_filename); 100 my $wline = ''; 101 while ($wline = <WLOGIN>) 102 { 103 if ($wline =~ /\[A:\d+\] SimpleVideoPlugin processing: (.+)/) 104 { 105 $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'FN'} = $1; 106 } 107 # Start of video processing (excluding as much IO as possible) 108 elsif ($wline =~ /\[C1:(\d+)\]/) 109 { 110 $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'PS'} = $1; 111 } 112 # Immediately after processing video 113 elsif ($wline =~ /\[E2:(\d+)\]/) 114 { 115 $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'PE'} = $1; 116 } 117 } 118 119 if ($timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'PE'} <= 0) 120 { 121 print "\n[Warning - bogus log: $log_filename]"; 122 } 123 124 close(WLOGIN); 125 $counter++; 126 } 127 } 128 print " Done!\n"; 129 130 # 4. Produce CSV of information 131 print " * Generating timing information as CSV... "; 132 open(CSVOUT, '>:utf8', 'timing.csv') or die('Error! Failed to open file for writing: timing.csv'); 133 print CSVOUT "number,id,hostname,start,end,hierarchy\n"; 134 my $thread_counter = 1; 135 foreach my $thread (nsort keys %{$timing_data}) 136 { 137 my $data = $timing_data->{$thread}; 138 print CSVOUT $thread_counter . ',' . $thread . ',' . $data->{'N'} . ',' . strftime("%H:%M:%S", localtime($data->{'S'})) . ',' . strftime("%H:%M:%S", localtime($data->{'E'})) . ','; 139 if ($thread eq 'M') 140 { 141 print CSVOUT '0'; 142 } 143 else 144 { 145 print CSVOUT '1'; 146 } 147 print CSVOUT "\n"; 148 $thread_counter++; 149 } 150 close(CSVOUT); 151 print "Done!\n"; 152 153 # 5. Produce pretty HTML chart of timing information including jobs 70 die('Error! Failed to open file for reading: ' . $timing_csv_path); 71 } 72 my $number_of_workers = scalar(keys(%{$id_2_worker_id}));; 73 74 # 3. Produce pretty HTML chart of timing information including jobs 154 75 print " * Generating timing information as HTML... "; 155 open(HTMLOUT, '>:utf8', 'gantt.html') or die('Error! Failed to open file for writing: gantt.html');76 open(HTMLOUT, '>:utf8', $dir . '/gantt.html') or die('Error! Failed to open file for writing: gantt.html'); 156 77 print HTMLOUT "<html>\n"; 157 78 print HTMLOUT '<head>' . "\n"; … … 159 80 print HTMLOUT 'div.thread {position:relative}' . "\n"; 160 81 print HTMLOUT 'div.master {border:1px solid gray;color:white;font-weight:bold}' . "\n"; 161 print HTMLOUT 'div.worker {b ackground-color:green;color:white;font-weight:bold}' . "\n";82 print HTMLOUT 'div.worker {border:1px solid black;background-color:green;color:white;font-weight:bold}' . "\n"; 162 83 print HTMLOUT 'div.time {font-size:smaller;font-weight:normal}' . "\n"; 163 84 print HTMLOUT 'div.job {background-color:transparent;color:black;border:1px solid black;display:block;font-size:smaller;position:relative;text-align:center}' . "\n"; … … 195 116 my $process_duration = $process_end - $process_start; 196 117 my $total_duration = $io_duration + $process_duration; 197 ###rint "[DEBUG] filename: " . $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'} . "\n";198 ###rint "[DEBUG] start: $job_start ps: $process_start pe: $process_end end: $job_end\n";199 ###rint "[DEBUG] io: $io_duration process: $process_duration duration: $total_duration\n";118 &debugPrint("filename: " . $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'}); 119 &debugPrint("start: $job_start ps: $process_start pe: $process_end end: $job_end"); 120 &debugPrint("io: $io_duration process: $process_duration duration: $total_duration"); 200 121 # Running stats 201 122 $total_io_time += $io_duration; … … 210 131 } 211 132 } 133 # Shorten filename 134 $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'} = substr($timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'}, length($import_dir) + 1); 212 135 $file_count++; 213 136 } … … 216 139 my $avg_processing_time = floor(($total_io_time + $total_process_time) / $file_count); 217 140 141 print HTMLOUT "<tr><th>Import Directory:</th><td>" . $import_dir . "</td></tr>\n"; 218 142 print HTMLOUT "<tr><th>Processing Time:</th><td>" . &renderTime($total_duration) . "</td></tr>\n"; 219 143 print HTMLOUT "<tr><th>Processing Threads:</th><td>" . $number_of_workers . "</td></tr>\n"; … … 247 171 exit; 248 172 249 # /** 250 # */ 173 174 ## @function debugPrint() 175 # 176 sub debugPrint 177 { 178 my $msg = shift(@_); 179 if ($debug) 180 { 181 print STDERR '[DEBUG] ' . $msg . "\n"; 182 } 183 } 184 ## debugPrint() ## 185 186 187 ## @function filenameCat 188 # 189 sub filenameCat 190 { 191 my $path = join('/', @_); 192 $path =~ s/[\/\\]+/\//g; 193 # protocols 194 $path =~ s/^(HDFS|HDFSShell|HDThriftFS):\//$1:\/\//; 195 return $path; 196 } 197 ## filenameCat() ## 198 199 ## @function printUsage() 200 # 201 sub printUsage 202 { 203 my $msg = shift(@_); 204 if (defined $msg) 205 { 206 print 'Error! ' . $msg . "\n"; 207 } 208 die("Usage: generate_gantt.pl <results dir> [<width in pixels>]\n\n"); 209 } 210 ## printUsage() ## 211 212 213 ## @function longestCommonPath 214 # 215 sub longestCommonPath 216 { 217 my ($path_new, $path_current) = @_; 218 my $result = ''; 219 if (defined $path_current) 220 { 221 my @path_new_parts = split(/\//, $path_new); 222 my @path_current_parts = split(/\//, $path_current); 223 my @path_parts; 224 for (my $i = 0; $i < scalar(@path_current_parts); $i++) 225 { 226 if ($path_current_parts[$i] eq $path_new_parts[$i]) 227 { 228 push(@path_parts, $path_new_parts[$i]); 229 } 230 else 231 { 232 last; 233 } 234 } 235 $result = &filenameCat(@path_parts); 236 } 237 else 238 { 239 $result = $path_new; 240 } 241 return $result; 242 } 243 ## longestCommonPath() ## 244 245 246 ## @function renderLine() 247 # 251 248 sub renderLine 252 249 { 253 250 my ($table_width, $start, $end, $class, $tname, $tstart, $tend, $jobs) = @_; 251 &debugPrint("renderLine($table_width, $start, $end, $class, $tname, $tstart, $tend, <jobs>)"); 254 252 # All timings need to be relative to 0 (relative start) 255 253 my $duration = $end - $start; … … 311 309 return $html; 312 310 } 313 # /** renderLine() **/ 314 311 ## renderLine() ## 312 313 314 ## @function renderTime() 315 # 315 316 sub renderTime 316 317 {
Note:
See TracChangeset
for help on using the changeset viewer.