#!/usr/bin/perl use strict; use warnings; use Sort::Naturally; use POSIX qw(floor strftime); print "\n===== Generate Timing (GANTT) =====\n"; # 1. Initialization #my $chart_width = 1024; my $chart_width = 1536; #my $chart_width = 2048; print "Chart Width: " . $chart_width . "px\n"; # 1.1 Store all information extracted in a cool data structure # - N = hostname, S = thread start, E = thread end my $timing_data = {'M' => {'N'=>'', 'S'=>0, 'E'=>0}}; # 1.2 Check the file exists if (!-f $ARGV[0]) { die("Error! File can't be read: " . $ARGV[0]); } my $main_log_filename = $ARGV[0]; # 1.2 From the filename we can parse in some information like the number of worker threads my $number_of_workers = 0; if ($main_log_filename =~ /\-W(\d+)E/) { $number_of_workers = $1; } else { die("Error! Malformed filename (expecting number of workers): " . $main_log_filename); } # 1.3 Initialize the data structure with the number of workers too (we don't # know the number of files yet, so they'll have to be adding on the fly) for (my $i = 1; $i <= $number_of_workers; $i++) { $timing_data->{'W' . $i} = {'N'=>'', 'S'=>0, 'E'=>0, 'F'=>{}}; } # 2. Read in main log file print " * Reading main log: " . $main_log_filename . "\n"; open(LOGIN, '<:utf8', $main_log_filename) or die("Error! Failed to open file for reading: " . $main_log_filename); my $line = ''; my $currently_processing = {}; while ($line = ) { # 2.1 Parse in the Master thread start time if ($line =~ /\[M\d?:(\d+)\] Starting on (.+)/) { $timing_data->{'M'}->{'S'} = $1; $timing_data->{'M'}->{'N'} = $2; } elsif ($line =~ /\[(W\d+):(\d+)\] Starting on (.+)/) { my $worker_id = $1; $timing_data->{$worker_id}->{'S'} = $2; $timing_data->{$worker_id}->{'N'} = $3; } elsif ($line =~ /\[(W\d+):(\d+)\] Processing/) { my $worker_id = $1; my $job_start_time = $2; $timing_data->{$worker_id}->{'F'}->{$job_start_time} = {'FN'=>'', 'PS'=>0, 'PE'=>0, 'E'=>0}; $currently_processing->{$worker_id} = $job_start_time; } # 2.3 Or we may parse in the starting times for each working thread # 2.4 Or we may also parse (the last encountered) completion time for each # working thread elsif ($line =~ /\[(W\d+):(\d+)\] Process complete/) { my $worker_id = $1; my $job_end_time = $2; $timing_data->{$worker_id}->{'E'} = $job_end_time; my $job_start_time = $currently_processing->{$worker_id}; $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'E'} = $job_end_time; delete($currently_processing->{$worker_id}); } # 2.5 Finally, we may parse in the Master thread end time elsif ($line =~ /\[M\d?:(\d+)\] Master will exit when workers complete/) { $timing_data->{'M'}->{'E'} = $1; } } close(LOGIN); # 3. Read each of worker logs parsing in information about the files processed # - each will be stored (in an associative array) against its start time print " * Reading worker logs"; foreach my $worker_id (nsort keys %{$timing_data}) { my $jobs = $timing_data->{$worker_id}->{'F'}; my $counter = 1; foreach my $job_start_time (sort keys %{$jobs}) { my $log_filename = 'gsimport-' . $worker_id . '-' . $counter . '.log'; print "."; open(WLOGIN, '<:utf8', $log_filename) or die("Error! Failed to open for reading: " . $log_filename); my $wline = ''; while ($wline = ) { if ($wline =~ /\[A:\d+\] SimpleVideoPlugin processing: (.+)/) { $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'FN'} = $1; } # Start of video processing (excluding as much IO as possible) elsif ($wline =~ /\[C1:(\d+)\]/) { $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'PS'} = $1; } # Immediately after processing video elsif ($wline =~ /\[E2:(\d+)\]/) { $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'PE'} = $1; } } if ($timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'PE'} <= 0) { print "\n[Warning - bogus log: $log_filename]"; } close(WLOGIN); $counter++; } } print " Done!\n"; # 4. Produce CSV of information print " * Generating timing information as CSV... "; open(CSVOUT, '>:utf8', 'timing.csv') or die('Error! Failed to open file for writing: timing.csv'); print CSVOUT "number,id,hostname,start,end,hierarchy\n"; my $thread_counter = 1; foreach my $thread (nsort keys %{$timing_data}) { my $data = $timing_data->{$thread}; print CSVOUT $thread_counter . ',' . $thread . ',' . $data->{'N'} . ',' . strftime("%H:%M:%S", localtime($data->{'S'})) . ',' . strftime("%H:%M:%S", localtime($data->{'E'})) . ','; if ($thread eq 'M') { print CSVOUT '0'; } else { print CSVOUT '1'; } print CSVOUT "\n"; $thread_counter++; } close(CSVOUT); print "Done!\n"; # 5. Produce pretty HTML chart of timing information including jobs print " * Generating timing information as HTML... "; open(HTMLOUT, '>:utf8', 'gantt.html') or die('Error! Failed to open file for writing: gantt.html'); print HTMLOUT "\n"; print HTMLOUT '' . "\n"; print HTMLOUT '' . "\n"; print HTMLOUT '' . "\n"; print HTMLOUT "\n"; print HTMLOUT "

Statistics

\n"; print HTMLOUT "\n"; my $total_duration = $timing_data->{'M'}->{'E'} - $timing_data->{'M'}->{'S'}; my $file_count = 0; my $total_io_time = 0; my $total_process_time = 0; my $fastest_file = 0; my $slowest_file = 0; my $problem_files = 0; foreach my $worker_id (keys %{$timing_data}) { if ($worker_id ne 'M') { foreach my $job_start ( keys %{$timing_data->{$worker_id}->{'F'}} ) { my $process_start = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'PS'}; my $process_end = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'PE'}; my $job_end = $timing_data->{$worker_id}->{'F'}->{$job_start}->{'E'}; if ($process_start == 0 || $process_end == 0 || $job_end == 0) { $problem_files++; } else { my $io_duration = ($process_start - $job_start) + ($job_end - $process_end); my $process_duration = $process_end - $process_start; my $total_duration = $io_duration + $process_duration; ###rint "[DEBUG] filename: " . $timing_data->{$worker_id}->{'F'}->{$job_start}->{'FN'} . "\n"; ###rint "[DEBUG] start: $job_start ps: $process_start pe: $process_end end: $job_end\n"; ###rint "[DEBUG] io: $io_duration process: $process_duration duration: $total_duration\n"; # Running stats $total_io_time += $io_duration; $total_process_time += $process_duration; if ($fastest_file == 0 || $total_duration < $fastest_file) { $fastest_file = $total_duration; } if ($slowest_file == 0 || $total_duration > $slowest_file) { $slowest_file = $total_duration; } } $file_count++; } } } my $avg_processing_time = floor(($total_io_time + $total_process_time) / $file_count); print HTMLOUT "\n"; print HTMLOUT "\n"; print HTMLOUT "\n"; print HTMLOUT "\n"; print HTMLOUT "\n"; print HTMLOUT "\n"; print HTMLOUT "\n"; print HTMLOUT "\n"; print HTMLOUT "\n"; print HTMLOUT "
Processing Time:" . &renderTime($total_duration) . "
Processing Threads:" . $number_of_workers . "
Files Processed:" . $file_count . "
Problem Files:" . $problem_files . "
Serial Processing Time:" . &renderTime($total_process_time) . "
Serial IO Time:" . &renderTime($total_io_time) . "
Average File Processing Time:" . &renderTime($avg_processing_time) . "
Fastest File:" . &renderTime($fastest_file) . "
Slowest File:" . &renderTime($slowest_file) . "
\n"; print HTMLOUT "
\n"; print HTMLOUT "

Timing Chart (Gannt)

\n"; print HTMLOUT renderLine($chart_width, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, 'master', $timing_data->{'M'}->{'N'}, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, {}); foreach my $worker_id (nsort keys %{$timing_data}) { if ($worker_id ne 'M') { my $data = $timing_data->{$worker_id}; print HTMLOUT renderLine($chart_width, $timing_data->{'M'}->{'S'}, $timing_data->{'M'}->{'E'}, 'worker', $worker_id . ' [' . $data->{'N'} . ']', $data->{'S'}, $data->{'E'}, $data->{'F'}); } } print HTMLOUT '
' . "\n"; print HTMLOUT "\n"; print HTMLOUT ""; close(HTMLOUT); print "Done!\n"; print "Complete!\n\n"; exit; # /** # */ sub renderLine { my ($table_width, $start, $end, $class, $tname, $tstart, $tend, $jobs) = @_; # All timings need to be relative to 0 (relative start) my $duration = $end - $start; my $rtstart = $tstart - $start; my $rtend = $tend - $start; # We need to scale these depending on the timing of this thread relative to # the master thread my $width = $chart_width; my $left = 0; if ($start != $tstart) { my $left_offset_percent = $rtstart / $duration; $left = $left_offset_percent * $table_width; } # - subtract any left offset from width $width = $width - $left; # - right offset directly subtracted from width if ($end != $tend) { my $right_offset_percent = ($duration - $rtend) / $duration; my $right = $right_offset_percent * $table_width; $width = $width - $right; } my $html = '
'; if ($class eq 'master') { $html .= '
'; } $html .= '
' . &renderTime($rtstart) . '
' . ucfirst($class) . ': ' . $tname . '
' . renderTime($rtend) . '
'; my $previous_jright = 0; foreach my $jstart (sort keys %{$jobs}) { my $rjstart = $jstart - $start; my $rpstart = $jobs->{$jstart}->{'PS'} - $start; my $rpend = $jobs->{$jstart}->{'PE'} - $start; my $rjend = $jobs->{$jstart}->{'E'} - $start; my $jduration = $jobs->{$jstart}->{'E'} - $jstart; # Scale Job co-ordinates my $jleft_percent = $rjstart / $duration; my $jleft = floor($jleft_percent * $table_width); my $jwidth_percent = $jduration / $duration; my $jwidth = floor($jwidth_percent * $table_width); if ($jleft < $previous_jright) { $jleft = $previous_jright; } if ($jleft + $jwidth > $left + $width) { $jwidth = ($left + $width) - $jleft; } # Then scale process timings within that! my $rpleft_percent = ($rpstart - $rjstart) / $duration; my $rpleft = floor($rpleft_percent * $table_width); my $rpwidth_percent = ($rpend - $rpstart) / $duration; my $rpwidth = floor($rpwidth_percent * $table_width); $html .= '
 ' . $jobs->{$jstart}->{'FN'} . '
'; $previous_jright = $jleft + $jwidth; } return $html; } # /** renderLine() **/ sub renderTime { my ($seconds) = @_; my $time_str = ''; # determine how many hours my $an_hour = 60 * 60; my $hours = floor($seconds / $an_hour); $seconds = $seconds - ($hours * $an_hour); my $a_minute = 60; my $minutes = floor($seconds / $a_minute); $seconds = $seconds - ($minutes * $a_minute); if ($hours > 0) { $time_str = sprintf('%dh%02dm%02ds', $hours, $minutes, $seconds); } elsif ($minutes > 0) { $time_str = sprintf('%dm%02ds', $minutes, $seconds); } else { $time_str = $seconds . 's'; } return $time_str; }