#!/usr/bin/perl use strict; use warnings; # 1.1 Store all information extracted in a cool data structure # - N = hostname, S = thread start, E = thread end my $timing_data = {'M' => {'N'=>'', 'S'=>0, 'E'=>0}}; # 1.2 Check the file exists if (!-f $ARGV[0]) { die("Error! File can't be read: " . $ARGV[0]); } my $main_log_filename = $ARGV[0]; # 1.2 From the filename we can parse in some information like the number of worker threads my $number_of_workers = 0; if ($main_log_filename =~ /\-W(\d+)E/) { $number_of_workers = $1; } else { die("Error! Malformed filename (expecting number of workers): " . $main_log_filename); } # 1.3 Initialize the data structure with the number of workers too (we don't # know the number of files yet, so they'll have to be adding on the fly) for (my $i = 1; $i <= $number_of_workers; $i++) { $timing_data->{'W' . $i} = {'N'=>'', 'S'=>0, 'E'=>0, 'F'=>{}}; } # 2. Read in main log file print " * Reading main log: " . $main_log_filename . "\n"; open(LOGIN, '<:utf8', $main_log_filename) or die("Error! Failed to open file for reading: " . $main_log_filename); my $line = ''; my $currently_processing = {}; while ($line = ) { # 2.1 Parse in the Master thread start time if ($line =~ /\[M\d?:(\d+)\] Starting on (.+)/) { $timing_data->{'M'}->{'S'} = $1; $timing_data->{'M'}->{'N'} = $2; } elsif ($line =~ /\[(W\d+):(\d+)\] Starting on (.+)/) { my $worker_id = $1; $timing_data->{$worker_id}->{'S'} = $2; $timing_data->{$worker_id}->{'N'} = $3; } elsif ($line =~ /\[(W\d+):(\d+)\] Processing/) { my $worker_id = $1; my $job_start_time = $2; $timing_data->{$worker_id}->{'F'}->{$job_start_time} = {'FN'=>'', 'PS'=>0, 'PE'=>0, 'E'=>0}; $currently_processing->{$worker_id} = $job_start_time; } # 2.3 Or we may parse in the starting times for each working thread # 2.4 Or we may also parse (the last encountered) completion time for each # working thread elsif ($line =~ /\[(W\d+):(\d+)\] Process complete/) { my $worker_id = $1; my $job_end_time = $2; $timing_data->{$worker_id}->{'E'} = $job_end_time; my $job_start_time = $currently_processing->{$worker_id}; $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'E'} = $job_end_time; delete($currently_processing->{$worker_id}); } # 2.5 Finally, we may parse in the Master thread end time elsif ($line =~ /\[M\d?:(\d+)\] Master will exit when workers complete/) { $timing_data->{'M'}->{'E'} = $1; } } close(LOGIN); # 3. Read each of worker logs parsing in information about the files processed # - each will be stored (in an associative array) against its start time print " * Reading worker logs"; foreach my $worker_id (nsort keys %{$timing_data}) { my $jobs = $timing_data->{$worker_id}->{'F'}; my $counter = 1; foreach my $job_start_time (sort keys %{$jobs}) { my $log_filename = 'gsimport-' . $worker_id . '-' . $counter . '.log'; print "."; open(WLOGIN, '<:utf8', $log_filename) or die("Error! Failed to open for reading: " . $log_filename); my $wline = ''; while ($wline = ) { if ($wline =~ /\[A:\d+\] SimpleVideoPlugin processing: (.+)/) { $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'FN'} = $1; } # Start of video processing (excluding as much IO as possible) elsif ($wline =~ /\[C1:(\d+)\]/) { $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'PS'} = $1; } # Immediately after processing video elsif ($wline =~ /\[E2:(\d+)\]/) { $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'PE'} = $1; } } if ($timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'PE'} <= 0) { print "\n[Warning - bogus log: $log_filename]"; } close(WLOGIN); $counter++; } } print " Done!\n"; # 4. Produce CSV of information print " * Generating timing information as CSV... "; open(CSVOUT, '>:utf8', 'timing.csv') or die('Error! Failed to open file for writing: timing.csv'); print CSVOUT "number,id,hostname,start,end,hierarchy\n"; my $thread_counter = 1; foreach my $thread (nsort keys %{$timing_data}) { my $data = $timing_data->{$thread}; print CSVOUT $thread_counter . ',' . $thread . ',' . $data->{'N'} . ',' . strftime("%H:%M:%S", localtime($data->{'S'})) . ',' . strftime("%H:%M:%S", localtime($data->{'E'})) . ','; if ($thread eq 'M') { print CSVOUT '0'; } else { print CSVOUT '1'; } print CSVOUT "\n"; $thread_counter++; } close(CSVOUT); print "Done!\n";