source: gs2-extensions/parallel-building/trunk/src/bin/script/openmpi_report.pl@ 30354

Last change on this file since 30354 was 27548, checked in by jmt12, 11 years ago

Extract information from the logs generated by parallel Greenstone using OpenMPI - and generate a CSV suitable for handing to generate_gantt.pl

File size: 4.5 KB
Line 
1#!/usr/bin/perl
2
3use strict;
4use warnings;
5
6# 1.1 Store all information extracted in a cool data structure
7# - N = hostname, S = thread start, E = thread end
8my $timing_data = {'M' => {'N'=>'', 'S'=>0, 'E'=>0}};
9# 1.2 Check the file exists
10if (!-f $ARGV[0])
11{
12 die("Error! File can't be read: " . $ARGV[0]);
13}
14my $main_log_filename = $ARGV[0];
15# 1.2 From the filename we can parse in some information like the number of worker threads
16my $number_of_workers = 0;
17if ($main_log_filename =~ /\-W(\d+)E/)
18{
19 $number_of_workers = $1;
20}
21else
22{
23 die("Error! Malformed filename (expecting number of workers): " . $main_log_filename);
24}
25# 1.3 Initialize the data structure with the number of workers too (we don't
26# know the number of files yet, so they'll have to be adding on the fly)
27for (my $i = 1; $i <= $number_of_workers; $i++)
28{
29 $timing_data->{'W' . $i} = {'N'=>'', 'S'=>0, 'E'=>0, 'F'=>{}};
30}
31
32# 2. Read in main log file
33print " * Reading main log: " . $main_log_filename . "\n";
34open(LOGIN, '<:utf8', $main_log_filename) or die("Error! Failed to open file for reading: " . $main_log_filename);
35my $line = '';
36my $currently_processing = {};
37while ($line = <LOGIN>)
38{
39 # 2.1 Parse in the Master thread start time
40 if ($line =~ /\[M\d?:(\d+)\] Starting on (.+)/)
41 {
42 $timing_data->{'M'}->{'S'} = $1;
43 $timing_data->{'M'}->{'N'} = $2;
44 }
45 elsif ($line =~ /\[(W\d+):(\d+)\] Starting on (.+)/)
46 {
47 my $worker_id = $1;
48 $timing_data->{$worker_id}->{'S'} = $2;
49 $timing_data->{$worker_id}->{'N'} = $3;
50 }
51 elsif ($line =~ /\[(W\d+):(\d+)\] Processing/)
52 {
53 my $worker_id = $1;
54 my $job_start_time = $2;
55 $timing_data->{$worker_id}->{'F'}->{$job_start_time} = {'FN'=>'', 'PS'=>0, 'PE'=>0, 'E'=>0};
56 $currently_processing->{$worker_id} = $job_start_time;
57 }
58 # 2.3 Or we may parse in the starting times for each working thread
59 # 2.4 Or we may also parse (the last encountered) completion time for each
60 # working thread
61 elsif ($line =~ /\[(W\d+):(\d+)\] Process complete/)
62 {
63 my $worker_id = $1;
64 my $job_end_time = $2;
65 $timing_data->{$worker_id}->{'E'} = $job_end_time;
66 my $job_start_time = $currently_processing->{$worker_id};
67 $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'E'} = $job_end_time;
68 delete($currently_processing->{$worker_id});
69 }
70 # 2.5 Finally, we may parse in the Master thread end time
71 elsif ($line =~ /\[M\d?:(\d+)\] Master will exit when workers complete/)
72 {
73 $timing_data->{'M'}->{'E'} = $1;
74 }
75}
76close(LOGIN);
77
78# 3. Read each of worker logs parsing in information about the files processed
79# - each will be stored (in an associative array) against its start time
80print " * Reading worker logs";
81foreach my $worker_id (nsort keys %{$timing_data})
82{
83 my $jobs = $timing_data->{$worker_id}->{'F'};
84 my $counter = 1;
85 foreach my $job_start_time (sort keys %{$jobs})
86 {
87 my $log_filename = 'gsimport-' . $worker_id . '-' . $counter . '.log';
88 print ".";
89 open(WLOGIN, '<:utf8', $log_filename) or die("Error! Failed to open for reading: " . $log_filename);
90 my $wline = '';
91 while ($wline = <WLOGIN>)
92 {
93 if ($wline =~ /\[A:\d+\] SimpleVideoPlugin processing: (.+)/)
94 {
95 $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'FN'} = $1;
96 }
97 # Start of video processing (excluding as much IO as possible)
98 elsif ($wline =~ /\[C1:(\d+)\]/)
99 {
100 $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'PS'} = $1;
101 }
102 # Immediately after processing video
103 elsif ($wline =~ /\[E2:(\d+)\]/)
104 {
105 $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'PE'} = $1;
106 }
107 }
108
109 if ($timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'PE'} <= 0)
110 {
111 print "\n[Warning - bogus log: $log_filename]";
112 }
113
114 close(WLOGIN);
115 $counter++;
116 }
117}
118print " Done!\n";
119
120# 4. Produce CSV of information
121print " * Generating timing information as CSV... ";
122open(CSVOUT, '>:utf8', 'timing.csv') or die('Error! Failed to open file for writing: timing.csv');
123print CSVOUT "number,id,hostname,start,end,hierarchy\n";
124my $thread_counter = 1;
125foreach my $thread (nsort keys %{$timing_data})
126{
127 my $data = $timing_data->{$thread};
128 print CSVOUT $thread_counter . ',' . $thread . ',' . $data->{'N'} . ',' . strftime("%H:%M:%S", localtime($data->{'S'})) . ',' . strftime("%H:%M:%S", localtime($data->{'E'})) . ',';
129 if ($thread eq 'M')
130 {
131 print CSVOUT '0';
132 }
133 else
134 {
135 print CSVOUT '1';
136 }
137 print CSVOUT "\n";
138 $thread_counter++;
139}
140close(CSVOUT);
141print "Done!\n";
Note: See TracBrowser for help on using the repository browser.