source: gs2-extensions/parallel-building/trunk/src/bin/script/import_with_io_metric.pl@ 28646

Last change on this file since 28646 was 28646, checked in by jmt12, 10 years ago

A script that uses strace to produce IO metrics of a Greenstone import

  • Property svn:executable set to *
File size: 14.8 KB
Line 
1#!/usr/bin/perl
2
3# Pragma
4use strict;
5use warnings;
6# Libraries
7use File::Path qw( remove_tree );
8use File::Temp qw( tempdir );
9use Getopt::Long;
10use Time::HiRes qw( gettimeofday tv_interval );
11
12BEGIN
13{
14 print "======================= Greenstone Import + I/O Metrics ======================\n\n";
15 if (!defined $ENV{'GSDLHOME'})
16 {
17 print "Error! GSDLHOME not set\n\n";
18 exit;
19 }
20}
21
22# 1. Parse and sanitize arguments - the listing of syscalls whose duration we
23# accumulate as IO time. Unless otherwise noted, these are taken from what
24# strace_analyzer.pl would use
25print " * Initializing... Done\n";
26our $io_function_list = {
27 'access'=>1,
28 'chmod'=>1,
29 'close'=>1,
30 'creat'=>1,
31 'fclose'=>1,
32 'fcntl'=>1,
33 'fgetpos'=>1,
34 'flock'=>1,
35 'fseek'=>1,
36 'fsetpos'=>1,
37 'fstat'=>1,
38 'fsync'=>1,
39 'ftell'=>1,
40 'getdents'=>1,
41 #'ioctl'=>1, # John added
42 'llseek'=>1,
43 'lockf'=>1,
44 'lseek'=>1,
45 'lseek64'=>1,
46 'mkdir'=>1,
47 'open'=>1,
48 'read'=>1,
49 'readdir'=>1,
50 'rename'=>1,
51 'rewind'=>1,
52 'rewinddir'=>1,
53 'scandir'=>1,
54 'stat'=>1,
55 'stat64'=>1,
56 'telldir'=>1,
57 'unlink'=>1,
58 'write'=>1
59 };
60
61my $start_time = [gettimeofday()];
62my $collection = '';
63my $debug = 0;
64my $flush_delay = 3;
65GetOptions ('collection=s' => \$collection,
66 'debug' => \$debug,
67 'flushdelay=i' => \$flush_delay)
68or &printUsage('Invalid or missing argument');
69if (!defined $collection || $collection eq '')
70{
71 &printUsage('Missing collection name');
72}
73my $collection_path = $ENV{'GSDLHOME'} . '/collect/' . $collection;
74if (!-d $collection_path)
75{
76 &printUsage('Collection not found: ' . $collection_path);
77}
78print ' - Greenstone: ' . $ENV{'GSDLHOME'} . "\n";
79print ' - Collection: ' . $collection . "\n";
80print ' - Flush Delay: ' . $flush_delay . "\n";
81print ' - Debug? ' . ($debug ? 'Yes' : 'No') . "\n";
82print "\n";
83
84# 2. Run dummy import command (empty import directory) within trace to
85# determine baseline
86&clearArchives();
87&flushDiskCache($flush_delay);
88print " * Running baseline collection import... ";
89my $dummy_dir = tempdir( CLEANUP => 1);
90my $command1 = 'time -p strace -f -q -T -ttt -s 4096 import.pl -removeold -importdir "' . $dummy_dir .'" "' . $collection . '" 2>&1 1>/dev/null';
91my $debug_path1 = '';
92if ($debug)
93{
94 $debug_path1 = $collection_path . '/debug-dummy.csv';
95}
96my ($baseline_duration, $baseline_io, $baseline_cpu) = &parseStrace($command1, $collection_path, $debug_path1);
97print "Done\n";
98print ' - Duration: ' . sprintf('%0.6f', $baseline_duration) . " seconds\n";
99print ' - I/O Duration: ' . sprintf('%0.6f', $baseline_io) . " seconds\n";
100print ' - I/O Percent: ' . sprintf('%d', ($baseline_io / $baseline_duration) * 100) . "%\n";
101if ($debug)
102{
103 print " - See 'debug-dummy.csv' for raw data\n";
104}
105print "\n";
106exit;
107
108# 3. Run normal import command within strace reading output line by line
109&clearArchives();
110&flushDiskCache($flush_delay);
111print " * Running import and tracking I/O metrics... ";
112my $command2 = 'strace -f -q -T -ttt -s 256 import.pl -removeold "' . $collection . '" 2>&1';
113my $debug_path2 = '';
114if ($debug)
115{
116 $debug_path2 = $collection_path . '/debug-import.csv';
117}
118my ($duration_import, $duration_io, $duration_cpu, $import_complete, $docs_found, $docs_processed) = &parseStrace($command2, $collection_path, $debug_path2);
119print "Done\n";
120print ' - Import? ' . ( $import_complete ? 'Completed' : 'Failed') . "\n";
121print ' - Found: ' . $docs_found . ' document' . (($docs_processed > 1) ? 's' : '') . "\n";
122print ' - Processed: ' . $docs_processed . ' document' . (($docs_processed > 1) ? 's' : '') . "\n";
123print ' - Duration: ' . sprintf('%0.6f', $duration_import) . " seconds\n";
124print ' - I/O Duration: ' . sprintf('%0.6f', $duration_io) . " seconds\n";
125print ' - I/O Percent: ' . sprintf('%d', ($duration_io / $duration_import) * 100) . "%\n";
126print " - See 'import.log' for Greenstone Import details\n";
127print " - See 'strace.out' for STrace details\n";
128if ($debug)
129{
130 print " - See 'debug-import.csv' for raw data\n";
131}
132print "\n";
133
134# 4. Results
135print &makeHeader('Import Results', 78) . "\n\n";
136print " Import Duration: " . sprintf('%0.6f', ($duration_import - $baseline_duration)) . " seconds\n";
137print " I/O Duration: " . sprintf('%0.6f', ($duration_io - $baseline_io)) . " seconds\n";
138print " I/O Percentage: " . sprintf('%d', ((($duration_io - $baseline_io)/($duration_import - $baseline_duration)) * 100)) . "%\n";
139print "\n";
140
141# Complete!
142my $end_time = [gettimeofday()];
143my $duration = tv_interval($start_time, $end_time);
144print &makeHeader(sprintf('Complete in %0.2f seconds', $duration), 78) . "\n\n";
145
146exit;
147
148
149## @function
150#
151sub makeHeader
152{
153 my ($msg, $length) = @_;
154 if (length($msg) % 2 == 1)
155 {
156 $msg = ' ' . $msg . ' ';
157 }
158 else
159 {
160 $msg = ' ' . $msg . ' ';
161 }
162 while (length($msg) < $length)
163 {
164 $msg = '=' . $msg . '=';
165 }
166 return $msg;
167}
168## makeHeader() ##
169
170
171## @function
172#
173sub clearArchives
174{
175 # 2. Remove any existing archives directory, so it doesn't get factored in the
176 # IO costs
177 my $archives_dir = $collection_path . '/archives';
178 if (-d $archives_dir)
179 {
180 print " * Deleting existing archives directory... ";
181 remove_tree($archives_dir);
182 print "Done\n";
183 }
184}
185## clearArchives()
186
187
188## @function
189#
190sub flushDiskCache
191{
192 my ($flush_delay) = @_;
193 # The Premise: use sudoedit and other black magic to clear out the memory-
194 # based disk cache (which is done by writing the number 3 to a certain
195 # system file)
196 &printFlush(" * Synching file system... ");
197 `sync`;
198 print "Done\n";
199 &printFlush(" * Dropping memory disk cache... ");
200 # - save our current default editor
201 my $current_editor = $ENV{'EDITOR'};
202 # - replace default editor with a script that simply clobbers the contents
203 # of any file it's handed with the number "3"
204 $ENV{'EDITOR'} = 'reset_memcache_editor.sh';
205 # - we now call sudoedit on the system file. How sudoedit works is that it
206 # starts by making a temp copy of the system file with appropriate
207 # permissions allowing the user to edit. It then passes the path to the
208 # temp file to the default editor - typically this would be an interactive
209 # editor like 'vi'. However, we've just replaced the editor with a custom
210 # script that just writes '3' as the content of the tmp file. Finally, when
211 # the editor exits, sudoedit copies the tmp file over the top of the system
212 # file, restoring appropriate root-level permissions
213 `sudoedit /proc/sys/vm/drop_caches`;
214 # - restore the default editor, just in case something in Greenstone
215 # depends on this being a reasonably value
216 $ENV{'EDITOR'} = $current_editor;
217 print "Done\n";
218 # - short break to give OS time to actually notice the drop_caches command
219 &printFlush(" * Waiting for drop_caches to complete... ");
220 while ($flush_delay > 0)
221 {
222 &printFlush($flush_delay . ' ');
223 sleep(1);
224 $flush_delay--;
225 }
226 print "Done\n\n";
227}
228## flushDiskCache()
229
230
231## @function parseStrace
232#
233sub parseStrace
234{
235 my ($command, $logs_path, $debug_path) = @_;
236 my $start_timestamp = 0;
237 my $end_timestamp = 0;
238 my $io_duration = 0;
239 my $cpu_duration = 0;
240 my $import_complete = 0;
241 my $documents_found = 0;
242 my $documents_processed = 0;
243 # hash from PIDs to array (FIFO stack) of syscalls and arguments
244 my $interrupted_stacks = {};
245
246 # open the logs if we have been given somewhere to log too
247 my $logging_enabled = 0;
248 if ($logs_path ne '')
249 {
250 my $strace_path = $logs_path . '/strace.out';
251 open(STRACEOUT, '>:utf8', $strace_path) or die("Error! Failed to open file for writing: " . $strace_path);
252 my $gslog_path = $logs_path . '/import.log';
253 open(GSLOGOUT, '>:utf8', $gslog_path) or die("Error! Failed to open file for writing: " . $gslog_path);
254 $logging_enabled = 1;
255 }
256
257 # debug mode means writing a whole bunch of information to a CSV
258 if ($debug_path)
259 {
260 open(DEBUGOUT, '>:utf8', $debug_path) or die('Error! Failed to open file for writing: ' . $logs_path . '/' . $debug);
261 print DEBUGOUT "TIMESTAMP,PID,SYSCALL,DURATION,TOTALIO,TOTALCPU,ARGS,RESULT\n";
262 }
263
264 if (open(GSI, $command . '|'))
265 {
266 my $pid = 0;
267 my $line;
268 while ($line = <GSI>)
269 {
270 if ($logging_enabled)
271 {
272 print STRACEOUT $line;
273 }
274 # we may have a line that unfortunately includes newlines in its arguments list
275 # - watch out for unfinished syscalls that will be resumed later
276 # - oh! and signals, cause they are way different anyhow
277 while ($line !~ /<unfinished \.\.\.>/ && $line !~ /SIG/ && $line =~ /\(/ && $line !~ /\)\s+=/)
278 {
279 my $new_line = '';
280 if ($new_line = <GSI>)
281 {
282 $line .= ' ' . $new_line;
283 }
284 else
285 {
286 print "Error! Failed to find complete arguments list: " . $line . "\n";
287 exit;
288 }
289 }
290 # detect and remove any PID information to make the rest of parsing easier
291 if ($line =~ /^\[pid (\d+)\]\s+(.*)$/s)
292 {
293 my $new_pid = $1;
294 $line = $2;
295 if ($new_pid != $pid)
296 {
297 $pid = $new_pid;
298 }
299 }
300 # detect resume lines, and rewrite so they look like other lines
301 if ($line =~ /^(\d+\.\d+)\s+<\.\.\.\s+([a-z0-9_]+)\s+resumed>\s+(.*)\)\s+=\s+(.+)\s+<(\d+\.\d+)>$/s)
302 {
303 my $timestamp = $1;
304 my $prev_syscall = '';
305 my $syscall = $2;
306 my $args_prefix = '';
307 my $args_suffix = $3;
308 my $result = $4;
309 my $duration = $5;
310 if ($syscall eq 'write' && $args_prefix =~ /^[12],/)
311 {
312 chomp($line);
313 print "\n\nResuming write pid:$pid line:|" . $line . "|\n";
314 # pop the last interrupted syscall off the stack for this pid
315 if (!defined $interrupted_stacks->{$pid} || scalar(@{$interrupted_stacks->{$pid}}) == 0)
316 {
317 print "Error! Resume when stack is empty: " . $pid . "\n";
318 exit;
319 }
320 ($prev_syscall, $args_prefix) = @{pop(@{$interrupted_stacks->{$pid}})};
321 if ($syscall ne $prev_syscall)
322 {
323 print "Error! Resume syscall does not match unfinished syscall: " . $syscall . " != " . $prev_syscall . "\n";
324 exit;
325 }
326 }
327 $line = $timestamp . ' ' . $syscall . '(' . $args_prefix . ' ' . $args_suffix . ') = ' . $result . ' <' . $duration . '>';
328 }
329
330 # SPECIAL CASES
331 # - unfinished syscalls... the only ones we care about are writes to standard out or error as these are echoed to the shell. Add them to the appropriate stack waiting for the 'resume' rewrite above
332 if ($line =~ /^(\d+\.\d+)\s+([a-z0-9_]+)\((.*?)\s+<unfinished \.\.\.>$/)
333 {
334 my $timestamp = $1;
335 my $syscall = $2;
336 my $args_prefix = $3;
337 if ($syscall eq 'write' && $args_prefix =~ /^[12],/)
338 {
339 print "\n\nPending write pid:$pid line:|" . $line . "|\n";
340 if (!defined $interrupted_stacks->{$pid})
341 {
342 $interrupted_stacks->{$pid} = [];
343 }
344 push(@{$interrupted_stacks->{$pid}}, [$syscall, $args_prefix]);
345 }
346 }
347 # - processes announce detachments and resumptions - maybe this explains
348 # why wait4's keep chaning
349 elsif ($line =~ /^Process (\d+) (detached|resumed)/)
350 {
351 my $local_pid = $1;
352 my $action = $2;
353 }
354 # - exit_group never has a duration
355 elsif ($line =~ /^(\d+\.\d+)\s+exit_group\(0\)\s+=\s+\?$/)
356 {
357 my $timestamp = $1;
358 $end_timestamp = $timestamp;
359 if ($debug)
360 {
361 print DEBUGOUT $timestamp . ',' . $pid . ',"exit_group",0,' . $io_duration . ',' . $cpu_duration . ",\"0\",\"\"\n";
362 }
363 }
364 # - signals
365 elsif ($line =~ /^(\d+\.\d+)\s+---\s+([A-Z]+)\s+\((.*)\)\s+\@\s+0\s+\(0\)\s+---$/)
366 {
367 my $timestamp = $1;
368 my $signal = $2;
369 my $message = $3;
370 $end_timestamp = $timestamp;
371 }
372 # NORMAL CASE
373 elsif ($line =~ /^(\d+\.\d+)\s+([a-z0-9_]+)\((.*)\)\s+=\s+(.+)\s+<(\d+\.\d+)>$/s)
374 {
375 my $timestamp = $1;
376 my $syscall = $2;
377 my $args = $3;
378 my $result = $4;
379 my $duration = $5;
380 if ($start_timestamp == 0)
381 {
382 $start_timestamp = $timestamp;
383 }
384 $end_timestamp = $timestamp;
385 if (defined $io_function_list->{$syscall})
386 {
387 $io_duration += $duration;
388 }
389 else
390 {
391 $cpu_duration += $duration;
392 ##print "[$syscall]";
393 }
394 if ($syscall eq 'write' && $args =~ /[12],\s+"(.*)"/s)
395 {
396 my $payload = $1;
397 $payload =~ s/\\n/\n/g;
398 # parse up some collection import statistics
399 if ($payload =~ /Import complete/)
400 {
401 $import_complete = 1;
402 }
403 elsif ($payload =~ /^\* (\d+) documents were considered/)
404 {
405 $documents_found = $1;
406 }
407 elsif ($payload =~ /^\* (\d+) were processed/)
408 {
409 $documents_processed = $1;
410 }
411 # finally, write these lines out to log if a path has been provided
412 if ($logging_enabled)
413 {
414 print GSLOGOUT $payload;
415 }
416 }
417 if ($debug)
418 {
419 $args =~ s/\"/&quot;/g;
420 $args =~ s/\r?\n//g;
421 print DEBUGOUT $timestamp . ',' . $pid . ',"' . $syscall . '",' . $duration . ',' . $io_duration . ',' . $cpu_duration . ',"' . $args . '","' . $result . "\"\n";
422 }
423 }
424 else
425 {
426 chomp($line);
427 print "\n\nUnparsed: |" . $line . "|\n\n";
428 }
429 }
430 }
431 else
432 {
433 print "Error! Failed to open import process for reading: " . $command . "\n";
434 }
435 if ($logging_enabled)
436 {
437 close(STRACEOUT);
438 close(GSLOGOUT);
439 }
440 if ($debug_path)
441 {
442 close(DEBUGOUT);
443 }
444 my $cmd_duration = $end_timestamp - $start_timestamp;
445 return ($cmd_duration, $io_duration, $cpu_duration, $import_complete, $documents_found, $documents_processed);
446}
447# parseStrace()
448
449
450sub printFlush
451{
452 my ($msg) = @_;
453 $| = 1;
454 print $msg;
455 $| = 0;
456}
457
458
459sub printUsage
460{
461 my ($msg) = @_;
462 if (defined $msg)
463 {
464 print 'Error! ' . $msg . "\n";
465 }
466 print "Usage: import_with_io_metric.pl -collection <collection name> [-debug]\n\n";
467 exit;
468}
469
4701;
Note: See TracBrowser for help on using the repository browser.