source: gs2-extensions/parallel-building/trunk/src/bin/script/import_with_io_metric.pl@ 28767

Last change on this file since 28767 was 28767, checked in by jmt12, 10 years ago

Drastically increased the script to allow 1) battery of imports backed by database of tests, 2) printing of reported in dokuwiki format, and 3) calculation of statistical information (STDDEV) and outliers (although I don't do anything with that information... yet)

  • Property svn:executable set to *
File size: 32.3 KB
RevLine 
[28646]1#!/usr/bin/perl
2
3# Pragma
4use strict;
5use warnings;
6# Libraries
[28767]7use File::Path qw( remove_tree );
8use File::Temp qw( tempdir );
[28646]9use Getopt::Long;
[28767]10use POSIX qw( strftime );
11use Time::HiRes qw( gettimeofday tv_interval );
[28646]12
13BEGIN
14{
[28767]15 print "\n======================= Greenstone Import + I/O Metrics ======================\n\n";
[28646]16 if (!defined $ENV{'GSDLHOME'})
17 {
18 print "Error! GSDLHOME not set\n\n";
19 exit;
20 }
21}
22
[28767]23print "Prepare and run a number of collection imports while recording I/O metrics.\n\n";
24
[28646]25# 1. Parse and sanitize arguments - the listing of syscalls whose duration we
26# accumulate as IO time. Unless otherwise noted, these are taken from what
27# strace_analyzer.pl would use
28print " * Initializing... Done\n";
[28767]29my $machine_name = `hostname -s`;
30chomp($machine_name);
31$machine_name = ucfirst($machine_name);
32my $os_name = `lsb_release -i`;
33$os_name =~ s/^Distributor ID:\s+(.*)\r?\n$/$1/i;
34my $fs_name = `df -T $ENV{'GSDLHOME'}`;
35$fs_name =~ s/^.*(ext2|ext3|ext4|xfs|zfs).*$/$1/is;
36$fs_name = uc($fs_name);
37my $start_time = [gettimeofday()];
38my @collections;
39my $print_report = 0;
[28665]40our $strace_flags = '-f -q -s 256 -T -ttt';
[28646]41our $io_function_list = {
42 'access'=>1,
43 'chmod'=>1,
44 'close'=>1,
45 'creat'=>1,
46 'fclose'=>1,
47 'fcntl'=>1,
48 'fgetpos'=>1,
49 'flock'=>1,
50 'fseek'=>1,
51 'fsetpos'=>1,
52 'fstat'=>1,
53 'fsync'=>1,
54 'ftell'=>1,
55 'getdents'=>1,
[28767]56 'ioctl'=>1, # John added
[28646]57 'llseek'=>1,
58 'lockf'=>1,
59 'lseek'=>1,
60 'lseek64'=>1,
61 'mkdir'=>1,
62 'open'=>1,
63 'read'=>1,
64 'readdir'=>1,
65 'rename'=>1,
66 'rewind'=>1,
67 'rewinddir'=>1,
68 'scandir'=>1,
69 'stat'=>1,
70 'stat64'=>1,
71 'telldir'=>1,
72 'unlink'=>1,
73 'write'=>1
74 };
[28767]75our $debug = 0;
76our $flush_delay = 3;
77our $test_runs = 9;
[28665]78
[28767]79GetOptions ('collection=s' => \@collections,
[28646]80 'debug' => \$debug,
[28767]81 'flushdelay=i' => \$flush_delay,
82 'runs=i' => \$test_runs,
83 'report' => \$print_report)
[28646]84or &printUsage('Invalid or missing argument');
[28665]85
[28767]86if ($test_runs < 1)
[28646]87{
[28767]88 &printUsage('Test runs must be non-zeo');
[28646]89}
[28767]90my $db_name = 'strace_' . $machine_name . '_' . $os_name . '_' . $fs_name . '.sqlite3db';
91my $db_path = $ENV{'GSDLHOME'} . '/collect/' . $db_name;
92
93if (0 < $print_report)
[28646]94{
[28767]95 &printReport($db_path);
96 exit;
[28646]97}
[28767]98
99if (0 == scalar(@collections))
100{
101 &printUsage('Missing collection or collections');
102}
103
104print ' - Greenstone: ' . $ENV{'GSDLHOME'} . "\n";
105print ' - Collections: ' . join(',', @collections) . "\n";
106print ' - Test Runs: ' . $test_runs . "\n";
107print ' - Flush Delay: ' . $flush_delay . "\n";
108print ' - Debug? ' . ($debug ? 'Yes' : 'No') . "\n";
[28646]109print "\n";
110
[28767]111# 0. Create database a populate with tests (if necessary)
112if (!-f $db_path)
[28646]113{
[28767]114 print STDOUT ' * Creating database tables... ';
115 execSQL($db_path, 'CREATE TABLE IF NOT EXISTS tests (collection TEXT, test_run INTEGER, test_started INTEGER DEFAULT 0, docs_found INTEGER, docs_processed INTEGER, b_elapsed REAL DEFAULT 0, b_io REAL DEFAULT 0, b_other REAL DEFAULT 0, i_elapsed REAL DEFAULT 0, i_io REAL DEFAULT 0, i_other REAL DEFAULT 0, PRIMARY KEY (collection, test_run))');
116 print STDOUT "Done\n";
[28646]117}
[28767]118print ' * Populating database with test runs as necessary... ';
119my $new_test_count = 0;
120foreach my $collection (@collections)
[28646]121{
[28767]122 for (my $i = 1; $i <= $test_runs; $i++)
123 {
124 my $search_sql = "SELECT COUNT(*) FROM tests WHERE collection='" . $collection . "' AND test_run=" . $i;
125 my $test_count = &getValueSQL($db_path, $search_sql);
126 if (0 == $test_count)
127 {
128 $new_test_count++;
129 my $insert_sql = "INSERT INTO tests (collection, test_run) VALUES ('" . $collection . "'," . $i . ")";
130 &execSQL($db_path, $insert_sql);
131 }
132 }
[28646]133}
[28767]134print $new_test_count . " tests added\n";
[28646]135print "\n";
136
[28767]137# 1. Continue picking random tests and running them until there are none left
138my $total_test_count = &getValueSQL($db_path, 'SELECT COUNT(*) FROM tests');
139my $remaining_test_sql = 'SELECT COUNT(*) FROM tests WHERE test_started=0';
140my $remaining_test_count = getValueSQL($db_path, $remaining_test_sql);
141my $exit_file_path = $ENV{'GSDLHOME'} . '/collect/exit.now';
142while ($remaining_test_count ne "0" && !-f $exit_file_path)
[28646]143{
[28767]144 my $test_started = time;
145 my $now_string = strftime "%a %b %e %H:%M:%S %Y", localtime($test_started);
146 my $random_test_sql = 'SELECT collection, test_run FROM tests WHERE test_started=0 ORDER BY RANDOM() LIMIT 1';
147 my ($collection, $test_run) = &getRecordSQL($db_path, $random_test_sql);
148
149 print &makeHeader('Running test: ' . $collection . '#' . $test_run, 78) . "\n\n";
150 print " * Note: to gracefully exit create a file: <gsdlhome>/collect/exit.now\n";
151 print ' * Started: ' . $now_string . "\n";
152
153 my $found_result = 0;
154 do
155 {
156 my @results = &importWithStrace($collection);
157 # We don't allow negative results... just try again
158 if ($results[0] != $results[1])
159 {
160 print "Warning! Failed to process all the documents found - rerunning test...\n";
161 }
162 elsif (0 >= $results[1])
163 {
164 print "Warning! Failed to process any documents - rerunning test...\n";
165 }
166 # I'm not sure how this happens, but it seems on really fast imports they
167 # sometimes happen faster than the baseline import?!?
168 elsif ($results[5] < $results[2])
169 {
170 print "Warning! Baseline import took longer than actual import - rerunning test...\n";
171 }
172 elsif ($results[6] < $results[3])
173 {
174 print "Warning! Spent more time on I/O syscalls in baseline than during import - rerunning test...\n";
175 }
176 elsif ($results[7] < $results[4])
177 {
178 print "Warning! Spent more time on non-I/O syscalls in baseline than during import - rerunning test...\n";
179 }
180 # I'm also seeing a lot of negative percentages caused by the total delta
181 # time spend on io and other syscalls being more than the delta time
182 # between import and baseline durations
183 elsif (($results[5] - $results[2]) < (($results[6] - $results[3]) + ($results[7] - $results[4])))
184 {
185 print "Warning! Time difference between baseline and import durations less than total time differences between io and misc system calls - rerunning test...\n";
186 }
187 else
188 {
189 my $update_sql = 'UPDATE tests SET test_started=' . $test_started . ', ';
190 $update_sql .= 'docs_found=' . $results[0] . ', docs_processed=' . $results[1] . ', ';
191 $update_sql .= 'b_elapsed=' . sprintf('%0.6f', $results[2]) . ', ';
192 $update_sql .= 'b_io=' . sprintf('%0.6f', $results[3]) . ', ';
193 $update_sql .= 'b_other=' . sprintf('%0.6f', $results[4]) . ', ';
194 $update_sql .= 'i_elapsed=' . sprintf('%0.6f', $results[5]) . ', ';
195 $update_sql .= 'i_io=' . sprintf('%0.6f', $results[6]) . ', ';
196 $update_sql .= 'i_other=' . sprintf('%0.6f', $results[7]) . ' ';
197 $update_sql .= 'WHERE collection=\'' . $collection . '\' AND test_run=' . $test_run;
198 execSQL($db_path, $update_sql);
199 $found_result = 1;
200 }
201 }
202 while(0 == $found_result);
203
204 # Repeat until we have exhausted pending tests
205 $remaining_test_count = &getValueSQL($db_path, $remaining_test_sql);
206
207 my $x = $total_test_count - $remaining_test_count;
208 print &makeHeader('Test Complete: ' . $collection . '#' . $test_run, 78) . "\n\n";
209 print ' * ' . sprintf("%.0f",(($x/$total_test_count)*100)) . "% of all tests complete!\n";
210 print ' * ' . $remaining_test_count . " tests remaining\n";
211 print "\n";
[28646]212}
[28767]213
214if (-f $exit_file_path)
[28646]215{
[28767]216 unlink($exit_file_path);
[28646]217}
218
[28767]219# 2. Complete!
[28646]220my $end_time = [gettimeofday()];
221my $duration = tv_interval($start_time, $end_time);
222print &makeHeader(sprintf('Complete in %0.2f seconds', $duration), 78) . "\n\n";
223
224exit;
225
[28767]226################################################################################
[28646]227
[28767]228## @function importWithStrace()
[28646]229#
[28767]230sub importWithStrace
231{
232 my ($collection) = @_;
233 my $collection_path = $ENV{'GSDLHOME'} . '/collect/' . $collection;
234 if (!-d $collection_path)
235 {
236 &printError('Collection not found: ' . $collection_path);
237 }
238
239 # 2. Run dummy import command (empty import directory) within trace to
240 # determine baseline
241 &clearExistingDirectory($collection_path, 'archives', 'cached', 'logs', 'tmp');
242 &flushDiskCache($flush_delay);
243 print " * Running baseline collection import... ";
244 my $dummy_dir = tempdir( CLEANUP => 1);
245 my $command1 = 'strace ' . $strace_flags . ' import.pl -removeold -importdir "' . $dummy_dir .'" "' . $collection . '" 2>&1';
246 my $debug_path1 = '';
247 if ($debug)
248 {
249 $debug_path1 = $collection_path . '/debug-baseline.tsv';
250 }
251 my ($baseline_duration, $baseline_io, $baseline_misc) = &parseStrace($command1, $collection_path, $debug_path1);
252 print "Done\n";
253 print ' - Duration: ' . sprintf('%0.6f', $baseline_duration) . " seconds\n";
254 print " - System Calls Breakdown:\n";
255 print ' - I/O Duration: ' . sprintf('%0.6f', $baseline_io) . " seconds\n";
256 print ' - I/O Percent: ' . sprintf('%0.1f', ($baseline_io / $baseline_duration) * 100) . "%\n";
257 print ' - Other Duratn: ' . sprintf('%0.6f', $baseline_misc) . " seconds\n";
258 print ' - Other Percnt: ' . sprintf('%0.1f', ($baseline_misc / $baseline_duration) * 100) . "%\n";
259 if ($debug)
260 {
261 print " - See 'debug-baseline.tsv' for raw data\n";
262 }
263 if (-d $dummy_dir)
264 {
265 rmdir($dummy_dir);
266 }
267
268 # 3. Run normal import command within strace reading output line by line
269 &clearExistingDirectory($collection_path, 'archives', 'cached', 'logs', 'tmp');
270 &flushDiskCache($flush_delay);
271 print " * Running import and tracking I/O metrics... ";
272 my $command2 = 'strace ' . $strace_flags . ' import.pl -removeold "' . $collection . '" 2>&1';
273 my $debug_path2 = '';
274 if ($debug)
275 {
276 $debug_path2 = $collection_path . '/debug-import.tsv';
277 }
278 my ($import_duration, $import_io, $import_misc, $import_complete, $docs_found, $docs_processed) = &parseStrace($command2, $collection_path, $debug_path2);
279 print "Done\n";
280 print ' - Import? ' . ( $import_complete ? 'Completed' : 'Failed') . "\n";
281 print ' - Found: ' . $docs_found . ' document' . (($docs_processed > 1) ? 's' : '') . "\n";
282 print ' - Processed: ' . $docs_processed . ' document' . (($docs_processed > 1) ? 's' : '') . "\n";
283 print ' - Duration: ' . sprintf('%0.6f', $import_duration) . " seconds\n";
284 print " - System Calls Breakdown:\n";
285 print ' - I/O Duration: ' . sprintf('%0.6f', $import_io) . " seconds\n";
286 print ' - I/O Percent: ' . sprintf('%0.1f', ($import_io / $import_duration) * 100) . "%\n";
287 print ' - Other Duratn: ' . sprintf('%0.6f', $import_misc) . " seconds\n";
288 print ' - Other Percnt: ' . sprintf('%0.1f', ($import_misc / $import_duration) * 100) . "%\n";
289 print " - See 'import.log' for Greenstone Import details\n";
290 print " - See 'strace.out' for STrace details\n";
291 if ($debug)
292 {
293 print " - See 'debug-import.tsv' for raw data\n";
294 }
295 print "\n";
296
297 # 4. Results
298 my $delta_duration = $import_duration - $baseline_duration;
299 my $delta_io = $import_io - $baseline_io;
300 my $delta_io_percent = ( $delta_io / $delta_duration ) * 100;
301 my $delta_misc = $import_misc - $baseline_misc;
302 my $delta_misc_percent = ( $delta_misc / $delta_duration ) * 100;
303 print &makeHeader('Import Results', 78) . "\n\n";
304 print ' Import Duration: ' . sprintf('%0.6f', $delta_duration) . " seconds\n";
305 print " System Calls Breakdown:\n";
306 print ' - I/O Duration: ' . sprintf('%0.6f', $delta_io) . " seconds\n";
307 print ' - I/O Percent: ' . sprintf('%0.1f', $delta_io_percent) . "%\n";
308 print ' - Other Duratn: ' . sprintf('%0.6f', $delta_misc) . " seconds\n";
309 print ' - Other Percnt: ' . sprintf('%0.1f', $delta_misc_percent) . "%\n";
310 print "\n";
311
312 return ($docs_found, $docs_processed, $baseline_duration, $baseline_io, $baseline_misc, $import_duration, $import_io, $import_misc);
313}
314## importWithStrace()
315
316## @function makeHeader($msg, [$length])
317#
318# Create a centered header string given a certain message padded with '=' characters.
319#
320# @param $msg The message to center as a string
321# @param $length The desired length of string - defaults to 79
322# @return A string centered with '=' as padding
323#
[28646]324sub makeHeader
325{
326 my ($msg, $length) = @_;
[28767]327 if (!defined $length)
[28646]328 {
[28767]329 $length = 79; # 80 with newline
[28646]330 }
[28767]331 if (length($msg) % 2 == 0)
[28646]332 {
[28767]333 $msg .= ' ';
[28646]334 }
[28767]335 my $filler_length = ($length - length($msg)) / 2;
336 my $filler = '=' x $filler_length;
337 $msg = $filler . ' ' . $msg . ' ' . $filler;
[28646]338 return $msg;
339}
340## makeHeader() ##
341
342
343## @function
[28767]344# Remove named Greenstone directory so it doesn't get factored in I/O costs
[28646]345#
[28767]346sub clearExistingDirectory
[28646]347{
[28767]348 my $collection_path = shift(@_);
349 for my $dirname (@_)
[28646]350 {
[28767]351 my $dir = $collection_path . '/' . $dirname;
352 if (-d $dir)
353 {
354 print ' * Deleting existing ' . $dirname . ' directory... ';
355 remove_tree($dir);
356 print "Done\n";
357 }
[28646]358 }
359}
[28767]360## clearExistingDirectory()
[28646]361
362## @function
363#
364sub flushDiskCache
365{
366 my ($flush_delay) = @_;
367 # The Premise: use sudoedit and other black magic to clear out the memory-
368 # based disk cache (which is done by writing the number 3 to a certain
369 # system file)
370 &printFlush(" * Synching file system... ");
371 `sync`;
372 print "Done\n";
373 &printFlush(" * Dropping memory disk cache... ");
374 # - save our current default editor
375 my $current_editor = $ENV{'EDITOR'};
376 # - replace default editor with a script that simply clobbers the contents
377 # of any file it's handed with the number "3"
378 $ENV{'EDITOR'} = 'reset_memcache_editor.sh';
379 # - we now call sudoedit on the system file. How sudoedit works is that it
380 # starts by making a temp copy of the system file with appropriate
381 # permissions allowing the user to edit. It then passes the path to the
382 # temp file to the default editor - typically this would be an interactive
383 # editor like 'vi'. However, we've just replaced the editor with a custom
384 # script that just writes '3' as the content of the tmp file. Finally, when
385 # the editor exits, sudoedit copies the tmp file over the top of the system
386 # file, restoring appropriate root-level permissions
387 `sudoedit /proc/sys/vm/drop_caches`;
388 # - restore the default editor, just in case something in Greenstone
389 # depends on this being a reasonably value
390 $ENV{'EDITOR'} = $current_editor;
391 print "Done\n";
392 # - short break to give OS time to actually notice the drop_caches command
393 &printFlush(" * Waiting for drop_caches to complete... ");
394 while ($flush_delay > 0)
395 {
396 &printFlush($flush_delay . ' ');
397 sleep(1);
398 $flush_delay--;
399 }
[28767]400 print "Done\n";
[28646]401}
402## flushDiskCache()
403
404
405## @function parseStrace
406#
407sub parseStrace
408{
409 my ($command, $logs_path, $debug_path) = @_;
410 my $start_timestamp = 0;
411 my $end_timestamp = 0;
412 my $io_duration = 0;
[28665]413 my $misc_duration = 0;
[28646]414 my $import_complete = 0;
415 my $documents_found = 0;
416 my $documents_processed = 0;
417 # hash from PIDs to array (FIFO stack) of syscalls and arguments
418 my $interrupted_stacks = {};
419
420 # open the logs if we have been given somewhere to log too
421 my $logging_enabled = 0;
422 if ($logs_path ne '')
423 {
424 my $strace_path = $logs_path . '/strace.out';
425 open(STRACEOUT, '>:utf8', $strace_path) or die("Error! Failed to open file for writing: " . $strace_path);
426 my $gslog_path = $logs_path . '/import.log';
427 open(GSLOGOUT, '>:utf8', $gslog_path) or die("Error! Failed to open file for writing: " . $gslog_path);
[28767]428 #$logging_enabled = 1;
[28646]429 }
430
431 # debug mode means writing a whole bunch of information to a CSV
432 if ($debug_path)
433 {
[28665]434 open(TSVOUT, '>:utf8', $debug_path) or die('Error! Failed to open file for writing: ' . $logs_path . '/' . $debug);
435 print TSVOUT "TIMESTAMP\tPID\tSYSCALL\tELAPSED\tTOTALIO\tTOTALMISC\tARGS\tRESULT\n";
[28646]436 }
437
438 if (open(GSI, $command . '|'))
439 {
440 my $pid = 0;
[28665]441 my $line = <GSI>;
442 while ($line)
[28646]443 {
[28665]444 my $next_line = <GSI>;
[28646]445 if ($logging_enabled)
446 {
447 print STRACEOUT $line;
448 }
[28767]449
450 if ($line =~ /^[^\d\[]+(.*)/)
451 {
452 $line = $1;
453 }
454
[28646]455 # we may have a line that unfortunately includes newlines in its arguments list
456 # - watch out for unfinished syscalls that will be resumed later
457 # - oh! and signals, cause they are way different anyhow
458 while ($line !~ /<unfinished \.\.\.>/ && $line !~ /SIG/ && $line =~ /\(/ && $line !~ /\)\s+=/)
459 {
[28665]460 $line .= ' ' . $next_line;
461 if ($next_line = <GSI>)
[28646]462 {
463 }
464 else
465 {
466 print "Error! Failed to find complete arguments list: " . $line . "\n";
467 exit;
468 }
469 }
470 # detect and remove any PID information to make the rest of parsing easier
[28665]471 if ($line =~ /^\[pid\s+(\d+)\]\s+(.*)$/s)
[28646]472 {
473 my $new_pid = $1;
474 $line = $2;
475 if ($new_pid != $pid)
476 {
477 $pid = $new_pid;
478 }
479 }
480 # detect resume lines, and rewrite so they look like other lines
481 if ($line =~ /^(\d+\.\d+)\s+<\.\.\.\s+([a-z0-9_]+)\s+resumed>\s+(.*)\)\s+=\s+(.+)\s+<(\d+\.\d+)>$/s)
482 {
483 my $timestamp = $1;
484 my $prev_syscall = '';
485 my $syscall = $2;
486 my $args_prefix = '';
487 my $args_suffix = $3;
488 my $result = $4;
489 my $duration = $5;
490 if ($syscall eq 'write' && $args_prefix =~ /^[12],/)
491 {
492 chomp($line);
493 print "\n\nResuming write pid:$pid line:|" . $line . "|\n";
494 # pop the last interrupted syscall off the stack for this pid
495 if (!defined $interrupted_stacks->{$pid} || scalar(@{$interrupted_stacks->{$pid}}) == 0)
496 {
497 print "Error! Resume when stack is empty: " . $pid . "\n";
498 exit;
499 }
500 ($prev_syscall, $args_prefix) = @{pop(@{$interrupted_stacks->{$pid}})};
501 if ($syscall ne $prev_syscall)
502 {
503 print "Error! Resume syscall does not match unfinished syscall: " . $syscall . " != " . $prev_syscall . "\n";
504 exit;
505 }
506 }
[28665]507
508 # Time for a hack - duration is highly dubious for suspended
509 # syscalls. It typically includes the total walltime spent doing
510 # anything between when the 'unfinished' and the 'resumed'... time that
511 # is already accounted for in other parsed syscalls. What I'll do is
512 # limit the resumed calls duration to, at most, the time difference
513 # between this syscall and the next. This is highly inaccurate, of
[28767]514 # course, as it excludes time spent before the suspension but then
515 # includes time spent in userspace but is significantly better than a
516 # elasped duration several times longer than the syscall actually took.
[28665]517 if ($next_line =~ /^.*?(\d+\.\d+)/)
518 {
519 my $next_timestamp = $1;
520 my $timestamp_duration = $next_timestamp - $timestamp;
521 if ($duration > $timestamp_duration)
522 {
523 #rint "[resume: elapsed longer than duration... fudging] ";
524 $duration = $timestamp_duration;
525 }
526 }
527
528 $line = $timestamp . ' ' . $syscall . '(' . $args_prefix . ' ' . $args_suffix . ') = ' . $result . ' <' . sprintf('%0.6f', $duration) . '>';
[28646]529 }
530
531 # SPECIAL CASES
532 # - unfinished syscalls... the only ones we care about are writes to standard out or error as these are echoed to the shell. Add them to the appropriate stack waiting for the 'resume' rewrite above
533 if ($line =~ /^(\d+\.\d+)\s+([a-z0-9_]+)\((.*?)\s+<unfinished \.\.\.>$/)
534 {
535 my $timestamp = $1;
536 my $syscall = $2;
537 my $args_prefix = $3;
538 if ($syscall eq 'write' && $args_prefix =~ /^[12],/)
539 {
[28767]540 #print "\n\nPending write pid:$pid line:|" . $line . "|\n";
[28646]541 if (!defined $interrupted_stacks->{$pid})
542 {
543 $interrupted_stacks->{$pid} = [];
544 }
545 push(@{$interrupted_stacks->{$pid}}, [$syscall, $args_prefix]);
546 }
547 }
548 # - processes announce detachments and resumptions - maybe this explains
[28665]549 # why wait4's keep chaning (-q should shut these up)
[28646]550 elsif ($line =~ /^Process (\d+) (detached|resumed)/)
551 {
552 my $local_pid = $1;
553 my $action = $2;
554 }
555 # - exit_group never has a duration
[28767]556 elsif ($line =~ /^(\d+\.\d+)\s+(_exit|exit_group)\((\d+)\)\s+=\s+\?$/)
[28646]557 {
558 my $timestamp = $1;
[28767]559 my $exit_function = $2;
560 my $exit_value = $3;
[28646]561 $end_timestamp = $timestamp;
562 if ($debug)
563 {
[28665]564 print TSVOUT sprintf("%0.6f", ($timestamp - $start_timestamp)) . "\t";
565 print TSVOUT $pid . "\t";
[28767]566 print TSVOUT $exit_function . "\t";
[28665]567 print TSVOUT "0.000000\t";
568 print TSVOUT sprintf("%0.6f", $misc_duration) . "\t";
569 print TSVOUT sprintf("%0.6f", $io_duration) . "\t";
570 print TSVOUT $exit_value . "\t";
571 print TSVOUT "?\n";
[28646]572 }
573 }
574 # - signals
575 elsif ($line =~ /^(\d+\.\d+)\s+---\s+([A-Z]+)\s+\((.*)\)\s+\@\s+0\s+\(0\)\s+---$/)
576 {
577 my $timestamp = $1;
578 my $signal = $2;
579 my $message = $3;
580 $end_timestamp = $timestamp;
581 }
582 # NORMAL CASE
583 elsif ($line =~ /^(\d+\.\d+)\s+([a-z0-9_]+)\((.*)\)\s+=\s+(.+)\s+<(\d+\.\d+)>$/s)
584 {
585 my $timestamp = $1;
586 my $syscall = $2;
587 my $args = $3;
588 my $result = $4;
589 my $duration = $5;
590 if ($start_timestamp == 0)
591 {
592 $start_timestamp = $timestamp;
593 }
594 $end_timestamp = $timestamp;
595 if (defined $io_function_list->{$syscall})
596 {
597 $io_duration += $duration;
598 }
599 else
600 {
[28665]601 $misc_duration += $duration;
[28646]602 ##print "[$syscall]";
603 }
604 if ($syscall eq 'write' && $args =~ /[12],\s+"(.*)"/s)
605 {
606 my $payload = $1;
607 $payload =~ s/\\n/\n/g;
608 # parse up some collection import statistics
609 if ($payload =~ /Import complete/)
610 {
611 $import_complete = 1;
612 }
613 elsif ($payload =~ /^\* (\d+) documents were considered/)
614 {
615 $documents_found = $1;
616 }
617 elsif ($payload =~ /^\* (\d+) were processed/)
618 {
619 $documents_processed = $1;
620 }
621 # finally, write these lines out to log if a path has been provided
622 if ($logging_enabled)
623 {
624 print GSLOGOUT $payload;
625 }
626 }
627 if ($debug)
628 {
629 $args =~ s/\r?\n//g;
[28665]630 print TSVOUT sprintf("%0.6f", ($timestamp - $start_timestamp)) . "\t";
631 print TSVOUT $pid . "\t";
632 print TSVOUT $syscall . "\t";
633 print TSVOUT sprintf("%0.6f", $duration) . "\t";
634 print TSVOUT sprintf("%0.6f", $misc_duration) . "\t";
635 print TSVOUT sprintf("%0.6f", $io_duration) . "\t";
636 print TSVOUT $args . "\t";
637 print TSVOUT $result . "\n";
[28646]638 }
639 }
640 else
641 {
642 chomp($line);
643 print "\n\nUnparsed: |" . $line . "|\n\n";
644 }
[28665]645 # Move on to the next line... (may be undef)
646 $line = $next_line;
[28646]647 }
648 }
649 else
650 {
651 print "Error! Failed to open import process for reading: " . $command . "\n";
652 }
653 if ($logging_enabled)
654 {
655 close(STRACEOUT);
656 close(GSLOGOUT);
657 }
658 if ($debug_path)
659 {
[28665]660 close(TSVOUT);
[28646]661 }
662 my $cmd_duration = $end_timestamp - $start_timestamp;
[28665]663 return ($cmd_duration, $io_duration, $misc_duration, $import_complete, $documents_found, $documents_processed);
[28646]664}
665# parseStrace()
666
667
668sub printFlush
669{
670 my ($msg) = @_;
671 $| = 1;
672 print $msg;
673 $| = 0;
674}
675
676
677sub printUsage
678{
679 my ($msg) = @_;
680 if (defined $msg)
681 {
682 print 'Error! ' . $msg . "\n";
683 }
684 print "Usage: import_with_io_metric.pl -collection <collection name> [-debug]\n\n";
685 exit;
686}
687
[28767]688## @function
689#
690sub execSQL
691{
692 my ($db_path, $sql) = @_;
693 # call getValueSQL but don't care about result
694 getValueSQL($db_path, $sql);
695}
696# /** execSQL() **/
697
698
699## @function
700#
701sub getRecordSQL
702{
703 my ($db_path, $sql) = @_;
704 if ($sql !~ /LIMIT 1/i)
705 {
706 $sql .= ' LIMIT 1';
707 }
708 my $value = getValueSQL($db_path, $sql);
709 return split(/\|/,$value);
710}
711# /** getRecordSQL() **/
712
713
714## @function
715#
716sub getRecordsSQL
717{
718 my ($db_path, $sql) = @_;
719 my @records;
720 my $raw_values = getValueSQL($db_path, $sql);
721 foreach my $raw_record (split(/\r?\n/, $raw_values))
722 {
723 my @record = split(/\|/, $raw_record);
724 push(@records, \@record);
725 }
726 return @records;
727}
728## getRecordsSQL()
729
730
731## @function
732#
733sub getValueSQL
734{
735 my ($db_path, $sql) = @_;
736 my $result = `sqlite3 "$db_path" "$sql" 2>&1`;
737 if ($result =~ /Error:/)
738 {
739 die("Fatal Error!\nSQL:" . $sql . "\nMsg:" . $result);
740 }
741 # trim
742 $result =~ s/^\s*|\s*$//g;
743 return $result;
744}
745# /** getValueSQL() **/
746
747
748## @function printReport()
749#
750sub printReport
751{
752 my ($db_path) = @_;
753 # get listing of unique collection names sorted alphabetically. While we are
754 # at it, grab the number of documents processed from pretty much any entry
755 # for this collection (they should all be the same, otherwise they would've
756 # been repeated back in testing)
757 my $collection_sql = 'SELECT collection, AVG(docs_processed) FROM tests GROUP BY collection';
758 my @collection_records = &getRecordsSQL($db_path, $collection_sql);
759 foreach my $collection_record (@collection_records)
760 {
761 my ($collection, $docs_processed) = @{$collection_record};
762 # determine the size in bytes of the import directory
763 my $collection_import_directory = $ENV{'GSDLHOME'} . '/collect/' . $collection . '/import';
764 my $du_command = 'du -bsL "' . $collection_import_directory . '"';
765 my $du_result = `$du_command`;
766 my $size_in_bytes = 0;
767 if ($du_result =~ /^(\d+)\s+/)
768 {
769 $size_in_bytes = $1;
770 }
771 # pretty print header block for dokuwiki
772 &printReportHeader($collection, $docs_processed, $size_in_bytes);
773 my @test_records = &getRecordsSQL($db_path, 'SELECT * FROM tests WHERE collection=\'' . $collection . '\' AND test_started > 0 ORDER BY test_started');
774 my $number_of_tests = scalar(@test_records);
775 my $total_b_elapsed = 0;
776 my @b_elapsed_values;
777 my $total_b_io_percent = 0;
778 my @b_io_percent_values;
779 my $total_b_other_percent = 0;
780 my @b_other_percent_values;
781 my $total_d_elapsed = 0;
782 my @d_elapsed_values;
783 my $total_d_io_percent = 0;
784 my @d_io_percent_values;
785 my $total_d_other_percent = 0;
786 my @d_other_percent_values;
787 foreach my $test_record (@test_records)
788 {
789 my ($the_collection, $the_test_run, $test_started, $docs_found, $docs_processed, $b_elapsed, $b_io, $b_other, $i_elapsed, $i_io, $i_other) = @{$test_record};
790 my $b_io_percent = ($b_io / $b_elapsed) * 100;
791 my $b_other_percent = ($b_other / $b_elapsed) * 100;
792 my $d_elapsed = $i_elapsed - $b_elapsed;
793 my $d_io = $i_io - $b_io;
794 my $d_io_percent = ($d_io / $d_elapsed) * 100;
795 my $d_other = $i_other - $b_other;
796 my $d_other_percent = ($d_other / $d_elapsed) * 100;
797 # add to running totals
798 $total_b_elapsed += $b_elapsed;
799 push(@b_elapsed_values, $b_elapsed);
800 $total_b_io_percent += $b_io_percent;
801 push(@b_io_percent_values, $b_io_percent);
802 $total_b_other_percent += $b_other_percent;
803 push(@b_other_percent_values, $b_other_percent);
804 $total_d_elapsed += $d_elapsed;
805 push(@d_elapsed_values, $d_elapsed);
806 $total_d_io_percent += $d_io_percent;
807 push(@d_io_percent_values, $d_io_percent);
808 $total_d_other_percent += $d_other_percent;
809 push(@d_other_percent_values, $d_other_percent);
810 }
811 # Averages
812 my $b_elapsed_avg = $total_b_elapsed / $number_of_tests;
813 my $b_io_percent_avg = $total_b_io_percent / $number_of_tests;
814 my $b_other_percent_avg = $total_b_other_percent / $number_of_tests;
815 my $d_elapsed_avg = $total_d_elapsed / $number_of_tests;
816 my $d_io_percent_avg = $total_d_io_percent / $number_of_tests;
817 my $d_other_percent_avg = $total_d_other_percent / $number_of_tests;
818 # Standard Deviations
819 my $b_elapsed_stddev = &calculateStandardDeviation($number_of_tests, $b_elapsed_avg, @b_elapsed_values);
820 my $b_io_percent_stddev = &calculateStandardDeviation($number_of_tests, $b_io_percent_avg, @b_io_percent_values);
821 my $b_other_percent_stddev = &calculateStandardDeviation($number_of_tests, $b_other_percent_avg, @b_other_percent_values);
822 my $d_elapsed_stddev = &calculateStandardDeviation($number_of_tests, $d_elapsed_avg, @d_elapsed_values);
823 my $d_io_percent_stddev = &calculateStandardDeviation($number_of_tests, $d_io_percent_avg, @d_io_percent_values);
824 my $d_other_percent_stddev = &calculateStandardDeviation($number_of_tests, $d_other_percent_avg, @d_other_percent_values);
825
826 my $counter = 0;
827 my $outlier_count = 0;
828 foreach my $test_record (@test_records)
829 {
830 my ($the_collection, $the_test_run, $test_started, $docs_found, $docs_processed, $b_elapsed, $b_io, $b_other, $i_elapsed, $i_io, $i_other) = @{$test_record};
831 $counter++;
832 my $b_io_percent = ($b_io / $b_elapsed) * 100;
833 my $b_other_percent = ($b_other / $b_elapsed) * 100;
834 my $d_elapsed = $i_elapsed - $b_elapsed;
835 my $d_io = $i_io - $b_io;
836 my $d_io_percent = ($d_io / $d_elapsed) * 100;
837 my $d_other = $i_other - $b_other;
838 my $d_other_percent = ($d_other / $d_elapsed) * 100;
839 print '| ' . $counter . ' | ' . $the_collection . ' | ' . $docs_processed . ' | ' . $size_in_bytes . ' | ' . $test_started . ' | ' . sprintf('%0.6f', $b_elapsed) . ' | ' . sprintf('%0.1f', $b_io_percent) . '% | ' . sprintf('%0.1f', $b_other_percent) . '% | ' . sprintf('%0.6f', $d_elapsed) . ' | ' . sprintf('%0.1f', $d_io_percent) . '% | ' . sprintf('%0.1f', $d_other_percent) . '% | ';
840 if (&isOutlier($d_io_percent, $d_io_percent_avg, $d_io_percent_stddev))
841 {
842 print 'Yes';
843 $outlier_count++;
844 }
845 else
846 {
847 print 'No';
848 }
849 print " |\n";
850 }
851 print '^ Average ||||| ' . sprintf('%0.6f', $b_elapsed_avg) . ' | ' . sprintf('%0.1f', $b_io_percent_avg) . '% | ' . sprintf('%0.1f', $b_other_percent_avg) . '% | ' . sprintf('%0.6f', $d_elapsed_avg) . ' | ' . sprintf('%0.1f', $d_io_percent_avg) . '% | ' . sprintf('%0.1f', $d_other_percent_avg) . "% |\n";
852 print '^ Standard Deviation ||||| ' . sprintf('%0.6f', $b_elapsed_stddev) . ' | ' . sprintf('%0.1f', $b_io_percent_stddev) . '% | ' . sprintf('%0.1f', $b_other_percent_stddev) . '% | ' . sprintf('%0.6f', $d_elapsed_stddev) . ' | ' . sprintf('%0.1f', $d_io_percent_stddev) . '% | ' . sprintf('%0.1f', $d_other_percent_stddev) . '% | ' . $outlier_count . " |\n";
853 print "\n";
854 }
855}
856## printReport()
857
858
859## @function printReportHeader($collection)
860#
861sub printReportHeader
862{
863 my ($collection) = @_;
864 print '==== "' . $collection . "\" Collection ====\n\n";
865 print "^ Count ^ Collection ^ NumDocs ^ Size ^ Timestamp ^ Base ^^^ Import ^^^ Outlier? ^\n";
866 print "^ ::: ^ ::: ^ ::: ^ ::: ^ ::: ^ Elapsed ^ Syscalls ^^ Elapsed ^ Syscalls ^^ ::: ^\n";
867 print "^ ::: ^ ::: ^ ::: ^ ::: ^ ::: ^ ::: ^ I/O% ^ Other% ^ ::: ^ I/O% ^ Other ^ ::: ^\n";
868}
869## printReportHeader()
870
871
872## @function calculateStandardDeviation($mean, $value1 ... $valueN)
873# Our sample size is the population so we can use the simplier standard deviation equation for an entire population (now, theres some rap lyrics)
874# stddev = squareroot ( sum ( square( value - mean ) ) / count )
875sub calculateStandardDeviation
876{
877 my $population = shift(@_);
878 my $mean = shift(@_);
879 my $count = scalar(@_);
880 my $total = 0;
881 # sum of squares
882 foreach my $value (@_)
883 {
884 my $subtotal = $value - $mean;
885 $total += $subtotal * $subtotal;
886 }
887 if ($population == $count)
888 {
889 $total = $total / $count;
890 }
891 else
892 {
893 $total = $total / ($count - 1);
894 }
895 my $standard_deviation = sqrt($total);
896 return $standard_deviation;
897}
898## calculateStandardDeviation()
899
900
901## @function isOutlier($value, $mean, $stddev)
902#
903sub isOutlier
904{
905 my ($value, $mean, $stddev) = @_;
906 my $min = $mean - (2 * $stddev);
907 my $max = $mean + (2 * $stddev);
908 return ($value <= $min || $max <= $value);
909}
910## isOutlier()
911
[28646]9121;
Note: See TracBrowser for help on using the repository browser.