Changeset 28665 for gs2-extensions
- Timestamp:
- 2013-11-21T13:35:52+13:00 (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs2-extensions/parallel-building/trunk/src/bin/script/import_with_io_metric.pl
r28646 r28665 24 24 # strace_analyzer.pl would use 25 25 print " * Initializing... Done\n"; 26 our $strace_flags = '-f -q -s 256 -T -ttt'; 26 27 our $io_function_list = { 27 28 'access'=>1, … … 58 59 'write'=>1 59 60 }; 60 61 61 my $start_time = [gettimeofday()]; 62 62 my $collection = ''; 63 63 my $debug = 0; 64 64 my $flush_delay = 3; 65 65 66 GetOptions ('collection=s' => \$collection, 66 67 'debug' => \$debug, 67 68 'flushdelay=i' => \$flush_delay) 68 69 or &printUsage('Invalid or missing argument'); 70 69 71 if (!defined $collection || $collection eq '') 70 72 { … … 88 90 print " * Running baseline collection import... "; 89 91 my $dummy_dir = tempdir( CLEANUP => 1); 90 my $command1 = ' time -p strace -f -q -T -ttt -s 4096 import.pl -removeold -importdir "' . $dummy_dir .'" "' . $collection . '" 2>&1 1>/dev/null';92 my $command1 = 'strace ' . $strace_flags . ' import.pl -removeold -importdir "' . $dummy_dir .'" "' . $collection . '" 2>&1'; 91 93 my $debug_path1 = ''; 92 94 if ($debug) 93 95 { 94 $debug_path1 = $collection_path . '/debug- dummy.csv';95 } 96 my ($baseline_duration, $baseline_io, $baseline_ cpu) = &parseStrace($command1, $collection_path, $debug_path1);96 $debug_path1 = $collection_path . '/debug-baseline.tsv'; 97 } 98 my ($baseline_duration, $baseline_io, $baseline_misc) = &parseStrace($command1, $collection_path, $debug_path1); 97 99 print "Done\n"; 98 100 print ' - Duration: ' . sprintf('%0.6f', $baseline_duration) . " seconds\n"; 99 print ' - I/O Duration: ' . sprintf('%0.6f', $baseline_io) . " seconds\n"; 101 print ' - SysCall Time: ' . sprintf('%0.6f', $baseline_io + $baseline_misc) . " seconds\n"; 102 print ' - SysCall Prct: ' . sprintf('%d', (($baseline_io + $baseline_misc) / $baseline_duration) * 100) . "%\n"; 103 print ' - I/O Time: ' . sprintf('%0.6f', $baseline_io) . " seconds\n"; 100 104 print ' - I/O Percent: ' . sprintf('%d', ($baseline_io / $baseline_duration) * 100) . "%\n"; 101 105 if ($debug) 102 106 { 103 print " - See 'debug- dummy.csv' for raw data\n";107 print " - See 'debug-baseline.tsv' for raw data\n"; 104 108 } 105 109 print "\n"; 106 exit;107 110 108 111 # 3. Run normal import command within strace reading output line by line … … 110 113 &flushDiskCache($flush_delay); 111 114 print " * Running import and tracking I/O metrics... "; 112 my $command2 = 'strace -f -q -T -ttt -s 256import.pl -removeold "' . $collection . '" 2>&1';115 my $command2 = 'strace ' . $strace_flags . ' import.pl -removeold "' . $collection . '" 2>&1'; 113 116 my $debug_path2 = ''; 114 117 if ($debug) 115 118 { 116 $debug_path2 = $collection_path . '/debug-import. csv';117 } 118 my ($duration_import, $duration_io, $duration_ cpu, $import_complete, $docs_found, $docs_processed) = &parseStrace($command2, $collection_path, $debug_path2);119 $debug_path2 = $collection_path . '/debug-import.tsv'; 120 } 121 my ($duration_import, $duration_io, $duration_misc, $import_complete, $docs_found, $docs_processed) = &parseStrace($command2, $collection_path, $debug_path2); 119 122 print "Done\n"; 120 123 print ' - Import? ' . ( $import_complete ? 'Completed' : 'Failed') . "\n"; … … 122 125 print ' - Processed: ' . $docs_processed . ' document' . (($docs_processed > 1) ? 's' : '') . "\n"; 123 126 print ' - Duration: ' . sprintf('%0.6f', $duration_import) . " seconds\n"; 127 print ' - SysCall Time: ' . sprintf('%0.6f', $duration_io + $duration_misc) . " seconds\n"; 128 print ' - SysCall Prct: ' . sprintf('%d', (($duration_io + $duration_misc) / $duration_import) * 100) . "%\n"; 124 129 print ' - I/O Duration: ' . sprintf('%0.6f', $duration_io) . " seconds\n"; 125 130 print ' - I/O Percent: ' . sprintf('%d', ($duration_io / $duration_import) * 100) . "%\n"; … … 128 133 if ($debug) 129 134 { 130 print " - See 'debug-import. csv' for raw data\n";135 print " - See 'debug-import.tsv' for raw data\n"; 131 136 } 132 137 print "\n"; … … 237 242 my $end_timestamp = 0; 238 243 my $io_duration = 0; 239 my $ cpu_duration = 0;244 my $misc_duration = 0; 240 245 my $import_complete = 0; 241 246 my $documents_found = 0; … … 258 263 if ($debug_path) 259 264 { 260 open( DEBUGOUT, '>:utf8', $debug_path) or die('Error! Failed to open file for writing: ' . $logs_path . '/' . $debug);261 print DEBUGOUT "TIMESTAMP,PID,SYSCALL,DURATION,TOTALIO,TOTALCPU,ARGS,RESULT\n";265 open(TSVOUT, '>:utf8', $debug_path) or die('Error! Failed to open file for writing: ' . $logs_path . '/' . $debug); 266 print TSVOUT "TIMESTAMP\tPID\tSYSCALL\tELAPSED\tTOTALIO\tTOTALMISC\tARGS\tRESULT\n"; 262 267 } 263 268 … … 265 270 { 266 271 my $pid = 0; 267 my $line ;268 while ($line = <GSI>)272 my $line = <GSI>; 273 while ($line) 269 274 { 275 my $next_line = <GSI>; 270 276 if ($logging_enabled) 271 277 { … … 277 283 while ($line !~ /<unfinished \.\.\.>/ && $line !~ /SIG/ && $line =~ /\(/ && $line !~ /\)\s+=/) 278 284 { 279 my $new_line = ''; 280 if ($new_line = <GSI>) 281 { 282 $line .= ' ' . $new_line; 285 $line .= ' ' . $next_line; 286 if ($next_line = <GSI>) 287 { 283 288 } 284 289 else … … 289 294 } 290 295 # detect and remove any PID information to make the rest of parsing easier 291 if ($line =~ /^\[pid 296 if ($line =~ /^\[pid\s+(\d+)\]\s+(.*)$/s) 292 297 { 293 298 my $new_pid = $1; … … 325 330 } 326 331 } 327 $line = $timestamp . ' ' . $syscall . '(' . $args_prefix . ' ' . $args_suffix . ') = ' . $result . ' <' . $duration . '>'; 332 333 # Time for a hack - duration is highly dubious for suspended 334 # syscalls. It typically includes the total walltime spent doing 335 # anything between when the 'unfinished' and the 'resumed'... time that 336 # is already accounted for in other parsed syscalls. What I'll do is 337 # limit the resumed calls duration to, at most, the time difference 338 # between this syscall and the next. This is highly inaccurate, of 339 # course, as it includes time spent in userspace but is significantly 340 # better than a elasped duration several times longer than the syscall 341 # actually took. 342 if ($next_line =~ /^.*?(\d+\.\d+)/) 343 { 344 my $next_timestamp = $1; 345 my $timestamp_duration = $next_timestamp - $timestamp; 346 if ($duration > $timestamp_duration) 347 { 348 #rint "[resume: elapsed longer than duration... fudging] "; 349 $duration = $timestamp_duration; 350 } 351 } 352 353 $line = $timestamp . ' ' . $syscall . '(' . $args_prefix . ' ' . $args_suffix . ') = ' . $result . ' <' . sprintf('%0.6f', $duration) . '>'; 328 354 } 329 355 … … 346 372 } 347 373 # - processes announce detachments and resumptions - maybe this explains 348 # why wait4's keep chaning 374 # why wait4's keep chaning (-q should shut these up) 349 375 elsif ($line =~ /^Process (\d+) (detached|resumed)/) 350 376 { … … 353 379 } 354 380 # - exit_group never has a duration 355 elsif ($line =~ /^(\d+\.\d+)\s+exit_group\( 0\)\s+=\s+\?$/)381 elsif ($line =~ /^(\d+\.\d+)\s+exit_group\((\d+)\)\s+=\s+\?$/) 356 382 { 357 383 my $timestamp = $1; 384 my $exit_value = $2; 358 385 $end_timestamp = $timestamp; 359 386 if ($debug) 360 387 { 361 print DEBUGOUT $timestamp . ',' . $pid . ',"exit_group",0,' . $io_duration . ',' . $cpu_duration . ",\"0\",\"\"\n"; 388 print TSVOUT sprintf("%0.6f", ($timestamp - $start_timestamp)) . "\t"; 389 print TSVOUT $pid . "\t"; 390 print TSVOUT "exit_group\t"; 391 print TSVOUT "0.000000\t"; 392 print TSVOUT sprintf("%0.6f", $misc_duration) . "\t"; 393 print TSVOUT sprintf("%0.6f", $io_duration) . "\t"; 394 print TSVOUT $exit_value . "\t"; 395 print TSVOUT "?\n"; 362 396 } 363 397 } … … 389 423 else 390 424 { 391 $ cpu_duration += $duration;425 $misc_duration += $duration; 392 426 ##print "[$syscall]"; 393 427 } … … 417 451 if ($debug) 418 452 { 419 $args =~ s/\"/"/g;420 453 $args =~ s/\r?\n//g; 421 print DEBUGOUT $timestamp . ',' . $pid . ',"' . $syscall . '",' . $duration . ',' . $io_duration . ',' . $cpu_duration . ',"' . $args . '","' . $result . "\"\n"; 454 print TSVOUT sprintf("%0.6f", ($timestamp - $start_timestamp)) . "\t"; 455 print TSVOUT $pid . "\t"; 456 print TSVOUT $syscall . "\t"; 457 print TSVOUT sprintf("%0.6f", $duration) . "\t"; 458 print TSVOUT sprintf("%0.6f", $misc_duration) . "\t"; 459 print TSVOUT sprintf("%0.6f", $io_duration) . "\t"; 460 print TSVOUT $args . "\t"; 461 print TSVOUT $result . "\n"; 422 462 } 423 463 } … … 427 467 print "\n\nUnparsed: |" . $line . "|\n\n"; 428 468 } 469 # Move on to the next line... (may be undef) 470 $line = $next_line; 429 471 } 430 472 } … … 440 482 if ($debug_path) 441 483 { 442 close( DEBUGOUT);484 close(TSVOUT); 443 485 } 444 486 my $cmd_duration = $end_timestamp - $start_timestamp; 445 return ($cmd_duration, $io_duration, $ cpu_duration, $import_complete, $documents_found, $documents_processed);487 return ($cmd_duration, $io_duration, $misc_duration, $import_complete, $documents_found, $documents_processed); 446 488 } 447 489 # parseStrace()
Note:
See TracChangeset
for help on using the changeset viewer.