Context Navigation

import_with_io_metric.pl@ 28767

Last change on this file since 28767 was 28767, checked in by jmt12, 10 years ago
Drastically increased the script to allow 1) battery of imports backed by database of tests, 2) printing of reported in dokuwiki format, and 3) calculation of statistical information (STDDEV) and outliers (although I don't do anything with that information... yet)
Property svn:executable set to ``*
File size: 32.3 KB

Rev	Line
[28646]	1	#!/usr/bin/perl
	2
	3	# Pragma
	4	use strict;
	5	use warnings;
	6	# Libraries
[28767]	7	use File::Path qw( remove_tree );
	8	use File::Temp qw( tempdir );
[28646]	9	use Getopt::Long;
[28767]	10	use POSIX qw( strftime );
	11	use Time::HiRes qw( gettimeofday tv_interval );
[28646]	12
	13	BEGIN
	14	{
[28767]	15	print "\n======================= Greenstone Import + I/O Metrics ======================\n\n";
[28646]	16	if (!defined $ENV{'GSDLHOME'})
	17	{
	18	print "Error! GSDLHOME not set\n\n";
	19	exit;
	20	}
	21	}
	22
[28767]	23	print "Prepare and run a number of collection imports while recording I/O metrics.\n\n";
	24
[28646]	25	# 1. Parse and sanitize arguments - the listing of syscalls whose duration we
	26	# accumulate as IO time. Unless otherwise noted, these are taken from what
	27	# strace_analyzer.pl would use
	28	print " * Initializing... Done\n";
[28767]	29	my $machine_name = `hostname -s`;
	30	chomp($machine_name);
	31	$machine_name = ucfirst($machine_name);
	32	my $os_name = `lsb_release -i`;
	33	$os_name =~ s/^Distributor ID:\s+(.*)\r?\n$/$1/i;
	34	my $fs_name = `df -T $ENV{'GSDLHOME'}`;
	35	$fs_name =~ s/^.(ext2\|ext3\|ext4\|xfs\|zfs).$/$1/is;
	36	$fs_name = uc($fs_name);
	37	my $start_time = [gettimeofday()];
	38	my @collections;
	39	my $print_report = 0;
[28665]	40	our $strace_flags = '-f -q -s 256 -T -ttt';
[28646]	41	our $io_function_list = {
	42	'access'=>1,
	43	'chmod'=>1,
	44	'close'=>1,
	45	'creat'=>1,
	46	'fclose'=>1,
	47	'fcntl'=>1,
	48	'fgetpos'=>1,
	49	'flock'=>1,
	50	'fseek'=>1,
	51	'fsetpos'=>1,
	52	'fstat'=>1,
	53	'fsync'=>1,
	54	'ftell'=>1,
	55	'getdents'=>1,
[28767]	56	'ioctl'=>1, # John added
[28646]	57	'llseek'=>1,
	58	'lockf'=>1,
	59	'lseek'=>1,
	60	'lseek64'=>1,
	61	'mkdir'=>1,
	62	'open'=>1,
	63	'read'=>1,
	64	'readdir'=>1,
	65	'rename'=>1,
	66	'rewind'=>1,
	67	'rewinddir'=>1,
	68	'scandir'=>1,
	69	'stat'=>1,
	70	'stat64'=>1,
	71	'telldir'=>1,
	72	'unlink'=>1,
	73	'write'=>1
	74	};
[28767]	75	our $debug = 0;
	76	our $flush_delay = 3;
	77	our $test_runs = 9;
[28665]	78
[28767]	79	GetOptions ('collection=s' => \@collections,
[28646]	80	'debug' => \$debug,
[28767]	81	'flushdelay=i' => \$flush_delay,
	82	'runs=i' => \$test_runs,
	83	'report' => \$print_report)
[28646]	84	or &printUsage('Invalid or missing argument');
[28665]	85
[28767]	86	if ($test_runs < 1)
[28646]	87	{
[28767]	88	&printUsage('Test runs must be non-zeo');
[28646]	89	}
[28767]	90	my $db_name = 'strace_' . $machine_name . '_' . $os_name . '_' . $fs_name . '.sqlite3db';
	91	my $db_path = $ENV{'GSDLHOME'} . '/collect/' . $db_name;
	92
	93	if (0 < $print_report)
[28646]	94	{
[28767]	95	&printReport($db_path);
	96	exit;
[28646]	97	}
[28767]	98
	99	if (0 == scalar(@collections))
	100	{
	101	&printUsage('Missing collection or collections');
	102	}
	103
	104	print ' - Greenstone: ' . $ENV{'GSDLHOME'} . "\n";
	105	print ' - Collections: ' . join(',', @collections) . "\n";
	106	print ' - Test Runs: ' . $test_runs . "\n";
	107	print ' - Flush Delay: ' . $flush_delay . "\n";
	108	print ' - Debug? ' . ($debug ? 'Yes' : 'No') . "\n";
[28646]	109	print "\n";
	110
[28767]	111	# 0. Create database a populate with tests (if necessary)
	112	if (!-f $db_path)
[28646]	113	{
[28767]	114	print STDOUT ' * Creating database tables... ';
	115	execSQL($db_path, 'CREATE TABLE IF NOT EXISTS tests (collection TEXT, test_run INTEGER, test_started INTEGER DEFAULT 0, docs_found INTEGER, docs_processed INTEGER, b_elapsed REAL DEFAULT 0, b_io REAL DEFAULT 0, b_other REAL DEFAULT 0, i_elapsed REAL DEFAULT 0, i_io REAL DEFAULT 0, i_other REAL DEFAULT 0, PRIMARY KEY (collection, test_run))');
	116	print STDOUT "Done\n";
[28646]	117	}
[28767]	118	print ' * Populating database with test runs as necessary... ';
	119	my $new_test_count = 0;
	120	foreach my $collection (@collections)
[28646]	121	{
[28767]	122	for (my $i = 1; $i <= $test_runs; $i++)
	123	{
	124	my $search_sql = "SELECT COUNT(*) FROM tests WHERE collection='" . $collection . "' AND test_run=" . $i;
	125	my $test_count = &getValueSQL($db_path, $search_sql);
	126	if (0 == $test_count)
	127	{
	128	$new_test_count++;
	129	my $insert_sql = "INSERT INTO tests (collection, test_run) VALUES ('" . $collection . "'," . $i . ")";
	130	&execSQL($db_path, $insert_sql);
	131	}
	132	}
[28646]	133	}
[28767]	134	print $new_test_count . " tests added\n";
[28646]	135	print "\n";
	136
[28767]	137	# 1. Continue picking random tests and running them until there are none left
	138	my $total_test_count = &getValueSQL($db_path, 'SELECT COUNT(*) FROM tests');
	139	my $remaining_test_sql = 'SELECT COUNT(*) FROM tests WHERE test_started=0';
	140	my $remaining_test_count = getValueSQL($db_path, $remaining_test_sql);
	141	my $exit_file_path = $ENV{'GSDLHOME'} . '/collect/exit.now';
	142	while ($remaining_test_count ne "0" && !-f $exit_file_path)
[28646]	143	{
[28767]	144	my $test_started = time;
	145	my $now_string = strftime "%a %b %e %H:%M:%S %Y", localtime($test_started);
	146	my $random_test_sql = 'SELECT collection, test_run FROM tests WHERE test_started=0 ORDER BY RANDOM() LIMIT 1';
	147	my ($collection, $test_run) = &getRecordSQL($db_path, $random_test_sql);
	148
	149	print &makeHeader('Running test: ' . $collection . '#' . $test_run, 78) . "\n\n";
	150	print " * Note: to gracefully exit create a file: <gsdlhome>/collect/exit.now\n";
	151	print ' * Started: ' . $now_string . "\n";
	152
	153	my $found_result = 0;
	154	do
	155	{
	156	my @results = &importWithStrace($collection);
	157	# We don't allow negative results... just try again
	158	if ($results[0] != $results[1])
	159	{
	160	print "Warning! Failed to process all the documents found - rerunning test...\n";
	161	}
	162	elsif (0 >= $results[1])
	163	{
	164	print "Warning! Failed to process any documents - rerunning test...\n";
	165	}
	166	# I'm not sure how this happens, but it seems on really fast imports they
	167	# sometimes happen faster than the baseline import?!?
	168	elsif ($results[5] < $results[2])
	169	{
	170	print "Warning! Baseline import took longer than actual import - rerunning test...\n";
	171	}
	172	elsif ($results[6] < $results[3])
	173	{
	174	print "Warning! Spent more time on I/O syscalls in baseline than during import - rerunning test...\n";
	175	}
	176	elsif ($results[7] < $results[4])
	177	{
	178	print "Warning! Spent more time on non-I/O syscalls in baseline than during import - rerunning test...\n";
	179	}
	180	# I'm also seeing a lot of negative percentages caused by the total delta
	181	# time spend on io and other syscalls being more than the delta time
	182	# between import and baseline durations
	183	elsif (($results[5] - $results[2]) < (($results[6] - $results[3]) + ($results[7] - $results[4])))
	184	{
	185	print "Warning! Time difference between baseline and import durations less than total time differences between io and misc system calls - rerunning test...\n";
	186	}
	187	else
	188	{
	189	my $update_sql = 'UPDATE tests SET test_started=' . $test_started . ', ';
	190	$update_sql .= 'docs_found=' . $results[0] . ', docs_processed=' . $results[1] . ', ';
	191	$update_sql .= 'b_elapsed=' . sprintf('%0.6f', $results[2]) . ', ';
	192	$update_sql .= 'b_io=' . sprintf('%0.6f', $results[3]) . ', ';
	193	$update_sql .= 'b_other=' . sprintf('%0.6f', $results[4]) . ', ';
	194	$update_sql .= 'i_elapsed=' . sprintf('%0.6f', $results[5]) . ', ';
	195	$update_sql .= 'i_io=' . sprintf('%0.6f', $results[6]) . ', ';
	196	$update_sql .= 'i_other=' . sprintf('%0.6f', $results[7]) . ' ';
	197	$update_sql .= 'WHERE collection=\'' . $collection . '\' AND test_run=' . $test_run;
	198	execSQL($db_path, $update_sql);
	199	$found_result = 1;
	200	}
	201	}
	202	while(0 == $found_result);
	203
	204	# Repeat until we have exhausted pending tests
	205	$remaining_test_count = &getValueSQL($db_path, $remaining_test_sql);
	206
	207	my $x = $total_test_count - $remaining_test_count;
	208	print &makeHeader('Test Complete: ' . $collection . '#' . $test_run, 78) . "\n\n";
	209	print ' * ' . sprintf("%.0f",(($x/$total_test_count)*100)) . "% of all tests complete!\n";
	210	print ' * ' . $remaining_test_count . " tests remaining\n";
	211	print "\n";
[28646]	212	}
[28767]	213
	214	if (-f $exit_file_path)
[28646]	215	{
[28767]	216	unlink($exit_file_path);
[28646]	217	}
	218
[28767]	219	# 2. Complete!
[28646]	220	my $end_time = [gettimeofday()];
	221	my $duration = tv_interval($start_time, $end_time);
	222	print &makeHeader(sprintf('Complete in %0.2f seconds', $duration), 78) . "\n\n";
	223
	224	exit;
	225
[28767]	226	################################################################################
[28646]	227
[28767]	228	## @function importWithStrace()
[28646]	229	#
[28767]	230	sub importWithStrace
	231	{
	232	my ($collection) = @_;
	233	my $collection_path = $ENV{'GSDLHOME'} . '/collect/' . $collection;
	234	if (!-d $collection_path)
	235	{
	236	&printError('Collection not found: ' . $collection_path);
	237	}
	238
	239	# 2. Run dummy import command (empty import directory) within trace to
	240	# determine baseline
	241	&clearExistingDirectory($collection_path, 'archives', 'cached', 'logs', 'tmp');
	242	&flushDiskCache($flush_delay);
	243	print " * Running baseline collection import... ";
	244	my $dummy_dir = tempdir( CLEANUP => 1);
	245	my $command1 = 'strace ' . $strace_flags . ' import.pl -removeold -importdir "' . $dummy_dir .'" "' . $collection . '" 2>&1';
	246	my $debug_path1 = '';
	247	if ($debug)
	248	{
	249	$debug_path1 = $collection_path . '/debug-baseline.tsv';
	250	}
	251	my ($baseline_duration, $baseline_io, $baseline_misc) = &parseStrace($command1, $collection_path, $debug_path1);
	252	print "Done\n";
	253	print ' - Duration: ' . sprintf('%0.6f', $baseline_duration) . " seconds\n";
	254	print " - System Calls Breakdown:\n";
	255	print ' - I/O Duration: ' . sprintf('%0.6f', $baseline_io) . " seconds\n";
	256	print ' - I/O Percent: ' . sprintf('%0.1f', ($baseline_io / $baseline_duration) * 100) . "%\n";
	257	print ' - Other Duratn: ' . sprintf('%0.6f', $baseline_misc) . " seconds\n";
	258	print ' - Other Percnt: ' . sprintf('%0.1f', ($baseline_misc / $baseline_duration) * 100) . "%\n";
	259	if ($debug)
	260	{
	261	print " - See 'debug-baseline.tsv' for raw data\n";
	262	}
	263	if (-d $dummy_dir)
	264	{
	265	rmdir($dummy_dir);
	266	}
	267
	268	# 3. Run normal import command within strace reading output line by line
	269	&clearExistingDirectory($collection_path, 'archives', 'cached', 'logs', 'tmp');
	270	&flushDiskCache($flush_delay);
	271	print " * Running import and tracking I/O metrics... ";
	272	my $command2 = 'strace ' . $strace_flags . ' import.pl -removeold "' . $collection . '" 2>&1';
	273	my $debug_path2 = '';
	274	if ($debug)
	275	{
	276	$debug_path2 = $collection_path . '/debug-import.tsv';
	277	}
	278	my ($import_duration, $import_io, $import_misc, $import_complete, $docs_found, $docs_processed) = &parseStrace($command2, $collection_path, $debug_path2);
	279	print "Done\n";
	280	print ' - Import? ' . ( $import_complete ? 'Completed' : 'Failed') . "\n";
	281	print ' - Found: ' . $docs_found . ' document' . (($docs_processed > 1) ? 's' : '') . "\n";
	282	print ' - Processed: ' . $docs_processed . ' document' . (($docs_processed > 1) ? 's' : '') . "\n";
	283	print ' - Duration: ' . sprintf('%0.6f', $import_duration) . " seconds\n";
	284	print " - System Calls Breakdown:\n";
	285	print ' - I/O Duration: ' . sprintf('%0.6f', $import_io) . " seconds\n";
	286	print ' - I/O Percent: ' . sprintf('%0.1f', ($import_io / $import_duration) * 100) . "%\n";
	287	print ' - Other Duratn: ' . sprintf('%0.6f', $import_misc) . " seconds\n";
	288	print ' - Other Percnt: ' . sprintf('%0.1f', ($import_misc / $import_duration) * 100) . "%\n";
	289	print " - See 'import.log' for Greenstone Import details\n";
	290	print " - See 'strace.out' for STrace details\n";
	291	if ($debug)
	292	{
	293	print " - See 'debug-import.tsv' for raw data\n";
	294	}
	295	print "\n";
	296
	297	# 4. Results
	298	my $delta_duration = $import_duration - $baseline_duration;
	299	my $delta_io = $import_io - $baseline_io;
	300	my $delta_io_percent = ( $delta_io / $delta_duration ) * 100;
	301	my $delta_misc = $import_misc - $baseline_misc;
	302	my $delta_misc_percent = ( $delta_misc / $delta_duration ) * 100;
	303	print &makeHeader('Import Results', 78) . "\n\n";
	304	print ' Import Duration: ' . sprintf('%0.6f', $delta_duration) . " seconds\n";
	305	print " System Calls Breakdown:\n";
	306	print ' - I/O Duration: ' . sprintf('%0.6f', $delta_io) . " seconds\n";
	307	print ' - I/O Percent: ' . sprintf('%0.1f', $delta_io_percent) . "%\n";
	308	print ' - Other Duratn: ' . sprintf('%0.6f', $delta_misc) . " seconds\n";
	309	print ' - Other Percnt: ' . sprintf('%0.1f', $delta_misc_percent) . "%\n";
	310	print "\n";
	311
	312	return ($docs_found, $docs_processed, $baseline_duration, $baseline_io, $baseline_misc, $import_duration, $import_io, $import_misc);
	313	}
	314	## importWithStrace()
	315
	316	## @function makeHeader($msg, [$length])
	317	#
	318	# Create a centered header string given a certain message padded with '=' characters.
	319	#
	320	# @param $msg The message to center as a string
	321	# @param $length The desired length of string - defaults to 79
	322	# @return A string centered with '=' as padding
	323	#
[28646]	324	sub makeHeader
	325	{
	326	my ($msg, $length) = @_;
[28767]	327	if (!defined $length)
[28646]	328	{
[28767]	329	$length = 79; # 80 with newline
[28646]	330	}
[28767]	331	if (length($msg) % 2 == 0)
[28646]	332	{
[28767]	333	$msg .= ' ';
[28646]	334	}
[28767]	335	my $filler_length = ($length - length($msg)) / 2;
	336	my $filler = '=' x $filler_length;
	337	$msg = $filler . ' ' . $msg . ' ' . $filler;
[28646]	338	return $msg;
	339	}
	340	## makeHeader() ##
	341
	342
	343	## @function
[28767]	344	# Remove named Greenstone directory so it doesn't get factored in I/O costs
[28646]	345	#
[28767]	346	sub clearExistingDirectory
[28646]	347	{
[28767]	348	my $collection_path = shift(@_);
	349	for my $dirname (@_)
[28646]	350	{
[28767]	351	my $dir = $collection_path . '/' . $dirname;
	352	if (-d $dir)
	353	{
	354	print ' * Deleting existing ' . $dirname . ' directory... ';
	355	remove_tree($dir);
	356	print "Done\n";
	357	}
[28646]	358	}
	359	}
[28767]	360	## clearExistingDirectory()
[28646]	361
	362	## @function
	363	#
	364	sub flushDiskCache
	365	{
	366	my ($flush_delay) = @_;
	367	# The Premise: use sudoedit and other black magic to clear out the memory-
	368	# based disk cache (which is done by writing the number 3 to a certain
	369	# system file)
	370	&printFlush(" * Synching file system... ");
	371	`sync`;
	372	print "Done\n";
	373	&printFlush(" * Dropping memory disk cache... ");
	374	# - save our current default editor
	375	my $current_editor = $ENV{'EDITOR'};
	376	# - replace default editor with a script that simply clobbers the contents
	377	# of any file it's handed with the number "3"
	378	$ENV{'EDITOR'} = 'reset_memcache_editor.sh';
	379	# - we now call sudoedit on the system file. How sudoedit works is that it
	380	# starts by making a temp copy of the system file with appropriate
	381	# permissions allowing the user to edit. It then passes the path to the
	382	# temp file to the default editor - typically this would be an interactive
	383	# editor like 'vi'. However, we've just replaced the editor with a custom
	384	# script that just writes '3' as the content of the tmp file. Finally, when
	385	# the editor exits, sudoedit copies the tmp file over the top of the system
	386	# file, restoring appropriate root-level permissions
	387	`sudoedit /proc/sys/vm/drop_caches`;
	388	# - restore the default editor, just in case something in Greenstone
	389	# depends on this being a reasonably value
	390	$ENV{'EDITOR'} = $current_editor;
	391	print "Done\n";
	392	# - short break to give OS time to actually notice the drop_caches command
	393	&printFlush(" * Waiting for drop_caches to complete... ");
	394	while ($flush_delay > 0)
	395	{
	396	&printFlush($flush_delay . ' ');
	397	sleep(1);
	398	$flush_delay--;
	399	}
[28767]	400	print "Done\n";
[28646]	401	}
	402	## flushDiskCache()
	403
	404
	405	## @function parseStrace
	406	#
	407	sub parseStrace
	408	{
	409	my ($command, $logs_path, $debug_path) = @_;
	410	my $start_timestamp = 0;
	411	my $end_timestamp = 0;
	412	my $io_duration = 0;
[28665]	413	my $misc_duration = 0;
[28646]	414	my $import_complete = 0;
	415	my $documents_found = 0;
	416	my $documents_processed = 0;
	417	# hash from PIDs to array (FIFO stack) of syscalls and arguments
	418	my $interrupted_stacks = {};
	419
	420	# open the logs if we have been given somewhere to log too
	421	my $logging_enabled = 0;
	422	if ($logs_path ne '')
	423	{
	424	my $strace_path = $logs_path . '/strace.out';
	425	open(STRACEOUT, '>:utf8', $strace_path) or die("Error! Failed to open file for writing: " . $strace_path);
	426	my $gslog_path = $logs_path . '/import.log';
	427	open(GSLOGOUT, '>:utf8', $gslog_path) or die("Error! Failed to open file for writing: " . $gslog_path);
[28767]	428	#$logging_enabled = 1;
[28646]	429	}
	430
	431	# debug mode means writing a whole bunch of information to a CSV
	432	if ($debug_path)
	433	{
[28665]	434	open(TSVOUT, '>:utf8', $debug_path) or die('Error! Failed to open file for writing: ' . $logs_path . '/' . $debug);
	435	print TSVOUT "TIMESTAMP\tPID\tSYSCALL\tELAPSED\tTOTALIO\tTOTALMISC\tARGS\tRESULT\n";
[28646]	436	}
	437
	438	if (open(GSI, $command . '\|'))
	439	{
	440	my $pid = 0;
[28665]	441	my $line = <GSI>;
	442	while ($line)
[28646]	443	{
[28665]	444	my $next_line = <GSI>;
[28646]	445	if ($logging_enabled)
	446	{
	447	print STRACEOUT $line;
	448	}
[28767]	449
	450	if ($line =~ /^[^\d\[]+(.*)/)
	451	{
	452	$line = $1;
	453	}
	454
[28646]	455	# we may have a line that unfortunately includes newlines in its arguments list
	456	# - watch out for unfinished syscalls that will be resumed later
	457	# - oh! and signals, cause they are way different anyhow
	458	while ($line !~ /<unfinished \.\.\.>/ && $line !~ /SIG/ && $line =~ /$/ && $line !~ /$\s+=/)
	459	{
[28665]	460	$line .= ' ' . $next_line;
	461	if ($next_line = <GSI>)
[28646]	462	{
	463	}
	464	else
	465	{
	466	print "Error! Failed to find complete arguments list: " . $line . "\n";
	467	exit;
	468	}
	469	}
	470	# detect and remove any PID information to make the rest of parsing easier
[28665]	471	if ($line =~ /^\[pid\s+(\d+)\]\s+(.*)$/s)
[28646]	472	{
	473	my $new_pid = $1;
	474	$line = $2;
	475	if ($new_pid != $pid)
	476	{
	477	$pid = $new_pid;
	478	}
	479	}
	480	# detect resume lines, and rewrite so they look like other lines
	481	if ($line =~ /^(\d+\.\d+)\s+<\.\.\.\s+([a-z0-9_]+)\s+resumed>\s+(.*)\)\s+=\s+(.+)\s+<(\d+\.\d+)>$/s)
	482	{
	483	my $timestamp = $1;
	484	my $prev_syscall = '';
	485	my $syscall = $2;
	486	my $args_prefix = '';
	487	my $args_suffix = $3;
	488	my $result = $4;
	489	my $duration = $5;
	490	if ($syscall eq 'write' && $args_prefix =~ /^[12],/)
	491	{
	492	chomp($line);
	493	print "\n\nResuming write pid:$pid line:\|" . $line . "\|\n";
	494	# pop the last interrupted syscall off the stack for this pid
	495	if (!defined $interrupted_stacks->{$pid} \|\| scalar(@{$interrupted_stacks->{$pid}}) == 0)
	496	{
	497	print "Error! Resume when stack is empty: " . $pid . "\n";
	498	exit;
	499	}
	500	($prev_syscall, $args_prefix) = @{pop(@{$interrupted_stacks->{$pid}})};
	501	if ($syscall ne $prev_syscall)
	502	{
	503	print "Error! Resume syscall does not match unfinished syscall: " . $syscall . " != " . $prev_syscall . "\n";
	504	exit;
	505	}
	506	}
[28665]	507
	508	# Time for a hack - duration is highly dubious for suspended
	509	# syscalls. It typically includes the total walltime spent doing
	510	# anything between when the 'unfinished' and the 'resumed'... time that
	511	# is already accounted for in other parsed syscalls. What I'll do is
	512	# limit the resumed calls duration to, at most, the time difference
	513	# between this syscall and the next. This is highly inaccurate, of
[28767]	514	# course, as it excludes time spent before the suspension but then
	515	# includes time spent in userspace but is significantly better than a
	516	# elasped duration several times longer than the syscall actually took.
[28665]	517	if ($next_line =~ /^.*?(\d+\.\d+)/)
	518	{
	519	my $next_timestamp = $1;
	520	my $timestamp_duration = $next_timestamp - $timestamp;
	521	if ($duration > $timestamp_duration)
	522	{
	523	#rint "[resume: elapsed longer than duration... fudging] ";
	524	$duration = $timestamp_duration;
	525	}
	526	}
	527
	528	$line = $timestamp . ' ' . $syscall . '(' . $args_prefix . ' ' . $args_suffix . ') = ' . $result . ' <' . sprintf('%0.6f', $duration) . '>';
[28646]	529	}
	530
	531	# SPECIAL CASES
	532	# - unfinished syscalls... the only ones we care about are writes to standard out or error as these are echoed to the shell. Add them to the appropriate stack waiting for the 'resume' rewrite above
	533	if ($line =~ /^(\d+\.\d+)\s+([a-z0-9_]+)\((.*?)\s+<unfinished \.\.\.>$/)
	534	{
	535	my $timestamp = $1;
	536	my $syscall = $2;
	537	my $args_prefix = $3;
	538	if ($syscall eq 'write' && $args_prefix =~ /^[12],/)
	539	{
[28767]	540	#print "\n\nPending write pid:$pid line:\|" . $line . "\|\n";
[28646]	541	if (!defined $interrupted_stacks->{$pid})
	542	{
	543	$interrupted_stacks->{$pid} = [];
	544	}
	545	push(@{$interrupted_stacks->{$pid}}, [$syscall, $args_prefix]);
	546	}
	547	}
	548	# - processes announce detachments and resumptions - maybe this explains
[28665]	549	# why wait4's keep chaning (-q should shut these up)
[28646]	550	elsif ($line =~ /^Process (\d+) (detached\|resumed)/)
	551	{
	552	my $local_pid = $1;
	553	my $action = $2;
	554	}
	555	# - exit_group never has a duration
[28767]	556	elsif ($line =~ /^(\d+\.\d+)\s+(_exit\|exit_group)$(\d+)$\s+=\s+\?$/)
[28646]	557	{
	558	my $timestamp = $1;
[28767]	559	my $exit_function = $2;
	560	my $exit_value = $3;
[28646]	561	$end_timestamp = $timestamp;
	562	if ($debug)
	563	{
[28665]	564	print TSVOUT sprintf("%0.6f", ($timestamp - $start_timestamp)) . "\t";
	565	print TSVOUT $pid . "\t";
[28767]	566	print TSVOUT $exit_function . "\t";
[28665]	567	print TSVOUT "0.000000\t";
	568	print TSVOUT sprintf("%0.6f", $misc_duration) . "\t";
	569	print TSVOUT sprintf("%0.6f", $io_duration) . "\t";
	570	print TSVOUT $exit_value . "\t";
	571	print TSVOUT "?\n";
[28646]	572	}
	573	}
	574	# - signals
	575	elsif ($line =~ /^(\d+\.\d+)\s+---\s+([A-Z]+)\s+$(.*)$\s+\@\s+0\s+$0$\s+---$/)
	576	{
	577	my $timestamp = $1;
	578	my $signal = $2;
	579	my $message = $3;
	580	$end_timestamp = $timestamp;
	581	}
	582	# NORMAL CASE
	583	elsif ($line =~ /^(\d+\.\d+)\s+([a-z0-9_]+)$(.*)$\s+=\s+(.+)\s+<(\d+\.\d+)>$/s)
	584	{
	585	my $timestamp = $1;
	586	my $syscall = $2;
	587	my $args = $3;
	588	my $result = $4;
	589	my $duration = $5;
	590	if ($start_timestamp == 0)
	591	{
	592	$start_timestamp = $timestamp;
	593	}
	594	$end_timestamp = $timestamp;
	595	if (defined $io_function_list->{$syscall})
	596	{
	597	$io_duration += $duration;
	598	}
	599	else
	600	{
[28665]	601	$misc_duration += $duration;
[28646]	602	##print "[$syscall]";
	603	}
	604	if ($syscall eq 'write' && $args =~ /[12],\s+"(.*)"/s)
	605	{
	606	my $payload = $1;
	607	$payload =~ s/\\n/\n/g;
	608	# parse up some collection import statistics
	609	if ($payload =~ /Import complete/)
	610	{
	611	$import_complete = 1;
	612	}
	613	elsif ($payload =~ /^\* (\d+) documents were considered/)
	614	{
	615	$documents_found = $1;
	616	}
	617	elsif ($payload =~ /^\* (\d+) were processed/)
	618	{
	619	$documents_processed = $1;
	620	}
	621	# finally, write these lines out to log if a path has been provided
	622	if ($logging_enabled)
	623	{
	624	print GSLOGOUT $payload;
	625	}
	626	}
	627	if ($debug)
	628	{
	629	$args =~ s/\r?\n//g;
[28665]	630	print TSVOUT sprintf("%0.6f", ($timestamp - $start_timestamp)) . "\t";
	631	print TSVOUT $pid . "\t";
	632	print TSVOUT $syscall . "\t";
	633	print TSVOUT sprintf("%0.6f", $duration) . "\t";
	634	print TSVOUT sprintf("%0.6f", $misc_duration) . "\t";
	635	print TSVOUT sprintf("%0.6f", $io_duration) . "\t";
	636	print TSVOUT $args . "\t";
	637	print TSVOUT $result . "\n";
[28646]	638	}
	639	}
	640	else
	641	{
	642	chomp($line);
	643	print "\n\nUnparsed: \|" . $line . "\|\n\n";
	644	}
[28665]	645	# Move on to the next line... (may be undef)
	646	$line = $next_line;
[28646]	647	}
	648	}
	649	else
	650	{
	651	print "Error! Failed to open import process for reading: " . $command . "\n";
	652	}
	653	if ($logging_enabled)
	654	{
	655	close(STRACEOUT);
	656	close(GSLOGOUT);
	657	}
	658	if ($debug_path)
	659	{
[28665]	660	close(TSVOUT);
[28646]	661	}
	662	my $cmd_duration = $end_timestamp - $start_timestamp;
[28665]	663	return ($cmd_duration, $io_duration, $misc_duration, $import_complete, $documents_found, $documents_processed);
[28646]	664	}
	665	# parseStrace()
	666
	667
	668	sub printFlush
	669	{
	670	my ($msg) = @_;
	671	$\| = 1;
	672	print $msg;
	673	$\| = 0;
	674	}
	675
	676
	677	sub printUsage
	678	{
	679	my ($msg) = @_;
	680	if (defined $msg)
	681	{
	682	print 'Error! ' . $msg . "\n";
	683	}
	684	print "Usage: import_with_io_metric.pl -collection <collection name> [-debug]\n\n";
	685	exit;
	686	}
	687
[28767]	688	## @function
	689	#
	690	sub execSQL
	691	{
	692	my ($db_path, $sql) = @_;
	693	# call getValueSQL but don't care about result
	694	getValueSQL($db_path, $sql);
	695	}
	696	# / execSQL() /
	697
	698
	699	## @function
	700	#
	701	sub getRecordSQL
	702	{
	703	my ($db_path, $sql) = @_;
	704	if ($sql !~ /LIMIT 1/i)
	705	{
	706	$sql .= ' LIMIT 1';
	707	}
	708	my $value = getValueSQL($db_path, $sql);
	709	return split(/\\|/,$value);
	710	}
	711	# / getRecordSQL() /
	712
	713
	714	## @function
	715	#
	716	sub getRecordsSQL
	717	{
	718	my ($db_path, $sql) = @_;
	719	my @records;
	720	my $raw_values = getValueSQL($db_path, $sql);
	721	foreach my $raw_record (split(/\r?\n/, $raw_values))
	722	{
	723	my @record = split(/\\|/, $raw_record);
	724	push(@records, \@record);
	725	}
	726	return @records;
	727	}
	728	## getRecordsSQL()
	729
	730
	731	## @function
	732	#
	733	sub getValueSQL
	734	{
	735	my ($db_path, $sql) = @_;
	736	my $result = `sqlite3 "$db_path" "$sql" 2>&1`;
	737	if ($result =~ /Error:/)
	738	{
	739	die("Fatal Error!\nSQL:" . $sql . "\nMsg:" . $result);
	740	}
	741	# trim
	742	$result =~ s/^\s\|\s$//g;
	743	return $result;
	744	}
	745	# / getValueSQL() /
	746
	747
	748	## @function printReport()
	749	#
	750	sub printReport
	751	{
	752	my ($db_path) = @_;
	753	# get listing of unique collection names sorted alphabetically. While we are
	754	# at it, grab the number of documents processed from pretty much any entry
	755	# for this collection (they should all be the same, otherwise they would've
	756	# been repeated back in testing)
	757	my $collection_sql = 'SELECT collection, AVG(docs_processed) FROM tests GROUP BY collection';
	758	my @collection_records = &getRecordsSQL($db_path, $collection_sql);
	759	foreach my $collection_record (@collection_records)
	760	{
	761	my ($collection, $docs_processed) = @{$collection_record};
	762	# determine the size in bytes of the import directory
	763	my $collection_import_directory = $ENV{'GSDLHOME'} . '/collect/' . $collection . '/import';
	764	my $du_command = 'du -bsL "' . $collection_import_directory . '"';
	765	my $du_result = `$du_command`;
	766	my $size_in_bytes = 0;
	767	if ($du_result =~ /^(\d+)\s+/)
	768	{
	769	$size_in_bytes = $1;
	770	}
	771	# pretty print header block for dokuwiki
	772	&printReportHeader($collection, $docs_processed, $size_in_bytes);
	773	my @test_records = &getRecordsSQL($db_path, 'SELECT * FROM tests WHERE collection=\'' . $collection . '\' AND test_started > 0 ORDER BY test_started');
	774	my $number_of_tests = scalar(@test_records);
	775	my $total_b_elapsed = 0;
	776	my @b_elapsed_values;
	777	my $total_b_io_percent = 0;
	778	my @b_io_percent_values;
	779	my $total_b_other_percent = 0;
	780	my @b_other_percent_values;
	781	my $total_d_elapsed = 0;
	782	my @d_elapsed_values;
	783	my $total_d_io_percent = 0;
	784	my @d_io_percent_values;
	785	my $total_d_other_percent = 0;
	786	my @d_other_percent_values;
	787	foreach my $test_record (@test_records)
	788	{
	789	my ($the_collection, $the_test_run, $test_started, $docs_found, $docs_processed, $b_elapsed, $b_io, $b_other, $i_elapsed, $i_io, $i_other) = @{$test_record};
	790	my $b_io_percent = ($b_io / $b_elapsed) * 100;
	791	my $b_other_percent = ($b_other / $b_elapsed) * 100;
	792	my $d_elapsed = $i_elapsed - $b_elapsed;
	793	my $d_io = $i_io - $b_io;
	794	my $d_io_percent = ($d_io / $d_elapsed) * 100;
	795	my $d_other = $i_other - $b_other;
	796	my $d_other_percent = ($d_other / $d_elapsed) * 100;
	797	# add to running totals
	798	$total_b_elapsed += $b_elapsed;
	799	push(@b_elapsed_values, $b_elapsed);
	800	$total_b_io_percent += $b_io_percent;
	801	push(@b_io_percent_values, $b_io_percent);
	802	$total_b_other_percent += $b_other_percent;
	803	push(@b_other_percent_values, $b_other_percent);
	804	$total_d_elapsed += $d_elapsed;
	805	push(@d_elapsed_values, $d_elapsed);
	806	$total_d_io_percent += $d_io_percent;
	807	push(@d_io_percent_values, $d_io_percent);
	808	$total_d_other_percent += $d_other_percent;
	809	push(@d_other_percent_values, $d_other_percent);
	810	}
	811	# Averages
	812	my $b_elapsed_avg = $total_b_elapsed / $number_of_tests;
	813	my $b_io_percent_avg = $total_b_io_percent / $number_of_tests;
	814	my $b_other_percent_avg = $total_b_other_percent / $number_of_tests;
	815	my $d_elapsed_avg = $total_d_elapsed / $number_of_tests;
	816	my $d_io_percent_avg = $total_d_io_percent / $number_of_tests;
	817	my $d_other_percent_avg = $total_d_other_percent / $number_of_tests;
	818	# Standard Deviations
	819	my $b_elapsed_stddev = &calculateStandardDeviation($number_of_tests, $b_elapsed_avg, @b_elapsed_values);
	820	my $b_io_percent_stddev = &calculateStandardDeviation($number_of_tests, $b_io_percent_avg, @b_io_percent_values);
	821	my $b_other_percent_stddev = &calculateStandardDeviation($number_of_tests, $b_other_percent_avg, @b_other_percent_values);
	822	my $d_elapsed_stddev = &calculateStandardDeviation($number_of_tests, $d_elapsed_avg, @d_elapsed_values);
	823	my $d_io_percent_stddev = &calculateStandardDeviation($number_of_tests, $d_io_percent_avg, @d_io_percent_values);
	824	my $d_other_percent_stddev = &calculateStandardDeviation($number_of_tests, $d_other_percent_avg, @d_other_percent_values);
	825
	826	my $counter = 0;
	827	my $outlier_count = 0;
	828	foreach my $test_record (@test_records)
	829	{
	830	my ($the_collection, $the_test_run, $test_started, $docs_found, $docs_processed, $b_elapsed, $b_io, $b_other, $i_elapsed, $i_io, $i_other) = @{$test_record};
	831	$counter++;
	832	my $b_io_percent = ($b_io / $b_elapsed) * 100;
	833	my $b_other_percent = ($b_other / $b_elapsed) * 100;
	834	my $d_elapsed = $i_elapsed - $b_elapsed;
	835	my $d_io = $i_io - $b_io;
	836	my $d_io_percent = ($d_io / $d_elapsed) * 100;
	837	my $d_other = $i_other - $b_other;
	838	my $d_other_percent = ($d_other / $d_elapsed) * 100;
	839	print '\| ' . $counter . ' \| ' . $the_collection . ' \| ' . $docs_processed . ' \| ' . $size_in_bytes . ' \| ' . $test_started . ' \| ' . sprintf('%0.6f', $b_elapsed) . ' \| ' . sprintf('%0.1f', $b_io_percent) . '% \| ' . sprintf('%0.1f', $b_other_percent) . '% \| ' . sprintf('%0.6f', $d_elapsed) . ' \| ' . sprintf('%0.1f', $d_io_percent) . '% \| ' . sprintf('%0.1f', $d_other_percent) . '% \| ';
	840	if (&isOutlier($d_io_percent, $d_io_percent_avg, $d_io_percent_stddev))
	841	{
	842	print 'Yes';
	843	$outlier_count++;
	844	}
	845	else
	846	{
	847	print 'No';
	848	}
	849	print " \|\n";
	850	}
	851	print '^ Average \|\|\|\|\| ' . sprintf('%0.6f', $b_elapsed_avg) . ' \| ' . sprintf('%0.1f', $b_io_percent_avg) . '% \| ' . sprintf('%0.1f', $b_other_percent_avg) . '% \| ' . sprintf('%0.6f', $d_elapsed_avg) . ' \| ' . sprintf('%0.1f', $d_io_percent_avg) . '% \| ' . sprintf('%0.1f', $d_other_percent_avg) . "% \|\n";
	852	print '^ Standard Deviation \|\|\|\|\| ' . sprintf('%0.6f', $b_elapsed_stddev) . ' \| ' . sprintf('%0.1f', $b_io_percent_stddev) . '% \| ' . sprintf('%0.1f', $b_other_percent_stddev) . '% \| ' . sprintf('%0.6f', $d_elapsed_stddev) . ' \| ' . sprintf('%0.1f', $d_io_percent_stddev) . '% \| ' . sprintf('%0.1f', $d_other_percent_stddev) . '% \| ' . $outlier_count . " \|\n";
	853	print "\n";
	854	}
	855	}
	856	## printReport()
	857
	858
	859	## @function printReportHeader($collection)
	860	#
	861	sub printReportHeader
	862	{
	863	my ($collection) = @_;
	864	print '==== "' . $collection . "\" Collection ====\n\n";
	865	print "^ Count ^ Collection ^ NumDocs ^ Size ^ Timestamp ^ Base ^^^ Import ^^^ Outlier? ^\n";
	866	print "^ ::: ^ ::: ^ ::: ^ ::: ^ ::: ^ Elapsed ^ Syscalls ^^ Elapsed ^ Syscalls ^^ ::: ^\n";
	867	print "^ ::: ^ ::: ^ ::: ^ ::: ^ ::: ^ ::: ^ I/O% ^ Other% ^ ::: ^ I/O% ^ Other ^ ::: ^\n";
	868	}
	869	## printReportHeader()
	870
	871
	872	## @function calculateStandardDeviation($mean, $value1 ... $valueN)
	873	# Our sample size is the population so we can use the simplier standard deviation equation for an entire population (now, theres some rap lyrics)
	874	# stddev = squareroot ( sum ( square( value - mean ) ) / count )
	875	sub calculateStandardDeviation
	876	{
	877	my $population = shift(@_);
	878	my $mean = shift(@_);
	879	my $count = scalar(@_);
	880	my $total = 0;
	881	# sum of squares
	882	foreach my $value (@_)
	883	{
	884	my $subtotal = $value - $mean;
	885	$total += $subtotal * $subtotal;
	886	}
	887	if ($population == $count)
	888	{
	889	$total = $total / $count;
	890	}
	891	else
	892	{
	893	$total = $total / ($count - 1);
	894	}
	895	my $standard_deviation = sqrt($total);
	896	return $standard_deviation;
	897	}
	898	## calculateStandardDeviation()
	899
	900
	901	## @function isOutlier($value, $mean, $stddev)
	902	#
	903	sub isOutlier
	904	{
	905	my ($value, $mean, $stddev) = @_;
	906	my $min = $mean - (2 * $stddev);
	907	my $max = $mean + (2 * $stddev);
	908	return ($value <= $min \|\| $max <= $value);
	909	}
	910	## isOutlier()
	911
[28646]	912	1;

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: gs2-extensions/parallel-building/trunk/src/bin/script/import_with_io_metric.pl@ 28767

Download in other formats: