source: gs2-extensions/parallel-building/trunk/src/bin/script/batch-testing.pl@ 30354

Last change on this file since 30354 was 25118, checked in by jmt12, 12 years ago

Significant changes to support the latest round of batch tests (for instance allowing varying number of threads)

  • Property svn:executable set to *
File size: 11.2 KB
Line 
1#!/usr/bin/perl
2
3use strict;
4
5use POSIX qw(strftime);
6
7BEGIN
8{
9 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
10}
11
12print STDOUT "===== Batch Testing =====\n";
13print STDOUT "Runs a multitude of tests against a 'lorem' collection and making\n";
14print STDOUT "use of parallel importing.\n\n";
15
16# 0. Configuration
17my $infodb_type = 'GDBM';
18#my $infodb_type = 'GDBMServer';
19#my $infodb_type = 'SQLite';
20#my $infodb_type = 'TDB';
21#my @sizes = (100);
22#my @sizes = (500);
23#my @sizes = (1000);
24#my @sizes = (5000);
25#my @sizes = (10000);
26#my @sizes = (50000);
27#my @sizes = (100, 500, 1000, 5000, 10000, 50000, 100000);
28my @sizes = (500000);
29#my @sizes = (1000000);
30#my @sizes = (500000, 1000000);
31#my @sizes = (100, 500, 1000, 5000, 10000, 50000, 100000, 500000, 1000000);
32#my @threads = (1,9);
33my @threads = (9);
34#my @threads = (1,2,3,4,5,6,7,8,9,10,11,12,13);
35#my @batchsizes = (0); # Non-parallel
36#my @batchsizes = (100);
37my @batchsizes = (1000);
38my $test_iterations = 1;
39#my $test_iterations = 3;
40#my $test_iterations = 9;
41
42# 1. Initialization
43my $machine_name = `hostname -s`;
44chomp($machine_name);
45$machine_name = ucfirst($machine_name);
46my $os_name = `lsb_release -i`;
47$os_name =~ s/^Distributor ID:\s+(.*)\r?\n$/$1/i;
48my $fs_name = `df -T $ENV{'GSDLHOME'}`;
49$fs_name =~ s/^.*(ext2|ext3|ext4|xfs|zfs).*$/$1/is;
50$fs_name = uc($fs_name);
51my $sizes_str = 'd' . $sizes[0];
52if (scalar(@sizes) > 1)
53{
54 $sizes_str .= '-' . $sizes[(scalar(@sizes) - 1)];
55}
56my $threads_str = 't' . $threads[0];
57if (scalar(@threads) > 1)
58{
59 $threads_str .= '-' . $threads[(scalar(@threads) - 1)];
60}
61my $batch_str = 'b' . $batchsizes[0];
62if (scalar(@batchsizes) > 1)
63{
64 $batch_str .= '-' . $batchsizes[(scalar(@batchsizes) - 1)];
65}
66my $db_name = $machine_name . '_' . $os_name . '_' . $fs_name . '_' . $infodb_type . '_' . $sizes_str . '_' . $threads_str . '_' . $batch_str . '_i' . $test_iterations . '.db';
67my $db_path = $ENV{'GSDLHOME'} . '/collect/' . $db_name;
68
69# 2. If we haven't already, initialize the database by creating the tables and
70# populating the pending tests queue
71my $init_database = 0;
72
73if (!-f $db_path)
74{
75 $init_database = 1;
76}
77elsif('0' eq getValueSQL($db_path, 'SELECT COUNT(*) FROM tests'))
78{
79 $init_database = 1;
80}
81
82if ($init_database > 0)
83{
84 print STDOUT " * Creating database tables\n";
85 # create tests table
86 execSQL($db_path, 'CREATE TABLE IF NOT EXISTS tests (walltime INTEGER DEFAULT 0, collection TEXT, type TEXT, threads INTEGER, batchsize INTEGER, testrun INTEGER, realtime REAL DEFAULT 0, systime REAL DEFAULT 0, usertime REAL DEFAULT 0, PRIMARY KEY (collection, type, testrun, threads, batchsize))');
87 execSQL($db_path, 'CREATE TABLE IF NOT EXISTS testoutput (walltime INTEGER DEFAULT 0, collection TEXT, type TEXT, testrun INTEGER, threads INTEGER, batchsize INTEGER, output TEXT, PRIMARY KEY (collection, type, testrun, threads, batchsize))');
88
89 # populate with anticpated tests
90 print STDOUT " * Populating tests table\n";
91 foreach my $size (@sizes)
92 {
93 my $collection = lc($infodb_type) . sprintf("%07d", $size);
94 print STDOUT ' - generating tests for collection=' . $collection . "\n";
95 my $a_test_iterations = $test_iterations;
96 for (my $test_run = 1; $test_run <= $a_test_iterations; $test_run++)
97 {
98 foreach my $threads_run (@threads)
99 {
100 foreach my $batchsize_run (@batchsizes)
101 {
102 my $sql = "INSERT INTO tests (collection, type, testrun, threads, batchsize) VALUES ('" . $collection . "','rm'," . $test_run . "," . $threads_run . "," . $batchsize_run . ")";
103 print STDERR ' - sql: ' . $sql . "\n";
104 execSQL($db_path, $sql);
105 $sql = "INSERT INTO tests (collection, type, testrun, threads, batchsize) VALUES ('" . $collection . "','import'," . $test_run . "," . $threads_run . "," . $batchsize_run . ")";
106 print STDERR ' - sql: ' . $sql . "\n";
107 execSQL($db_path, $sql);
108 $sql = "INSERT INTO testoutput (collection, type, testrun, threads, batchsize) VALUES ('" . $collection . "','rm'," . $test_run . "," . $threads_run . "," . $batchsize_run . ")";
109 print STDERR ' - sql: ' . $sql . "\n";
110 execSQL($db_path, $sql);
111 $sql = "INSERT INTO testoutput (collection, type, testrun, threads, batchsize) VALUES ('" . $collection . "','import'," . $test_run . "," . $threads_run . "," . $batchsize_run . ")";
112 print STDERR ' - sql: ' . $sql . "\n";
113 execSQL($db_path, $sql);
114 }
115 }
116 }
117 }
118}
119
120# 3. While there are still pending tests in the queue
121my $total_count = getValueSQL($db_path, 'SELECT COUNT(*) FROM tests');
122my $test_count = getValueSQL($db_path, 'SELECT COUNT(*) FROM tests WHERE realtime=0');
123my $exit_file_path = $ENV{'GSDLHOME'} . '/collect/exit.now';
124while ($test_count ne "0" && !-f $exit_file_path)
125{
126 my $x = $total_count - $test_count;
127 my $now_string = strftime "%a %b %e %H:%M:%S %Y", localtime;
128 print STDERR ' * [' . $now_string . '] Progress: ' . sprintf("%.0f",(($x/$total_count)*100)) . '% complete! [' . $test_count . " tests remaining]\n";
129 my $cmd;
130 my $result;
131 my $rtime; my $utime; my $stime;
132 # 4. Pick a random test (thread count and epoch) and run and time it
133 my ($counter, $collection, $test_run, $threads_run, $batchsize_run) = getRecordSQL($db_path, 'SELECT _rowid_, collection, testrun, threads, batchsize FROM tests WHERE realtime=0 ORDER BY _rowid_ LIMIT 1');
134 print STDOUT ' - running test import for collection=' . $collection . ', test_run=' . $test_run . ", and threads=" . $threads_run . "\n";
135
136 # Command one: run rm_archives.pl and record information in database
137 $cmd = 'time -p rm_archives.pl ' . $collection . ' 2>&1';
138 print STDOUT ' - command: ' . $cmd . "\n";
139 $result = `$cmd`;
140 $rtime = 0;
141 if ($result =~ /real\s+(\d+\.\d+)/)
142 {
143 $rtime = $1;
144 }
145 $utime = 0;
146 if ($result =~ /user\s+(\d+\.\d+)/)
147 {
148 $utime = $1;
149 }
150 $stime = 0;
151 if ($result =~ /sys\s+(\d+\.\d+)/)
152 {
153 $stime = $1;
154 }
155 $result =~ s/['"]//g;
156 my $walltime = time();
157 execSQL($db_path, 'UPDATE tests SET walltime=' . $walltime . ', realtime=' . $rtime . ', usertime=' . $utime . ', systime=' . $stime . ' WHERE collection=\'' . $collection . '\' AND type=\'rm\' AND testrun=' . $test_run . ' AND threads=' . $threads_run . ' AND batchsize=' . $batchsize_run);
158 execSQL($db_path, 'UPDATE testoutput SET walltime=' . $walltime . ', output=\'' . $result . '\' WHERE collection=\'' . $collection . '\' AND type=\'rm\' AND testrun=' . $test_run . ' AND threads=' . $threads_run . ' AND batchsize=' . $batchsize_run);
159
160 # Have a sleep to try and prevent any headaches caused by 'rm' influencing
161 # later processes
162 #sleep(5);
163
164 # Command two: use sudoedit and other black magic to clear out the memory-
165 # based disk cache (which is done by writing the number 3 to a certain
166 # system file)
167 print STDOUT " - Synching file system... ";
168 `sync`;
169 print STDOUT "Done!\n";
170 print STDOUT " - Dropping memory disk cache... ";
171 # - save our current default editor
172 my $current_editor = $ENV{'EDITOR'};
173 # - replace default editor with a script that simply clobbers the contents
174 # of any file it's handed with the number "3"
175 $ENV{'EDITOR'} = 'reset_memcache_editor.sh';
176 # - we now call sudoedit on the system file. How sudoedit works is that it
177 # starts by making a temp copy of the system file with appropriate
178 # permissions allowing the user to edit. It then passes the path to the
179 # temp file to the default editor - typically this would be an interactive
180 # editor like 'vi'. However, we've just replaced the editor with a custom
181 # script that just writes '3' as the content of the tmp file. Finally, when
182 # the editor exits, sudoedit copies the tmp file over the top of the system
183 # file, restoring appropriate root-level permissions
184 `sudoedit /proc/sys/vm/drop_caches`;
185 # - restore the default editor, just in case something in Greenstone
186 # depends on this being a reasonably value
187 $ENV{'EDITOR'} = $current_editor;
188 print STDOUT "Done!\n";
189
190 # Command three: run import.pl and record information in database
191 if ($batchsize_run == 0)
192 {
193 $cmd = 'time -p import.pl -removeold -verbosity 0 ' . $collection . ' 2>&1';
194 }
195 # Very super special case: if parallel processing, but number of threads is
196 # one (i.e. non-parallel) we call import but specify a manifest file for the
197 # entire collection (otherwise the import would do a pre-scan, drastically
198 # altering the timing)
199 elsif ($threads_run == 1)
200 {
201 $cmd = 'time -p import.pl -keepold -verbosity 0 -manifest manifest.xml ' . $collection . ' 2>&1';
202 }
203 else
204 {
205# $cmd = 'time -p parallel_import.pl -removeold -verbosity 0 -epoch ' . $batchsize_run . ' -jobs ' . $threads_run . ' ' . $collection . ' 2>&1';
206 $cmd = 'time -p parallel_import.pl -removeold -verbosity 42 -epoch ' . $batchsize_run . ' -jobs ' . $threads_run . ' ' . $collection . ' 2>&1';
207 }
208
209 print STDOUT ' - command: ' . $cmd . "\n";
210# $result = `$cmd`;
211 open(CMDINPUT, $cmd . ' |') or die("Failed to execute command: " . $cmd);
212 my $line = '';
213 my $line_counter = 0;
214 while (defined($line = <CMDINPUT>))
215 {
216 chomp($line);
217 print STDOUT "[" . $line_counter . "] " . $line . "\n";
218 $line_counter++;
219 }
220 close(CMDINPUT);
221
222 $rtime = 0;
223 if ($result =~ /real\s+(\d+\.\d+)/)
224 {
225 $rtime = $1;
226 }
227 $utime = 0;
228 if ($result =~ /user\s+(\d+\.\d+)/)
229 {
230 $utime = $1;
231 }
232 $stime = 0;
233 if ($result =~ /sys\s+(\d+\.\d+)/)
234 {
235 $stime = $1;
236 }
237 $result =~ s/['"]//g;
238 $walltime = time();
239 execSQL($db_path, 'UPDATE tests SET walltime=' . $walltime . ', realtime=' . $rtime . ', usertime=' . $utime . ', systime=' . $stime . ' WHERE collection=\'' . $collection . '\' AND type=\'import\' AND testrun=' . $test_run . ' AND threads=' . $threads_run . ' AND batchsize=' . $batchsize_run);
240 execSQL($db_path, 'UPDATE testoutput SET walltime=' . $walltime . ', output=\'' . $result . '\' WHERE collection=\'' . $collection . '\' AND type=\'import\' AND testrun=' . $test_run . ' AND threads=' . $threads_run . ' AND batchsize=' . $batchsize_run);
241
242 # Repeat until we have exhausted pending tests
243 $test_count = getValueSQL($db_path, 'SELECT COUNT(*) FROM tests WHERE realtime=0');
244}
245
246# Remove any exit file
247if (-f $exit_file_path)
248{
249 print STDOUT " - Removing exit file... ";
250 unlink($exit_file_path);
251 print STDOUT "Done!\n";
252}
253
254
255print STDOUT "Complete!\n\n";
256
257exit;
258
259sub execSQL
260{
261 my ($db_path, $sql) = @_;
262 # call getValueSQL but don't care about result
263 getValueSQL($db_path, $sql);
264}
265# /** execSQL() **/
266
267sub getRecordSQL
268{
269 my ($db_path, $sql) = @_;
270 if ($sql !~ /LIMIT 1/i)
271 {
272 $sql .= ' LIMIT 1';
273 }
274 my $value = getValueSQL($db_path, $sql);
275 return split(/\|/,$value);
276}
277# /** getRecordSQL() **/
278
279sub getValueSQL
280{
281 my ($db_path, $sql) = @_;
282 my $result = `sqlite3 "$db_path" "$sql" 2>&1`;
283 if ($result =~ /Error:/)
284 {
285 die("Fatal Error!\nSQL:" . $sql . "\nMsg:" . $result);
286 }
287 # trim
288 $result =~ s/^\s*|\s*$//g;
289 return $result;
290}
291# /** getValueSQL() **/
292
293sub printUsage
294{
295 my ($msg) = @_;
296 # flush STDOUT
297 select((select(STDOUT), $|=1)[0]);
298 print STDOUT '';
299 select((select(STDOUT), $|=0)[0]);
300 # output any error message
301 if (defined $msg)
302 {
303 print STDERR 'Error! ' . $msg . "\n";
304 }
305 # and finally the usage
306 print STDERR "Usage: mpitesting.pl <path to database> <number docs> <worker threads>\n";
307 print STDERR "\n";
308 exit;
309}
310
3111;
Note: See TracBrowser for help on using the repository browser.