1 | #!/usr/bin/perl
|
---|
2 |
|
---|
3 | use strict;
|
---|
4 |
|
---|
5 | use POSIX qw(strftime);
|
---|
6 |
|
---|
7 | BEGIN
|
---|
8 | {
|
---|
9 | die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
|
---|
10 | }
|
---|
11 |
|
---|
12 | print STDOUT "===== Batch Testing =====\n";
|
---|
13 | print STDOUT "Runs a multitude of tests against a 'lorem' collection and making\n";
|
---|
14 | print STDOUT "use of parallel importing.\n\n";
|
---|
15 |
|
---|
16 | # 0. Configuration
|
---|
17 | my $infodb_type = 'GDBM';
|
---|
18 | #my $infodb_type = 'GDBMServer';
|
---|
19 | #my $infodb_type = 'SQLite';
|
---|
20 | #my $infodb_type = 'TDB';
|
---|
21 | #my @sizes = (100);
|
---|
22 | #my @sizes = (500);
|
---|
23 | #my @sizes = (1000);
|
---|
24 | #my @sizes = (5000);
|
---|
25 | #my @sizes = (10000);
|
---|
26 | #my @sizes = (50000);
|
---|
27 | #my @sizes = (100, 500, 1000, 5000, 10000, 50000, 100000);
|
---|
28 | my @sizes = (500000);
|
---|
29 | #my @sizes = (1000000);
|
---|
30 | #my @sizes = (500000, 1000000);
|
---|
31 | #my @sizes = (100, 500, 1000, 5000, 10000, 50000, 100000, 500000, 1000000);
|
---|
32 | #my @threads = (1,9);
|
---|
33 | my @threads = (9);
|
---|
34 | #my @threads = (1,2,3,4,5,6,7,8,9,10,11,12,13);
|
---|
35 | #my @batchsizes = (0); # Non-parallel
|
---|
36 | #my @batchsizes = (100);
|
---|
37 | my @batchsizes = (1000);
|
---|
38 | my $test_iterations = 1;
|
---|
39 | #my $test_iterations = 3;
|
---|
40 | #my $test_iterations = 9;
|
---|
41 |
|
---|
42 | # 1. Initialization
|
---|
43 | my $machine_name = `hostname -s`;
|
---|
44 | chomp($machine_name);
|
---|
45 | $machine_name = ucfirst($machine_name);
|
---|
46 | my $os_name = `lsb_release -i`;
|
---|
47 | $os_name =~ s/^Distributor ID:\s+(.*)\r?\n$/$1/i;
|
---|
48 | my $fs_name = `df -T $ENV{'GSDLHOME'}`;
|
---|
49 | $fs_name =~ s/^.*(ext2|ext3|ext4|xfs|zfs).*$/$1/is;
|
---|
50 | $fs_name = uc($fs_name);
|
---|
51 | my $sizes_str = 'd' . $sizes[0];
|
---|
52 | if (scalar(@sizes) > 1)
|
---|
53 | {
|
---|
54 | $sizes_str .= '-' . $sizes[(scalar(@sizes) - 1)];
|
---|
55 | }
|
---|
56 | my $threads_str = 't' . $threads[0];
|
---|
57 | if (scalar(@threads) > 1)
|
---|
58 | {
|
---|
59 | $threads_str .= '-' . $threads[(scalar(@threads) - 1)];
|
---|
60 | }
|
---|
61 | my $batch_str = 'b' . $batchsizes[0];
|
---|
62 | if (scalar(@batchsizes) > 1)
|
---|
63 | {
|
---|
64 | $batch_str .= '-' . $batchsizes[(scalar(@batchsizes) - 1)];
|
---|
65 | }
|
---|
66 | my $db_name = $machine_name . '_' . $os_name . '_' . $fs_name . '_' . $infodb_type . '_' . $sizes_str . '_' . $threads_str . '_' . $batch_str . '_i' . $test_iterations . '.db';
|
---|
67 | my $db_path = $ENV{'GSDLHOME'} . '/collect/' . $db_name;
|
---|
68 |
|
---|
69 | # 2. If we haven't already, initialize the database by creating the tables and
|
---|
70 | # populating the pending tests queue
|
---|
71 | my $init_database = 0;
|
---|
72 |
|
---|
73 | if (!-f $db_path)
|
---|
74 | {
|
---|
75 | $init_database = 1;
|
---|
76 | }
|
---|
77 | elsif('0' eq getValueSQL($db_path, 'SELECT COUNT(*) FROM tests'))
|
---|
78 | {
|
---|
79 | $init_database = 1;
|
---|
80 | }
|
---|
81 |
|
---|
82 | if ($init_database > 0)
|
---|
83 | {
|
---|
84 | print STDOUT " * Creating database tables\n";
|
---|
85 | # create tests table
|
---|
86 | execSQL($db_path, 'CREATE TABLE IF NOT EXISTS tests (walltime INTEGER DEFAULT 0, collection TEXT, type TEXT, threads INTEGER, batchsize INTEGER, testrun INTEGER, realtime REAL DEFAULT 0, systime REAL DEFAULT 0, usertime REAL DEFAULT 0, PRIMARY KEY (collection, type, testrun, threads, batchsize))');
|
---|
87 | execSQL($db_path, 'CREATE TABLE IF NOT EXISTS testoutput (walltime INTEGER DEFAULT 0, collection TEXT, type TEXT, testrun INTEGER, threads INTEGER, batchsize INTEGER, output TEXT, PRIMARY KEY (collection, type, testrun, threads, batchsize))');
|
---|
88 |
|
---|
89 | # populate with anticpated tests
|
---|
90 | print STDOUT " * Populating tests table\n";
|
---|
91 | foreach my $size (@sizes)
|
---|
92 | {
|
---|
93 | my $collection = lc($infodb_type) . sprintf("%07d", $size);
|
---|
94 | print STDOUT ' - generating tests for collection=' . $collection . "\n";
|
---|
95 | my $a_test_iterations = $test_iterations;
|
---|
96 | for (my $test_run = 1; $test_run <= $a_test_iterations; $test_run++)
|
---|
97 | {
|
---|
98 | foreach my $threads_run (@threads)
|
---|
99 | {
|
---|
100 | foreach my $batchsize_run (@batchsizes)
|
---|
101 | {
|
---|
102 | my $sql = "INSERT INTO tests (collection, type, testrun, threads, batchsize) VALUES ('" . $collection . "','rm'," . $test_run . "," . $threads_run . "," . $batchsize_run . ")";
|
---|
103 | print STDERR ' - sql: ' . $sql . "\n";
|
---|
104 | execSQL($db_path, $sql);
|
---|
105 | $sql = "INSERT INTO tests (collection, type, testrun, threads, batchsize) VALUES ('" . $collection . "','import'," . $test_run . "," . $threads_run . "," . $batchsize_run . ")";
|
---|
106 | print STDERR ' - sql: ' . $sql . "\n";
|
---|
107 | execSQL($db_path, $sql);
|
---|
108 | $sql = "INSERT INTO testoutput (collection, type, testrun, threads, batchsize) VALUES ('" . $collection . "','rm'," . $test_run . "," . $threads_run . "," . $batchsize_run . ")";
|
---|
109 | print STDERR ' - sql: ' . $sql . "\n";
|
---|
110 | execSQL($db_path, $sql);
|
---|
111 | $sql = "INSERT INTO testoutput (collection, type, testrun, threads, batchsize) VALUES ('" . $collection . "','import'," . $test_run . "," . $threads_run . "," . $batchsize_run . ")";
|
---|
112 | print STDERR ' - sql: ' . $sql . "\n";
|
---|
113 | execSQL($db_path, $sql);
|
---|
114 | }
|
---|
115 | }
|
---|
116 | }
|
---|
117 | }
|
---|
118 | }
|
---|
119 |
|
---|
120 | # 3. While there are still pending tests in the queue
|
---|
121 | my $total_count = getValueSQL($db_path, 'SELECT COUNT(*) FROM tests');
|
---|
122 | my $test_count = getValueSQL($db_path, 'SELECT COUNT(*) FROM tests WHERE realtime=0');
|
---|
123 | my $exit_file_path = $ENV{'GSDLHOME'} . '/collect/exit.now';
|
---|
124 | while ($test_count ne "0" && !-f $exit_file_path)
|
---|
125 | {
|
---|
126 | my $x = $total_count - $test_count;
|
---|
127 | my $now_string = strftime "%a %b %e %H:%M:%S %Y", localtime;
|
---|
128 | print STDERR ' * [' . $now_string . '] Progress: ' . sprintf("%.0f",(($x/$total_count)*100)) . '% complete! [' . $test_count . " tests remaining]\n";
|
---|
129 | my $cmd;
|
---|
130 | my $result;
|
---|
131 | my $rtime; my $utime; my $stime;
|
---|
132 | # 4. Pick a random test (thread count and epoch) and run and time it
|
---|
133 | my ($counter, $collection, $test_run, $threads_run, $batchsize_run) = getRecordSQL($db_path, 'SELECT _rowid_, collection, testrun, threads, batchsize FROM tests WHERE realtime=0 ORDER BY _rowid_ LIMIT 1');
|
---|
134 | print STDOUT ' - running test import for collection=' . $collection . ', test_run=' . $test_run . ", and threads=" . $threads_run . "\n";
|
---|
135 |
|
---|
136 | # Command one: run rm_archives.pl and record information in database
|
---|
137 | $cmd = 'time -p rm_archives.pl ' . $collection . ' 2>&1';
|
---|
138 | print STDOUT ' - command: ' . $cmd . "\n";
|
---|
139 | $result = `$cmd`;
|
---|
140 | $rtime = 0;
|
---|
141 | if ($result =~ /real\s+(\d+\.\d+)/)
|
---|
142 | {
|
---|
143 | $rtime = $1;
|
---|
144 | }
|
---|
145 | $utime = 0;
|
---|
146 | if ($result =~ /user\s+(\d+\.\d+)/)
|
---|
147 | {
|
---|
148 | $utime = $1;
|
---|
149 | }
|
---|
150 | $stime = 0;
|
---|
151 | if ($result =~ /sys\s+(\d+\.\d+)/)
|
---|
152 | {
|
---|
153 | $stime = $1;
|
---|
154 | }
|
---|
155 | $result =~ s/['"]//g;
|
---|
156 | my $walltime = time();
|
---|
157 | execSQL($db_path, 'UPDATE tests SET walltime=' . $walltime . ', realtime=' . $rtime . ', usertime=' . $utime . ', systime=' . $stime . ' WHERE collection=\'' . $collection . '\' AND type=\'rm\' AND testrun=' . $test_run . ' AND threads=' . $threads_run . ' AND batchsize=' . $batchsize_run);
|
---|
158 | execSQL($db_path, 'UPDATE testoutput SET walltime=' . $walltime . ', output=\'' . $result . '\' WHERE collection=\'' . $collection . '\' AND type=\'rm\' AND testrun=' . $test_run . ' AND threads=' . $threads_run . ' AND batchsize=' . $batchsize_run);
|
---|
159 |
|
---|
160 | # Have a sleep to try and prevent any headaches caused by 'rm' influencing
|
---|
161 | # later processes
|
---|
162 | #sleep(5);
|
---|
163 |
|
---|
164 | # Command two: use sudoedit and other black magic to clear out the memory-
|
---|
165 | # based disk cache (which is done by writing the number 3 to a certain
|
---|
166 | # system file)
|
---|
167 | print STDOUT " - Synching file system... ";
|
---|
168 | `sync`;
|
---|
169 | print STDOUT "Done!\n";
|
---|
170 | print STDOUT " - Dropping memory disk cache... ";
|
---|
171 | # - save our current default editor
|
---|
172 | my $current_editor = $ENV{'EDITOR'};
|
---|
173 | # - replace default editor with a script that simply clobbers the contents
|
---|
174 | # of any file it's handed with the number "3"
|
---|
175 | $ENV{'EDITOR'} = 'reset_memcache_editor.sh';
|
---|
176 | # - we now call sudoedit on the system file. How sudoedit works is that it
|
---|
177 | # starts by making a temp copy of the system file with appropriate
|
---|
178 | # permissions allowing the user to edit. It then passes the path to the
|
---|
179 | # temp file to the default editor - typically this would be an interactive
|
---|
180 | # editor like 'vi'. However, we've just replaced the editor with a custom
|
---|
181 | # script that just writes '3' as the content of the tmp file. Finally, when
|
---|
182 | # the editor exits, sudoedit copies the tmp file over the top of the system
|
---|
183 | # file, restoring appropriate root-level permissions
|
---|
184 | `sudoedit /proc/sys/vm/drop_caches`;
|
---|
185 | # - restore the default editor, just in case something in Greenstone
|
---|
186 | # depends on this being a reasonably value
|
---|
187 | $ENV{'EDITOR'} = $current_editor;
|
---|
188 | print STDOUT "Done!\n";
|
---|
189 |
|
---|
190 | # Command three: run import.pl and record information in database
|
---|
191 | if ($batchsize_run == 0)
|
---|
192 | {
|
---|
193 | $cmd = 'time -p import.pl -removeold -verbosity 0 ' . $collection . ' 2>&1';
|
---|
194 | }
|
---|
195 | # Very super special case: if parallel processing, but number of threads is
|
---|
196 | # one (i.e. non-parallel) we call import but specify a manifest file for the
|
---|
197 | # entire collection (otherwise the import would do a pre-scan, drastically
|
---|
198 | # altering the timing)
|
---|
199 | elsif ($threads_run == 1)
|
---|
200 | {
|
---|
201 | $cmd = 'time -p import.pl -keepold -verbosity 0 -manifest manifest.xml ' . $collection . ' 2>&1';
|
---|
202 | }
|
---|
203 | else
|
---|
204 | {
|
---|
205 | # $cmd = 'time -p parallel_import.pl -removeold -verbosity 0 -epoch ' . $batchsize_run . ' -jobs ' . $threads_run . ' ' . $collection . ' 2>&1';
|
---|
206 | $cmd = 'time -p parallel_import.pl -removeold -verbosity 42 -epoch ' . $batchsize_run . ' -jobs ' . $threads_run . ' ' . $collection . ' 2>&1';
|
---|
207 | }
|
---|
208 |
|
---|
209 | print STDOUT ' - command: ' . $cmd . "\n";
|
---|
210 | # $result = `$cmd`;
|
---|
211 | open(CMDINPUT, $cmd . ' |') or die("Failed to execute command: " . $cmd);
|
---|
212 | my $line = '';
|
---|
213 | my $line_counter = 0;
|
---|
214 | while (defined($line = <CMDINPUT>))
|
---|
215 | {
|
---|
216 | chomp($line);
|
---|
217 | print STDOUT "[" . $line_counter . "] " . $line . "\n";
|
---|
218 | $line_counter++;
|
---|
219 | }
|
---|
220 | close(CMDINPUT);
|
---|
221 |
|
---|
222 | $rtime = 0;
|
---|
223 | if ($result =~ /real\s+(\d+\.\d+)/)
|
---|
224 | {
|
---|
225 | $rtime = $1;
|
---|
226 | }
|
---|
227 | $utime = 0;
|
---|
228 | if ($result =~ /user\s+(\d+\.\d+)/)
|
---|
229 | {
|
---|
230 | $utime = $1;
|
---|
231 | }
|
---|
232 | $stime = 0;
|
---|
233 | if ($result =~ /sys\s+(\d+\.\d+)/)
|
---|
234 | {
|
---|
235 | $stime = $1;
|
---|
236 | }
|
---|
237 | $result =~ s/['"]//g;
|
---|
238 | $walltime = time();
|
---|
239 | execSQL($db_path, 'UPDATE tests SET walltime=' . $walltime . ', realtime=' . $rtime . ', usertime=' . $utime . ', systime=' . $stime . ' WHERE collection=\'' . $collection . '\' AND type=\'import\' AND testrun=' . $test_run . ' AND threads=' . $threads_run . ' AND batchsize=' . $batchsize_run);
|
---|
240 | execSQL($db_path, 'UPDATE testoutput SET walltime=' . $walltime . ', output=\'' . $result . '\' WHERE collection=\'' . $collection . '\' AND type=\'import\' AND testrun=' . $test_run . ' AND threads=' . $threads_run . ' AND batchsize=' . $batchsize_run);
|
---|
241 |
|
---|
242 | # Repeat until we have exhausted pending tests
|
---|
243 | $test_count = getValueSQL($db_path, 'SELECT COUNT(*) FROM tests WHERE realtime=0');
|
---|
244 | }
|
---|
245 |
|
---|
246 | # Remove any exit file
|
---|
247 | if (-f $exit_file_path)
|
---|
248 | {
|
---|
249 | print STDOUT " - Removing exit file... ";
|
---|
250 | unlink($exit_file_path);
|
---|
251 | print STDOUT "Done!\n";
|
---|
252 | }
|
---|
253 |
|
---|
254 |
|
---|
255 | print STDOUT "Complete!\n\n";
|
---|
256 |
|
---|
257 | exit;
|
---|
258 |
|
---|
259 | sub execSQL
|
---|
260 | {
|
---|
261 | my ($db_path, $sql) = @_;
|
---|
262 | # call getValueSQL but don't care about result
|
---|
263 | getValueSQL($db_path, $sql);
|
---|
264 | }
|
---|
265 | # /** execSQL() **/
|
---|
266 |
|
---|
267 | sub getRecordSQL
|
---|
268 | {
|
---|
269 | my ($db_path, $sql) = @_;
|
---|
270 | if ($sql !~ /LIMIT 1/i)
|
---|
271 | {
|
---|
272 | $sql .= ' LIMIT 1';
|
---|
273 | }
|
---|
274 | my $value = getValueSQL($db_path, $sql);
|
---|
275 | return split(/\|/,$value);
|
---|
276 | }
|
---|
277 | # /** getRecordSQL() **/
|
---|
278 |
|
---|
279 | sub getValueSQL
|
---|
280 | {
|
---|
281 | my ($db_path, $sql) = @_;
|
---|
282 | my $result = `sqlite3 "$db_path" "$sql" 2>&1`;
|
---|
283 | if ($result =~ /Error:/)
|
---|
284 | {
|
---|
285 | die("Fatal Error!\nSQL:" . $sql . "\nMsg:" . $result);
|
---|
286 | }
|
---|
287 | # trim
|
---|
288 | $result =~ s/^\s*|\s*$//g;
|
---|
289 | return $result;
|
---|
290 | }
|
---|
291 | # /** getValueSQL() **/
|
---|
292 |
|
---|
293 | sub printUsage
|
---|
294 | {
|
---|
295 | my ($msg) = @_;
|
---|
296 | # flush STDOUT
|
---|
297 | select((select(STDOUT), $|=1)[0]);
|
---|
298 | print STDOUT '';
|
---|
299 | select((select(STDOUT), $|=0)[0]);
|
---|
300 | # output any error message
|
---|
301 | if (defined $msg)
|
---|
302 | {
|
---|
303 | print STDERR 'Error! ' . $msg . "\n";
|
---|
304 | }
|
---|
305 | # and finally the usage
|
---|
306 | print STDERR "Usage: mpitesting.pl <path to database> <number docs> <worker threads>\n";
|
---|
307 | print STDERR "\n";
|
---|
308 | exit;
|
---|
309 | }
|
---|
310 |
|
---|
311 | 1;
|
---|