1 | #!/usr/bin/perl
|
---|
2 |
|
---|
3 | # Pragma
|
---|
4 | use strict;
|
---|
5 | use warnings;
|
---|
6 |
|
---|
7 | # Modules
|
---|
8 | use File::Path qw(make_path);
|
---|
9 | use POSIX qw(strftime);
|
---|
10 |
|
---|
11 | # Requires setup.bash to have been sourced
|
---|
12 | BEGIN
|
---|
13 | {
|
---|
14 | die "GSDLHOME not set\n" unless (defined $ENV{'GSDLHOME'} && $ENV{'GSDLHOME'} ne '');
|
---|
15 | die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
|
---|
16 | die "GEXTPARALLELBUILDING not set\n" unless defined $ENV{'GEXTPARALLELBUILDING'};
|
---|
17 | die "GEXTPARALLELBUILDING_INSTALLED not set\n" unless defined $ENV{'GEXTPARALLELBUILDING_INSTALLED'};
|
---|
18 | die "HDFS HOST not set (set in <gsdl>/ext/parallel_processing/setup.bash)\n" unless defined $ENV{'HDFSHOST'};
|
---|
19 | die "HDFS PORT not set (set in <gsdl>/ext/parallel_processing/setup.bash)\n" unless defined $ENV{'HDFSPORT'};
|
---|
20 | }
|
---|
21 |
|
---|
22 | if (!defined $ARGV[0])
|
---|
23 | {
|
---|
24 | &printUsage('Missing collection name');
|
---|
25 | }
|
---|
26 | my $collection = $ARGV[0];
|
---|
27 | if (!defined $ARGV[1] || $ARGV[1] !~ /^\d+$/)
|
---|
28 | {
|
---|
29 | &printUsage('Missing max replication factor or NAN');
|
---|
30 | }
|
---|
31 | my $max_replication_factor = $ARGV[1];
|
---|
32 | if (!defined $ARGV[2] || $ARGV[2] !~ /^\d+$/)
|
---|
33 | {
|
---|
34 | &printUsage('Missing iterations or NAN');
|
---|
35 | }
|
---|
36 | my $iterations = $ARGV[2];
|
---|
37 |
|
---|
38 | # 1. Initialization
|
---|
39 | my $dry_run = 0;
|
---|
40 | my $debug = 0;
|
---|
41 | my $user_name = `id -u -n`;
|
---|
42 | chomp($user_name);
|
---|
43 | my $machine_name = `hostname -s`;
|
---|
44 | chomp($machine_name);
|
---|
45 | $machine_name = ucfirst($machine_name);
|
---|
46 | my $os_name = `lsb_release -i`;
|
---|
47 | $os_name =~ s/^Distributor ID:\s+(.*)\r?\n$/$1/i;
|
---|
48 | my $db_path_suffix = $machine_name . '_' . $os_name . '_hadoop_hdfsshell_54_15_1_i' . $iterations;
|
---|
49 | my $test_dir = $ENV{'GSDLHOME'} . '/collect/' . $collection . '/results/' . $db_path_suffix;
|
---|
50 | if (!-d $test_dir)
|
---|
51 | {
|
---|
52 | make_path($test_dir);
|
---|
53 | }
|
---|
54 | my $db_path = $test_dir . '/test.db';
|
---|
55 |
|
---|
56 | # 2. Create and populate testing database as necessary
|
---|
57 | my $init_database = 0;
|
---|
58 | if (!-f $db_path)
|
---|
59 | {
|
---|
60 | $init_database = 1;
|
---|
61 | }
|
---|
62 | elsif('0' eq sqliteGetValue($db_path, 'SELECT COUNT(*) FROM tests'))
|
---|
63 | {
|
---|
64 | $init_database = 1;
|
---|
65 | }
|
---|
66 | # - do we need to create database?
|
---|
67 | if ($init_database > 0)
|
---|
68 | {
|
---|
69 | print STDOUT " * Creating database tables\n";
|
---|
70 | # create tests table
|
---|
71 | sqliteExec($db_path, 'CREATE TABLE IF NOT EXISTS tests (replication INTEGER, iteration INTEGER, timestamp INTEGER DEFAULT 0, realtime REAL DEFAULT 0, systime REAL DEFAULT 0, usertime REAL DEFAULT 0, PRIMARY KEY (replication, iteration))');
|
---|
72 | sqliteExec($db_path, 'CREATE TABLE IF NOT EXISTS testoutput (replication INTEGER, iteration INTEGER, output TEXT, PRIMARY KEY (replication, iteration))');
|
---|
73 | # populate with tests
|
---|
74 | print STDOUT " * Populating tests table\n";
|
---|
75 | for (my $replication = 1; $replication <= $max_replication_factor; $replication++)
|
---|
76 | {
|
---|
77 | for (my $iteration = 1; $iteration <= $iterations; $iteration++)
|
---|
78 | {
|
---|
79 | sqliteExec($db_path, 'INSERT INTO tests (replication, iteration) VALUES (' . $replication . ',' . $iteration . ')');
|
---|
80 | sqliteExec($db_path, 'INSERT INTO testoutput (replication, iteration) VALUES (' . $replication . ',' . $iteration . ')');
|
---|
81 | }
|
---|
82 | }
|
---|
83 | }
|
---|
84 |
|
---|
85 | # 3. Load random test and run it
|
---|
86 | my $total_count = sqliteGetValue($db_path, 'SELECT COUNT(*) FROM tests');
|
---|
87 | my $test_count = sqliteGetValue($db_path, 'SELECT COUNT(*) FROM tests WHERE realtime=0');
|
---|
88 | my $exit_file_path = $ENV{'GSDLHOME'} . '/collect/exit.now';
|
---|
89 | while ($total_count > 0 && $test_count > 0 && !-f $exit_file_path)
|
---|
90 | {
|
---|
91 | my $x = $total_count - $test_count;
|
---|
92 | my $timestamp = time();
|
---|
93 | my $now_string = strftime("%a %b %e %H:%M:%S %Y", localtime($timestamp));
|
---|
94 | print STDOUT ' * [' . $now_string . '] Progress: ' . sprintf("%.0f",(($x/$total_count)*100)) . '% complete! [' . $test_count . " tests remaining]\n";
|
---|
95 |
|
---|
96 | # 4. Pick a random test (thread count and epoch) and run and time it
|
---|
97 | my ($replication, $iteration) = sqliteGetValues($db_path, 'SELECT replication, iteration FROM tests WHERE realtime=0 ORDER BY RANDOM() LIMIT 1');
|
---|
98 | print STDOUT ' - running test hadoop import for collection=' . $collection . ', replication=' . $replication . ', iteration=' . $iteration . "\n";
|
---|
99 |
|
---|
100 | # 5. Change the HDFS replication to match
|
---|
101 | print STDOUT ' - rebalance HDFS with replication: ' . $replication . "\n";
|
---|
102 | my $hdfs_cmd = 'hadoop fs -setrep -w ' . $replication . ' -R /user/' . $user_name . '/gsdl/collect/' . $collection . '/import 2>&1';
|
---|
103 | print STDOUT '[DEBUG] command: |' . $hdfs_cmd . "|\n" unless !$debug;
|
---|
104 | if (!$dry_run)
|
---|
105 | {
|
---|
106 | my $result = `$hdfs_cmd`;
|
---|
107 | print STDOUT '[DEBUG] result: |' . $result . "|\n" unless !$debug;
|
---|
108 | }
|
---|
109 |
|
---|
110 | # 6. Now call hadoop_import.pl but pass in some extra options to control
|
---|
111 | # where logs get written
|
---|
112 | my $import_cmd = 'time -p hadoop_import.pl "' . $collection . '" -logdir "' . $test_dir . '/' . $timestamp . '" 2>&1';
|
---|
113 | print STDOUT '[DEBUG] command: |' . $import_cmd . "|\n" unless !$debug;
|
---|
114 | if ($dry_run)
|
---|
115 | {
|
---|
116 | sqliteExec($db_path, 'UPDATE tests SET realtime=1 WHERE replication=' . $replication . ' AND iteration=' . $iteration);
|
---|
117 | }
|
---|
118 | else
|
---|
119 | {
|
---|
120 | my $result = `$import_cmd`;
|
---|
121 | my $rtime = 0;
|
---|
122 | if ($result =~ /real\s+(\d+\.\d+)/)
|
---|
123 | {
|
---|
124 | $rtime = $1;
|
---|
125 | }
|
---|
126 | my $utime = 0;
|
---|
127 | if ($result =~ /user\s+(\d+\.\d+)/)
|
---|
128 | {
|
---|
129 | $utime = $1;
|
---|
130 | }
|
---|
131 | my $stime = 0;
|
---|
132 | if ($result =~ /sys\s+(\d+\.\d+)/)
|
---|
133 | {
|
---|
134 | $stime = $1;
|
---|
135 | }
|
---|
136 | $result =~ s/'/'/g;
|
---|
137 | $result =~ s/"/"/g;
|
---|
138 | $result =~ s/`/`/g;
|
---|
139 | print STDOUT '[DEBUG] result: |' . $result . "|\n" unless !$debug;
|
---|
140 | # 7. Write results to database
|
---|
141 | sqliteExec($db_path, 'UPDATE tests SET timestamp=' . $timestamp . ', realtime=' . $rtime . ', usertime=' . $utime . ', systime=' . $stime . ' WHERE replication=' . $replication . ' AND iteration=' . $iteration);
|
---|
142 | sqliteExec($db_path, "UPDATE testoutput SET output='" . $result . "' WHERE replication=" . $replication . " AND iteration=" . $iteration);
|
---|
143 | }
|
---|
144 |
|
---|
145 | # Repeat until we have exhausted pending tests
|
---|
146 | $test_count = sqliteGetValue($db_path, 'SELECT COUNT(*) FROM tests WHERE realtime=0');
|
---|
147 | }
|
---|
148 |
|
---|
149 | # 8. Done.
|
---|
150 | if (-f $exit_file_path)
|
---|
151 | {
|
---|
152 | print STDOUT " - Removing exit file... ";
|
---|
153 | unlink($exit_file_path);
|
---|
154 | print STDOUT "Done!\n";
|
---|
155 | }
|
---|
156 | print STDOUT "Complete!\n\n";
|
---|
157 | exit 0;
|
---|
158 |
|
---|
159 |
|
---|
160 | ## @function sqliteExec()
|
---|
161 | #
|
---|
162 | sub sqliteExec
|
---|
163 | {
|
---|
164 | my ($db_path, $sql) = @_;
|
---|
165 | # call sqliteGetValue() but don't care about result
|
---|
166 | &sqliteGetValue($db_path, $sql);
|
---|
167 | }
|
---|
168 | ## sqliteExec() ##
|
---|
169 |
|
---|
170 |
|
---|
171 | ## @function sqliteGetValues()
|
---|
172 | #
|
---|
173 | sub sqliteGetValues
|
---|
174 | {
|
---|
175 | my ($db_path, $sql) = @_;
|
---|
176 | if ($sql !~ /LIMIT 1/i)
|
---|
177 | {
|
---|
178 | $sql .= ' LIMIT 1';
|
---|
179 | }
|
---|
180 | my $value = sqliteGetValue($db_path, $sql);
|
---|
181 | return split(/\|/,$value);
|
---|
182 | }
|
---|
183 | ## sqliteGetValues() ##
|
---|
184 |
|
---|
185 |
|
---|
186 | ## @function sqliteGetValue()
|
---|
187 | #
|
---|
188 | sub sqliteGetValue
|
---|
189 | {
|
---|
190 | my ($db_path, $sql) = @_;
|
---|
191 | my $result = `sqlite3 "$db_path" "$sql" 2>&1`;
|
---|
192 | if ($result =~ /Error:/)
|
---|
193 | {
|
---|
194 | die("Fatal Error!\nSQL:" . $sql . "\nMsg:" . $result);
|
---|
195 | }
|
---|
196 | # trim
|
---|
197 | $result =~ s/^\s*|\s*$//g;
|
---|
198 | return $result;
|
---|
199 | }
|
---|
200 | ## sqliteGetValue() ##
|
---|
201 |
|
---|
202 |
|
---|
203 | ## @function printUsage()
|
---|
204 | #
|
---|
205 | sub printUsage
|
---|
206 | {
|
---|
207 | my ($msg) = @_;
|
---|
208 | # flush STDOUT
|
---|
209 | select((select(STDOUT), $|=1)[0]);
|
---|
210 | print STDOUT '';
|
---|
211 | select((select(STDOUT), $|=0)[0]);
|
---|
212 | # output any error message
|
---|
213 | if (defined $msg)
|
---|
214 | {
|
---|
215 | print STDERR 'Error! ' . $msg . "\n";
|
---|
216 | }
|
---|
217 | # and finally the usage
|
---|
218 | print STDERR "Usage: replication_tests.pl <str:collection> <int:max replication> <int:iterations>\n";
|
---|
219 | print STDERR "\n";
|
---|
220 | exit;
|
---|
221 | }
|
---|
222 | ## printUsage() ##
|
---|
223 |
|
---|
224 | 1;
|
---|