root/gs2-extensions/parallel-building/trunk/src/bin/script/replication_tests.pl @ 28017

Revision 28017, 7.0 KB (checked in by jmt12, 6 years ago)

Forgot to add processing comment before call to hadoop_import.pl

  • Property svn:executable set to *
Line 
1#!/usr/bin/perl
2
3# Pragma
4use strict;
5use warnings;
6
7# Modules
8use File::Path qw(make_path);
9use POSIX qw(strftime);
10
11# Requires setup.bash to have been sourced
12BEGIN
13{
14  die "GSDLHOME not set\n" unless (defined $ENV{'GSDLHOME'} && $ENV{'GSDLHOME'} ne '');
15  die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
16  die "GEXTPARALLELBUILDING not set\n" unless defined $ENV{'GEXTPARALLELBUILDING'};
17  die "GEXTPARALLELBUILDING_INSTALLED not set\n" unless defined $ENV{'GEXTPARALLELBUILDING_INSTALLED'};
18  die "HDFS HOST not set (set in <gsdl>/ext/parallel_processing/setup.bash)\n" unless defined $ENV{'HDFSHOST'};
19  die "HDFS PORT not set (set in <gsdl>/ext/parallel_processing/setup.bash)\n" unless defined $ENV{'HDFSPORT'};
20}
21
22if (!defined $ARGV[0])
23{
24  &printUsage('Missing collection name');
25}
26my $collection = $ARGV[0];
27if (!defined $ARGV[1] || $ARGV[1] !~ /^\d+$/)
28{
29  &printUsage('Missing max replication factor or NAN');
30}
31my $max_replication_factor = $ARGV[1];
32if (!defined $ARGV[2] || $ARGV[2] !~ /^\d+$/)
33{
34  &printUsage('Missing iterations or NAN');
35}
36my $iterations = $ARGV[2];
37
38# 1. Initialization
39my $dry_run = 0;
40my $debug = 0;
41my $user_name = `id -u -n`;
42chomp($user_name);
43my $machine_name = `hostname -s`;
44chomp($machine_name);
45$machine_name = ucfirst($machine_name);
46my $os_name = `lsb_release -i`;
47$os_name =~ s/^Distributor ID:\s+(.*)\r?\n$/$1/i;
48my $db_path_suffix = $machine_name . '_' . $os_name . '_hadoop_hdfsshell_54_15_1_i' . $iterations;
49my $test_dir = $ENV{'GSDLHOME'} . '/collect/' . $collection . '/results/' . $db_path_suffix;
50if (!-d $test_dir)
51{
52  make_path($test_dir);
53}
54my $db_path = $test_dir . '/test.db';
55
56# 2. Create and populate testing database as necessary
57my $init_database = 0;
58if (!-f $db_path)
59{
60  $init_database = 1;
61}
62elsif('0' eq sqliteGetValue($db_path, 'SELECT COUNT(*) FROM tests'))
63{
64  $init_database = 1;
65}
66# - do we need to create database?
67if ($init_database > 0)
68{
69  print STDOUT " * Creating database tables\n";
70  # create tests table
71  sqliteExec($db_path, 'CREATE TABLE IF NOT EXISTS tests (replication INTEGER, iteration INTEGER, timestamp INTEGER DEFAULT 0, realtime REAL DEFAULT 0, systime REAL DEFAULT 0, usertime REAL DEFAULT 0, PRIMARY KEY (replication, iteration))');
72  sqliteExec($db_path, 'CREATE TABLE IF NOT EXISTS testoutput (replication INTEGER, iteration INTEGER,  output TEXT, PRIMARY KEY (replication, iteration))');
73  # populate with tests
74  print STDOUT " * Populating tests table\n";
75  for (my $replication = 1; $replication <= $max_replication_factor; $replication++)
76  {
77    for (my $iteration = 1; $iteration <= $iterations; $iteration++)
78    {
79      sqliteExec($db_path, 'INSERT INTO tests (replication, iteration) VALUES (' . $replication . ',' . $iteration . ')');
80      sqliteExec($db_path, 'INSERT INTO testoutput (replication, iteration) VALUES (' . $replication . ',' . $iteration . ')');
81    }
82  }
83}
84
85# 3. Load random test and run it
86my $total_count = sqliteGetValue($db_path, 'SELECT COUNT(*) FROM tests');
87my $test_count = sqliteGetValue($db_path, 'SELECT COUNT(*) FROM tests WHERE realtime=0');
88my $exit_file_path = $ENV{'GSDLHOME'} . '/collect/exit.now';
89while ($total_count > 0 && $test_count > 0 && !-f $exit_file_path)
90{
91  my $x = $total_count - $test_count;
92  my $timestamp = time();
93  my $now_string = strftime("%a %b %e %H:%M:%S %Y", localtime($timestamp));
94  print STDOUT ' * [' . $now_string . '] Progress: ' . sprintf("%.0f",(($x/$total_count)*100)) . '% complete! [' . $test_count . " tests remaining]\n";
95
96  # 4. Pick a random test (thread count and epoch) and run and time it
97  my ($replication, $iteration) = sqliteGetValues($db_path, 'SELECT replication, iteration FROM tests WHERE realtime=0 ORDER BY RANDOM() LIMIT 1');
98  print STDOUT '   - running test hadoop import for collection=' . $collection . ', replication=' . $replication . ', iteration=' . $iteration . "\n";
99
100  # 5. Change the HDFS replication to match
101  print STDOUT '   - rebalance HDFS with replication: ' . $replication . "\n";
102  my $hdfs_cmd = 'hadoop fs -setrep -w ' . $replication . ' -R /user/' . $user_name . '/gsdl/collect/' . $collection . '/import 2>&1';
103  print STDOUT '[DEBUG] command: |' . $hdfs_cmd . "|\n" unless !$debug;
104  if (!$dry_run)
105  {
106    my $result = `$hdfs_cmd`;
107    print STDOUT '[DEBUG] result: |' . $result . "|\n" unless !$debug;
108  }
109
110  # 6. Now call hadoop_import.pl but pass in some extra options to control
111  # where logs get written
112  print STDOUT "   - ingest using Hadoop\n";
113  my $import_cmd = 'time -p hadoop_import.pl "' . $collection . '" -logdir "' . $test_dir . '/' . $timestamp . '" 2>&1';
114  print STDOUT '[DEBUG] command: |' . $import_cmd . "|\n" unless !$debug;
115  if ($dry_run)
116  {
117    sqliteExec($db_path, 'UPDATE tests SET realtime=1 WHERE replication=' . $replication . ' AND iteration=' . $iteration);
118  }
119  else
120  {
121    my $result = `$import_cmd`;
122    my $rtime = 0;
123    if ($result =~ /real\s+(\d+\.\d+)/)
124    {
125      $rtime = $1;
126    }
127    my $utime = 0;
128    if ($result =~ /user\s+(\d+\.\d+)/)
129    {
130      $utime = $1;
131    }
132    my $stime = 0;
133    if ($result =~ /sys\s+(\d+\.\d+)/)
134    {
135      $stime = $1;
136    }
137    $result =~ s/'/&apos;/g;
138    $result =~ s/"/&quot;/g;
139    $result =~ s/`/&#96;/g;
140    print STDOUT '[DEBUG] result: |' . $result . "|\n" unless !$debug;
141    # 7. Write results to database
142    sqliteExec($db_path, 'UPDATE tests SET timestamp=' . $timestamp . ', realtime=' . $rtime . ', usertime=' . $utime . ', systime=' . $stime . ' WHERE replication=' . $replication . ' AND iteration=' . $iteration);
143    sqliteExec($db_path, "UPDATE testoutput SET output='" . $result . "' WHERE replication=" . $replication . " AND iteration=" . $iteration);
144  }
145
146  # Repeat until we have exhausted pending tests
147  $test_count = sqliteGetValue($db_path, 'SELECT COUNT(*) FROM tests WHERE realtime=0');
148}
149
150# 8. Done.
151if (-f $exit_file_path)
152{
153  print STDOUT "   - Removing exit file... ";
154  unlink($exit_file_path);
155  print STDOUT "Done!\n";
156}
157print STDOUT "Complete!\n\n";
158exit 0;
159
160
161## @function sqliteExec()
162#
163sub sqliteExec
164{
165  my ($db_path, $sql) = @_;
166  # call sqliteGetValue() but don't care about result
167  &sqliteGetValue($db_path, $sql);
168}
169## sqliteExec() ##
170
171
172## @function sqliteGetValues()
173#
174sub sqliteGetValues
175{
176  my ($db_path, $sql) = @_;
177  if ($sql !~ /LIMIT 1/i)
178  {
179    $sql .= ' LIMIT 1';
180  }
181  my $value = sqliteGetValue($db_path, $sql);
182  return split(/\|/,$value);
183}
184## sqliteGetValues() ##
185
186
187## @function sqliteGetValue()
188#
189sub sqliteGetValue
190{
191  my ($db_path, $sql) = @_;
192  my $result = `sqlite3 "$db_path" "$sql" 2>&1`;
193  if ($result =~ /Error:/)
194  {
195    die("Fatal Error!\nSQL:" . $sql . "\nMsg:" . $result);
196  }
197  # trim
198  $result =~ s/^\s*|\s*$//g;
199  return $result;
200}
201## sqliteGetValue() ##
202
203
204## @function printUsage()
205#
206sub printUsage
207{
208  my ($msg) = @_;
209  # flush STDOUT
210  select((select(STDOUT), $|=1)[0]);
211  print STDOUT '';
212  select((select(STDOUT), $|=0)[0]);
213  # output any error message
214  if (defined $msg)
215  {
216    print STDERR 'Error! ' . $msg . "\n";
217  }
218  # and finally the usage
219  print STDERR "Usage: replication_tests.pl <str:collection> <int:max replication> <int:iterations>\n";
220  print STDERR "\n";
221  exit;
222}
223## printUsage() ##
224
2251;
Note: See TracBrowser for help on using the browser.