[28645] | 1 | #!/usr/bin/perl
|
---|
| 2 |
|
---|
| 3 | use strict;
|
---|
| 4 | use warnings;
|
---|
| 5 |
|
---|
| 6 | use List::Util qw(sum);
|
---|
| 7 | use Sort::Key::Natural qw(natsort);
|
---|
| 8 |
|
---|
| 9 | my $data = {};
|
---|
| 10 | my $base_dir = '/research/jmt12/temp';
|
---|
| 11 | my $filename = 'replication';
|
---|
| 12 | if (defined $ARGV[0])
|
---|
| 13 | {
|
---|
| 14 | $filename = $ARGV[0];
|
---|
| 15 | }
|
---|
| 16 | my $path = $base_dir . '/' . $filename . '.csv';
|
---|
| 17 | if (!-f $path)
|
---|
| 18 | {
|
---|
| 19 | die('File not found: ' . $path);
|
---|
| 20 | }
|
---|
| 21 |
|
---|
| 22 | if (open(FIN, '<:utf8', $path))
|
---|
| 23 | {
|
---|
| 24 | my $line = '';
|
---|
| 25 | while ($line = <FIN>)
|
---|
| 26 | {
|
---|
| 27 | print STDERR '[debug] line: ' . $line . "\n";
|
---|
| 28 | if ($line =~ /^(\d+),(\d+),(\d+)/)
|
---|
| 29 | {
|
---|
| 30 | my $replication = $1;
|
---|
| 31 | my $test_run = $2;
|
---|
| 32 | my $epoch = $3;
|
---|
| 33 | my $avgtime = '???';
|
---|
| 34 | my $avgiotime = '???';
|
---|
| 35 | my $dl = '???';
|
---|
| 36 | # Locate gantt chart
|
---|
| 37 | my $gantt_path = $base_dir . '/' . $filename . '/' . $epoch . '/' . $epoch . '-gantt.html';
|
---|
| 38 | print STDERR ' * Searching for: ' . $gantt_path . "\n";
|
---|
| 39 | if(open(GIN, '<:utf8', $gantt_path))
|
---|
| 40 | {
|
---|
| 41 | my $line2 = '';
|
---|
| 42 | while ($line = <GIN>)
|
---|
| 43 | {
|
---|
| 44 | if ($line =~ /<th>Average Processing Time:<\/th><td>([0-9hms]+)<\/td>/)
|
---|
| 45 | {
|
---|
| 46 | $avgtime = &parseTime($1);
|
---|
| 47 | }
|
---|
| 48 | if ($line =~ /<th>Average File IO Time:<\/th><td>([0-9hms]+)<\/td>/)
|
---|
| 49 | {
|
---|
| 50 | $avgiotime = &parseTime($1);
|
---|
| 51 | }
|
---|
| 52 | if ($line =~ /<th>Data Locality:<\/th><td>(\d+)%/)
|
---|
| 53 | {
|
---|
| 54 | $dl = $1;
|
---|
| 55 | }
|
---|
| 56 | }
|
---|
| 57 | close(GIN);
|
---|
| 58 | }
|
---|
| 59 | else
|
---|
| 60 | {
|
---|
| 61 | print STDERR 'Warning! Failed to find chart: ' . $gantt_path . "\n";
|
---|
| 62 | }
|
---|
| 63 |
|
---|
| 64 | if ($avgtime eq '???')
|
---|
| 65 | {
|
---|
| 66 | die("Failed to parse timing information from: " . $gantt_path);
|
---|
| 67 | }
|
---|
| 68 |
|
---|
| 69 | # Store for averaging
|
---|
| 70 | if (!defined $data->{$replication})
|
---|
| 71 | {
|
---|
| 72 | $data->{$replication} = {'count' => 0,
|
---|
| 73 | 'epochs' => [],
|
---|
| 74 | 'ios' => [],
|
---|
| 75 | 'times' => [],
|
---|
| 76 | 'dls' => []
|
---|
| 77 | };
|
---|
| 78 | }
|
---|
| 79 | $data->{$replication}->{'count'}++;
|
---|
| 80 | push(@{$data->{$replication}->{'epochs'}}, $epoch);
|
---|
| 81 | push(@{$data->{$replication}->{'ios'}}, $avgiotime);
|
---|
| 82 | push(@{$data->{$replication}->{'times'}}, $avgtime);
|
---|
| 83 | push(@{$data->{$replication}->{'dls'}}, $dl);
|
---|
| 84 | }
|
---|
| 85 | }
|
---|
| 86 | close(FIN);
|
---|
| 87 | }
|
---|
| 88 | else
|
---|
| 89 | {
|
---|
| 90 | die('Error! Failed to open file for reading: replication.csv');
|
---|
| 91 | }
|
---|
| 92 |
|
---|
| 93 | # Perform some calculations
|
---|
| 94 | foreach my $replication (natsort keys %{$data})
|
---|
| 95 | {
|
---|
| 96 | my $variables = {'pt' => 'times', 'io' => 'ios', 'dl' => 'dls'};
|
---|
| 97 | foreach my $prefix (keys %{$variables})
|
---|
| 98 | {
|
---|
| 99 | my $values_name = $variables->{$prefix};
|
---|
| 100 | $data->{$replication}->{$prefix . '_mean'} = &calculateMean($data->{$replication}->{$values_name});
|
---|
| 101 | $data->{$replication}->{$prefix . '_median'} = &calculateMedian($data->{$replication}->{$values_name});
|
---|
| 102 | $data->{$replication}->{$prefix . '_stddev'} = &calculateStandardDeviation($data->{$replication}->{$values_name}, $data->{$replication}->{$prefix . '_mean'});
|
---|
| 103 | my $radius = 2 * $data->{$replication}->{$prefix . '_stddev'};
|
---|
| 104 | $data->{$replication}->{$prefix . '_lbound'} = $data->{$replication}->{$prefix . '_mean'} - $radius;
|
---|
| 105 | $data->{$replication}->{$prefix . '_ubound'} = $data->{$replication}->{$prefix . '_mean'} + $radius;
|
---|
| 106 | # Special cases for percentages, which can't be less than 0 nor greater than 100
|
---|
| 107 | if ($prefix eq 'dl')
|
---|
| 108 | {
|
---|
| 109 | if ($data->{$replication}->{$prefix . '_lbound'} < 0)
|
---|
| 110 | {
|
---|
| 111 | $data->{$replication}->{$prefix . '_lbound'} = 0;
|
---|
| 112 | }
|
---|
| 113 | if ($data->{$replication}->{$prefix . '_ubound'} > 100)
|
---|
| 114 | {
|
---|
| 115 | $data->{$replication}->{$prefix . '_ubound'} = 100;
|
---|
| 116 | }
|
---|
| 117 | }
|
---|
| 118 | }
|
---|
| 119 | }
|
---|
| 120 |
|
---|
| 121 | print '<html>
|
---|
| 122 | <head>
|
---|
| 123 | <style>
|
---|
| 124 | table {
|
---|
| 125 | border:1px solid black;
|
---|
| 126 | border-collapse:collapse;
|
---|
| 127 | margin-left:auto;
|
---|
| 128 | margin-right:auto;
|
---|
| 129 | width:80%;
|
---|
| 130 | }
|
---|
| 131 | td {
|
---|
| 132 | border:1px solid black;
|
---|
| 133 | padding:2px;
|
---|
| 134 | text-align:right;
|
---|
| 135 | }
|
---|
| 136 | th {
|
---|
| 137 | border:1px solid black;
|
---|
| 138 | background-color:#C7C7C7;
|
---|
| 139 | }
|
---|
| 140 | </style>
|
---|
| 141 | </head>
|
---|
| 142 | <body>';
|
---|
| 143 |
|
---|
| 144 | print '<h1>Data Locality Report</h1>';
|
---|
| 145 |
|
---|
| 146 | print '<ul><li><a href="#raw">Raw Data</a></li><li><a href="#averaged">Averaged</a></li></ul>';
|
---|
| 147 |
|
---|
| 148 | print '<h2><a name="raw"></a>Raw Data</h2>
|
---|
| 149 | <table>
|
---|
| 150 | <tr>
|
---|
| 151 | <th rowspan="2">Replication</th>
|
---|
| 152 | <th rowspan="2">Epoch</th>
|
---|
| 153 | <th colspan="3">Avg Per File</th>
|
---|
| 154 | <th rowspan="2">DataLocality</th>
|
---|
| 155 | </tr>
|
---|
| 156 | <tr>
|
---|
| 157 | <th>IO</th><th>CPU</th><th>Total</th>
|
---|
| 158 | </tr>
|
---|
| 159 | ';
|
---|
| 160 | foreach my $replication (natsort keys %{$data})
|
---|
| 161 | {
|
---|
| 162 | for (my $test_run = 0; $test_run < $data->{$replication}->{'count'}; $test_run++)
|
---|
| 163 | {
|
---|
| 164 | my $epoch = @{$data->{$replication}->{'epochs'}}[$test_run];
|
---|
| 165 | my $avgiotime = @{$data->{$replication}->{'ios'}}[$test_run];
|
---|
| 166 | my $avgtime = @{$data->{$replication}->{'times'}}[$test_run];
|
---|
| 167 | my $dl = @{$data->{$replication}->{'dls'}}[$test_run];
|
---|
| 168 | print sprintf('<tr><th><a name="result%d.%d" href="#avg%d">%2d</a></th><td><a href="%s/%d/%d-gantt.html">%d</a></td><td>%4d</td><td>%4d</td><td>%4d</td><td>%3d%%</td></tr>', $replication, $test_run, $replication, $replication, $filename, $epoch, $epoch, $epoch, $avgiotime, ($avgtime - $avgiotime), $avgtime, $dl) . "\n";
|
---|
| 169 | }
|
---|
| 170 | }
|
---|
| 171 | print '</table>';
|
---|
| 172 | print '<a href="#">back to top</a><br />';
|
---|
| 173 |
|
---|
| 174 |
|
---|
| 175 | print '<h2><a name="averaged"></a>Averaged</h2>';
|
---|
| 176 | print '<table><tr><th rowspan="2">Replication</th><th rowspan="2">Count</th><th colspan="5">Processing Time (s)</th><th colspan="5">IO Time (s)</th><th colspan="5">Data Locality (%)</th></tr>
|
---|
| 177 | <tr>';
|
---|
| 178 | for (my $i = 0; $i < 3; $i++)
|
---|
| 179 | {
|
---|
| 180 | print '<th>Median</th><th>Mean</th><th>StdDev</th><th>LBound</th><th>UBound</th>';
|
---|
| 181 | }
|
---|
| 182 | print '</tr>';
|
---|
| 183 | foreach my $replication (natsort keys %{$data})
|
---|
| 184 | {
|
---|
| 185 | my $count = $data->{$replication}->{'count'};
|
---|
| 186 | my $sum_dl = sum(@{$data->{$replication}->{'dls'}});
|
---|
| 187 | my $avg_dl = $sum_dl / $count;
|
---|
| 188 | print '<tr><th><a name="avg' . $replication . '" href="#result' . $replication . '.0">' . $replication . '</a></th><td>' . $count . '</td>';
|
---|
| 189 | # Processing Time (pt)
|
---|
| 190 | print renderStatisticsAsHTML($data->{$replication}, 'pt');
|
---|
| 191 | # IO Time (io)
|
---|
| 192 | print renderStatisticsAsHTML($data->{$replication}, 'io');
|
---|
| 193 | # Data Locality Percentages (dl)
|
---|
| 194 | print renderStatisticsAsHTML($data->{$replication}, 'dl', '%');
|
---|
| 195 | print "</tr>\n";
|
---|
| 196 | }
|
---|
| 197 | print '</table>';
|
---|
| 198 | print '<a href="#">back to top</a>';
|
---|
| 199 | print '</html>';
|
---|
| 200 |
|
---|
| 201 | exit;
|
---|
| 202 |
|
---|
| 203 | ## @function calculateMean()
|
---|
| 204 | sub calculateMean
|
---|
| 205 | {
|
---|
| 206 | my ($data) = @_;
|
---|
| 207 | my $count = scalar(@{$data});
|
---|
| 208 | if ($count == 0)
|
---|
| 209 | {
|
---|
| 210 | die("Empty array\n");
|
---|
| 211 | }
|
---|
| 212 | my $total = 0;
|
---|
| 213 | foreach (@{$data})
|
---|
| 214 | {
|
---|
| 215 | $total += $_;
|
---|
| 216 | }
|
---|
| 217 | my $average = $total / $count;
|
---|
| 218 | return $average;
|
---|
| 219 | }
|
---|
| 220 | ## calculateMean() ##
|
---|
| 221 |
|
---|
| 222 | ## @function calculateMedian()
|
---|
| 223 | sub calculateMedian
|
---|
| 224 | {
|
---|
| 225 | my ($data) = @_;
|
---|
| 226 | my @vals = sort {$a <=> $b} @{$data};
|
---|
| 227 | my $len = @vals;
|
---|
| 228 | if($len%2) #odd?
|
---|
| 229 | {
|
---|
| 230 | return $vals[int($len/2)];
|
---|
| 231 | }
|
---|
| 232 | else #even
|
---|
| 233 | {
|
---|
| 234 | return ($vals[int($len/2)-1] + $vals[int($len/2)])/2;
|
---|
| 235 | }
|
---|
| 236 | }
|
---|
| 237 | ## calculateMedian() ##
|
---|
| 238 |
|
---|
| 239 | ## @function calculateStandardDeviation()
|
---|
| 240 | sub calculateStandardDeviation
|
---|
| 241 | {
|
---|
| 242 | my ($data, $average) = @_;
|
---|
| 243 | my $count = scalar(@{$data});
|
---|
| 244 | if ($count == 1)
|
---|
| 245 | {
|
---|
| 246 | return 0;
|
---|
| 247 | }
|
---|
| 248 | if (!defined $average)
|
---|
| 249 | {
|
---|
| 250 | $average = &calculateMean($data);
|
---|
| 251 | }
|
---|
| 252 | my $sqtotal = 0;
|
---|
| 253 | foreach (@{$data})
|
---|
| 254 | {
|
---|
| 255 | $sqtotal += ($average - $_) ** 2;
|
---|
| 256 | }
|
---|
| 257 | my $std = ($sqtotal / ($count - 1)) ** 0.5;
|
---|
| 258 | return $std;
|
---|
| 259 | }
|
---|
| 260 | ## calculateStandardDeviation() ##
|
---|
| 261 |
|
---|
| 262 | sub parseTime
|
---|
| 263 | {
|
---|
| 264 | my ($raw_time_str) = @_;
|
---|
| 265 | my $time_in_seconds = 0;
|
---|
| 266 | if ($raw_time_str =~ /(\d+)h/)
|
---|
| 267 | {
|
---|
| 268 | $time_in_seconds += $1 * 60 * 60;
|
---|
| 269 | }
|
---|
| 270 | if ($raw_time_str =~ /(\d+)m/)
|
---|
| 271 | {
|
---|
| 272 | $time_in_seconds += $1 * 60;
|
---|
| 273 | }
|
---|
| 274 | if ($raw_time_str =~ /(\d+)s/)
|
---|
| 275 | {
|
---|
| 276 | $time_in_seconds += $1;
|
---|
| 277 | }
|
---|
| 278 | return $time_in_seconds;
|
---|
| 279 | }
|
---|
| 280 |
|
---|
| 281 | ## @function renderStatisticsAsHTML
|
---|
| 282 | sub renderStatisticsAsHTML
|
---|
| 283 | {
|
---|
| 284 | my ($data, $prefix, $suffix) = @_;
|
---|
| 285 | if (!defined $suffix)
|
---|
| 286 | {
|
---|
| 287 | $suffix = '';
|
---|
| 288 | }
|
---|
| 289 | my $html = '';
|
---|
| 290 | $html .= '<td>' . $data->{$prefix . '_median'} . $suffix . '</td>';
|
---|
| 291 | $html .= '<td>' . sprintf('%0.2f', $data->{$prefix . '_mean'}) . $suffix . '</td>';
|
---|
| 292 | $html .= '<td>' . sprintf('%0.2f', $data->{$prefix . '_stddev'}) . $suffix . '</td>';
|
---|
| 293 | $html .= '<td>' . sprintf('%0.2f', $data->{$prefix . '_lbound'}) . $suffix . '</td>';
|
---|
| 294 | $html .= '<td>' . sprintf('%0.2f', $data->{$prefix . '_ubound'}) . $suffix . '</td>';
|
---|
| 295 | return $html;
|
---|
| 296 | }
|
---|
| 297 | ## renderStatisticsAsHTML() ##
|
---|