1 | #!/usr/bin/perl
|
---|
2 |
|
---|
3 | use strict;
|
---|
4 | use warnings;
|
---|
5 |
|
---|
6 | use List::Util qw(sum);
|
---|
7 | use Sort::Key::Natural qw(natsort);
|
---|
8 |
|
---|
9 | my $data = {};
|
---|
10 | my $base_dir = '/research/jmt12/temp';
|
---|
11 | my $filename = 'replication';
|
---|
12 | if (defined $ARGV[0])
|
---|
13 | {
|
---|
14 | $filename = $ARGV[0];
|
---|
15 | }
|
---|
16 | my $path = $base_dir . '/' . $filename . '.csv';
|
---|
17 | if (!-f $path)
|
---|
18 | {
|
---|
19 | die('File not found: ' . $path);
|
---|
20 | }
|
---|
21 |
|
---|
22 | if (open(FIN, '<:utf8', $path))
|
---|
23 | {
|
---|
24 | my $line = '';
|
---|
25 | while ($line = <FIN>)
|
---|
26 | {
|
---|
27 | print STDERR '[debug] line: ' . $line . "\n";
|
---|
28 | if ($line =~ /^(\d+),(\d+),(\d+)/)
|
---|
29 | {
|
---|
30 | my $replication = $1;
|
---|
31 | my $test_run = $2;
|
---|
32 | my $epoch = $3;
|
---|
33 | my $avgtime = '???';
|
---|
34 | my $avgiotime = '???';
|
---|
35 | my $dl = '???';
|
---|
36 | # Locate gantt chart
|
---|
37 | my $gantt_path = $base_dir . '/' . $filename . '/' . $epoch . '/' . $epoch . '-gantt.html';
|
---|
38 | print STDERR ' * Searching for: ' . $gantt_path . "\n";
|
---|
39 | if(open(GIN, '<:utf8', $gantt_path))
|
---|
40 | {
|
---|
41 | my $line2 = '';
|
---|
42 | while ($line = <GIN>)
|
---|
43 | {
|
---|
44 | if ($line =~ /<th>Average Processing Time:<\/th><td>([0-9hms]+)<\/td>/)
|
---|
45 | {
|
---|
46 | $avgtime = &parseTime($1);
|
---|
47 | }
|
---|
48 | if ($line =~ /<th>Average File IO Time:<\/th><td>([0-9hms]+)<\/td>/)
|
---|
49 | {
|
---|
50 | $avgiotime = &parseTime($1);
|
---|
51 | }
|
---|
52 | if ($line =~ /<th>Data Locality:<\/th><td>(\d+)%/)
|
---|
53 | {
|
---|
54 | $dl = $1;
|
---|
55 | }
|
---|
56 | }
|
---|
57 | close(GIN);
|
---|
58 | }
|
---|
59 | else
|
---|
60 | {
|
---|
61 | print STDERR 'Warning! Failed to find chart: ' . $gantt_path . "\n";
|
---|
62 | }
|
---|
63 |
|
---|
64 | if ($avgtime eq '???')
|
---|
65 | {
|
---|
66 | die("Failed to parse timing information from: " . $gantt_path);
|
---|
67 | }
|
---|
68 |
|
---|
69 | # Store for averaging
|
---|
70 | if (!defined $data->{$replication})
|
---|
71 | {
|
---|
72 | $data->{$replication} = {'count' => 0,
|
---|
73 | 'epochs' => [],
|
---|
74 | 'ios' => [],
|
---|
75 | 'times' => [],
|
---|
76 | 'dls' => []
|
---|
77 | };
|
---|
78 | }
|
---|
79 | $data->{$replication}->{'count'}++;
|
---|
80 | push(@{$data->{$replication}->{'epochs'}}, $epoch);
|
---|
81 | push(@{$data->{$replication}->{'ios'}}, $avgiotime);
|
---|
82 | push(@{$data->{$replication}->{'times'}}, $avgtime);
|
---|
83 | push(@{$data->{$replication}->{'dls'}}, $dl);
|
---|
84 | }
|
---|
85 | }
|
---|
86 | close(FIN);
|
---|
87 | }
|
---|
88 | else
|
---|
89 | {
|
---|
90 | die('Error! Failed to open file for reading: replication.csv');
|
---|
91 | }
|
---|
92 |
|
---|
93 | # Perform some calculations
|
---|
94 | foreach my $replication (natsort keys %{$data})
|
---|
95 | {
|
---|
96 | my $variables = {'pt' => 'times', 'io' => 'ios', 'dl' => 'dls'};
|
---|
97 | foreach my $prefix (keys %{$variables})
|
---|
98 | {
|
---|
99 | my $values_name = $variables->{$prefix};
|
---|
100 | $data->{$replication}->{$prefix . '_mean'} = &calculateMean($data->{$replication}->{$values_name});
|
---|
101 | $data->{$replication}->{$prefix . '_median'} = &calculateMedian($data->{$replication}->{$values_name});
|
---|
102 | $data->{$replication}->{$prefix . '_stddev'} = &calculateStandardDeviation($data->{$replication}->{$values_name}, $data->{$replication}->{$prefix . '_mean'});
|
---|
103 | my $radius = 2 * $data->{$replication}->{$prefix . '_stddev'};
|
---|
104 | $data->{$replication}->{$prefix . '_lbound'} = $data->{$replication}->{$prefix . '_mean'} - $radius;
|
---|
105 | $data->{$replication}->{$prefix . '_ubound'} = $data->{$replication}->{$prefix . '_mean'} + $radius;
|
---|
106 | # Special cases for percentages, which can't be less than 0 nor greater than 100
|
---|
107 | if ($prefix eq 'dl')
|
---|
108 | {
|
---|
109 | if ($data->{$replication}->{$prefix . '_lbound'} < 0)
|
---|
110 | {
|
---|
111 | $data->{$replication}->{$prefix . '_lbound'} = 0;
|
---|
112 | }
|
---|
113 | if ($data->{$replication}->{$prefix . '_ubound'} > 100)
|
---|
114 | {
|
---|
115 | $data->{$replication}->{$prefix . '_ubound'} = 100;
|
---|
116 | }
|
---|
117 | }
|
---|
118 | }
|
---|
119 | }
|
---|
120 |
|
---|
121 | print '<html>
|
---|
122 | <head>
|
---|
123 | <style>
|
---|
124 | table {
|
---|
125 | border:1px solid black;
|
---|
126 | border-collapse:collapse;
|
---|
127 | margin-left:auto;
|
---|
128 | margin-right:auto;
|
---|
129 | width:80%;
|
---|
130 | }
|
---|
131 | td {
|
---|
132 | border:1px solid black;
|
---|
133 | padding:2px;
|
---|
134 | text-align:right;
|
---|
135 | }
|
---|
136 | th {
|
---|
137 | border:1px solid black;
|
---|
138 | background-color:#C7C7C7;
|
---|
139 | }
|
---|
140 | </style>
|
---|
141 | </head>
|
---|
142 | <body>';
|
---|
143 |
|
---|
144 | print '<h1>Data Locality Report</h1>';
|
---|
145 |
|
---|
146 | print '<ul><li><a href="#raw">Raw Data</a></li><li><a href="#averaged">Averaged</a></li></ul>';
|
---|
147 |
|
---|
148 | print '<h2><a name="raw"></a>Raw Data</h2>
|
---|
149 | <table>
|
---|
150 | <tr>
|
---|
151 | <th rowspan="2">Replication</th>
|
---|
152 | <th rowspan="2">Epoch</th>
|
---|
153 | <th colspan="3">Avg Per File</th>
|
---|
154 | <th rowspan="2">DataLocality</th>
|
---|
155 | </tr>
|
---|
156 | <tr>
|
---|
157 | <th>IO</th><th>CPU</th><th>Total</th>
|
---|
158 | </tr>
|
---|
159 | ';
|
---|
160 | foreach my $replication (natsort keys %{$data})
|
---|
161 | {
|
---|
162 | for (my $test_run = 0; $test_run < $data->{$replication}->{'count'}; $test_run++)
|
---|
163 | {
|
---|
164 | my $epoch = @{$data->{$replication}->{'epochs'}}[$test_run];
|
---|
165 | my $avgiotime = @{$data->{$replication}->{'ios'}}[$test_run];
|
---|
166 | my $avgtime = @{$data->{$replication}->{'times'}}[$test_run];
|
---|
167 | my $dl = @{$data->{$replication}->{'dls'}}[$test_run];
|
---|
168 | print sprintf('<tr><th><a name="result%d.%d" href="#avg%d">%2d</a></th><td><a href="%s/%d/%d-gantt.html">%d</a></td><td>%4d</td><td>%4d</td><td>%4d</td><td>%3d%%</td></tr>', $replication, $test_run, $replication, $replication, $filename, $epoch, $epoch, $epoch, $avgiotime, ($avgtime - $avgiotime), $avgtime, $dl) . "\n";
|
---|
169 | }
|
---|
170 | }
|
---|
171 | print '</table>';
|
---|
172 | print '<a href="#">back to top</a><br />';
|
---|
173 |
|
---|
174 |
|
---|
175 | print '<h2><a name="averaged"></a>Averaged</h2>';
|
---|
176 | print '<table><tr><th rowspan="2">Replication</th><th rowspan="2">Count</th><th colspan="5">Processing Time (s)</th><th colspan="5">IO Time (s)</th><th colspan="5">Data Locality (%)</th></tr>
|
---|
177 | <tr>';
|
---|
178 | for (my $i = 0; $i < 3; $i++)
|
---|
179 | {
|
---|
180 | print '<th>Median</th><th>Mean</th><th>StdDev</th><th>LBound</th><th>UBound</th>';
|
---|
181 | }
|
---|
182 | print '</tr>';
|
---|
183 | foreach my $replication (natsort keys %{$data})
|
---|
184 | {
|
---|
185 | my $count = $data->{$replication}->{'count'};
|
---|
186 | my $sum_dl = sum(@{$data->{$replication}->{'dls'}});
|
---|
187 | my $avg_dl = $sum_dl / $count;
|
---|
188 | print '<tr><th><a name="avg' . $replication . '" href="#result' . $replication . '.0">' . $replication . '</a></th><td>' . $count . '</td>';
|
---|
189 | # Processing Time (pt)
|
---|
190 | print renderStatisticsAsHTML($data->{$replication}, 'pt');
|
---|
191 | # IO Time (io)
|
---|
192 | print renderStatisticsAsHTML($data->{$replication}, 'io');
|
---|
193 | # Data Locality Percentages (dl)
|
---|
194 | print renderStatisticsAsHTML($data->{$replication}, 'dl', '%');
|
---|
195 | print "</tr>\n";
|
---|
196 | }
|
---|
197 | print '</table>';
|
---|
198 | print '<a href="#">back to top</a>';
|
---|
199 | print '</html>';
|
---|
200 |
|
---|
201 | exit;
|
---|
202 |
|
---|
203 | ## @function calculateMean()
|
---|
204 | sub calculateMean
|
---|
205 | {
|
---|
206 | my ($data) = @_;
|
---|
207 | my $count = scalar(@{$data});
|
---|
208 | if ($count == 0)
|
---|
209 | {
|
---|
210 | die("Empty array\n");
|
---|
211 | }
|
---|
212 | my $total = 0;
|
---|
213 | foreach (@{$data})
|
---|
214 | {
|
---|
215 | $total += $_;
|
---|
216 | }
|
---|
217 | my $average = $total / $count;
|
---|
218 | return $average;
|
---|
219 | }
|
---|
220 | ## calculateMean() ##
|
---|
221 |
|
---|
222 | ## @function calculateMedian()
|
---|
223 | sub calculateMedian
|
---|
224 | {
|
---|
225 | my ($data) = @_;
|
---|
226 | my @vals = sort {$a <=> $b} @{$data};
|
---|
227 | my $len = @vals;
|
---|
228 | if($len%2) #odd?
|
---|
229 | {
|
---|
230 | return $vals[int($len/2)];
|
---|
231 | }
|
---|
232 | else #even
|
---|
233 | {
|
---|
234 | return ($vals[int($len/2)-1] + $vals[int($len/2)])/2;
|
---|
235 | }
|
---|
236 | }
|
---|
237 | ## calculateMedian() ##
|
---|
238 |
|
---|
239 | ## @function calculateStandardDeviation()
|
---|
240 | sub calculateStandardDeviation
|
---|
241 | {
|
---|
242 | my ($data, $average) = @_;
|
---|
243 | my $count = scalar(@{$data});
|
---|
244 | if ($count == 1)
|
---|
245 | {
|
---|
246 | return 0;
|
---|
247 | }
|
---|
248 | if (!defined $average)
|
---|
249 | {
|
---|
250 | $average = &calculateMean($data);
|
---|
251 | }
|
---|
252 | my $sqtotal = 0;
|
---|
253 | foreach (@{$data})
|
---|
254 | {
|
---|
255 | $sqtotal += ($average - $_) ** 2;
|
---|
256 | }
|
---|
257 | my $std = ($sqtotal / ($count - 1)) ** 0.5;
|
---|
258 | return $std;
|
---|
259 | }
|
---|
260 | ## calculateStandardDeviation() ##
|
---|
261 |
|
---|
262 | sub parseTime
|
---|
263 | {
|
---|
264 | my ($raw_time_str) = @_;
|
---|
265 | my $time_in_seconds = 0;
|
---|
266 | if ($raw_time_str =~ /(\d+)h/)
|
---|
267 | {
|
---|
268 | $time_in_seconds += $1 * 60 * 60;
|
---|
269 | }
|
---|
270 | if ($raw_time_str =~ /(\d+)m/)
|
---|
271 | {
|
---|
272 | $time_in_seconds += $1 * 60;
|
---|
273 | }
|
---|
274 | if ($raw_time_str =~ /(\d+)s/)
|
---|
275 | {
|
---|
276 | $time_in_seconds += $1;
|
---|
277 | }
|
---|
278 | return $time_in_seconds;
|
---|
279 | }
|
---|
280 |
|
---|
281 | ## @function renderStatisticsAsHTML
|
---|
282 | sub renderStatisticsAsHTML
|
---|
283 | {
|
---|
284 | my ($data, $prefix, $suffix) = @_;
|
---|
285 | if (!defined $suffix)
|
---|
286 | {
|
---|
287 | $suffix = '';
|
---|
288 | }
|
---|
289 | my $html = '';
|
---|
290 | $html .= '<td>' . $data->{$prefix . '_median'} . $suffix . '</td>';
|
---|
291 | $html .= '<td>' . sprintf('%0.2f', $data->{$prefix . '_mean'}) . $suffix . '</td>';
|
---|
292 | $html .= '<td>' . sprintf('%0.2f', $data->{$prefix . '_stddev'}) . $suffix . '</td>';
|
---|
293 | $html .= '<td>' . sprintf('%0.2f', $data->{$prefix . '_lbound'}) . $suffix . '</td>';
|
---|
294 | $html .= '<td>' . sprintf('%0.2f', $data->{$prefix . '_ubound'}) . $suffix . '</td>';
|
---|
295 | return $html;
|
---|
296 | }
|
---|
297 | ## renderStatisticsAsHTML() ##
|
---|