#!/usr/bin/perl use strict; use warnings; use List::Util qw(sum); use Sort::Key::Natural qw(natsort); my $data = {}; my $base_dir = '/research/jmt12/temp'; my $filename = 'replication'; if (defined $ARGV[0]) { $filename = $ARGV[0]; } my $path = $base_dir . '/' . $filename . '.csv'; if (!-f $path) { die('File not found: ' . $path); } if (open(FIN, '<:utf8', $path)) { my $line = ''; while ($line = ) { print STDERR '[debug] line: ' . $line . "\n"; if ($line =~ /^(\d+),(\d+),(\d+)/) { my $replication = $1; my $test_run = $2; my $epoch = $3; my $avgtime = '???'; my $avgiotime = '???'; my $dl = '???'; # Locate gantt chart my $gantt_path = $base_dir . '/' . $filename . '/' . $epoch . '/' . $epoch . '-gantt.html'; print STDERR ' * Searching for: ' . $gantt_path . "\n"; if(open(GIN, '<:utf8', $gantt_path)) { my $line2 = ''; while ($line = ) { if ($line =~ /Average Processing Time:<\/th>([0-9hms]+)<\/td>/) { $avgtime = &parseTime($1); } if ($line =~ /Average File IO Time:<\/th>([0-9hms]+)<\/td>/) { $avgiotime = &parseTime($1); } if ($line =~ /Data Locality:<\/th>(\d+)%/) { $dl = $1; } } close(GIN); } else { print STDERR 'Warning! Failed to find chart: ' . $gantt_path . "\n"; } if ($avgtime eq '???') { die("Failed to parse timing information from: " . $gantt_path); } # Store for averaging if (!defined $data->{$replication}) { $data->{$replication} = {'count' => 0, 'epochs' => [], 'ios' => [], 'times' => [], 'dls' => [] }; } $data->{$replication}->{'count'}++; push(@{$data->{$replication}->{'epochs'}}, $epoch); push(@{$data->{$replication}->{'ios'}}, $avgiotime); push(@{$data->{$replication}->{'times'}}, $avgtime); push(@{$data->{$replication}->{'dls'}}, $dl); } } close(FIN); } else { die('Error! Failed to open file for reading: replication.csv'); } # Perform some calculations foreach my $replication (natsort keys %{$data}) { my $variables = {'pt' => 'times', 'io' => 'ios', 'dl' => 'dls'}; foreach my $prefix (keys %{$variables}) { my $values_name = $variables->{$prefix}; $data->{$replication}->{$prefix . '_mean'} = &calculateMean($data->{$replication}->{$values_name}); $data->{$replication}->{$prefix . '_median'} = &calculateMedian($data->{$replication}->{$values_name}); $data->{$replication}->{$prefix . '_stddev'} = &calculateStandardDeviation($data->{$replication}->{$values_name}, $data->{$replication}->{$prefix . '_mean'}); my $radius = 2 * $data->{$replication}->{$prefix . '_stddev'}; $data->{$replication}->{$prefix . '_lbound'} = $data->{$replication}->{$prefix . '_mean'} - $radius; $data->{$replication}->{$prefix . '_ubound'} = $data->{$replication}->{$prefix . '_mean'} + $radius; # Special cases for percentages, which can't be less than 0 nor greater than 100 if ($prefix eq 'dl') { if ($data->{$replication}->{$prefix . '_lbound'} < 0) { $data->{$replication}->{$prefix . '_lbound'} = 0; } if ($data->{$replication}->{$prefix . '_ubound'} > 100) { $data->{$replication}->{$prefix . '_ubound'} = 100; } } } } print ' '; print '

Data Locality Report

'; print ''; print '

Raw Data

'; foreach my $replication (natsort keys %{$data}) { for (my $test_run = 0; $test_run < $data->{$replication}->{'count'}; $test_run++) { my $epoch = @{$data->{$replication}->{'epochs'}}[$test_run]; my $avgiotime = @{$data->{$replication}->{'ios'}}[$test_run]; my $avgtime = @{$data->{$replication}->{'times'}}[$test_run]; my $dl = @{$data->{$replication}->{'dls'}}[$test_run]; print sprintf('', $replication, $test_run, $replication, $replication, $filename, $epoch, $epoch, $epoch, $avgiotime, ($avgtime - $avgiotime), $avgtime, $dl) . "\n"; } } print '
Replication Epoch Avg Per File DataLocality
IOCPUTotal
%2d%d%4d%4d%4d%3d%%
'; print 'back to top
'; print '

Averaged

'; print ''; for (my $i = 0; $i < 3; $i++) { print ''; } print ''; foreach my $replication (natsort keys %{$data}) { my $count = $data->{$replication}->{'count'}; my $sum_dl = sum(@{$data->{$replication}->{'dls'}}); my $avg_dl = $sum_dl / $count; print ''; # Processing Time (pt) print renderStatisticsAsHTML($data->{$replication}, 'pt'); # IO Time (io) print renderStatisticsAsHTML($data->{$replication}, 'io'); # Data Locality Percentages (dl) print renderStatisticsAsHTML($data->{$replication}, 'dl', '%'); print "\n"; } print '
ReplicationCountProcessing Time (s)IO Time (s)Data Locality (%)
MedianMeanStdDevLBoundUBound
' . $replication . '' . $count . '
'; print 'back to top'; print ''; exit; ## @function calculateMean() sub calculateMean { my ($data) = @_; my $count = scalar(@{$data}); if ($count == 0) { die("Empty array\n"); } my $total = 0; foreach (@{$data}) { $total += $_; } my $average = $total / $count; return $average; } ## calculateMean() ## ## @function calculateMedian() sub calculateMedian { my ($data) = @_; my @vals = sort {$a <=> $b} @{$data}; my $len = @vals; if($len%2) #odd? { return $vals[int($len/2)]; } else #even { return ($vals[int($len/2)-1] + $vals[int($len/2)])/2; } } ## calculateMedian() ## ## @function calculateStandardDeviation() sub calculateStandardDeviation { my ($data, $average) = @_; my $count = scalar(@{$data}); if ($count == 1) { return 0; } if (!defined $average) { $average = &calculateMean($data); } my $sqtotal = 0; foreach (@{$data}) { $sqtotal += ($average - $_) ** 2; } my $std = ($sqtotal / ($count - 1)) ** 0.5; return $std; } ## calculateStandardDeviation() ## sub parseTime { my ($raw_time_str) = @_; my $time_in_seconds = 0; if ($raw_time_str =~ /(\d+)h/) { $time_in_seconds += $1 * 60 * 60; } if ($raw_time_str =~ /(\d+)m/) { $time_in_seconds += $1 * 60; } if ($raw_time_str =~ /(\d+)s/) { $time_in_seconds += $1; } return $time_in_seconds; } ## @function renderStatisticsAsHTML sub renderStatisticsAsHTML { my ($data, $prefix, $suffix) = @_; if (!defined $suffix) { $suffix = ''; } my $html = ''; $html .= '' . $data->{$prefix . '_median'} . $suffix . ''; $html .= '' . sprintf('%0.2f', $data->{$prefix . '_mean'}) . $suffix . ''; $html .= '' . sprintf('%0.2f', $data->{$prefix . '_stddev'}) . $suffix . ''; $html .= '' . sprintf('%0.2f', $data->{$prefix . '_lbound'}) . $suffix . ''; $html .= '' . sprintf('%0.2f', $data->{$prefix . '_ubound'}) . $suffix . ''; return $html; } ## renderStatisticsAsHTML() ##