Ignore:
Timestamp:
2013-07-04T13:44:22+12:00 (11 years ago)
Author:
jmt12
Message:

Data locality file not being found is no longer fatal (HDFS-NFS-Proxy doesn't do locality very well) and added code to determine percentage of file HandbrakeCLI thinks it has parsed

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_report.pl

    r27684 r27752  
    8989
    9090# Read in data_locality.csv (will be matched to task logs)
    91 print ' * Reading and parsing "data_locality.csv"... ';
    9291my $data_was_local = {};
    9392my $data_locality_csv_path = $results_path . '/data_locality.csv';
    94 if (open(DLIN, '<:utf8', $data_locality_csv_path))
    95 {
    96   while (my $line = <DLIN>)
    97   {
    98     if ($line =~ /(\d+),\d,(\d)/)
    99     {
    100       $data_was_local->{$1} = $2;
    101     }
    102   }
    103   close(DLIN);
     93if (-f $data_locality_csv_path)
     94{
     95  print ' * Reading and parsing "data_locality.csv"... ';
     96  if (open(DLIN, '<:utf8', $data_locality_csv_path))
     97  {
     98    while (my $line = <DLIN>)
     99    {
     100      if ($line =~ /(\d+),\d,(\d)/)
     101      {
     102        $data_was_local->{$1} = $2;
     103      }
     104    }
     105    close(DLIN);
     106  }
     107  else
     108  {
     109    die('Error! Failed to open file for reading: ' . $data_locality_csv_path);
     110  }
     111  print "Done!\n";
    104112}
    105113else
    106114{
    107   die('Error! Failed to open file for reading: ' . $data_locality_csv_path);
    108 }
    109 print "Done!\n";
     115  print " * Data locality not available or not applicable\n";
     116}
    110117
    111118# Read in all task logs and parse task records
     
    121128      my $job_no = $1;
    122129      my $task_no = $2;
    123       my $is_data_local = $data_was_local->{$task_no};
    124       my $task_record = {'host'=>'', 'cpu'=>0, 'job' => $job_no, 'task' => $task_no, 'start'=>0, 'end'=>0, 'cpu_time'=>0, 'data_locality'=>$is_data_local, 'file'=>''};
     130      my $is_data_local = 0;
     131      if (defined ($data_was_local->{$task_no}))
     132      {
     133        $is_data_local = $data_was_local->{$task_no};
     134      }
     135      my $task_record = {'host'=>'', 'cpu'=>0, 'job' => $job_no, 'task' => $task_no, 'start'=>0, 'end'=>0, 'cpu_time'=>0, 'data_locality'=>$is_data_local, 'file'=>'', 'percom'=>'NA'};
    125136      print ' - Reading and parsing "' . $file . '"... ';
    126137      my $task_log_path = $results_path . '/' . $file;
     
    169180        # Calculate CPU time (total time - IO time)
    170181        $task_record->{'cpu_time'} = $task_record->{'end'} - $task_record->{'start'} - $io_time;
     182
     183        # We should now have the filename - use this and try and locate a
     184        # convert log for this item (assuming it is multimedia, which it may
     185        # not be)
     186        if (defined $task_record->{'file'} && $task_record->{'file'} =~ /\/([^\/]+)\.ts/)
     187        {
     188          my $filename_sans_extension = $1;
     189          my $convert_log = $results_path . '/convert-' . $filename_sans_extension . '.log';
     190          if (-f $convert_log)
     191          {
     192            print '[Reading and parsing convert log]... ';
     193            if (open(CLIN, '<:utf8', $convert_log))
     194            {
     195              my $max_percent = 0.00;
     196              while (my $line = <CLIN>)
     197              {
     198                if ($line =~ /.*Encoding: task 1 of 1, (\d+\.\d\d) \%/)
     199                {
     200                  my $percent = $1;
     201                  if ($percent > $max_percent)
     202                  {
     203                    $max_percent = $percent;
     204                  }
     205                }
     206              }
     207              close(CLIN);
     208              $task_record->{'percom'} = $max_percent;
     209            }
     210            else
     211            {
     212              print STDERR "Warning! Failed to open log file for reading: " . $convert_log . "\n";
     213            }
     214          }
     215        }
     216
    171217        # Store this record
    172218        $task_records->{$task_no} = $task_record;
     
    232278  my $row_counter = 1;
    233279  # Header
    234   print CSVOUT "id,name,hostname,start,end,cputime,dl,pid,filename\n";
     280  print CSVOUT "id,name,hostname,start,end,cputime,dl,pid,filename,percom\n";
    235281  # Master Record
    236   print CSVOUT $row_counter . ',M0,' . $job_record->{'host'} . ',' . $job_record->{'start'} . ',' . $job_record->{'end'} . ',' . ($job_record->{'cpu_time'} / 1000) . ",0,0,NA\n";
     282  print CSVOUT $row_counter . ',M0,' . $job_record->{'host'} . ',' . $job_record->{'start'} . ',' . $job_record->{'end'} . ',' . ($job_record->{'cpu_time'} / 1000) . ",0,0,NA,NA\n";
    237283  $row_counter++;
    238284  # For each compute node record
     
    259305    }
    260306    $known_workers->{$csv_worker_id} = 1;
    261     print CSVOUT $node_id . ',' . $csv_worker_id . ',' . $node_record->{'host'} . ',' . $node_record->{'start'} . ',' . $node_record->{'end'} . ',' . $node_record->{'cpu_time'} . ",0,1,NA\n";
     307    print CSVOUT $node_id . ',' . $csv_worker_id . ',' . $node_record->{'host'} . ',' . $node_record->{'start'} . ',' . $node_record->{'end'} . ',' . $node_record->{'cpu_time'} . ",0,1,NA,NA\n";
    262308    # List the child task records
    263309    foreach my $taskno (sort keys %{$task_records})
     
    266312      if ($task_record->{'host'} . '#' . $task_record->{'cpu'} eq $worker_id)
    267313      {
    268         print CSVOUT $row_counter . ',T' . ($task_record->{'task'} + 0) . ',' . $task_record->{'host'} . ',' . $task_record->{'start'} . ',' . $task_record->{'end'} . ',' . $task_record->{'cpu_time'} . ',' . $task_record->{'data_locality'} . ',' . $node_id . ',' . $task_record->{'file'} . "\n";
     314        print CSVOUT $row_counter . ',T' . ($task_record->{'task'} + 0) . ',' . $task_record->{'host'} . ',' . $task_record->{'start'} . ',' . $task_record->{'end'} . ',' . $task_record->{'cpu_time'} . ',' . $task_record->{'data_locality'} . ',' . $node_id . ',' . $task_record->{'file'} . ',' . $task_record->{'percom'} . "\n";
    269315        $row_counter++;
    270316      }
Note: See TracChangeset for help on using the changeset viewer.