Changeset 27752

Show
Ignore:
Timestamp:
04.07.2013 13:44:22 (6 years ago)
Author:
jmt12
Message:

Data locality file not being found is no longer fatal (HDFS-NFS-Proxy doesn't do locality very well) and added code to determine percentage of file HandbrakeCLI thinks it has parsed

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_report.pl

    r27684 r27752  
    8989 
    9090# Read in data_locality.csv (will be matched to task logs) 
    91 print ' * Reading and parsing "data_locality.csv"... '; 
    9291my $data_was_local = {}; 
    9392my $data_locality_csv_path = $results_path . '/data_locality.csv'; 
    94 if (open(DLIN, '<:utf8', $data_locality_csv_path)) 
    95 { 
    96   while (my $line = <DLIN>) 
    97   { 
    98     if ($line =~ /(\d+),\d,(\d)/) 
    99     { 
    100       $data_was_local->{$1} = $2; 
    101     } 
    102   } 
    103   close(DLIN); 
     93if (-f $data_locality_csv_path) 
     94{ 
     95  print ' * Reading and parsing "data_locality.csv"... '; 
     96  if (open(DLIN, '<:utf8', $data_locality_csv_path)) 
     97  { 
     98    while (my $line = <DLIN>) 
     99    { 
     100      if ($line =~ /(\d+),\d,(\d)/) 
     101      { 
     102        $data_was_local->{$1} = $2; 
     103      } 
     104    } 
     105    close(DLIN); 
     106  } 
     107  else 
     108  { 
     109    die('Error! Failed to open file for reading: ' . $data_locality_csv_path); 
     110  } 
     111  print "Done!\n"; 
    104112} 
    105113else 
    106114{ 
    107   die('Error! Failed to open file for reading: ' . $data_locality_csv_path); 
    108 } 
    109 print "Done!\n"; 
     115  print " * Data locality not available or not applicable\n"; 
     116} 
    110117 
    111118# Read in all task logs and parse task records 
     
    121128      my $job_no = $1; 
    122129      my $task_no = $2; 
    123       my $is_data_local = $data_was_local->{$task_no}; 
    124       my $task_record = {'host'=>'', 'cpu'=>0, 'job' => $job_no, 'task' => $task_no, 'start'=>0, 'end'=>0, 'cpu_time'=>0, 'data_locality'=>$is_data_local, 'file'=>''}; 
     130      my $is_data_local = 0; 
     131      if (defined ($data_was_local->{$task_no})) 
     132      { 
     133        $is_data_local = $data_was_local->{$task_no}; 
     134      } 
     135      my $task_record = {'host'=>'', 'cpu'=>0, 'job' => $job_no, 'task' => $task_no, 'start'=>0, 'end'=>0, 'cpu_time'=>0, 'data_locality'=>$is_data_local, 'file'=>'', 'percom'=>'NA'}; 
    125136      print ' - Reading and parsing "' . $file . '"... '; 
    126137      my $task_log_path = $results_path . '/' . $file; 
     
    169180        # Calculate CPU time (total time - IO time) 
    170181        $task_record->{'cpu_time'} = $task_record->{'end'} - $task_record->{'start'} - $io_time; 
     182 
     183        # We should now have the filename - use this and try and locate a 
     184        # convert log for this item (assuming it is multimedia, which it may 
     185        # not be) 
     186        if (defined $task_record->{'file'} && $task_record->{'file'} =~ /\/([^\/]+)\.ts/) 
     187        { 
     188          my $filename_sans_extension = $1; 
     189          my $convert_log = $results_path . '/convert-' . $filename_sans_extension . '.log'; 
     190          if (-f $convert_log) 
     191          { 
     192            print '[Reading and parsing convert log]... '; 
     193            if (open(CLIN, '<:utf8', $convert_log)) 
     194            { 
     195              my $max_percent = 0.00; 
     196              while (my $line = <CLIN>) 
     197              { 
     198                if ($line =~ /.*Encoding: task 1 of 1, (\d+\.\d\d) \%/) 
     199                { 
     200                  my $percent = $1; 
     201                  if ($percent > $max_percent) 
     202                  { 
     203                    $max_percent = $percent; 
     204                  } 
     205                } 
     206              } 
     207              close(CLIN); 
     208              $task_record->{'percom'} = $max_percent; 
     209            } 
     210            else 
     211            { 
     212              print STDERR "Warning! Failed to open log file for reading: " . $convert_log . "\n"; 
     213            } 
     214          } 
     215        } 
     216 
    171217        # Store this record 
    172218        $task_records->{$task_no} = $task_record; 
     
    232278  my $row_counter = 1; 
    233279  # Header 
    234   print CSVOUT "id,name,hostname,start,end,cputime,dl,pid,filename\n"; 
     280  print CSVOUT "id,name,hostname,start,end,cputime,dl,pid,filename,percom\n"; 
    235281  # Master Record 
    236   print CSVOUT $row_counter . ',M0,' . $job_record->{'host'} . ',' . $job_record->{'start'} . ',' . $job_record->{'end'} . ',' . ($job_record->{'cpu_time'} / 1000) . ",0,0,NA\n"; 
     282  print CSVOUT $row_counter . ',M0,' . $job_record->{'host'} . ',' . $job_record->{'start'} . ',' . $job_record->{'end'} . ',' . ($job_record->{'cpu_time'} / 1000) . ",0,0,NA,NA\n"; 
    237283  $row_counter++; 
    238284  # For each compute node record 
     
    259305    } 
    260306    $known_workers->{$csv_worker_id} = 1; 
    261     print CSVOUT $node_id . ',' . $csv_worker_id . ',' . $node_record->{'host'} . ',' . $node_record->{'start'} . ',' . $node_record->{'end'} . ',' . $node_record->{'cpu_time'} . ",0,1,NA\n"; 
     307    print CSVOUT $node_id . ',' . $csv_worker_id . ',' . $node_record->{'host'} . ',' . $node_record->{'start'} . ',' . $node_record->{'end'} . ',' . $node_record->{'cpu_time'} . ",0,1,NA,NA\n"; 
    262308    # List the child task records 
    263309    foreach my $taskno (sort keys %{$task_records}) 
     
    266312      if ($task_record->{'host'} . '#' . $task_record->{'cpu'} eq $worker_id) 
    267313      { 
    268         print CSVOUT $row_counter . ',T' . ($task_record->{'task'} + 0) . ',' . $task_record->{'host'} . ',' . $task_record->{'start'} . ',' . $task_record->{'end'} . ',' . $task_record->{'cpu_time'} . ',' . $task_record->{'data_locality'} . ',' . $node_id . ',' . $task_record->{'file'} . "\n"; 
     314        print CSVOUT $row_counter . ',T' . ($task_record->{'task'} + 0) . ',' . $task_record->{'host'} . ',' . $task_record->{'start'} . ',' . $task_record->{'end'} . ',' . $task_record->{'cpu_time'} . ',' . $task_record->{'data_locality'} . ',' . $node_id . ',' . $task_record->{'file'} . ',' . $task_record->{'percom'} . "\n"; 
    269315        $row_counter++; 
    270316      }