Changeset 26242


Ignore:
Timestamp:
2012-09-25T12:41:51+12:00 (12 years ago)
Author:
jmt12
Message:

Modifications to progress messages to improve extracting information from the logs in an automated fashion

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gs2-extensions/parallel-building/trunk/src/bin/script/parallel_terrier_fileindex.pl

    r26187 r26242  
    6666  if ($debug)
    6767  {
    68     print STDERR '[debug] ' . $message;
     68    print STDERR '[SDEBUG] ' . $message;
    6969  }
    7070}
     
    100100sub main
    101101{
    102     print '[' . time() . '] Parallel FileIndexer Started: ' . localtime() . "\n";
    103 
    104     # 1. Initialization
    105     my $class_name = 'org.terrier.applications.FileIndexer';
    106     my $worker_count = 0;
    107     my $terrier_home = '';
    108     my $collection_path = '';
    109     my $batch_size = 0;
    110     my $debug = 0;
    111     # - parse arguments
    112     my $argument;
    113     for (my $i = 0; $i < scalar(@ARGV); $i++)
    114     {
    115       $argument = $ARGV[$i];
    116       if ('-workers' eq $argument)
    117       {
    118         $i++;
    119         $worker_count = $ARGV[$i];
    120       }
    121       elsif ('-terrier' eq $argument)
    122       {
    123         $i++;
    124         $terrier_home = $ARGV[$i];
    125       }
    126       elsif ('-collection' eq $argument)
    127       {
    128         $i++;
    129         $collection_path = $ARGV[$i];
    130       }
    131       elsif ('-batchsize' eq $argument)
    132       {
    133         $i++;
    134         $batch_size = $ARGV[$i];
    135       }
    136       elsif ('-debug' eq $argument)
    137       {
    138         $debug = 1;
    139       }
    140       else
    141       {
    142         &printUsage('Unrecognized argument: ' . $argument);
    143       }
    144     }
    145     print 'Worker Count: ' . $worker_count . "\n";
    146     print 'Terrier Home: ' . $terrier_home . "\n";
    147     print 'Collection:   ' . $collection_path . "\n";
    148     print 'Batch Size:   ' . $batch_size . "\n";
    149     print 'Debug:        ' . $debug . "\n";
    150     # - check arguments
    151     if ($worker_count !~ /^\d+$/)
    152     {
    153       &printUsage('Worker count must be an integer');
    154     }
    155     if ('' eq $terrier_home || !-d $terrier_home)
    156     {
    157       &printUsage('Terrier home path given doesn\'t exist or isn\'t a directory');
    158     }
    159     if ('' eq $collection_path || !-d $collection_path)
    160     {
    161       &printUsage('Collection path given doesn\'t exist or isn\'t a directory');
    162     }
    163     if ($batch_size !~ /^\d+$/)
    164     {
    165       &printUsage('Batch size count must be an integer');
    166     }
    167     if (0 == $worker_count || 0 == $batch_size)
    168     {
    169       print STDOUT "Warning! Zero workers or a batch size of zero causes a serial index.\n";
    170       $batch_size = 0;
    171     }
    172     # - derived variables
    173     my $anyclass_exe = &fileCat($terrier_home, 'bin', 'anyclass.sh');
    174 
    175 
    176     # 2. Remove any existing index
    177     print STDOUT " * Removing any old index\n";
    178     my $var_path = &fileCat($terrier_home, 'var');
    179     opendir(DH, $var_path) or die('Error! Failed to open var path for reading: ' . $!);
    180     my @old_files = readdir(DH);
    181     closedir(DH);
    182     my $old_file;
    183     foreach $old_file (@old_files)
    184     {
    185       if ($old_file =~ /^manifest-\d+.spec/)
    186       {
    187         my $old_path = &fileCat($var_path, $old_file);
    188         &debugPrint($debug, 'deleting ' . $old_path . "\n");
    189         unlink($old_path);
    190       }
    191     }
    192     my $index_path = &fileCat($var_path, 'index');
    193     my $delete_command = 'rm -rf "' . $index_path . '"';
    194     &debugPrint($debug, 'command: ' . $delete_command . "\n");
    195     `$delete_command`;
    196     my $assoc_path = &fileCat($var_path, 'share', 'images', 'assoc');
    197     $delete_command  = 'rm -rf "' . $assoc_path . '"';
    198     &debugPrint($debug, 'command: ' . $delete_command . "\n");
    199     `$delete_command`;
    200     print STDOUT ' - cleanup complete!' . "\n";
    201 
    202     # 3. Prepare the collection for parallel indexing
    203     print STDOUT " * Prepare collection for indexing\n";
     102  print '[SCRIPT:' . time() . "] Starting Parallel FileIndexer\n";
     103
     104  # 1. Initialization
     105  my $class_name = 'org.terrier.applications.FileIndexer';
     106  my $worker_count = 0;
     107  my $terrier_home = '';
     108  my $collection_path = '';
     109  my $batch_size = 0;
     110  my $debug = 0;
     111  # - parse arguments
     112  my $argument;
     113  for (my $i = 0; $i < scalar(@ARGV); $i++)
     114  {
     115    $argument = $ARGV[$i];
     116    if ('-workers' eq $argument)
     117    {
     118      $i++;
     119      $worker_count = $ARGV[$i];
     120    }
     121    elsif ('-terrier' eq $argument)
     122    {
     123      $i++;
     124      $terrier_home = $ARGV[$i];
     125    }
     126    elsif ('-collection' eq $argument)
     127    {
     128      $i++;
     129      $collection_path = $ARGV[$i];
     130    }
     131    elsif ('-batchsize' eq $argument)
     132    {
     133      $i++;
     134      $batch_size = $ARGV[$i];
     135    }
     136    elsif ('-debug' eq $argument)
     137    {
     138      $debug = 1;
     139    }
     140    else
     141    {
     142      &printUsage('Unrecognized argument: ' . $argument);
     143    }
     144  }
     145  print '[SCRIPT] Worker Count: ' . $worker_count . "\n";
     146  print '[SCRIPT] Terrier Home: ' . $terrier_home . "\n";
     147  print '[SCRIPT] Collection:   ' . $collection_path . "\n";
     148  print '[SCRIPT] Batch Size:   ' . $batch_size . "\n";
     149  print '[SCRIPT] Debug:        ' . $debug . "\n";
     150
     151  # - check arguments
     152  if ($worker_count !~ /^\d+$/)
     153  {
     154    &printUsage('Worker count must be an integer');
     155  }
     156  if ('' eq $terrier_home || !-d $terrier_home)
     157  {
     158    &printUsage('Terrier home path given doesn\'t exist or isn\'t a directory');
     159  }
     160  if ('' eq $collection_path || !-d $collection_path)
     161  {
     162    &printUsage('Collection path given doesn\'t exist or isn\'t a directory');
     163  }
     164  if ($batch_size !~ /^\d+$/)
     165  {
     166    &printUsage('Batch size count must be an integer');
     167  }
     168  if (0 == $worker_count || 0 == $batch_size)
     169  {
     170    print STDOUT "Warning! Zero workers or a batch size of zero causes a serial index.\n";
     171    $batch_size = 0;
     172  }
     173  # - derived variables
     174  my $anyclass_exe = &fileCat($terrier_home, 'bin', 'anyclass.sh');
     175
     176  # 2. Remove any existing index
     177  print STDOUT "[SCRIPT] Removing old index files...\n";
     178  my $var_path = &fileCat($terrier_home, 'var');
     179  opendir(DH, $var_path) or die('Error! Failed to open var path for reading: ' . $!);
     180  my @old_files = readdir(DH);
     181  closedir(DH);
     182  my $old_file;
     183  foreach $old_file (@old_files)
     184  {
     185    if ($old_file =~ /^manifest-\d+.spec/)
     186    {
     187      my $old_path = &fileCat($var_path, $old_file);
     188      &debugPrint($debug, 'deleting ' . $old_path . "\n");
     189      unlink($old_path);
     190    }
     191  }
     192  my $index_path = &fileCat($var_path, 'index');
     193  my $delete_command = 'rm -rf "' . $index_path . '"';
     194  &debugPrint($debug, 'command: ' . $delete_command . "\n");
     195  `$delete_command`;
     196  my $assoc_path = &fileCat($terrier_home, 'share', 'images', 'assoc');
     197  $delete_command  = 'rm -rf "' . $assoc_path . '"';
     198  &debugPrint($debug, 'command: ' . $delete_command . "\n");
     199  `$delete_command`;
     200
     201  # 3. Prepare the collection for parallel indexing
     202  print STDOUT "[SCRIPT] Prepare collection for indexing...\n";
    204203    my $prepare_command = $anyclass_exe . ' ' . $class_name . ' -prepare -path "' . $collection_path . '"';
    205204    if (0 < $batch_size)
     
    221220      }
    222221    }
    223     print STDOUT ' - generated ' . $manifest_count . ' manifest files' . "\n";
    224     print STDOUT ' - preparation complete!' . "\n";
     222    print STDOUT '[SCRIPT] => generated ' . $manifest_count . " manifest files\n";
    225223    if (0 >= $manifest_count)
    226224    {
     
    231229    if (1 == $manifest_count)
    232230    {
    233       print STDOUT ' * Index collection with serial processing' . "\n";
     231      print STDOUT "[SCRIPT] Index collection using serial processing\n";
    234232      my $manifest_path = &fileCat($var_path, 'manifest-000.spec');
    235233      my $index_command = $anyclass_exe . ' ' . $class_name . ' -index -path "' . $manifest_path . '" -prefix 000';
     
    240238    else
    241239    {
    242       print STDOUT ' * Index collection with parallel processing (' . $worker_count . ' workers)' . "\n";
    243       my $mpi_flags = '--show-progress --timestamp-output --verbose --report-bindings --tag-output';
     240      print STDOUT '[SCRIPT] Index collection using parallel processing (' . $worker_count . " workers)";
     241      my $mpi_flags = '--show-progress --verbose ';
    244242      my $mpi_conf_path = &fileCat($terrier_home, 'mpi.conf');
    245243      if (-f $mpi_conf_path)
    246244      {
    247         print STDOUT ' - parallel processing on cluster' . "\n";
     245        print STDOUT "(cluster)\n";
    248246        $mpi_flags .= ' -nolocal -machinefile "' . $mpi_conf_path . '"';
    249247      }
    250248      else
    251249      {
    252         print STDOUT ' - parallel processing on multicore computer' . "\n";
     250        print STDOUT "(multicore)\n";
    253251      }
    254252      my $mpi_command = 'mpirun ' . $mpi_flags . ' -np ' . ($worker_count + 1) . ' mpiterrierfileindexer "' . $ENV{'GSDLHOME'} . '" "' . $terrier_home . '" ' . $manifest_count;
     
    256254      `$mpi_command`;
    257255    }
    258     print STDOUT ' - indexing complete!' . "\n";
    259256
    260257    # 5. Merge the indexes
    261258    # - if we performed a serial process above, then this will just rename the
    262259    #   index files
    263     print STDOUT ' * Merging Indexes' . "\n";
     260    print STDOUT "[SCRIPT] Merging Indexes\n";
    264261    my $merge_command = $anyclass_exe . ' ' . $class_name . ' -merge';
    265262    &debugPrint($debug, 'command: ' . $merge_command . "\n");
    266263    `$merge_command`;
    267     print STDOUT ' - merging complete!' . "\n";
    268264
    269265    # Complete!
    270     print '[' . time() . '] Parallel FileIndexer Complete: ' . localtime() . "\n";
     266    print '[SCRIPT:' . time() . "] Complete!\n\n";
    271267  }
    272268
Note: See TracChangeset for help on using the changeset viewer.