Changeset 26998

Show
Ignore:
Timestamp:
05.03.2013 11:07:26 (7 years ago)
Author:
jmt12
Message:

Adding maxdocs variable, lots of debug comments, added some tests for directories rather than just deleting them (causing non-fatal errors if the directories weren't there) and added some extra flags to ensure MPI bound to correct interface (as Medusa has some phantom virtual interfaces that occasionally interfere)

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • gs2-extensions/parallel-building/trunk/src/bin/script/parallel_terrier_fileindex.pl

    r26242 r26998  
    9090    print STDERR 'Error! ' . $message . "\n"; 
    9191  } 
    92   print STDERR 'Usage: parallel_terrier_fileindexer.pl -workers <int> -terrier <path> -collection <path> -batchsize <int> [-debug]' . "\n\n"; 
     92  print STDERR 'Usage: parallel_terrier_fileindexer.pl -terrier <path> -collection <path> -workers <num> -batchsize <num> [-maxfiles <num>] [-debug]' . "\n\n"; 
    9393  print '[' . time() . ']Parallel FileIndexer Complete: ' . localtime() . "\n"; 
    9494  exit(0); 
     
    109109  my $batch_size = 0; 
    110110  my $debug = 0; 
     111  my $max_files = 0; 
    111112  # - parse arguments 
    112113  my $argument; 
     
    137138    { 
    138139      $debug = 1; 
     140    } 
     141    elsif ('-maxfiles' eq $argument) 
     142    { 
     143      $i++; 
     144      $max_files = $ARGV[$i]; 
    139145    } 
    140146    else 
     
    189195      unlink($old_path); 
    190196    } 
    191   } 
    192   my $index_path = &fileCat($var_path, 'index'); 
    193   my $delete_command = 'rm -rf "' . $index_path . '"'; 
    194   &debugPrint($debug, 'command: ' . $delete_command . "\n"); 
    195   `$delete_command`; 
    196   my $assoc_path = &fileCat($terrier_home, 'share', 'images', 'assoc'); 
    197   $delete_command  = 'rm -rf "' . $assoc_path . '"'; 
    198   &debugPrint($debug, 'command: ' . $delete_command . "\n"); 
    199   `$delete_command`; 
     197    my $index_path = &fileCat($var_path, 'index'); 
     198    if (-d $index_path) 
     199    { 
     200      my $delete_command = 'rm -rf "' . $index_path . '"'; 
     201      &debugPrint($debug, 'command: ' . $delete_command . "\n"); 
     202      `$delete_command`; 
     203    } 
     204    my $assoc_path = &fileCat($terrier_home, 'share', 'images', 'assoc'); 
     205    if (-d $assoc_path) 
     206    { 
     207      my $delete_command2  = 'rm -rf "' . $assoc_path . '"'; 
     208      &debugPrint($debug, 'command: ' . $delete_command2 . "\n"); 
     209      `$delete_command2`; 
     210    } 
     211  } 
    200212 
    201213  # 3. Prepare the collection for parallel indexing 
     
    205217    { 
    206218      $prepare_command .= ' -batchsize ' . $batch_size; 
     219    } 
     220    if (0 < $max_files) 
     221    { 
     222      $prepare_command .= ' -maxfiles ' . $max_files; 
    207223    } 
    208224    &debugPrint($debug, 'command: ' . $prepare_command . "\n"); 
     
    238254    else 
    239255    { 
    240       print STDOUT '[SCRIPT] Index collection using parallel processing (' . $worker_count . " workers)"; 
     256      print STDOUT "[SCRIPT] Index collection with parallel processing (" . $worker_count . " workers\n"; 
    241257      my $mpi_flags = '--show-progress --verbose '; 
     258      # Excessive force! Ensure we bind to the correct network interface 
     259      $mpi_flags .= '--mca btl tcp,sm,self --mca btl_tcp_if_include eth0 '; 
     260      #$mpi_flags .= '-nolocal '; 
    242261      my $mpi_conf_path = &fileCat($terrier_home, 'mpi.conf'); 
    243262      if (-f $mpi_conf_path) 
    244263      { 
    245264        print STDOUT "(cluster)\n"; 
    246         $mpi_flags .= ' -nolocal -machinefile "' . $mpi_conf_path . '"'; 
     265        $mpi_flags .= '-machinefile "' . $mpi_conf_path . '" '; 
    247266      } 
    248267      else