Changeset 26998 for gs2-extensions


Ignore:
Timestamp:
2013-03-05T11:07:26+13:00 (11 years ago)
Author:
jmt12
Message:

Adding maxdocs variable, lots of debug comments, added some tests for directories rather than just deleting them (causing non-fatal errors if the directories weren't there) and added some extra flags to ensure MPI bound to correct interface (as Medusa has some phantom virtual interfaces that occasionally interfere)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gs2-extensions/parallel-building/trunk/src/bin/script/parallel_terrier_fileindex.pl

    r26242 r26998  
    9090    print STDERR 'Error! ' . $message . "\n";
    9191  }
    92   print STDERR 'Usage: parallel_terrier_fileindexer.pl -workers <int> -terrier <path> -collection <path> -batchsize <int> [-debug]' . "\n\n";
     92  print STDERR 'Usage: parallel_terrier_fileindexer.pl -terrier <path> -collection <path> -workers <num> -batchsize <num> [-maxfiles <num>] [-debug]' . "\n\n";
    9393  print '[' . time() . ']Parallel FileIndexer Complete: ' . localtime() . "\n";
    9494  exit(0);
     
    109109  my $batch_size = 0;
    110110  my $debug = 0;
     111  my $max_files = 0;
    111112  # - parse arguments
    112113  my $argument;
     
    137138    {
    138139      $debug = 1;
     140    }
     141    elsif ('-maxfiles' eq $argument)
     142    {
     143      $i++;
     144      $max_files = $ARGV[$i];
    139145    }
    140146    else
     
    189195      unlink($old_path);
    190196    }
    191   }
    192   my $index_path = &fileCat($var_path, 'index');
    193   my $delete_command = 'rm -rf "' . $index_path . '"';
    194   &debugPrint($debug, 'command: ' . $delete_command . "\n");
    195   `$delete_command`;
    196   my $assoc_path = &fileCat($terrier_home, 'share', 'images', 'assoc');
    197   $delete_command  = 'rm -rf "' . $assoc_path . '"';
    198   &debugPrint($debug, 'command: ' . $delete_command . "\n");
    199   `$delete_command`;
     197    my $index_path = &fileCat($var_path, 'index');
     198    if (-d $index_path)
     199    {
     200      my $delete_command = 'rm -rf "' . $index_path . '"';
     201      &debugPrint($debug, 'command: ' . $delete_command . "\n");
     202      `$delete_command`;
     203    }
     204    my $assoc_path = &fileCat($terrier_home, 'share', 'images', 'assoc');
     205    if (-d $assoc_path)
     206    {
     207      my $delete_command2  = 'rm -rf "' . $assoc_path . '"';
     208      &debugPrint($debug, 'command: ' . $delete_command2 . "\n");
     209      `$delete_command2`;
     210    }
     211  }
    200212
    201213  # 3. Prepare the collection for parallel indexing
     
    205217    {
    206218      $prepare_command .= ' -batchsize ' . $batch_size;
     219    }
     220    if (0 < $max_files)
     221    {
     222      $prepare_command .= ' -maxfiles ' . $max_files;
    207223    }
    208224    &debugPrint($debug, 'command: ' . $prepare_command . "\n");
     
    238254    else
    239255    {
    240       print STDOUT '[SCRIPT] Index collection using parallel processing (' . $worker_count . " workers)";
     256      print STDOUT "[SCRIPT] Index collection with parallel processing (" . $worker_count . " workers\n";
    241257      my $mpi_flags = '--show-progress --verbose ';
     258      # Excessive force! Ensure we bind to the correct network interface
     259      $mpi_flags .= '--mca btl tcp,sm,self --mca btl_tcp_if_include eth0 ';
     260      #$mpi_flags .= '-nolocal ';
    242261      my $mpi_conf_path = &fileCat($terrier_home, 'mpi.conf');
    243262      if (-f $mpi_conf_path)
    244263      {
    245264        print STDOUT "(cluster)\n";
    246         $mpi_flags .= ' -nolocal -machinefile "' . $mpi_conf_path . '"';
     265        $mpi_flags .= '-machinefile "' . $mpi_conf_path . '" ';
    247266      }
    248267      else
Note: See TracChangeset for help on using the changeset viewer.