Changeset 26071

Show
Ignore:
Timestamp:
06.08.2012 11:10:22 (7 years ago)
Author:
jmt12
Message:

Randomizing the order files are imported and restoring debugging flags to MPI call

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • gs2-extensions/parallel-building/trunk/src/perllib/ParallelInexport.pm

    r24839 r26071  
    2828use strict; 
    2929 
     30# Randomize the order of files in the filelist 
     31use List::Util qw( shuffle ); 
    3032 
    3133# index the files in parallel using MPI farmer to farm off multiple processes 
     
    4244 
    4345   # create the list of files to import 
     46   my $overwrite = 1; 
    4447   my $tmp_filelist = &util::filename_cat($tmp_dir_path, "filelist.txt"); 
    4548   # - if the file is already there (which is should be during testing) then 
    4649   #   don't regenerate. This is especially important for imports of 1 million 
    4750   #   documents as just the directory scan can take several hours. 
    48    if (!-f $tmp_filelist) 
     51   if ($overwrite || !-f $tmp_filelist) 
    4952   { 
    5053     open (my $filelist, ">$tmp_filelist"); 
    51      foreach my $filename (sort keys %{$block_hash->{'all_files'}}) 
     54     my @filenames = keys %{$block_hash->{'all_files'}}; 
     55     @filenames = shuffle(@filenames); 
     56     foreach my $filename (@filenames) 
    5257     { 
    5358       my $full_filename = &util::filename_cat($importdir,$filename); 
     
    6570   my $gsdlhome = $ENV{'GSDLHOME'}; 
    6671   my $farmer_exe = 'mpiimport'; # will be on PATH 
    67    my $mpi_cmd = "mpirun -n $jobs $farmer_exe $tmp_filelist $epoch $gsdlhome $collection $site"; 
    68 #   my $mpi_cmd = "mpirun --show-progress --timestamp-output --verbose --report-bindings --tag-output -n $jobs $farmer_exe $tmp_filelist $epoch $gsdlhome $collection $site"; 
     72   #my $mpi_cmd = "mpirun -n $jobs $farmer_exe $tmp_filelist $epoch $gsdlhome $collection $site"; 
     73   my $mpi_cmd = "mpirun --show-progress --timestamp-output --verbose --report-bindings --tag-output -n $jobs $farmer_exe $tmp_filelist $epoch $gsdlhome $collection $site"; 
    6974   print STDERR "MPI Command: \"" . $mpi_cmd . "\"\n"; 
    7075#   system ($mpi_cmd);