Changeset 26071


Ignore:
Timestamp:
2012-08-06T11:10:22+12:00 (12 years ago)
Author:
jmt12
Message:

Randomizing the order files are imported and restoring debugging flags to MPI call

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gs2-extensions/parallel-building/trunk/src/perllib/ParallelInexport.pm

    r24839 r26071  
    2828use strict;
    2929
     30# Randomize the order of files in the filelist
     31use List::Util qw( shuffle );
    3032
    3133# index the files in parallel using MPI farmer to farm off multiple processes
     
    4244
    4345   # create the list of files to import
     46   my $overwrite = 1;
    4447   my $tmp_filelist = &util::filename_cat($tmp_dir_path, "filelist.txt");
    4548   # - if the file is already there (which is should be during testing) then
    4649   #   don't regenerate. This is especially important for imports of 1 million
    4750   #   documents as just the directory scan can take several hours.
    48    if (!-f $tmp_filelist)
     51   if ($overwrite || !-f $tmp_filelist)
    4952   {
    5053     open (my $filelist, ">$tmp_filelist");
    51      foreach my $filename (sort keys %{$block_hash->{'all_files'}})
     54     my @filenames = keys %{$block_hash->{'all_files'}};
     55     @filenames = shuffle(@filenames);
     56     foreach my $filename (@filenames)
    5257     {
    5358       my $full_filename = &util::filename_cat($importdir,$filename);
     
    6570   my $gsdlhome = $ENV{'GSDLHOME'};
    6671   my $farmer_exe = 'mpiimport'; # will be on PATH
    67    my $mpi_cmd = "mpirun -n $jobs $farmer_exe $tmp_filelist $epoch $gsdlhome $collection $site";
    68 #   my $mpi_cmd = "mpirun --show-progress --timestamp-output --verbose --report-bindings --tag-output -n $jobs $farmer_exe $tmp_filelist $epoch $gsdlhome $collection $site";
     72   #my $mpi_cmd = "mpirun -n $jobs $farmer_exe $tmp_filelist $epoch $gsdlhome $collection $site";
     73   my $mpi_cmd = "mpirun --show-progress --timestamp-output --verbose --report-bindings --tag-output -n $jobs $farmer_exe $tmp_filelist $epoch $gsdlhome $collection $site";
    6974   print STDERR "MPI Command: \"" . $mpi_cmd . "\"\n";
    7075#   system ($mpi_cmd);
Note: See TracChangeset for help on using the changeset viewer.