Ignore:
Timestamp:
2011-12-01T12:24:10+13:00 (12 years ago)
Author:
jmt12
Message:

If the filelist is already present in tmp don't regenerate (just for testing) and make the mpi launch in a pipe so we can continually trap output (rather than waiting until a system() call had completed

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gs2-extensions/parallel-building/trunk/src/perllib/ParallelInexport.pm

    r24698 r24839  
    4141   }
    4242
     43   # create the list of files to import
    4344   my $tmp_filelist = &util::filename_cat($tmp_dir_path, "filelist.txt");
    44 
    45    # create the list of files to import
    46    open (my $filelist, ">$tmp_filelist");
    47    foreach my $filename (sort keys %{$block_hash->{'all_files'}})
     45   # - if the file is already there (which is should be during testing) then
     46   #   don't regenerate. This is especially important for imports of 1 million
     47   #   documents as just the directory scan can take several hours.
     48   if (!-f $tmp_filelist)
    4849   {
     50     open (my $filelist, ">$tmp_filelist");
     51     foreach my $filename (sort keys %{$block_hash->{'all_files'}})
     52     {
    4953       my $full_filename = &util::filename_cat($importdir,$filename);
    5054       if ((! exists $block_hash->{'file_blocks'}->{$full_filename})
    5155       && ($filename !~ m/metadata\.xml$/))
    5256       {
    53        print $filelist "$filename\n";
     57         print $filelist "$filename\n";
    5458       }
     59     }
     60     close ($filelist);
    5561   }
    56    close ($filelist);
    5762
    5863   # invoke the farmer to start processing the files
     
    6065   my $gsdlhome = $ENV{'GSDLHOME'};
    6166   my $farmer_exe = 'mpiimport'; # will be on PATH
    62    my $mpi_cmd = "mpirun -np $jobs $farmer_exe $tmp_filelist $epoch $gsdlhome $collection $site";
    63    system ($mpi_cmd);
     67   my $mpi_cmd = "mpirun -n $jobs $farmer_exe $tmp_filelist $epoch $gsdlhome $collection $site";
     68#   my $mpi_cmd = "mpirun --show-progress --timestamp-output --verbose --report-bindings --tag-output -n $jobs $farmer_exe $tmp_filelist $epoch $gsdlhome $collection $site";
     69   print STDERR "MPI Command: \"" . $mpi_cmd . "\"\n";
     70#   system ($mpi_cmd);
     71   open(MPI, $mpi_cmd . " |") or die("Couldn't Execute MPI");
     72   while ( defined( my $line = <MPI> )  )
     73   {
     74     chomp($line);
     75     print "$line\n";
     76   }
     77   close(MPI);
    6478}
    6579
Note: See TracChangeset for help on using the changeset viewer.