Changeset 26071
- Timestamp:
- 2012-08-06T11:10:22+12:00 (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs2-extensions/parallel-building/trunk/src/perllib/ParallelInexport.pm
r24839 r26071 28 28 use strict; 29 29 30 # Randomize the order of files in the filelist 31 use List::Util qw( shuffle ); 30 32 31 33 # index the files in parallel using MPI farmer to farm off multiple processes … … 42 44 43 45 # create the list of files to import 46 my $overwrite = 1; 44 47 my $tmp_filelist = &util::filename_cat($tmp_dir_path, "filelist.txt"); 45 48 # - if the file is already there (which is should be during testing) then 46 49 # don't regenerate. This is especially important for imports of 1 million 47 50 # documents as just the directory scan can take several hours. 48 if ( !-f $tmp_filelist)51 if ($overwrite || !-f $tmp_filelist) 49 52 { 50 53 open (my $filelist, ">$tmp_filelist"); 51 foreach my $filename (sort keys %{$block_hash->{'all_files'}}) 54 my @filenames = keys %{$block_hash->{'all_files'}}; 55 @filenames = shuffle(@filenames); 56 foreach my $filename (@filenames) 52 57 { 53 58 my $full_filename = &util::filename_cat($importdir,$filename); … … 65 70 my $gsdlhome = $ENV{'GSDLHOME'}; 66 71 my $farmer_exe = 'mpiimport'; # will be on PATH 67 my $mpi_cmd = "mpirun -n $jobs $farmer_exe $tmp_filelist $epoch $gsdlhome $collection $site";68 #my $mpi_cmd = "mpirun --show-progress --timestamp-output --verbose --report-bindings --tag-output -n $jobs $farmer_exe $tmp_filelist $epoch $gsdlhome $collection $site";72 #my $mpi_cmd = "mpirun -n $jobs $farmer_exe $tmp_filelist $epoch $gsdlhome $collection $site"; 73 my $mpi_cmd = "mpirun --show-progress --timestamp-output --verbose --report-bindings --tag-output -n $jobs $farmer_exe $tmp_filelist $epoch $gsdlhome $collection $site"; 69 74 print STDERR "MPI Command: \"" . $mpi_cmd . "\"\n"; 70 75 # system ($mpi_cmd);
Note:
See TracChangeset
for help on using the changeset viewer.