########################################################################### # # ParallelInexport.pm -- useful class to support parallel_import.pl # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### package ParallelInexport; use strict; # index the files in parallel using MPI farmer to farm off multiple processes # [hs, 1 july 2010] sub farm_out_processes { my ($jobs, $epoch, $importdir, $block_hash, $collection, $site) = @_; my $tmp_dir_path = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, 'tmp'); if (!-d $tmp_dir_path) { mkdir($tmp_dir_path, 0777); } # create the list of files to import my $tmp_filelist = &util::filename_cat($tmp_dir_path, "filelist.txt"); # - if the file is already there (which is should be during testing) then # don't regenerate. This is especially important for imports of 1 million # documents as just the directory scan can take several hours. if (!-f $tmp_filelist) { open (my $filelist, ">$tmp_filelist"); foreach my $filename (sort keys %{$block_hash->{'all_files'}}) { my $full_filename = &util::filename_cat($importdir,$filename); if ((! exists $block_hash->{'file_blocks'}->{$full_filename}) && ($filename !~ m/metadata\.xml$/)) { print $filelist "$filename\n"; } } close ($filelist); } # invoke the farmer to start processing the files $site = "" if (!defined $site); my $gsdlhome = $ENV{'GSDLHOME'}; my $farmer_exe = 'mpiimport'; # will be on PATH my $mpi_cmd = "mpirun -n $jobs $farmer_exe $tmp_filelist $epoch $gsdlhome $collection $site"; # my $mpi_cmd = "mpirun --show-progress --timestamp-output --verbose --report-bindings --tag-output -n $jobs $farmer_exe $tmp_filelist $epoch $gsdlhome $collection $site"; print STDERR "MPI Command: \"" . $mpi_cmd . "\"\n"; # system ($mpi_cmd); open(MPI, $mpi_cmd . " |") or die("Couldn't Execute MPI"); while ( defined( my $line = ) ) { chomp($line); print "$line\n"; } close(MPI); } 1;