########################################################################### # # parallelbuildingbuildcolutils.pm -- # # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### # search for: parallel indexname indexlevel package parallelbuildingbuildcolutils; # Pragma use strict; no strict 'refs'; # allow filehandles to be variables and vice versa no strict 'subs'; # allow barewords (eg STDERR) as function arguments # Greenstone Modules use buildcolutils; use gsprintf; BEGIN { @parallelbuildingbuildcolutils::ISA = ('buildcolutils'); } # Parallel Build Customization my $arguments = [ { 'name' => "workers", 'desc' => "**Parallel Processing** The number of 'worker' threads to spawn when parallel processing", 'type' => "int", 'range' => "0,", 'reqd' => "no", 'hiddengli' => "yes" } ]; ## @method new() # sub new { my $class = shift(@_); my $self = new buildcolutils(@_); # Sanity checks return bless($self, $class); } # @function getSupportedArguments # Retrieve the list of arguments that are specific to this subclass of # buildcolutils so they can be added to the list of supported arguments to # buildcol.pl. The use of any of these arguments automatically causes this # subclass to be instantiated and used in preference to the parent class. # ATM it is up to the implementer to ensure these arguments are unique between # subclasses sub getSupportedArguments { return $arguments; } # getSupportedArguments() # @function set_collection_options # sub set_collection_options { my $self = shift @_; my ($collectcfg) = @_; $self->SUPER::set_collection_options($collectcfg); # Sanity tests if ($collectcfg->{'infodbtype'} eq 'sqlite' && $self->{'workers'} > 0) { print STDERR "WARNING: Parallel builds not current supported by SQLite - reverting to serial build\n"; $self->{'workers'} = 0; } # Add parallel building prefix to requests buildertype as necessary if ($self->{'buildtype'} !~ /^parallelbuilding/) { print STDERR "WARNING: using parallel processing version of indexer: " . $self->{'buildtype'} . "\n"; $self->{'buildtype'} = 'parallel' . $self->{'buildtype'}; } } # set_collection_options() # @function build_collection() # Parallel Building Support # - if parallel building is requested then we subvert the normal 'all' mode # process, insert attempting to create an XML 'recipe' for building this # collection. We then pass this recipe to an Open MPI augmented compiled # executable (which will in turn make multiple calls back to buildcol.pl # according to the instructions in the recipe)! sub build_collection { my $self = shift(@_); my $builders_ref = shift(@_); my $out = $self->{'out'}; if ($self->{'workers'} > 0) { print $out "*** parallel building\n"; # Some infodb modes (namely GDBMServer at the moment) need to open the # connection to the database in such a way that it persists over the # child threads. We do this by adding a dummy call to build the file path # to archiveinf-doc as it is the database in question. The '1' at the end # means launch the server... it will then persist until this block passes # out of scope (presumably after all the child mpi processes are done) my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($self->{'infodbtype'}, "archiveinf-doc", $self->{'archivedir'}, 1); # we initially create the recipe as a datastructure to make it easier for # each builder to determine what has already been defined # - each step of the recipe will have a command as a string and a (possibly # empty) array of steps that depend on this step (possibly recursive) print $out "Generating indexing 'recipe'\n"; my $recipe = []; # pass to each builder to have it populated with appropriate commands map { local $_=$_; $_->prepareIndexRecipe($self->{'collection'}, $recipe); } @{$builders_ref}; # now write the recipe to an XML file, resolving any path macros my $max_parallel_tasks = scalar(@{$recipe}); my $xml_lines = (); push(@{$xml_lines},''); push(@{$xml_lines},''); foreach my $item (@{$recipe}) { my $max_parallel_child_tasks = &print_recipe($xml_lines, $item); if ($max_parallel_child_tasks > $max_parallel_tasks) { $max_parallel_tasks = $max_parallel_child_tasks; } } push(@{$xml_lines}, ''); my $recipe_path = &util::get_tmp_filename('.xml'); open(XMLOUT, ">:utf8", $recipe_path) or die("Error! Failed to open recipe file for writing: " . $recipe_path . "\nReason: " . $!); print XMLOUT join("\n", @{$xml_lines}); close(XMLOUT); # call mpibuildcol executable using mpirun and passing path to recipe my $number_of_threads = $self->{'workers'} + 1; my $mpirun_cmd = 'mpirun -n ' . $number_of_threads . ' mpibuildcol "' . $recipe_path . '"'; print $out "Running command: " . $mpirun_cmd . "\n"; print `$mpirun_cmd`; # clean up recipe unlink($recipe_path); } else { $self::SUPER->build_collection($builders_ref); } } # build_collection() # @function build_auxiliary_files # sub build_auxiliary_files { my $self = shift(@_); my ($builders_ref) = @_; if (!$self->{'parallel'} && !$self->{'debug'}) { $self->SUPER::build_auxiliary_files($builders_ref); } } # build_auxiliary_files() # @function print_recipe # sub print_recipe { my ($xml_lines, $item) = @_; my $max_parallel_tasks = 0; # start building up the command in our xml buffer push(@{$xml_lines}, ''); my $command = $item->{'command'}; $command =~ s/&/&/g; $command =~ s//>/g; push(@{$xml_lines}, '' . $command . ''); # - print children before closing task if (defined $item->{'children'}) { $max_parallel_tasks = scalar(@{$item->{'children'}}); foreach my $child_item (@{$item->{'children'}}) { my $max_parallel_child_tasks = &print_recipe($xml_lines, $child_item); if ($max_parallel_child_tasks > $max_parallel_tasks) { $max_parallel_tasks = $max_parallel_child_tasks; } } } # - now we can close the task having printed nested children push(@{$xml_lines},''); # done return $max_parallel_tasks; } # print_recipe()