###########################################################################
#
# parallelbuildingbuildcolutils.pm --
#
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the
# University of Waikato, New Zealand.
#
# Copyright (C) 1999 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################
# search for: parallel indexname indexlevel
package parallelbuildingbuildcolutils;
# Pragma
use strict;
no strict 'refs'; # allow filehandles to be variables and vice versa
no strict 'subs'; # allow barewords (eg STDERR) as function arguments
# Greenstone Modules
use buildcolutils;
use gsprintf;
BEGIN
{
@parallelbuildingbuildcolutils::ISA = ('buildcolutils');
}
# Parallel Build Customization
my $arguments = [
{ 'name' => "workers",
'desc' => "**Parallel Processing** The number of 'worker' threads to spawn when parallel processing",
'type' => "int",
'range' => "0,",
'reqd' => "no",
'hiddengli' => "yes" }
];
## @method new()
#
sub new
{
my $class = shift(@_);
my $self = new buildcolutils(@_);
# Sanity checks
return bless($self, $class);
}
# @function getSupportedArguments
# Retrieve the list of arguments that are specific to this subclass of
# buildcolutils so they can be added to the list of supported arguments to
# buildcol.pl. The use of any of these arguments automatically causes this
# subclass to be instantiated and used in preference to the parent class.
# ATM it is up to the implementer to ensure these arguments are unique between
# subclasses
sub getSupportedArguments
{
return $arguments;
}
# getSupportedArguments()
# @function set_collection_options
#
sub set_collection_options
{
my $self = shift @_;
my ($collectcfg) = @_;
$self->SUPER::set_collection_options($collectcfg);
# Sanity tests
if ($collectcfg->{'infodbtype'} eq 'sqlite' && $self->{'workers'} > 0)
{
print STDERR "WARNING: Parallel builds not current supported by SQLite - reverting to serial build\n";
$self->{'workers'} = 0;
}
# Add parallel building prefix to requests buildertype as necessary
if ($self->{'buildtype'} !~ /^parallelbuilding/)
{
print STDERR "WARNING: using parallel processing version of indexer: " . $self->{'buildtype'} . "\n";
$self->{'buildtype'} = 'parallel' . $self->{'buildtype'};
}
}
# set_collection_options()
# @function build_collection()
# Parallel Building Support
# - if parallel building is requested then we subvert the normal 'all' mode
# process, insert attempting to create an XML 'recipe' for building this
# collection. We then pass this recipe to an Open MPI augmented compiled
# executable (which will in turn make multiple calls back to buildcol.pl
# according to the instructions in the recipe)!
sub build_collection
{
my $self = shift(@_);
my $builders_ref = shift(@_);
my $out = $self->{'out'};
if ($self->{'workers'} > 0)
{
print $out "*** parallel building\n";
# Some infodb modes (namely GDBMServer at the moment) need to open the
# connection to the database in such a way that it persists over the
# child threads. We do this by adding a dummy call to build the file path
# to archiveinf-doc as it is the database in question. The '1' at the end
# means launch the server... it will then persist until this block passes
# out of scope (presumably after all the child mpi processes are done)
my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($self->{'infodbtype'}, "archiveinf-doc", $self->{'archivedir'}, 1);
# we initially create the recipe as a datastructure to make it easier for
# each builder to determine what has already been defined
# - each step of the recipe will have a command as a string and a (possibly
# empty) array of steps that depend on this step (possibly recursive)
print $out "Generating indexing 'recipe'\n";
my $recipe = [];
# pass to each builder to have it populated with appropriate commands
map { local $_=$_; $_->prepareIndexRecipe($self->{'collection'}, $recipe); } @{$builders_ref};
# now write the recipe to an XML file, resolving any path macros
my $max_parallel_tasks = scalar(@{$recipe});
my $xml_lines = ();
push(@{$xml_lines},'');
push(@{$xml_lines},'');
foreach my $item (@{$recipe})
{
my $max_parallel_child_tasks = &print_recipe($xml_lines, $item);
if ($max_parallel_child_tasks > $max_parallel_tasks)
{
$max_parallel_tasks = $max_parallel_child_tasks;
}
}
push(@{$xml_lines}, '');
my $recipe_path = &util::get_tmp_filename('.xml');
open(XMLOUT, ">:utf8", $recipe_path) or die("Error! Failed to open recipe file for writing: " . $recipe_path . "\nReason: " . $!);
print XMLOUT join("\n", @{$xml_lines});
close(XMLOUT);
# call mpibuildcol executable using mpirun and passing path to recipe
my $number_of_threads = $self->{'workers'} + 1;
my $mpirun_cmd = 'mpirun -n ' . $number_of_threads . ' mpibuildcol "' . $recipe_path . '"';
print $out "Running command: " . $mpirun_cmd . "\n";
print `$mpirun_cmd`;
# clean up recipe
unlink($recipe_path);
}
else
{
$self::SUPER->build_collection($builders_ref);
}
}
# build_collection()
# @function build_auxiliary_files
#
sub build_auxiliary_files
{
my $self = shift(@_);
my ($builders_ref) = @_;
if (!$self->{'parallel'} && !$self->{'debug'})
{
$self->SUPER::build_auxiliary_files($builders_ref);
}
}
# build_auxiliary_files()
# @function print_recipe
#
sub print_recipe
{
my ($xml_lines, $item) = @_;
my $max_parallel_tasks = 0;
# start building up the command in our xml buffer
push(@{$xml_lines}, '');
my $command = $item->{'command'};
$command =~ s/&/&/g;
$command =~ s/</g;
$command =~ s/>/>/g;
push(@{$xml_lines}, '' . $command . '');
# - print children before closing task
if (defined $item->{'children'})
{
$max_parallel_tasks = scalar(@{$item->{'children'}});
foreach my $child_item (@{$item->{'children'}})
{
my $max_parallel_child_tasks = &print_recipe($xml_lines, $child_item);
if ($max_parallel_child_tasks > $max_parallel_tasks)
{
$max_parallel_tasks = $max_parallel_child_tasks;
}
}
}
# - now we can close the task having printed nested children
push(@{$xml_lines},'');
# done
return $max_parallel_tasks;
}
# print_recipe()