Index: /gs2-extensions/parallel-building/trunk/src/perllib/parallelbuildingbuildcolutils.pm
===================================================================
--- /gs2-extensions/parallel-building/trunk/src/perllib/parallelbuildingbuildcolutils.pm (revision 27280)
+++ /gs2-extensions/parallel-building/trunk/src/perllib/parallelbuildingbuildcolutils.pm (revision 27280)
@@ -0,0 +1,217 @@
+###########################################################################
+#
+# parallelbuildingbuildcolutils.pm --
+#
+# A component of the Greenstone digital library software
+# from the New Zealand Digital Library Project at the
+# University of Waikato, New Zealand.
+#
+# Copyright (C) 1999 New Zealand Digital Library Project
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+###########################################################################
+
+# search for: parallel indexname indexlevel
+
+package parallelbuildingbuildcolutils;
+
+# Pragma
+use strict;
+no strict 'refs'; # allow filehandles to be variables and vice versa
+no strict 'subs'; # allow barewords (eg STDERR) as function arguments
+
+# Greenstone Modules
+use buildcolutils;
+use gsprintf;
+
+BEGIN
+{
+ @parallelbuildingbuildcolutils::ISA = ('buildcolutils');
+}
+
+# Parallel Build Customization
+my $arguments = [
+ { 'name' => "workers",
+ 'desc' => "**Parallel Processing** The number of 'worker' threads to spawn when parallel processing",
+ 'type' => "int",
+ 'range' => "0,",
+ 'reqd' => "no",
+ 'hiddengli' => "yes" }
+ ];
+
+## @method new()
+#
+sub new
+{
+ my $class = shift(@_);
+ my $self = new buildcolutils(@_);
+
+ # Sanity checks
+
+ return bless($self, $class);
+}
+
+# @function getSupportedArguments
+# Retrieve the list of arguments that are specific to this subclass of
+# buildcolutils so they can be added to the list of supported arguments to
+# buildcol.pl. The use of any of these arguments automatically causes this
+# subclass to be instantiated and used in preference to the parent class.
+# ATM it is up to the implementer to ensure these arguments are unique between
+# subclasses
+sub getSupportedArguments
+{
+ return $arguments;
+}
+# getSupportedArguments()
+
+# @function set_collection_options
+#
+sub set_collection_options
+{
+ my $self = shift @_;
+ my ($collectcfg) = @_;
+
+ $self->SUPER::set_collection_options($collectcfg);
+
+ # Sanity tests
+ if ($collectcfg->{'infodbtype'} eq 'sqlite' && $self->{'workers'} > 0)
+ {
+ print STDERR "WARNING: Parallel builds not current supported by SQLite - reverting to serial build\n";
+ $self->{'workers'} = 0;
+ }
+
+ # Add parallel building prefix to requests buildertype as necessary
+ if ($self->{'buildtype'} !~ /^parallelbuilding/)
+ {
+ print STDERR "WARNING: using parallel processing version of indexer: " . $self->{'buildtype'} . "\n";
+ $self->{'buildtype'} = 'parallel' . $self->{'buildtype'};
+ }
+}
+# set_collection_options()
+
+# @function build_collection()
+# Parallel Building Support
+# - if parallel building is requested then we subvert the normal 'all' mode
+# process, insert attempting to create an XML 'recipe' for building this
+# collection. We then pass this recipe to an Open MPI augmented compiled
+# executable (which will in turn make multiple calls back to buildcol.pl
+# according to the instructions in the recipe)!
+sub build_collection
+{
+ my $self = shift(@_);
+ my $builders_ref = shift(@_);
+ my $out = $self->{'out'};
+
+ if ($self->{'workers'} > 0)
+ {
+ print $out "*** parallel building\n";
+ # Some infodb modes (namely GDBMServer at the moment) need to open the
+ # connection to the database in such a way that it persists over the
+ # child threads. We do this by adding a dummy call to build the file path
+ # to archiveinf-doc as it is the database in question. The '1' at the end
+ # means launch the server... it will then persist until this block passes
+ # out of scope (presumably after all the child mpi processes are done)
+ my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($self->{'infodbtype'}, "archiveinf-doc", $self->{'archivedir'}, 1);
+
+ # we initially create the recipe as a datastructure to make it easier for
+ # each builder to determine what has already been defined
+ # - each step of the recipe will have a command as a string and a (possibly
+ # empty) array of steps that depend on this step (possibly recursive)
+ print $out "Generating indexing 'recipe'\n";
+ my $recipe = [];
+ # pass to each builder to have it populated with appropriate commands
+ map { local $_=$_; $_->prepareIndexRecipe($self->{'collection'}, $recipe); } @{$builders_ref};
+ # now write the recipe to an XML file, resolving any path macros
+ my $max_parallel_tasks = scalar(@{$recipe});
+ my $xml_lines = ();
+ push(@{$xml_lines},'');
+ push(@{$xml_lines},'');
+ foreach my $item (@{$recipe})
+ {
+ my $max_parallel_child_tasks = &print_recipe($xml_lines, $item);
+ if ($max_parallel_child_tasks > $max_parallel_tasks)
+ {
+ $max_parallel_tasks = $max_parallel_child_tasks;
+ }
+ }
+ push(@{$xml_lines}, '');
+ my $recipe_path = &util::get_tmp_filename('.xml');
+ open(XMLOUT, ">:utf8", $recipe_path) or die("Error! Failed to open recipe file for writing: " . $recipe_path . "\nReason: " . $!);
+ print XMLOUT join("\n", @{$xml_lines});
+ close(XMLOUT);
+ # call mpibuildcol executable using mpirun and passing path to recipe
+ my $number_of_threads = $self->{'workers'} + 1;
+ my $mpirun_cmd = 'mpirun -n ' . $number_of_threads . ' mpibuildcol "' . $recipe_path . '"';
+ print $out "Running command: " . $mpirun_cmd . "\n";
+ print `$mpirun_cmd`;
+ # clean up recipe
+ unlink($recipe_path);
+ }
+ else
+ {
+ $self::SUPER->build_collection($builders_ref);
+ }
+}
+# build_collection()
+
+# @function build_auxiliary_files
+#
+sub build_auxiliary_files
+{
+ my $self = shift(@_);
+ my ($builders_ref) = @_;
+ if (!$self->{'parallel'} && !$self->{'debug'})
+ {
+ $self->SUPER::build_auxiliary_files($builders_ref);
+ }
+}
+# build_auxiliary_files()
+
+# @function print_recipe
+#
+sub print_recipe
+{
+ my ($xml_lines, $item) = @_;
+ my $max_parallel_tasks = 0;
+
+ # start building up the command in our xml buffer
+ push(@{$xml_lines}, '');
+ my $command = $item->{'command'};
+ $command =~ s/&/&/g;
+ $command =~ s/</g;
+ $command =~ s/>/>/g;
+ push(@{$xml_lines}, '' . $command . '');
+
+ # - print children before closing task
+ if (defined $item->{'children'})
+ {
+ $max_parallel_tasks = scalar(@{$item->{'children'}});
+
+ foreach my $child_item (@{$item->{'children'}})
+ {
+ my $max_parallel_child_tasks = &print_recipe($xml_lines, $child_item);
+ if ($max_parallel_child_tasks > $max_parallel_tasks)
+ {
+ $max_parallel_tasks = $max_parallel_child_tasks;
+ }
+ }
+ }
+ # - now we can close the task having printed nested children
+ push(@{$xml_lines},'');
+ # done
+ return $max_parallel_tasks;
+}
+# print_recipe()