source: gs2-extensions/parallel-building/trunk/src/perllib/parallelbuildingbuildcolutils.pm@ 27280

Last change on this file since 27280 was 27280, checked in by jmt12, 11 years ago

A subclass of buildcolutils.pm with added functionality for parallel processing using OpenMPI

File size: 7.3 KB
Line 
1###########################################################################
2#
3# parallelbuildingbuildcolutils.pm --
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27# search for: parallel indexname indexlevel
28
29package parallelbuildingbuildcolutils;
30
31# Pragma
32use strict;
33no strict 'refs'; # allow filehandles to be variables and vice versa
34no strict 'subs'; # allow barewords (eg STDERR) as function arguments
35
36# Greenstone Modules
37use buildcolutils;
38use gsprintf;
39
40BEGIN
41{
42 @parallelbuildingbuildcolutils::ISA = ('buildcolutils');
43}
44
45# Parallel Build Customization
46my $arguments = [
47 { 'name' => "workers",
48 'desc' => "**Parallel Processing** The number of 'worker' threads to spawn when parallel processing",
49 'type' => "int",
50 'range' => "0,",
51 'reqd' => "no",
52 'hiddengli' => "yes" }
53 ];
54
55## @method new()
56#
57sub new
58{
59 my $class = shift(@_);
60 my $self = new buildcolutils(@_);
61
62 # Sanity checks
63
64 return bless($self, $class);
65}
66
67# @function getSupportedArguments
68# Retrieve the list of arguments that are specific to this subclass of
69# buildcolutils so they can be added to the list of supported arguments to
70# buildcol.pl. The use of any of these arguments automatically causes this
71# subclass to be instantiated and used in preference to the parent class.
72# ATM it is up to the implementer to ensure these arguments are unique between
73# subclasses
74sub getSupportedArguments
75{
76 return $arguments;
77}
78# getSupportedArguments()
79
80# @function set_collection_options
81#
82sub set_collection_options
83{
84 my $self = shift @_;
85 my ($collectcfg) = @_;
86
87 $self->SUPER::set_collection_options($collectcfg);
88
89 # Sanity tests
90 if ($collectcfg->{'infodbtype'} eq 'sqlite' && $self->{'workers'} > 0)
91 {
92 print STDERR "WARNING: Parallel builds not current supported by SQLite - reverting to serial build\n";
93 $self->{'workers'} = 0;
94 }
95
96 # Add parallel building prefix to requests buildertype as necessary
97 if ($self->{'buildtype'} !~ /^parallelbuilding/)
98 {
99 print STDERR "WARNING: using parallel processing version of indexer: " . $self->{'buildtype'} . "\n";
100 $self->{'buildtype'} = 'parallel' . $self->{'buildtype'};
101 }
102}
103# set_collection_options()
104
105# @function build_collection()
106# Parallel Building Support
107# - if parallel building is requested then we subvert the normal 'all' mode
108# process, insert attempting to create an XML 'recipe' for building this
109# collection. We then pass this recipe to an Open MPI augmented compiled
110# executable (which will in turn make multiple calls back to buildcol.pl
111# according to the instructions in the recipe)!
112sub build_collection
113{
114 my $self = shift(@_);
115 my $builders_ref = shift(@_);
116 my $out = $self->{'out'};
117
118 if ($self->{'workers'} > 0)
119 {
120 print $out "*** parallel building\n";
121 # Some infodb modes (namely GDBMServer at the moment) need to open the
122 # connection to the database in such a way that it persists over the
123 # child threads. We do this by adding a dummy call to build the file path
124 # to archiveinf-doc as it is the database in question. The '1' at the end
125 # means launch the server... it will then persist until this block passes
126 # out of scope (presumably after all the child mpi processes are done)
127 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($self->{'infodbtype'}, "archiveinf-doc", $self->{'archivedir'}, 1);
128
129 # we initially create the recipe as a datastructure to make it easier for
130 # each builder to determine what has already been defined
131 # - each step of the recipe will have a command as a string and a (possibly
132 # empty) array of steps that depend on this step (possibly recursive)
133 print $out "Generating indexing 'recipe'\n";
134 my $recipe = [];
135 # pass to each builder to have it populated with appropriate commands
136 map { local $_=$_; $_->prepareIndexRecipe($self->{'collection'}, $recipe); } @{$builders_ref};
137 # now write the recipe to an XML file, resolving any path macros
138 my $max_parallel_tasks = scalar(@{$recipe});
139 my $xml_lines = ();
140 push(@{$xml_lines},'<?xml version="1.0" standalone="no" ?>');
141 push(@{$xml_lines},'<Recipe>');
142 foreach my $item (@{$recipe})
143 {
144 my $max_parallel_child_tasks = &print_recipe($xml_lines, $item);
145 if ($max_parallel_child_tasks > $max_parallel_tasks)
146 {
147 $max_parallel_tasks = $max_parallel_child_tasks;
148 }
149 }
150 push(@{$xml_lines}, '</Recipe>');
151 my $recipe_path = &util::get_tmp_filename('.xml');
152 open(XMLOUT, ">:utf8", $recipe_path) or die("Error! Failed to open recipe file for writing: " . $recipe_path . "\nReason: " . $!);
153 print XMLOUT join("\n", @{$xml_lines});
154 close(XMLOUT);
155 # call mpibuildcol executable using mpirun and passing path to recipe
156 my $number_of_threads = $self->{'workers'} + 1;
157 my $mpirun_cmd = 'mpirun -n ' . $number_of_threads . ' mpibuildcol "' . $recipe_path . '"';
158 print $out "Running command: " . $mpirun_cmd . "\n";
159 print `$mpirun_cmd`;
160 # clean up recipe
161 unlink($recipe_path);
162 }
163 else
164 {
165 $self::SUPER->build_collection($builders_ref);
166 }
167}
168# build_collection()
169
170# @function build_auxiliary_files
171#
172sub build_auxiliary_files
173{
174 my $self = shift(@_);
175 my ($builders_ref) = @_;
176 if (!$self->{'parallel'} && !$self->{'debug'})
177 {
178 $self->SUPER::build_auxiliary_files($builders_ref);
179 }
180}
181# build_auxiliary_files()
182
183# @function print_recipe
184#
185sub print_recipe
186{
187 my ($xml_lines, $item) = @_;
188 my $max_parallel_tasks = 0;
189
190 # start building up the command in our xml buffer
191 push(@{$xml_lines}, '<Task>');
192 my $command = $item->{'command'};
193 $command =~ s/&/&amp;/g;
194 $command =~ s/</&lt;/g;
195 $command =~ s/>/&gt;/g;
196 push(@{$xml_lines}, '<Command>' . $command . '</Command>');
197
198 # - print children before closing task
199 if (defined $item->{'children'})
200 {
201 $max_parallel_tasks = scalar(@{$item->{'children'}});
202
203 foreach my $child_item (@{$item->{'children'}})
204 {
205 my $max_parallel_child_tasks = &print_recipe($xml_lines, $child_item);
206 if ($max_parallel_child_tasks > $max_parallel_tasks)
207 {
208 $max_parallel_tasks = $max_parallel_child_tasks;
209 }
210 }
211 }
212 # - now we can close the task having printed nested children
213 push(@{$xml_lines},'</Task>');
214 # done
215 return $max_parallel_tasks;
216}
217# print_recipe()
Note: See TracBrowser for help on using the repository browser.