1 | ###########################################################################
|
---|
2 | #
|
---|
3 | # parallelbuildingbuildcolutils.pm --
|
---|
4 | #
|
---|
5 | # A component of the Greenstone digital library software
|
---|
6 | # from the New Zealand Digital Library Project at the
|
---|
7 | # University of Waikato, New Zealand.
|
---|
8 | #
|
---|
9 | # Copyright (C) 1999 New Zealand Digital Library Project
|
---|
10 | #
|
---|
11 | # This program is free software; you can redistribute it and/or modify
|
---|
12 | # it under the terms of the GNU General Public License as published by
|
---|
13 | # the Free Software Foundation; either version 2 of the License, or
|
---|
14 | # (at your option) any later version.
|
---|
15 | #
|
---|
16 | # This program is distributed in the hope that it will be useful,
|
---|
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
19 | # GNU General Public License for more details.
|
---|
20 | #
|
---|
21 | # You should have received a copy of the GNU General Public License
|
---|
22 | # along with this program; if not, write to the Free Software
|
---|
23 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
24 | #
|
---|
25 | ###########################################################################
|
---|
26 |
|
---|
27 | # search for: parallel indexname indexlevel
|
---|
28 |
|
---|
29 | package parallelbuildingbuildcolutils;
|
---|
30 |
|
---|
31 | # Pragma
|
---|
32 | use strict;
|
---|
33 | no strict 'refs'; # allow filehandles to be variables and vice versa
|
---|
34 | no strict 'subs'; # allow barewords (eg STDERR) as function arguments
|
---|
35 |
|
---|
36 | # Greenstone Modules
|
---|
37 | use buildcolutils;
|
---|
38 | use gsprintf;
|
---|
39 |
|
---|
40 | BEGIN
|
---|
41 | {
|
---|
42 | @parallelbuildingbuildcolutils::ISA = ('buildcolutils');
|
---|
43 | }
|
---|
44 |
|
---|
45 | # Parallel Build Customization
|
---|
46 | my $arguments = [
|
---|
47 | { 'name' => "workers",
|
---|
48 | 'desc' => "**Parallel Processing** The number of 'worker' threads to spawn when parallel processing",
|
---|
49 | 'type' => "int",
|
---|
50 | 'range' => "0,",
|
---|
51 | 'reqd' => "no",
|
---|
52 | 'hiddengli' => "yes" }
|
---|
53 | ];
|
---|
54 |
|
---|
55 | ## @method new()
|
---|
56 | #
|
---|
57 | sub new
|
---|
58 | {
|
---|
59 | my $class = shift(@_);
|
---|
60 | my $self = new buildcolutils(@_);
|
---|
61 |
|
---|
62 | # Sanity checks
|
---|
63 |
|
---|
64 | return bless($self, $class);
|
---|
65 | }
|
---|
66 |
|
---|
67 | # @function getSupportedArguments
|
---|
68 | # Retrieve the list of arguments that are specific to this subclass of
|
---|
69 | # buildcolutils so they can be added to the list of supported arguments to
|
---|
70 | # buildcol.pl. The use of any of these arguments automatically causes this
|
---|
71 | # subclass to be instantiated and used in preference to the parent class.
|
---|
72 | # ATM it is up to the implementer to ensure these arguments are unique between
|
---|
73 | # subclasses
|
---|
74 | sub getSupportedArguments
|
---|
75 | {
|
---|
76 | return $arguments;
|
---|
77 | }
|
---|
78 | # getSupportedArguments()
|
---|
79 |
|
---|
80 | # @function set_collection_options
|
---|
81 | #
|
---|
82 | sub set_collection_options
|
---|
83 | {
|
---|
84 | my $self = shift @_;
|
---|
85 | my ($collectcfg) = @_;
|
---|
86 |
|
---|
87 | $self->SUPER::set_collection_options($collectcfg);
|
---|
88 |
|
---|
89 | # Sanity tests
|
---|
90 | if ($collectcfg->{'infodbtype'} eq 'sqlite' && $self->{'workers'} > 0)
|
---|
91 | {
|
---|
92 | print STDERR "WARNING: Parallel builds not current supported by SQLite - reverting to serial build\n";
|
---|
93 | $self->{'workers'} = 0;
|
---|
94 | }
|
---|
95 |
|
---|
96 | # Add parallel building prefix to requests buildertype as necessary
|
---|
97 | if ($self->{'buildtype'} !~ /^parallelbuilding/)
|
---|
98 | {
|
---|
99 | print STDERR "WARNING: using parallel processing version of indexer: " . $self->{'buildtype'} . "\n";
|
---|
100 | $self->{'buildtype'} = 'parallel' . $self->{'buildtype'};
|
---|
101 | }
|
---|
102 | }
|
---|
103 | # set_collection_options()
|
---|
104 |
|
---|
105 | # @function build_collection()
|
---|
106 | # Parallel Building Support
|
---|
107 | # - if parallel building is requested then we subvert the normal 'all' mode
|
---|
108 | # process, insert attempting to create an XML 'recipe' for building this
|
---|
109 | # collection. We then pass this recipe to an Open MPI augmented compiled
|
---|
110 | # executable (which will in turn make multiple calls back to buildcol.pl
|
---|
111 | # according to the instructions in the recipe)!
|
---|
112 | sub build_collection
|
---|
113 | {
|
---|
114 | my $self = shift(@_);
|
---|
115 | my $builders_ref = shift(@_);
|
---|
116 | my $out = $self->{'out'};
|
---|
117 |
|
---|
118 | if ($self->{'workers'} > 0)
|
---|
119 | {
|
---|
120 | print $out "*** parallel building\n";
|
---|
121 | # Some infodb modes (namely GDBMServer at the moment) need to open the
|
---|
122 | # connection to the database in such a way that it persists over the
|
---|
123 | # child threads. We do this by adding a dummy call to build the file path
|
---|
124 | # to archiveinf-doc as it is the database in question. The '1' at the end
|
---|
125 | # means launch the server... it will then persist until this block passes
|
---|
126 | # out of scope (presumably after all the child mpi processes are done)
|
---|
127 | my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($self->{'infodbtype'}, "archiveinf-doc", $self->{'archivedir'}, 1);
|
---|
128 |
|
---|
129 | # we initially create the recipe as a datastructure to make it easier for
|
---|
130 | # each builder to determine what has already been defined
|
---|
131 | # - each step of the recipe will have a command as a string and a (possibly
|
---|
132 | # empty) array of steps that depend on this step (possibly recursive)
|
---|
133 | print $out "Generating indexing 'recipe'\n";
|
---|
134 | my $recipe = [];
|
---|
135 | # pass to each builder to have it populated with appropriate commands
|
---|
136 | map { local $_=$_; $_->prepareIndexRecipe($self->{'collection'}, $recipe); } @{$builders_ref};
|
---|
137 | # now write the recipe to an XML file, resolving any path macros
|
---|
138 | my $max_parallel_tasks = scalar(@{$recipe});
|
---|
139 | my $xml_lines = ();
|
---|
140 | push(@{$xml_lines},'<?xml version="1.0" standalone="no" ?>');
|
---|
141 | push(@{$xml_lines},'<Recipe>');
|
---|
142 | foreach my $item (@{$recipe})
|
---|
143 | {
|
---|
144 | my $max_parallel_child_tasks = &print_recipe($xml_lines, $item);
|
---|
145 | if ($max_parallel_child_tasks > $max_parallel_tasks)
|
---|
146 | {
|
---|
147 | $max_parallel_tasks = $max_parallel_child_tasks;
|
---|
148 | }
|
---|
149 | }
|
---|
150 | push(@{$xml_lines}, '</Recipe>');
|
---|
151 | my $recipe_path = &util::get_tmp_filename('.xml');
|
---|
152 | open(XMLOUT, ">:utf8", $recipe_path) or die("Error! Failed to open recipe file for writing: " . $recipe_path . "\nReason: " . $!);
|
---|
153 | print XMLOUT join("\n", @{$xml_lines});
|
---|
154 | close(XMLOUT);
|
---|
155 | # call mpibuildcol executable using mpirun and passing path to recipe
|
---|
156 | my $number_of_threads = $self->{'workers'} + 1;
|
---|
157 | my $mpirun_cmd = 'mpirun -n ' . $number_of_threads . ' mpibuildcol "' . $recipe_path . '"';
|
---|
158 | print $out "Running command: " . $mpirun_cmd . "\n";
|
---|
159 | print `$mpirun_cmd`;
|
---|
160 | # clean up recipe
|
---|
161 | unlink($recipe_path);
|
---|
162 | }
|
---|
163 | else
|
---|
164 | {
|
---|
165 | $self::SUPER->build_collection($builders_ref);
|
---|
166 | }
|
---|
167 | }
|
---|
168 | # build_collection()
|
---|
169 |
|
---|
170 | # @function build_auxiliary_files
|
---|
171 | #
|
---|
172 | sub build_auxiliary_files
|
---|
173 | {
|
---|
174 | my $self = shift(@_);
|
---|
175 | my ($builders_ref) = @_;
|
---|
176 | if (!$self->{'parallel'} && !$self->{'debug'})
|
---|
177 | {
|
---|
178 | $self->SUPER::build_auxiliary_files($builders_ref);
|
---|
179 | }
|
---|
180 | }
|
---|
181 | # build_auxiliary_files()
|
---|
182 |
|
---|
183 | # @function print_recipe
|
---|
184 | #
|
---|
185 | sub print_recipe
|
---|
186 | {
|
---|
187 | my ($xml_lines, $item) = @_;
|
---|
188 | my $max_parallel_tasks = 0;
|
---|
189 |
|
---|
190 | # start building up the command in our xml buffer
|
---|
191 | push(@{$xml_lines}, '<Task>');
|
---|
192 | my $command = $item->{'command'};
|
---|
193 | $command =~ s/&/&/g;
|
---|
194 | $command =~ s/</</g;
|
---|
195 | $command =~ s/>/>/g;
|
---|
196 | push(@{$xml_lines}, '<Command>' . $command . '</Command>');
|
---|
197 |
|
---|
198 | # - print children before closing task
|
---|
199 | if (defined $item->{'children'})
|
---|
200 | {
|
---|
201 | $max_parallel_tasks = scalar(@{$item->{'children'}});
|
---|
202 |
|
---|
203 | foreach my $child_item (@{$item->{'children'}})
|
---|
204 | {
|
---|
205 | my $max_parallel_child_tasks = &print_recipe($xml_lines, $child_item);
|
---|
206 | if ($max_parallel_child_tasks > $max_parallel_tasks)
|
---|
207 | {
|
---|
208 | $max_parallel_tasks = $max_parallel_child_tasks;
|
---|
209 | }
|
---|
210 | }
|
---|
211 | }
|
---|
212 | # - now we can close the task having printed nested children
|
---|
213 | push(@{$xml_lines},'</Task>');
|
---|
214 | # done
|
---|
215 | return $max_parallel_tasks;
|
---|
216 | }
|
---|
217 | # print_recipe()
|
---|