source: main/trunk/greenstone2/bin/script/buildcol.pl@ 27351

Last change on this file since 27351 was 27305, checked in by jmt12, 11 years ago

Add code to allow importing and building to load overriding versions of inexport.pm and buildcolutils.pm from extensions at runtime. When an extension provides a possible override, Greenstone will dynamically detect and add additional options (visible in the --help). When a user specifies one of these options the appropriate inexport/buildcolutils subclass will be loaded

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 12.1 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# buildcol.pl --
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 1999 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29# This program will build a particular collection.
30package buildcol;
31
32# Environment
33BEGIN
34{
35 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
36 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
37 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib');
38 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/cpan');
39 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/cpan/XML/XPath');
40 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/plugins');
41 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/classify');
42
43 if (defined $ENV{'GSDL-RUN-SETUP'})
44 {
45 require util;
46 &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
47 }
48
49 if (defined $ENV{'GSDLEXTS'})
50 {
51 my @extensions = split(/:/, $ENV{'GSDLEXTS'});
52 foreach my $e (@extensions)
53 {
54 my $ext_prefix = $ENV{'GSDLHOME'} . '/ext/' . $e;
55
56 unshift(@INC, $ext_prefix . '/perllib');
57 unshift(@INC, $ext_prefix . '/perllib/cpan');
58 unshift(@INC, $ext_prefix . '/perllib/plugins');
59 unshift(@INC, $ext_prefix . '/perllib/classify');
60 }
61 }
62 if (defined $ENV{'GSDL3EXTS'})
63 {
64 my @extensions = split(/:/, $ENV{'GSDL3EXTS'});
65 foreach my $e (@extensions)
66 {
67 my $ext_prefix = $ENV{'GSDL3SRCHOME'} . '/ext/' . $e;
68
69 unshift(@INC, $ext_prefix . '/perllib');
70 unshift(@INC, $ext_prefix . '/perllib/cpan');
71 unshift(@INC, $ext_prefix . '/perllib/plugins');
72 unshift(@INC, $ext_prefix . '/perllib/classify');
73 }
74 }
75}
76
77# Pragma
78use strict;
79no strict 'refs'; # allow filehandles to be variables and vice versa
80no strict 'subs'; # allow barewords (eg STDERR) as function arguments
81
82# Modules
83use Symbol qw<qualify>; # Needed for runtime loading of modules [jmt12]
84
85# Greenstone Modules
86use buildcolutils;
87use FileUtils;
88use util;
89
90# Globals
91# - build up arguments list/control
92my $mode_list =
93 [ { 'name' => "all",
94 'desc' => "{buildcol.mode.all}" },
95 { 'name' => "compress_text",
96 'desc' => "{buildcol.mode.compress_text}" },
97 { 'name' => "build_index",
98 'desc' => "{buildcol.mode.build_index}" },
99 { 'name' => "infodb",
100 'desc' => "{buildcol.mode.infodb}" } ];
101
102my $sec_index_list =
103 [ {'name' => "never",
104 'desc' => "{buildcol.sections_index_document_metadata.never}" },
105 {'name' => "always",
106 'desc' => "{buildcol.sections_index_document_metadata.always}" },
107 {'name' => "unless_section_metadata_exists",
108 'desc' => "{buildcol.sections_index_document_metadata.unless_section_metadata_exists}" }
109 ];
110
111my $arguments =
112 [ { 'name' => "remove_empty_classifications",
113 'desc' => "{buildcol.remove_empty_classifications}",
114 'type' => "flag",
115 'reqd' => "no",
116 'modegli' => "2" },
117 { 'name' => "archivedir",
118 'desc' => "{buildcol.archivedir}",
119 'type' => "string",
120 'reqd' => "no",
121 'hiddengli' => "yes" },
122 { 'name' => "builddir",
123 'desc' => "{buildcol.builddir}",
124 'type' => "string",
125 'reqd' => "no",
126 'hiddengli' => "yes" },
127# { 'name' => "cachedir",
128# 'desc' => "{buildcol.cachedir}",
129# 'type' => "string",
130# 'reqd' => "no" },
131 { 'name' => "collectdir",
132 'desc' => "{buildcol.collectdir}",
133 'type' => "string",
134 # parsearg left "" as default
135 #'deft' => &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect"),
136 'reqd' => "no",
137 'hiddengli' => "yes" },
138 { 'name' => "site",
139 'desc' => "{buildcol.site}",
140 'type' => "string",
141 'deft' => "",
142 'reqd' => "no",
143 'hiddengli' => "yes" },
144 { 'name' => "debug",
145 'desc' => "{buildcol.debug}",
146 'type' => "flag",
147 'reqd' => "no",
148 'hiddengli' => "yes" },
149 { 'name' => "faillog",
150 'desc' => "{buildcol.faillog}",
151 'type' => "string",
152 # parsearg left "" as default
153 #'deft' => &FileUtils::filenameConcatenate("<collectdir>", "colname", "etc", "fail.log"),
154 'reqd' => "no",
155 'modegli' => "3" },
156 { 'name' => "index",
157 'desc' => "{buildcol.index}",
158 'type' => "string",
159 'reqd' => "no",
160 'modegli' => "3" },
161 { 'name' => "incremental",
162 'desc' => "{buildcol.incremental}",
163 'type' => "flag",
164 'hiddengli' => "yes" },
165 { 'name' => "keepold",
166 'desc' => "{buildcol.keepold}",
167 'type' => "flag",
168 'reqd' => "no",
169 #'modegli' => "3",
170 'hiddengli' => "yes" },
171 { 'name' => "removeold",
172 'desc' => "{buildcol.removeold}",
173 'type' => "flag",
174 'reqd' => "no",
175 #'modegli' => "3",
176 'hiddengli' => "yes" },
177 { 'name' => "language",
178 'desc' => "{scripts.language}",
179 'type' => "string",
180 'reqd' => "no",
181 'modegli' => "3" },
182 { 'name' => "maxdocs",
183 'desc' => "{buildcol.maxdocs}",
184 'type' => "int",
185 'reqd' => "no",
186 'hiddengli' => "yes" },
187 { 'name' => "maxnumeric",
188 'desc' => "{buildcol.maxnumeric}",
189 'type' => "int",
190 'reqd' => "no",
191 'deft' => "4",
192 'range' => "4,512",
193 'modegli' => "3" },
194 { 'name' => "mode",
195 'desc' => "{buildcol.mode}",
196 'type' => "enum",
197 'list' => $mode_list,
198 # parsearg left "" as default
199# 'deft' => "all",
200 'reqd' => "no",
201 'modegli' => "3" },
202 { 'name' => "no_strip_html",
203 'desc' => "{buildcol.no_strip_html}",
204 'type' => "flag",
205 'reqd' => "no",
206 'modegli' => "3" },
207 { 'name' => "store_metadata_coverage",
208 'desc' => "{buildcol.store_metadata_coverage}",
209 'type' => "flag",
210 'reqd' => "no",
211 'modegli' => "3" },
212 { 'name' => "no_text",
213 'desc' => "{buildcol.no_text}",
214 'type' => "flag",
215 'reqd' => "no",
216 'modegli' => "2" },
217 { 'name' => "sections_index_document_metadata",
218 'desc' => "{buildcol.sections_index_document_metadata}",
219 'type' => "enum",
220 'list' => $sec_index_list,
221 'reqd' => "no",
222 'modegli' => "2" },
223 { 'name' => "out",
224 'desc' => "{buildcol.out}",
225 'type' => "string",
226 'deft' => "STDERR",
227 'reqd' => "no",
228 'hiddengli' => "yes" },
229 { 'name' => "verbosity",
230 'desc' => "{buildcol.verbosity}",
231 'type' => "int",
232 # parsearg left "" as default
233 #'deft' => "2",
234 'reqd' => "no",
235 'modegli' => "3" },
236 { 'name' => "gli",
237 'desc' => "",
238 'type' => "flag",
239 'reqd' => "no",
240 'hiddengli' => "yes" },
241 { 'name' => "xml",
242 'desc' => "{scripts.xml}",
243 'type' => "flag",
244 'reqd' => "no",
245 'hiddengli' => "yes" },
246 { 'name' => "activate",
247 'desc' => "{buildcol.activate}",
248 'type' => "flag",
249 'reqd' => "no",
250 'hiddengli' => "yes" },
251 { 'name' => "indexname",
252 'desc' => "{buildcol.index}",
253 'type' => "string",
254 'reqd' => "no",
255 'modegli' => "3" },
256 { 'name' => "indexlevel",
257 'desc' => "{buildcol.indexlevel}",
258 'type' => "string",
259 'reqd' => "no",
260 'modegli' => "3" },
261 ];
262
263my $options = { 'name' => "buildcol.pl",
264 'desc' => "{buildcol.desc}",
265 'args' => $arguments };
266
267# The hash maps between argument and the buildcolutils subclass supporting that
268# argument - allowing for extensions to override the normal buildcolutils as
269# necessary
270my $function_to_subclass_mappings = {};
271
272# Lets get the party rolling... or ball started... hmmm
273&main();
274
275exit;
276
277sub main
278{
279 # Dynamically include arguments from any subclasses of buildcolutils we find
280 # in the extensions directory
281 if (defined $ENV{'GSDLEXTS'})
282 {
283 &_scanForSubclasses($ENV{'GSDLHOME'}, $ENV{'GSDLEXTS'});
284 }
285 if (defined $ENV{'GSDL3EXTS'})
286 {
287 &_scanForSubclasses($ENV{'GSDL3SRCHOME'}, $ENV{'GSDL3EXTS'});
288 }
289
290 # Loop through arguments, checking to see if any depend on a specific
291 # subclass of buildcolutils. Note that we load the first subclass we
292 # encounter so only support a single 'override' ATM.
293 my $subclass;
294 foreach my $argument (@ARGV)
295 {
296 # proper arguments start with a hyphen
297 if ($argument =~ /^-/ && defined $function_to_subclass_mappings->{$argument})
298 {
299 my $required_subclass = $function_to_subclass_mappings->{$argument};
300 if (!defined $subclass)
301 {
302 $subclass = $required_subclass;
303 }
304 # Oh noes! The user has included specific arguments from two different
305 # subclasses... this isn't supported
306 elsif ($subclass ne $required_subclass)
307 {
308 print STDERR "Error! You cannot specify arguments from two different extention specific buildcolutils modules: " . $subclass . " != " . $required_subclass . "\n";
309 exit;
310 }
311 }
312 }
313
314 my $buildcolutils;
315 if (defined $subclass)
316 {
317 print "* Loading overriding buildcolutils module: " . $subclass . "\n";
318 require $subclass . '.pm';
319 $buildcolutils = new $subclass(\@ARGV, $options);
320 }
321 # We don't have a overridden buildcolutils, or the above command failed
322 # somehow so load the base class
323 if (!defined $buildcolutils)
324 {
325 $buildcolutils = new buildcolutils(\@ARGV, $options);
326 }
327
328 my $collection = $buildcolutils->get_collection();
329 if (defined $collection)
330 {
331 my ($config_filename,$collect_cfg) = $buildcolutils->read_collection_cfg($collection, $options);
332 $buildcolutils->set_collection_options($collect_cfg);
333
334 my $builders_ref = $buildcolutils->prepare_builders($config_filename, $collect_cfg);
335 $buildcolutils->build_collection($builders_ref);
336 $buildcolutils->build_auxiliary_files($builders_ref);
337 $buildcolutils->complete_builders($builders_ref);
338
339 # The user may have requested the collection be activated
340 $buildcolutils->activate_collection();
341 }
342
343 # Cleanup
344 $buildcolutils->deinit();
345}
346# main()
347
348# @function _scanForSubclasses()
349# @param $dir The extension directory to look within
350# @param $exts A list of the available extensions (as a colon separated string)
351# @return The number of subclasses of buildcolutils found as an Integer
352sub _scanForSubclasses
353{
354 my ($dir, $exts) = @_;
355 my $class_count = 0;
356 my $ext_prefix = &FileUtils::filenameConcatenate($dir, "ext");
357 my @extensions = split(/:/, $exts);
358 foreach my $e (@extensions)
359 {
360 # - any subclass must be prefixed with the name of the ext
361 my $package_name = $e . 'buildcolutils';
362 $package_name =~ s/[^a-z]//gi; # package names have limited characters
363 my $file_name = $package_name . '.pm';
364 my $file_path = &FileUtils::filenameConcatenate($ext_prefix, $e, 'perllib', $file_name);
365 # see if we have a subclass lurking in that extension folder
366 if (&FileUtils::fileExists($file_path))
367 {
368 # - note we load the filename (with pm) unlike normal modules
369 require $file_name;
370 # - make call to the newly created package
371 my $symbol = qualify('getSupportedArguments', $package_name);
372 # - strict prevents strings being used as function calls, so temporarily
373 # disable that pragma
374 no strict;
375 # - lets check that the function we are about to call actually exists
376 if ( defined &{$symbol} )
377 {
378 my $extra_arguments = &{$symbol}();
379 foreach my $argument (@{$extra_arguments})
380 {
381 # - record a mapping from each extra arguments to the subclass
382 # that supports it. We put the hyphen on here to make comparing
383 # with command line arguments even easier
384 $function_to_subclass_mappings->{'-' . $argument->{'name'}} = $package_name;
385 # - and them add them as acceptable arguments to import.pl
386 push(@{$options->{'args'}}, $argument);
387 }
388 $class_count++;
389 }
390 else
391 {
392 print "Warning! A subclass of buildcolutils module (named '" . $file_name . "') does not implement the required getSupportedArguments() function - ignoring. Found in: " . $file_path . "\n";
393 }
394 }
395 }
396 return $class_count;
397}
398# _scanForSubclasses()
Note: See TracBrowser for help on using the repository browser.