source: main/trunk/greenstone2/bin/script/buildcol.pl@ 33171

Last change on this file since 33171 was 33171, checked in by wy59, 5 years ago

First part of commit: following Dr Bainbridge's suggestion for how best to determine what phase of building we're in from within doc.pm. Needs a package level variable, used existing coding pattern in the file that used the our keyword.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 13.0 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# buildcol.pl --
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 1999 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29# This program will build a particular collection.
30package buildcol;
31
32# Environment
33BEGIN
34{
35 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
36 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
37
38 # Order is important. With unshift want our XMLParser to be
39 # found ahead of XML/XPath
40
41 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/cpan/XML/XPath');
42 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/classify');
43 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/plugins');
44 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/cpan');
45 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib');
46
47 if (defined $ENV{'GSDL-RUN-SETUP'})
48 {
49 require util;
50 &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
51 }
52
53 if (defined $ENV{'GSDLEXTS'})
54 {
55 my @extensions = split(/:/, $ENV{'GSDLEXTS'});
56 foreach my $e (@extensions)
57 {
58 my $ext_prefix = $ENV{'GSDLHOME'} . '/ext/' . $e;
59
60 unshift(@INC, $ext_prefix . '/perllib');
61 unshift(@INC, $ext_prefix . '/perllib/cpan');
62 unshift(@INC, $ext_prefix . '/perllib/plugins');
63 unshift(@INC, $ext_prefix . '/perllib/classify');
64 }
65 }
66 if (defined $ENV{'GSDL3EXTS'})
67 {
68 my @extensions = split(/:/, $ENV{'GSDL3EXTS'});
69 foreach my $e (@extensions)
70 {
71 my $ext_prefix = $ENV{'GSDL3SRCHOME'} . '/ext/' . $e;
72
73 unshift(@INC, $ext_prefix . '/perllib');
74 unshift(@INC, $ext_prefix . '/perllib/cpan');
75 unshift(@INC, $ext_prefix . '/perllib/plugins');
76 unshift(@INC, $ext_prefix . '/perllib/classify');
77 }
78 }
79}
80
81# Pragma
82use strict;
83no strict 'refs'; # allow filehandles to be variables and vice versa
84no strict 'subs'; # allow barewords (eg STDERR) as function arguments
85
86# Modules
87use Symbol qw<qualify>; # Needed for runtime loading of modules [jmt12]
88
89# Greenstone Modules
90use buildcolutils;
91use FileUtils;
92use util;
93use doc; # for the variable our $cmd_line_mode ($doc::cmd_line_mode)
94
95# Globals
96# - build up arguments list/control
97my $mode_list =
98 [ { 'name' => "all",
99 'desc' => "{buildcol.mode.all}" },
100 { 'name' => "compress_text",
101 'desc' => "{buildcol.mode.compress_text}" },
102 { 'name' => "build_index",
103 'desc' => "{buildcol.mode.build_index}" },
104 { 'name' => "infodb",
105 'desc' => "{buildcol.mode.infodb}" },
106 { 'name' => "extra",
107 'desc' => "{buildcol.mode.extra}" } ];
108
109my $sec_index_list =
110 [ {'name' => "never",
111 'desc' => "{buildcol.sections_index_document_metadata.never}" },
112 {'name' => "always",
113 'desc' => "{buildcol.sections_index_document_metadata.always}" },
114 {'name' => "unless_section_metadata_exists",
115 'desc' => "{buildcol.sections_index_document_metadata.unless_section_metadata_exists}" }
116 ];
117
118my $arguments =
119 [ { 'name' => "remove_empty_classifications",
120 'desc' => "{buildcol.remove_empty_classifications}",
121 'type' => "flag",
122 'reqd' => "no",
123 'modegli' => "2" },
124 { 'name' => "archivedir",
125 'desc' => "{buildcol.archivedir}",
126 'type' => "string",
127 'reqd' => "no",
128 'hiddengli' => "yes" },
129 { 'name' => "builddir",
130 'desc' => "{buildcol.builddir}",
131 'type' => "string",
132 'reqd' => "no",
133 'hiddengli' => "yes" },
134# { 'name' => "cachedir",
135# 'desc' => "{buildcol.cachedir}",
136# 'type' => "string",
137# 'reqd' => "no" },
138 { 'name' => "collectdir",
139 'desc' => "{buildcol.collectdir}",
140 'type' => "string",
141 # parsearg left "" as default
142 #'deft' => &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect"),
143 'reqd' => "no",
144 'hiddengli' => "yes" },
145 { 'name' => "site",
146 'desc' => "{buildcol.site}",
147 'type' => "string",
148 'deft' => "",
149 'reqd' => "no",
150 'hiddengli' => "yes" },
151 { 'name' => "debug",
152 'desc' => "{buildcol.debug}",
153 'type' => "flag",
154 'reqd' => "no",
155 'hiddengli' => "yes" },
156 { 'name' => "faillog",
157 'desc' => "{buildcol.faillog}",
158 'type' => "string",
159 # parsearg left "" as default
160 #'deft' => &FileUtils::filenameConcatenate("<collectdir>", "colname", "etc", "fail.log"),
161 'reqd' => "no",
162 'modegli' => "3" },
163 { 'name' => "index",
164 'desc' => "{buildcol.index}",
165 'type' => "string",
166 'reqd' => "no",
167 'modegli' => "3" },
168 { 'name' => "incremental",
169 'desc' => "{buildcol.incremental}",
170 'type' => "flag",
171 'hiddengli' => "yes" },
172 { 'name' => "keepold",
173 'desc' => "{buildcol.keepold}",
174 'type' => "flag",
175 'reqd' => "no",
176 #'modegli' => "3",
177 'hiddengli' => "yes" },
178 { 'name' => "removeold",
179 'desc' => "{buildcol.removeold}",
180 'type' => "flag",
181 'reqd' => "no",
182 #'modegli' => "3",
183 'hiddengli' => "yes" },
184 { 'name' => "language",
185 'desc' => "{scripts.language}",
186 'type' => "string",
187 'reqd' => "no",
188 'modegli' => "3" },
189 { 'name' => "maxdocs",
190 'desc' => "{buildcol.maxdocs}",
191 'type' => "int",
192 'reqd' => "no",
193 'hiddengli' => "yes" },
194 { 'name' => "maxnumeric",
195 'desc' => "{buildcol.maxnumeric}",
196 'type' => "int",
197 'reqd' => "no",
198 'deft' => "4",
199 'range' => "4,512",
200 'modegli' => "3" },
201 { 'name' => "mode",
202 'desc' => "{buildcol.mode}",
203 'type' => "enum",
204 'list' => $mode_list,
205 # parsearg left "" as default
206# 'deft' => "all",
207 'reqd' => "no",
208 'modegli' => "3" },
209 { 'name' => "no_strip_html",
210 'desc' => "{buildcol.no_strip_html}",
211 'type' => "flag",
212 'reqd' => "no",
213 'modegli' => "3" },
214 { 'name' => "store_metadata_coverage",
215 'desc' => "{buildcol.store_metadata_coverage}",
216 'type' => "flag",
217 'reqd' => "no",
218 'modegli' => "3" },
219 { 'name' => "no_text",
220 'desc' => "{buildcol.no_text}",
221 'type' => "flag",
222 'reqd' => "no",
223 'modegli' => "2" },
224 { 'name' => "sections_index_document_metadata",
225 'desc' => "{buildcol.sections_index_document_metadata}",
226 'type' => "enum",
227 'list' => $sec_index_list,
228 'reqd' => "no",
229 'modegli' => "2" },
230 { 'name' => "sections_sort_on_document_metadata",
231 'desc' => "{buildcol.sections_sort_on_document_metadata}",
232 'type' => "enum",
233 'list' => $sec_index_list,
234 'reqd' => "no",
235 'modegli' => "2" },
236 { 'name' => "out",
237 'desc' => "{buildcol.out}",
238 'type' => "string",
239 'deft' => "STDERR",
240 'reqd' => "no",
241 'hiddengli' => "yes" },
242 { 'name' => "verbosity",
243 'desc' => "{buildcol.verbosity}",
244 'type' => "int",
245 # parsearg left "" as default
246 #'deft' => "2",
247 'reqd' => "no",
248 'modegli' => "3" },
249 { 'name' => "gli",
250 'desc' => "",
251 'type' => "flag",
252 'reqd' => "no",
253 'hiddengli' => "yes" },
254 { 'name' => "xml",
255 'desc' => "{scripts.xml}",
256 'type' => "flag",
257 'reqd' => "no",
258 'hiddengli' => "yes" },
259 { 'name' => "activate",
260 'desc' => "{buildcol.activate}",
261 'type' => "flag",
262 'reqd' => "no",
263 'hiddengli' => "yes" },
264 { 'name' => "skipactivation",
265 'desc' => "{buildcol.skipactivation}",
266 'type' => "flag",
267 'reqd' => "no",
268 'hiddengli' => "yes" },
269 { 'name' => "library_url",
270 'desc' => "{buildcol.library_url}",
271 'type' => "string",
272 'reqd' => "no",
273 'hiddengli' => "yes" },
274 { 'name' => "library_name",
275 'desc' => "{buildcol.library_name}",
276 'type' => "string",
277 'reqd' => "no",
278 'hiddengli' => "yes" },
279 { 'name' => "indexname",
280 'desc' => "{buildcol.index}",
281 'type' => "string",
282 'reqd' => "no",
283 'modegli' => "3" },
284 { 'name' => "indexlevel",
285 'desc' => "{buildcol.indexlevel}",
286 'type' => "string",
287 'reqd' => "no",
288 'modegli' => "3" },
289 ];
290
291my $options = { 'name' => "buildcol.pl",
292 'desc' => "{buildcol.desc}",
293 'args' => $arguments };
294
295# The hash maps between argument and the buildcolutils subclass supporting that
296# argument - allowing for extensions to override the normal buildcolutils as
297# necessary
298my $function_to_subclass_mappings = {};
299
300# Lets get the party rolling... or ball started... hmmm
301&main();
302
303exit;
304
305sub main
306{
307 $doc::cmd_line_mode = "buildcol";
308
309 # Dynamically include arguments from any subclasses of buildcolutils we find
310 # in the extensions directory
311 if (defined $ENV{'GSDLEXTS'})
312 {
313 &_scanForSubclasses($ENV{'GSDLHOME'}, $ENV{'GSDLEXTS'});
314 }
315 if (defined $ENV{'GSDL3EXTS'})
316 {
317 &_scanForSubclasses($ENV{'GSDL3SRCHOME'}, $ENV{'GSDL3EXTS'});
318 }
319
320 # Loop through arguments, checking to see if any depend on a specific
321 # subclass of buildcolutils. Note that we load the first subclass we
322 # encounter so only support a single 'override' ATM.
323 my $subclass;
324 foreach my $argument (@ARGV)
325 {
326 # proper arguments start with a hyphen
327 if ($argument =~ /^-/ && defined $function_to_subclass_mappings->{$argument})
328 {
329 my $required_subclass = $function_to_subclass_mappings->{$argument};
330 if (!defined $subclass)
331 {
332 $subclass = $required_subclass;
333 }
334 # Oh noes! The user has included specific arguments from two different
335 # subclasses... this isn't supported
336 elsif ($subclass ne $required_subclass)
337 {
338 print STDERR "Error! You cannot specify arguments from two different extension specific buildcolutils modules: " . $subclass . " != " . $required_subclass . "\n";
339 exit;
340 }
341 }
342 }
343
344 my $buildcolutils;
345 if (defined $subclass)
346 {
347 print "* Loading overriding buildcolutils module: " . $subclass . "\n";
348 require $subclass . '.pm';
349 $buildcolutils = new $subclass(\@ARGV, $options);
350 }
351 # We don't have an overridden buildcolutils, or the above command failed
352 # somehow so load the base class
353 if (!defined $buildcolutils)
354 {
355 $buildcolutils = new buildcolutils(\@ARGV, $options);
356 }
357
358 my $collection = $buildcolutils->get_collection();
359 if (defined $collection)
360 {
361 my ($config_filename,$collect_cfg) = $buildcolutils->read_collection_cfg($collection, $options);
362 $buildcolutils->set_collection_options($collect_cfg);
363
364 my $builders_ref = $buildcolutils->prepare_builders($config_filename, $collect_cfg);
365 $buildcolutils->build_collection($builders_ref);
366 $buildcolutils->build_auxiliary_files($builders_ref);
367 $buildcolutils->complete_builders($builders_ref);
368
369 # The user may have requested the collection be activated
370 $buildcolutils->activate_collection();
371 }
372
373 # Cleanup
374 $buildcolutils->deinit();
375}
376# main()
377
378# @function _scanForSubclasses()
379# @param $dir The extension directory to look within
380# @param $exts A list of the available extensions (as a colon separated string)
381# @return The number of subclasses of buildcolutils found as an Integer
382sub _scanForSubclasses
383{
384 my ($dir, $exts) = @_;
385 my $class_count = 0;
386 my $ext_prefix = &FileUtils::filenameConcatenate($dir, "ext");
387 my @extensions = split(/:/, $exts);
388 foreach my $e (@extensions)
389 {
390 # - any subclass must be prefixed with the name of the ext
391 my $package_name = $e . 'buildcolutils';
392 $package_name =~ s/[^a-z]//gi; # package names have limited characters
393 my $file_name = $package_name . '.pm';
394 my $file_path = &FileUtils::filenameConcatenate($ext_prefix, $e, 'perllib', $file_name);
395 # see if we have a subclass lurking in that extension folder
396 if (&FileUtils::fileExists($file_path))
397 {
398 # - note we load the filename (with pm) unlike normal modules
399 require $file_name;
400 # - make call to the newly created package
401 my $symbol = qualify('getSupportedArguments', $package_name);
402 # - strict prevents strings being used as function calls, so temporarily
403 # disable that pragma
404 no strict;
405 # - lets check that the function we are about to call actually exists
406 if ( defined &{$symbol} )
407 {
408 my $extra_arguments = &{$symbol}();
409 foreach my $argument (@{$extra_arguments})
410 {
411 # - record a mapping from each extra arguments to the subclass
412 # that supports it. We put the hyphen on here to make comparing
413 # with command line arguments even easier
414 $function_to_subclass_mappings->{'-' . $argument->{'name'}} = $package_name;
415 # - and them add them as acceptable arguments to import.pl
416 push(@{$options->{'args'}}, $argument);
417 }
418 $class_count++;
419 }
420 else
421 {
422 print "Warning! A subclass of buildcolutils module (named '" . $file_name . "') does not implement the required getSupportedArguments() function - ignoring. Found in: " . $file_path . "\n";
423 }
424 }
425 }
426 return $class_count;
427}
428# _scanForSubclasses()
Note: See TracBrowser for help on using the repository browser.