source: main/trunk/greenstone2/bin/script/buildcol.pl@ 35233

Last change on this file since 35233 was 35233, checked in by anupama, 3 years ago

Extending changes of commits 35213-35215 as they broke diffcol on Linux (yet strangely not on Windows): diffcol exports empty GSDL3SRCHOME (and GSDLHOME) variable. This ends up as defined but with no value. After commit revisions 35213-35215 the import, buildcol (and activate) scripts test whether these variables are defined (which they are even for GS2 when diffcol runs) and then sets the site variable to localsite. Need to test these variables aren't just defined but also not empty string before setting site to localsite, in order for diffcol testing, which runs on GS2 installs, to still work.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 13.5 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# buildcol.pl --
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 1999 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29# This program will build a particular collection.
30package buildcol;
31
32# Environment
33BEGIN
34{
35 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
36 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
37
38 # Order is important. With unshift want our XMLParser to be
39 # found ahead of XML/XPath
40
41 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/cpan/XML/XPath');
42 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/classify');
43 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/plugins');
44 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/cpan');
45 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib');
46
47 if (defined $ENV{'GSDL-RUN-SETUP'})
48 {
49 require util;
50 &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
51 }
52
53 if (defined $ENV{'GSDLEXTS'})
54 {
55 my @extensions = split(/:/, $ENV{'GSDLEXTS'});
56 foreach my $e (@extensions)
57 {
58 my $ext_prefix = $ENV{'GSDLHOME'} . '/ext/' . $e;
59
60 unshift(@INC, $ext_prefix . '/perllib');
61 unshift(@INC, $ext_prefix . '/perllib/cpan');
62 unshift(@INC, $ext_prefix . '/perllib/plugins');
63 unshift(@INC, $ext_prefix . '/perllib/classify');
64 }
65 }
66 if (defined $ENV{'GSDL3EXTS'})
67 {
68 my @extensions = split(/:/, $ENV{'GSDL3EXTS'});
69 foreach my $e (@extensions)
70 {
71 my $ext_prefix = $ENV{'GSDL3SRCHOME'} . '/ext/' . $e;
72
73 unshift(@INC, $ext_prefix . '/perllib');
74 unshift(@INC, $ext_prefix . '/perllib/cpan');
75 unshift(@INC, $ext_prefix . '/perllib/plugins');
76 unshift(@INC, $ext_prefix . '/perllib/classify');
77 }
78 }
79}
80
81# Pragma
82use strict;
83no strict 'refs'; # allow filehandles to be variables and vice versa
84no strict 'subs'; # allow barewords (eg STDERR) as function arguments
85
86# Modules
87use Symbol qw<qualify>; # Needed for runtime loading of modules [jmt12]
88
89# Greenstone Modules
90use buildcolutils;
91use FileUtils;
92use util;
93use doc; # for the variable our $cmd_line_mode ($doc::cmd_line_mode)
94
95# Globals
96# - build up arguments list/control
97my $mode_list =
98 [ { 'name' => "all",
99 'desc' => "{buildcol.mode.all}" },
100 { 'name' => "compress_text",
101 'desc' => "{buildcol.mode.compress_text}" },
102 { 'name' => "build_index",
103 'desc' => "{buildcol.mode.build_index}" },
104 { 'name' => "infodb",
105 'desc' => "{buildcol.mode.infodb}" },
106 { 'name' => "extra",
107 'desc' => "{buildcol.mode.extra}" } ];
108
109my $sec_index_list =
110 [ {'name' => "never",
111 'desc' => "{buildcol.sections_index_document_metadata.never}" },
112 {'name' => "always",
113 'desc' => "{buildcol.sections_index_document_metadata.always}" },
114 {'name' => "unless_section_metadata_exists",
115 'desc' => "{buildcol.sections_index_document_metadata.unless_section_metadata_exists}" }
116 ];
117
118my $arguments =
119 [ { 'name' => "remove_empty_classifications",
120 'desc' => "{buildcol.remove_empty_classifications}",
121 'type' => "flag",
122 'reqd' => "no",
123 'modegli' => "2" },
124 { 'name' => "archivedir",
125 'desc' => "{buildcol.archivedir}",
126 'type' => "string",
127 'reqd' => "no",
128 'hiddengli' => "yes" },
129 { 'name' => "builddir",
130 'desc' => "{buildcol.builddir}",
131 'type' => "string",
132 'reqd' => "no",
133 'hiddengli' => "yes" },
134# { 'name' => "cachedir",
135# 'desc' => "{buildcol.cachedir}",
136# 'type' => "string",
137# 'reqd' => "no" },
138 { 'name' => "collectdir",
139 'desc' => "{buildcol.collectdir}",
140 'type' => "string",
141 # parsearg left "" as default
142 #'deft' => &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect"),
143 'reqd' => "no",
144 'hiddengli' => "yes" },
145 { 'name' => "site",
146 'desc' => "{buildcol.site}",
147 'type' => "string",
148 'deft' => "",
149 'reqd' => "no",
150 'hiddengli' => "yes" },
151 { 'name' => "debug",
152 'desc' => "{buildcol.debug}",
153 'type' => "flag",
154 'reqd' => "no",
155 'hiddengli' => "yes" },
156 { 'name' => "faillog",
157 'desc' => "{buildcol.faillog}",
158 'type' => "string",
159 # parsearg left "" as default
160 #'deft' => &FileUtils::filenameConcatenate("<collectdir>", "colname", "etc", "fail.log"),
161 'reqd' => "no",
162 'modegli' => "3" },
163 { 'name' => "index",
164 'desc' => "{buildcol.index}",
165 'type' => "string",
166 'reqd' => "no",
167 'modegli' => "3" },
168 { 'name' => "incremental",
169 'desc' => "{buildcol.incremental}",
170 'type' => "flag",
171 'hiddengli' => "yes" },
172 { 'name' => "keepold",
173 'desc' => "{buildcol.keepold}",
174 'type' => "flag",
175 'reqd' => "no",
176 #'modegli' => "3",
177 'hiddengli' => "yes" },
178 { 'name' => "removeold",
179 'desc' => "{buildcol.removeold}",
180 'type' => "flag",
181 'reqd' => "no",
182 #'modegli' => "3",
183 'hiddengli' => "yes" },
184 { 'name' => "language",
185 'desc' => "{scripts.language}",
186 'type' => "string",
187 'reqd' => "no",
188 'modegli' => "3" },
189 { 'name' => "maxdocs",
190 'desc' => "{buildcol.maxdocs}",
191 'type' => "int",
192 'reqd' => "no",
193 'hiddengli' => "yes" },
194 { 'name' => "maxnumeric",
195 'desc' => "{buildcol.maxnumeric}",
196 'type' => "int",
197 'reqd' => "no",
198 'deft' => "4",
199 'range' => "4,512",
200 'modegli' => "3" },
201 { 'name' => "mode",
202 'desc' => "{buildcol.mode}",
203 'type' => "enum",
204 'list' => $mode_list,
205 # parsearg left "" as default
206# 'deft' => "all",
207 'reqd' => "no",
208 'modegli' => "3" },
209 { 'name' => "no_strip_html",
210 'desc' => "{buildcol.no_strip_html}",
211 'type' => "flag",
212 'reqd' => "no",
213 'modegli' => "3" },
214 { 'name' => "store_metadata_coverage",
215 'desc' => "{buildcol.store_metadata_coverage}",
216 'type' => "flag",
217 'reqd' => "no",
218 'modegli' => "3" },
219 { 'name' => "no_text",
220 'desc' => "{buildcol.no_text}",
221 'type' => "flag",
222 'reqd' => "no",
223 'modegli' => "2" },
224 { 'name' => "sections_index_document_metadata",
225 'desc' => "{buildcol.sections_index_document_metadata}",
226 'type' => "enum",
227 'list' => $sec_index_list,
228 'reqd' => "no",
229 'modegli' => "2" },
230 { 'name' => "sections_sort_on_document_metadata",
231 'desc' => "{buildcol.sections_sort_on_document_metadata}",
232 'type' => "enum",
233 'list' => $sec_index_list,
234 'reqd' => "no",
235 'modegli' => "2" },
236 { 'name' => "out",
237 'desc' => "{buildcol.out}",
238 'type' => "string",
239 'deft' => "STDERR",
240 'reqd' => "no",
241 'hiddengli' => "yes" },
242 { 'name' => "verbosity",
243 'desc' => "{buildcol.verbosity}",
244 'type' => "int",
245 # parsearg left "" as default
246 #'deft' => "2",
247 'reqd' => "no",
248 'modegli' => "3" },
249 { 'name' => "gli",
250 'desc' => "",
251 'type' => "flag",
252 'reqd' => "no",
253 'hiddengli' => "yes" },
254 { 'name' => "xml",
255 'desc' => "{scripts.xml}",
256 'type' => "flag",
257 'reqd' => "no",
258 'hiddengli' => "yes" },
259 { 'name' => "activate",
260 'desc' => "{buildcol.activate}",
261 'type' => "flag",
262 'reqd' => "no",
263 'hiddengli' => "yes" },
264 { 'name' => "skipactivation",
265 'desc' => "{buildcol.skipactivation}",
266 'type' => "flag",
267 'reqd' => "no",
268 'hiddengli' => "yes" },
269 { 'name' => "library_url",
270 'desc' => "{buildcol.library_url}",
271 'type' => "string",
272 'reqd' => "no",
273 'hiddengli' => "yes" },
274 { 'name' => "library_name",
275 'desc' => "{buildcol.library_name}",
276 'type' => "string",
277 'reqd' => "no",
278 'hiddengli' => "yes" },
279 { 'name' => "indexname",
280 'desc' => "{buildcol.index}",
281 'type' => "string",
282 'reqd' => "no",
283 'modegli' => "3" },
284 { 'name' => "indexlevel",
285 'desc' => "{buildcol.indexlevel}",
286 'type' => "string",
287 'reqd' => "no",
288 'modegli' => "3" },
289 ];
290
291my $options = { 'name' => "buildcol.pl",
292 'desc' => "{buildcol.desc}",
293 'args' => $arguments };
294
295# The hash maps between argument and the buildcolutils subclass supporting that
296# argument - allowing for extensions to override the normal buildcolutils as
297# necessary
298my $function_to_subclass_mappings = {};
299
300# Lets get the party rolling... or ball started... hmmm
301&main();
302
303exit;
304
305sub main
306{
307 $doc::cmd_line_mode = "buildcol";
308
309 # Dynamically include arguments from any subclasses of buildcolutils we find
310 # in the extensions directory
311 if (defined $ENV{'GSDLEXTS'})
312 {
313 &_scanForSubclasses($ENV{'GSDLHOME'}, $ENV{'GSDLEXTS'});
314 }
315 if (defined $ENV{'GSDL3EXTS'})
316 {
317 &_scanForSubclasses($ENV{'GSDL3SRCHOME'}, $ENV{'GSDL3EXTS'});
318 }
319
320 # Loop through arguments, checking to see if any depend on a specific
321 # subclass of buildcolutils. Note that we load the first subclass we
322 # encounter so only support a single 'override' ATM.
323 my $subclass;
324 foreach my $argument (@ARGV)
325 {
326 # proper arguments start with a hyphen
327 if ($argument =~ /^-/ && defined $function_to_subclass_mappings->{$argument})
328 {
329 my $required_subclass = $function_to_subclass_mappings->{$argument};
330 if (!defined $subclass)
331 {
332 $subclass = $required_subclass;
333 }
334 # Oh noes! The user has included specific arguments from two different
335 # subclasses... this isn't supported
336 elsif ($subclass ne $required_subclass)
337 {
338 print STDERR "Error! You cannot specify arguments from two different extension specific buildcolutils modules: " . $subclass . " != " . $required_subclass . "\n";
339 exit;
340 }
341 }
342 }
343
344 my $buildcolutils;
345 if (defined $subclass)
346 {
347 print "* Loading overriding buildcolutils module: " . $subclass . "\n";
348 require $subclass . '.pm';
349 $buildcolutils = new $subclass(\@ARGV, $options);
350 }
351 # We don't have an overridden buildcolutils, or the above command failed
352 # somehow so load the base class
353 if (!defined $buildcolutils)
354 {
355 $buildcolutils = new buildcolutils(\@ARGV, $options);
356 }
357
358 ## shortcut - if gsdl3srchome is set, and site is not set, set site to localsite
359 ## But need to check gsdl3srchome not empty even if defined, as that can
360 ## happen when nightly diffcol is run and doesn't mean it's a GS3 install
361 if (defined $ENV{'GSDL3SRCHOME'} && $ENV{'GSDL3SRCHOME'} ne "") {
362 if (!defined $buildcolutils->{'site'} || $buildcolutils->{'site'} eq "") {
363 $buildcolutils->{'site'} = "localsite";
364 print STDERR "setting site to localsite as GSDL3SRCHOME was set\n";
365 }
366 }
367 my $collection = $buildcolutils->get_collection();
368 if (defined $collection)
369 {
370 my ($config_filename,$collect_cfg) = $buildcolutils->read_collection_cfg($collection, $options);
371 $buildcolutils->set_collection_options($collect_cfg);
372
373 my $builders_ref = $buildcolutils->prepare_builders($config_filename, $collect_cfg);
374 $buildcolutils->build_collection($builders_ref);
375 $buildcolutils->build_auxiliary_files($builders_ref);
376 $buildcolutils->complete_builders($builders_ref);
377
378 # The user may have requested the collection be activated
379 $buildcolutils->activate_collection();
380 }
381
382 # Cleanup
383 $buildcolutils->deinit();
384}
385# main()
386
387# @function _scanForSubclasses()
388# @param $dir The extension directory to look within
389# @param $exts A list of the available extensions (as a colon separated string)
390# @return The number of subclasses of buildcolutils found as an Integer
391sub _scanForSubclasses
392{
393 my ($dir, $exts) = @_;
394 my $class_count = 0;
395 my $ext_prefix = &FileUtils::filenameConcatenate($dir, "ext");
396 my @extensions = split(/:/, $exts);
397 foreach my $e (@extensions)
398 {
399 # - any subclass must be prefixed with the name of the ext
400 my $package_name = $e . 'buildcolutils';
401 $package_name =~ s/[^a-z]//gi; # package names have limited characters
402 my $file_name = $package_name . '.pm';
403 my $file_path = &FileUtils::filenameConcatenate($ext_prefix, $e, 'perllib', $file_name);
404 # see if we have a subclass lurking in that extension folder
405 if (&FileUtils::fileExists($file_path))
406 {
407 # - note we load the filename (with pm) unlike normal modules
408 require $file_name;
409 # - make call to the newly created package
410 my $symbol = qualify('getSupportedArguments', $package_name);
411 # - strict prevents strings being used as function calls, so temporarily
412 # disable that pragma
413 no strict;
414 # - lets check that the function we are about to call actually exists
415 if ( defined &{$symbol} )
416 {
417 my $extra_arguments = &{$symbol}();
418 foreach my $argument (@{$extra_arguments})
419 {
420 # - record a mapping from each extra arguments to the subclass
421 # that supports it. We put the hyphen on here to make comparing
422 # with command line arguments even easier
423 $function_to_subclass_mappings->{'-' . $argument->{'name'}} = $package_name;
424 # - and them add them as acceptable arguments to import.pl
425 push(@{$options->{'args'}}, $argument);
426 }
427 $class_count++;
428 }
429 else
430 {
431 print "Warning! A subclass of buildcolutils module (named '" . $file_name . "') does not implement the required getSupportedArguments() function - ignoring. Found in: " . $file_path . "\n";
432 }
433 }
434 }
435 return $class_count;
436}
437# _scanForSubclasses()
Note: See TracBrowser for help on using the repository browser.