source: main/trunk/greenstone2/bin/script/buildcol.pl@ 30499

Last change on this file since 30499 was 28801, checked in by ak19, 10 years ago

New mode to buildcol.pl added called 'extra'. This restricts the build to only sending/processing the archives content by the orthogonal indexes

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 12.5 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# buildcol.pl --
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 1999 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29# This program will build a particular collection.
30package buildcol;
31
32# Environment
33BEGIN
34{
35 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
36 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
37
38 # Order is important. With unshift want our XMLParser to be
39 # found ahead of XML/XPath
40
41 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/cpan/XML/XPath');
42 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/classify');
43 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/plugins');
44 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/cpan');
45 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib');
46
47 if (defined $ENV{'GSDL-RUN-SETUP'})
48 {
49 require util;
50 &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
51 }
52
53 if (defined $ENV{'GSDLEXTS'})
54 {
55 my @extensions = split(/:/, $ENV{'GSDLEXTS'});
56 foreach my $e (@extensions)
57 {
58 my $ext_prefix = $ENV{'GSDLHOME'} . '/ext/' . $e;
59
60 unshift(@INC, $ext_prefix . '/perllib');
61 unshift(@INC, $ext_prefix . '/perllib/cpan');
62 unshift(@INC, $ext_prefix . '/perllib/plugins');
63 unshift(@INC, $ext_prefix . '/perllib/classify');
64 }
65 }
66 if (defined $ENV{'GSDL3EXTS'})
67 {
68 my @extensions = split(/:/, $ENV{'GSDL3EXTS'});
69 foreach my $e (@extensions)
70 {
71 my $ext_prefix = $ENV{'GSDL3SRCHOME'} . '/ext/' . $e;
72
73 unshift(@INC, $ext_prefix . '/perllib');
74 unshift(@INC, $ext_prefix . '/perllib/cpan');
75 unshift(@INC, $ext_prefix . '/perllib/plugins');
76 unshift(@INC, $ext_prefix . '/perllib/classify');
77 }
78 }
79}
80
81# Pragma
82use strict;
83no strict 'refs'; # allow filehandles to be variables and vice versa
84no strict 'subs'; # allow barewords (eg STDERR) as function arguments
85
86# Modules
87use Symbol qw<qualify>; # Needed for runtime loading of modules [jmt12]
88
89# Greenstone Modules
90use buildcolutils;
91use FileUtils;
92use util;
93
94# Globals
95# - build up arguments list/control
96my $mode_list =
97 [ { 'name' => "all",
98 'desc' => "{buildcol.mode.all}" },
99 { 'name' => "compress_text",
100 'desc' => "{buildcol.mode.compress_text}" },
101 { 'name' => "build_index",
102 'desc' => "{buildcol.mode.build_index}" },
103 { 'name' => "infodb",
104 'desc' => "{buildcol.mode.infodb}" },
105 { 'name' => "extra",
106 'desc' => "{buildcol.mode.extra}" } ];
107
108my $sec_index_list =
109 [ {'name' => "never",
110 'desc' => "{buildcol.sections_index_document_metadata.never}" },
111 {'name' => "always",
112 'desc' => "{buildcol.sections_index_document_metadata.always}" },
113 {'name' => "unless_section_metadata_exists",
114 'desc' => "{buildcol.sections_index_document_metadata.unless_section_metadata_exists}" }
115 ];
116
117my $arguments =
118 [ { 'name' => "remove_empty_classifications",
119 'desc' => "{buildcol.remove_empty_classifications}",
120 'type' => "flag",
121 'reqd' => "no",
122 'modegli' => "2" },
123 { 'name' => "archivedir",
124 'desc' => "{buildcol.archivedir}",
125 'type' => "string",
126 'reqd' => "no",
127 'hiddengli' => "yes" },
128 { 'name' => "builddir",
129 'desc' => "{buildcol.builddir}",
130 'type' => "string",
131 'reqd' => "no",
132 'hiddengli' => "yes" },
133# { 'name' => "cachedir",
134# 'desc' => "{buildcol.cachedir}",
135# 'type' => "string",
136# 'reqd' => "no" },
137 { 'name' => "collectdir",
138 'desc' => "{buildcol.collectdir}",
139 'type' => "string",
140 # parsearg left "" as default
141 #'deft' => &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect"),
142 'reqd' => "no",
143 'hiddengli' => "yes" },
144 { 'name' => "site",
145 'desc' => "{buildcol.site}",
146 'type' => "string",
147 'deft' => "",
148 'reqd' => "no",
149 'hiddengli' => "yes" },
150 { 'name' => "debug",
151 'desc' => "{buildcol.debug}",
152 'type' => "flag",
153 'reqd' => "no",
154 'hiddengli' => "yes" },
155 { 'name' => "faillog",
156 'desc' => "{buildcol.faillog}",
157 'type' => "string",
158 # parsearg left "" as default
159 #'deft' => &FileUtils::filenameConcatenate("<collectdir>", "colname", "etc", "fail.log"),
160 'reqd' => "no",
161 'modegli' => "3" },
162 { 'name' => "index",
163 'desc' => "{buildcol.index}",
164 'type' => "string",
165 'reqd' => "no",
166 'modegli' => "3" },
167 { 'name' => "incremental",
168 'desc' => "{buildcol.incremental}",
169 'type' => "flag",
170 'hiddengli' => "yes" },
171 { 'name' => "keepold",
172 'desc' => "{buildcol.keepold}",
173 'type' => "flag",
174 'reqd' => "no",
175 #'modegli' => "3",
176 'hiddengli' => "yes" },
177 { 'name' => "removeold",
178 'desc' => "{buildcol.removeold}",
179 'type' => "flag",
180 'reqd' => "no",
181 #'modegli' => "3",
182 'hiddengli' => "yes" },
183 { 'name' => "language",
184 'desc' => "{scripts.language}",
185 'type' => "string",
186 'reqd' => "no",
187 'modegli' => "3" },
188 { 'name' => "maxdocs",
189 'desc' => "{buildcol.maxdocs}",
190 'type' => "int",
191 'reqd' => "no",
192 'hiddengli' => "yes" },
193 { 'name' => "maxnumeric",
194 'desc' => "{buildcol.maxnumeric}",
195 'type' => "int",
196 'reqd' => "no",
197 'deft' => "4",
198 'range' => "4,512",
199 'modegli' => "3" },
200 { 'name' => "mode",
201 'desc' => "{buildcol.mode}",
202 'type' => "enum",
203 'list' => $mode_list,
204 # parsearg left "" as default
205# 'deft' => "all",
206 'reqd' => "no",
207 'modegli' => "3" },
208 { 'name' => "no_strip_html",
209 'desc' => "{buildcol.no_strip_html}",
210 'type' => "flag",
211 'reqd' => "no",
212 'modegli' => "3" },
213 { 'name' => "store_metadata_coverage",
214 'desc' => "{buildcol.store_metadata_coverage}",
215 'type' => "flag",
216 'reqd' => "no",
217 'modegli' => "3" },
218 { 'name' => "no_text",
219 'desc' => "{buildcol.no_text}",
220 'type' => "flag",
221 'reqd' => "no",
222 'modegli' => "2" },
223 { 'name' => "sections_index_document_metadata",
224 'desc' => "{buildcol.sections_index_document_metadata}",
225 'type' => "enum",
226 'list' => $sec_index_list,
227 'reqd' => "no",
228 'modegli' => "2" },
229 { 'name' => "sections_sort_on_document_metadata",
230 'desc' => "{buildcol.sections_sort_on_document_metadata}",
231 'type' => "enum",
232 'list' => $sec_index_list,
233 'reqd' => "no",
234 'modegli' => "2" },
235 { 'name' => "out",
236 'desc' => "{buildcol.out}",
237 'type' => "string",
238 'deft' => "STDERR",
239 'reqd' => "no",
240 'hiddengli' => "yes" },
241 { 'name' => "verbosity",
242 'desc' => "{buildcol.verbosity}",
243 'type' => "int",
244 # parsearg left "" as default
245 #'deft' => "2",
246 'reqd' => "no",
247 'modegli' => "3" },
248 { 'name' => "gli",
249 'desc' => "",
250 'type' => "flag",
251 'reqd' => "no",
252 'hiddengli' => "yes" },
253 { 'name' => "xml",
254 'desc' => "{scripts.xml}",
255 'type' => "flag",
256 'reqd' => "no",
257 'hiddengli' => "yes" },
258 { 'name' => "activate",
259 'desc' => "{buildcol.activate}",
260 'type' => "flag",
261 'reqd' => "no",
262 'hiddengli' => "yes" },
263 { 'name' => "indexname",
264 'desc' => "{buildcol.index}",
265 'type' => "string",
266 'reqd' => "no",
267 'modegli' => "3" },
268 { 'name' => "indexlevel",
269 'desc' => "{buildcol.indexlevel}",
270 'type' => "string",
271 'reqd' => "no",
272 'modegli' => "3" },
273 ];
274
275my $options = { 'name' => "buildcol.pl",
276 'desc' => "{buildcol.desc}",
277 'args' => $arguments };
278
279# The hash maps between argument and the buildcolutils subclass supporting that
280# argument - allowing for extensions to override the normal buildcolutils as
281# necessary
282my $function_to_subclass_mappings = {};
283
284# Lets get the party rolling... or ball started... hmmm
285&main();
286
287exit;
288
289sub main
290{
291 # Dynamically include arguments from any subclasses of buildcolutils we find
292 # in the extensions directory
293 if (defined $ENV{'GSDLEXTS'})
294 {
295 &_scanForSubclasses($ENV{'GSDLHOME'}, $ENV{'GSDLEXTS'});
296 }
297 if (defined $ENV{'GSDL3EXTS'})
298 {
299 &_scanForSubclasses($ENV{'GSDL3SRCHOME'}, $ENV{'GSDL3EXTS'});
300 }
301
302 # Loop through arguments, checking to see if any depend on a specific
303 # subclass of buildcolutils. Note that we load the first subclass we
304 # encounter so only support a single 'override' ATM.
305 my $subclass;
306 foreach my $argument (@ARGV)
307 {
308 # proper arguments start with a hyphen
309 if ($argument =~ /^-/ && defined $function_to_subclass_mappings->{$argument})
310 {
311 my $required_subclass = $function_to_subclass_mappings->{$argument};
312 if (!defined $subclass)
313 {
314 $subclass = $required_subclass;
315 }
316 # Oh noes! The user has included specific arguments from two different
317 # subclasses... this isn't supported
318 elsif ($subclass ne $required_subclass)
319 {
320 print STDERR "Error! You cannot specify arguments from two different extention specific buildcolutils modules: " . $subclass . " != " . $required_subclass . "\n";
321 exit;
322 }
323 }
324 }
325
326 my $buildcolutils;
327 if (defined $subclass)
328 {
329 print "* Loading overriding buildcolutils module: " . $subclass . "\n";
330 require $subclass . '.pm';
331 $buildcolutils = new $subclass(\@ARGV, $options);
332 }
333 # We don't have a overridden buildcolutils, or the above command failed
334 # somehow so load the base class
335 if (!defined $buildcolutils)
336 {
337 $buildcolutils = new buildcolutils(\@ARGV, $options);
338 }
339
340 my $collection = $buildcolutils->get_collection();
341 if (defined $collection)
342 {
343 my ($config_filename,$collect_cfg) = $buildcolutils->read_collection_cfg($collection, $options);
344 $buildcolutils->set_collection_options($collect_cfg);
345
346 my $builders_ref = $buildcolutils->prepare_builders($config_filename, $collect_cfg);
347 $buildcolutils->build_collection($builders_ref);
348 $buildcolutils->build_auxiliary_files($builders_ref);
349 $buildcolutils->complete_builders($builders_ref);
350
351 # The user may have requested the collection be activated
352 $buildcolutils->activate_collection();
353 }
354
355 # Cleanup
356 $buildcolutils->deinit();
357}
358# main()
359
360# @function _scanForSubclasses()
361# @param $dir The extension directory to look within
362# @param $exts A list of the available extensions (as a colon separated string)
363# @return The number of subclasses of buildcolutils found as an Integer
364sub _scanForSubclasses
365{
366 my ($dir, $exts) = @_;
367 my $class_count = 0;
368 my $ext_prefix = &FileUtils::filenameConcatenate($dir, "ext");
369 my @extensions = split(/:/, $exts);
370 foreach my $e (@extensions)
371 {
372 # - any subclass must be prefixed with the name of the ext
373 my $package_name = $e . 'buildcolutils';
374 $package_name =~ s/[^a-z]//gi; # package names have limited characters
375 my $file_name = $package_name . '.pm';
376 my $file_path = &FileUtils::filenameConcatenate($ext_prefix, $e, 'perllib', $file_name);
377 # see if we have a subclass lurking in that extension folder
378 if (&FileUtils::fileExists($file_path))
379 {
380 # - note we load the filename (with pm) unlike normal modules
381 require $file_name;
382 # - make call to the newly created package
383 my $symbol = qualify('getSupportedArguments', $package_name);
384 # - strict prevents strings being used as function calls, so temporarily
385 # disable that pragma
386 no strict;
387 # - lets check that the function we are about to call actually exists
388 if ( defined &{$symbol} )
389 {
390 my $extra_arguments = &{$symbol}();
391 foreach my $argument (@{$extra_arguments})
392 {
393 # - record a mapping from each extra arguments to the subclass
394 # that supports it. We put the hyphen on here to make comparing
395 # with command line arguments even easier
396 $function_to_subclass_mappings->{'-' . $argument->{'name'}} = $package_name;
397 # - and them add them as acceptable arguments to import.pl
398 push(@{$options->{'args'}}, $argument);
399 }
400 $class_count++;
401 }
402 else
403 {
404 print "Warning! A subclass of buildcolutils module (named '" . $file_name . "') does not implement the required getSupportedArguments() function - ignoring. Found in: " . $file_path . "\n";
405 }
406 }
407 }
408 return $class_count;
409}
410# _scanForSubclasses()
Note: See TracBrowser for help on using the repository browser.