source: main/trunk/greenstone2/bin/script/buildcol.pl@ 28801

Last change on this file since 28801 was 28801, checked in by ak19, 7 years ago

New mode to buildcol.pl added called 'extra'. This restricts the build to only sending/processing the archives content by the orthogonal indexes

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 12.5 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# buildcol.pl --
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 1999 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29# This program will build a particular collection.
30package buildcol;
31
32# Environment
33BEGIN
34{
35 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
36 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
37
38 # Order is important. With unshift want our XMLParser to be
39 # found ahead of XML/XPath
40
41 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/cpan/XML/XPath');
42 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/classify');
43 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/plugins');
44 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/cpan');
45 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib');
46
47 if (defined $ENV{'GSDL-RUN-SETUP'})
48 {
49 require util;
50 &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
51 }
52
53 if (defined $ENV{'GSDLEXTS'})
54 {
55 my @extensions = split(/:/, $ENV{'GSDLEXTS'});
56 foreach my $e (@extensions)
57 {
58 my $ext_prefix = $ENV{'GSDLHOME'} . '/ext/' . $e;
59
60 unshift(@INC, $ext_prefix . '/perllib');
61 unshift(@INC, $ext_prefix . '/perllib/cpan');
62 unshift(@INC, $ext_prefix . '/perllib/plugins');
63 unshift(@INC, $ext_prefix . '/perllib/classify');
64 }
65 }
66 if (defined $ENV{'GSDL3EXTS'})
67 {
68 my @extensions = split(/:/, $ENV{'GSDL3EXTS'});
69 foreach my $e (@extensions)
70 {
71 my $ext_prefix = $ENV{'GSDL3SRCHOME'} . '/ext/' . $e;
72
73 unshift(@INC, $ext_prefix . '/perllib');
74 unshift(@INC, $ext_prefix . '/perllib/cpan');
75 unshift(@INC, $ext_prefix . '/perllib/plugins');
76 unshift(@INC, $ext_prefix . '/perllib/classify');
77 }
78 }
79}
80
81# Pragma
82use strict;
83no strict 'refs'; # allow filehandles to be variables and vice versa
84no strict 'subs'; # allow barewords (eg STDERR) as function arguments
85
86# Modules
87use Symbol qw<qualify>; # Needed for runtime loading of modules [jmt12]
88
89# Greenstone Modules
90use buildcolutils;
91use FileUtils;
92use util;
93
94# Globals
95# - build up arguments list/control
96my $mode_list =
97 [ { 'name' => "all",
98 'desc' => "{buildcol.mode.all}" },
99 { 'name' => "compress_text",
100 'desc' => "{buildcol.mode.compress_text}" },
101 { 'name' => "build_index",
102 'desc' => "{buildcol.mode.build_index}" },
103 { 'name' => "infodb",
104 'desc' => "{buildcol.mode.infodb}" },
105 { 'name' => "extra",
106 'desc' => "{buildcol.mode.extra}" } ];
107
108my $sec_index_list =
109 [ {'name' => "never",
110 'desc' => "{buildcol.sections_index_document_metadata.never}" },
111 {'name' => "always",
112 'desc' => "{buildcol.sections_index_document_metadata.always}" },
113 {'name' => "unless_section_metadata_exists",
114 'desc' => "{buildcol.sections_index_document_metadata.unless_section_metadata_exists}" }
115 ];
116
117my $arguments =
118 [ { 'name' => "remove_empty_classifications",
119 'desc' => "{buildcol.remove_empty_classifications}",
120 'type' => "flag",
121 'reqd' => "no",
122 'modegli' => "2" },
123 { 'name' => "archivedir",
124 'desc' => "{buildcol.archivedir}",
125 'type' => "string",
126 'reqd' => "no",
127 'hiddengli' => "yes" },
128 { 'name' => "builddir",
129 'desc' => "{buildcol.builddir}",
130 'type' => "string",
131 'reqd' => "no",
132 'hiddengli' => "yes" },
133# { 'name' => "cachedir",
134# 'desc' => "{buildcol.cachedir}",
135# 'type' => "string",
136# 'reqd' => "no" },
137 { 'name' => "collectdir",
138 'desc' => "{buildcol.collectdir}",
139 'type' => "string",
140 # parsearg left "" as default
141 #'deft' => &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect"),
142 'reqd' => "no",
143 'hiddengli' => "yes" },
144 { 'name' => "site",
145 'desc' => "{buildcol.site}",
146 'type' => "string",
147 'deft' => "",
148 'reqd' => "no",
149 'hiddengli' => "yes" },
150 { 'name' => "debug",
151 'desc' => "{buildcol.debug}",
152 'type' => "flag",
153 'reqd' => "no",
154 'hiddengli' => "yes" },
155 { 'name' => "faillog",
156 'desc' => "{buildcol.faillog}",
157 'type' => "string",
158 # parsearg left "" as default
159 #'deft' => &FileUtils::filenameConcatenate("<collectdir>", "colname", "etc", "fail.log"),
160 'reqd' => "no",
161 'modegli' => "3" },
162 { 'name' => "index",
163 'desc' => "{buildcol.index}",
164 'type' => "string",
165 'reqd' => "no",
166 'modegli' => "3" },
167 { 'name' => "incremental",
168 'desc' => "{buildcol.incremental}",
169 'type' => "flag",
170 'hiddengli' => "yes" },
171 { 'name' => "keepold",
172 'desc' => "{buildcol.keepold}",
173 'type' => "flag",
174 'reqd' => "no",
175 #'modegli' => "3",
176 'hiddengli' => "yes" },
177 { 'name' => "removeold",
178 'desc' => "{buildcol.removeold}",
179 'type' => "flag",
180 'reqd' => "no",
181 #'modegli' => "3",
182 'hiddengli' => "yes" },
183 { 'name' => "language",
184 'desc' => "{scripts.language}",
185 'type' => "string",
186 'reqd' => "no",
187 'modegli' => "3" },
188 { 'name' => "maxdocs",
189 'desc' => "{buildcol.maxdocs}",
190 'type' => "int",
191 'reqd' => "no",
192 'hiddengli' => "yes" },
193 { 'name' => "maxnumeric",
194 'desc' => "{buildcol.maxnumeric}",
195 'type' => "int",
196 'reqd' => "no",
197 'deft' => "4",
198 'range' => "4,512",
199 'modegli' => "3" },
200 { 'name' => "mode",
201 'desc' => "{buildcol.mode}",
202 'type' => "enum",
203 'list' => $mode_list,
204 # parsearg left "" as default
205# 'deft' => "all",
206 'reqd' => "no",
207 'modegli' => "3" },
208 { 'name' => "no_strip_html",
209 'desc' => "{buildcol.no_strip_html}",
210 'type' => "flag",
211 'reqd' => "no",
212 'modegli' => "3" },
213 { 'name' => "store_metadata_coverage",
214 'desc' => "{buildcol.store_metadata_coverage}",
215 'type' => "flag",
216 'reqd' => "no",
217 'modegli' => "3" },
218 { 'name' => "no_text",
219 'desc' => "{buildcol.no_text}",
220 'type' => "flag",
221 'reqd' => "no",
222 'modegli' => "2" },
223 { 'name' => "sections_index_document_metadata",
224 'desc' => "{buildcol.sections_index_document_metadata}",
225 'type' => "enum",
226 'list' => $sec_index_list,
227 'reqd' => "no",
228 'modegli' => "2" },
229 { 'name' => "sections_sort_on_document_metadata",
230 'desc' => "{buildcol.sections_sort_on_document_metadata}",
231 'type' => "enum",
232 'list' => $sec_index_list,
233 'reqd' => "no",
234 'modegli' => "2" },
235 { 'name' => "out",
236 'desc' => "{buildcol.out}",
237 'type' => "string",
238 'deft' => "STDERR",
239 'reqd' => "no",
240 'hiddengli' => "yes" },
241 { 'name' => "verbosity",
242 'desc' => "{buildcol.verbosity}",
243 'type' => "int",
244 # parsearg left "" as default
245 #'deft' => "2",
246 'reqd' => "no",
247 'modegli' => "3" },
248 { 'name' => "gli",
249 'desc' => "",
250 'type' => "flag",
251 'reqd' => "no",
252 'hiddengli' => "yes" },
253 { 'name' => "xml",
254 'desc' => "{scripts.xml}",
255 'type' => "flag",
256 'reqd' => "no",
257 'hiddengli' => "yes" },
258 { 'name' => "activate",
259 'desc' => "{buildcol.activate}",
260 'type' => "flag",
261 'reqd' => "no",
262 'hiddengli' => "yes" },
263 { 'name' => "indexname",
264 'desc' => "{buildcol.index}",
265 'type' => "string",
266 'reqd' => "no",
267 'modegli' => "3" },
268 { 'name' => "indexlevel",
269 'desc' => "{buildcol.indexlevel}",
270 'type' => "string",
271 'reqd' => "no",
272 'modegli' => "3" },
273 ];
274
275my $options = { 'name' => "buildcol.pl",
276 'desc' => "{buildcol.desc}",
277 'args' => $arguments };
278
279# The hash maps between argument and the buildcolutils subclass supporting that
280# argument - allowing for extensions to override the normal buildcolutils as
281# necessary
282my $function_to_subclass_mappings = {};
283
284# Lets get the party rolling... or ball started... hmmm
285&main();
286
287exit;
288
289sub main
290{
291 # Dynamically include arguments from any subclasses of buildcolutils we find
292 # in the extensions directory
293 if (defined $ENV{'GSDLEXTS'})
294 {
295 &_scanForSubclasses($ENV{'GSDLHOME'}, $ENV{'GSDLEXTS'});
296 }
297 if (defined $ENV{'GSDL3EXTS'})
298 {
299 &_scanForSubclasses($ENV{'GSDL3SRCHOME'}, $ENV{'GSDL3EXTS'});
300 }
301
302 # Loop through arguments, checking to see if any depend on a specific
303 # subclass of buildcolutils. Note that we load the first subclass we
304 # encounter so only support a single 'override' ATM.
305 my $subclass;
306 foreach my $argument (@ARGV)
307 {
308 # proper arguments start with a hyphen
309 if ($argument =~ /^-/ && defined $function_to_subclass_mappings->{$argument})
310 {
311 my $required_subclass = $function_to_subclass_mappings->{$argument};
312 if (!defined $subclass)
313 {
314 $subclass = $required_subclass;
315 }
316 # Oh noes! The user has included specific arguments from two different
317 # subclasses... this isn't supported
318 elsif ($subclass ne $required_subclass)
319 {
320 print STDERR "Error! You cannot specify arguments from two different extention specific buildcolutils modules: " . $subclass . " != " . $required_subclass . "\n";
321 exit;
322 }
323 }
324 }
325
326 my $buildcolutils;
327 if (defined $subclass)
328 {
329 print "* Loading overriding buildcolutils module: " . $subclass . "\n";
330 require $subclass . '.pm';
331 $buildcolutils = new $subclass(\@ARGV, $options);
332 }
333 # We don't have a overridden buildcolutils, or the above command failed
334 # somehow so load the base class
335 if (!defined $buildcolutils)
336 {
337 $buildcolutils = new buildcolutils(\@ARGV, $options);
338 }
339
340 my $collection = $buildcolutils->get_collection();
341 if (defined $collection)
342 {
343 my ($config_filename,$collect_cfg) = $buildcolutils->read_collection_cfg($collection, $options);
344 $buildcolutils->set_collection_options($collect_cfg);
345
346 my $builders_ref = $buildcolutils->prepare_builders($config_filename, $collect_cfg);
347 $buildcolutils->build_collection($builders_ref);
348 $buildcolutils->build_auxiliary_files($builders_ref);
349 $buildcolutils->complete_builders($builders_ref);
350
351 # The user may have requested the collection be activated
352 $buildcolutils->activate_collection();
353 }
354
355 # Cleanup
356 $buildcolutils->deinit();
357}
358# main()
359
360# @function _scanForSubclasses()
361# @param $dir The extension directory to look within
362# @param $exts A list of the available extensions (as a colon separated string)
363# @return The number of subclasses of buildcolutils found as an Integer
364sub _scanForSubclasses
365{
366 my ($dir, $exts) = @_;
367 my $class_count = 0;
368 my $ext_prefix = &FileUtils::filenameConcatenate($dir, "ext");
369 my @extensions = split(/:/, $exts);
370 foreach my $e (@extensions)
371 {
372 # - any subclass must be prefixed with the name of the ext
373 my $package_name = $e . 'buildcolutils';
374 $package_name =~ s/[^a-z]//gi; # package names have limited characters
375 my $file_name = $package_name . '.pm';
376 my $file_path = &FileUtils::filenameConcatenate($ext_prefix, $e, 'perllib', $file_name);
377 # see if we have a subclass lurking in that extension folder
378 if (&FileUtils::fileExists($file_path))
379 {
380 # - note we load the filename (with pm) unlike normal modules
381 require $file_name;
382 # - make call to the newly created package
383 my $symbol = qualify('getSupportedArguments', $package_name);
384 # - strict prevents strings being used as function calls, so temporarily
385 # disable that pragma
386 no strict;
387 # - lets check that the function we are about to call actually exists
388 if ( defined &{$symbol} )
389 {
390 my $extra_arguments = &{$symbol}();
391 foreach my $argument (@{$extra_arguments})
392 {
393 # - record a mapping from each extra arguments to the subclass
394 # that supports it. We put the hyphen on here to make comparing
395 # with command line arguments even easier
396 $function_to_subclass_mappings->{'-' . $argument->{'name'}} = $package_name;
397 # - and them add them as acceptable arguments to import.pl
398 push(@{$options->{'args'}}, $argument);
399 }
400 $class_count++;
401 }
402 else
403 {
404 print "Warning! A subclass of buildcolutils module (named '" . $file_name . "') does not implement the required getSupportedArguments() function - ignoring. Found in: " . $file_path . "\n";
405 }
406 }
407 }
408 return $class_count;
409}
410# _scanForSubclasses()
Note: See TracBrowser for help on using the repository browser.