source: main/trunk/greenstone2/bin/script/buildcol.pl@ 27562

Last change on this file since 27562 was 27562, checked in by kjdon, 11 years ago

added new build option sections_sort_on_document_metadata. same as sections_index_document_metadata but used for the lucene sort fields, not the indexed fields

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 12.3 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# buildcol.pl --
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 1999 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29# This program will build a particular collection.
30package buildcol;
31
32# Environment
33BEGIN
34{
35 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
36 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
37 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib');
38 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/cpan');
39 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/cpan/XML/XPath');
40 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/plugins');
41 unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/classify');
42
43 if (defined $ENV{'GSDL-RUN-SETUP'})
44 {
45 require util;
46 &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
47 }
48
49 if (defined $ENV{'GSDLEXTS'})
50 {
51 my @extensions = split(/:/, $ENV{'GSDLEXTS'});
52 foreach my $e (@extensions)
53 {
54 my $ext_prefix = $ENV{'GSDLHOME'} . '/ext/' . $e;
55
56 unshift(@INC, $ext_prefix . '/perllib');
57 unshift(@INC, $ext_prefix . '/perllib/cpan');
58 unshift(@INC, $ext_prefix . '/perllib/plugins');
59 unshift(@INC, $ext_prefix . '/perllib/classify');
60 }
61 }
62 if (defined $ENV{'GSDL3EXTS'})
63 {
64 my @extensions = split(/:/, $ENV{'GSDL3EXTS'});
65 foreach my $e (@extensions)
66 {
67 my $ext_prefix = $ENV{'GSDL3SRCHOME'} . '/ext/' . $e;
68
69 unshift(@INC, $ext_prefix . '/perllib');
70 unshift(@INC, $ext_prefix . '/perllib/cpan');
71 unshift(@INC, $ext_prefix . '/perllib/plugins');
72 unshift(@INC, $ext_prefix . '/perllib/classify');
73 }
74 }
75}
76
77# Pragma
78use strict;
79no strict 'refs'; # allow filehandles to be variables and vice versa
80no strict 'subs'; # allow barewords (eg STDERR) as function arguments
81
82# Modules
83use Symbol qw<qualify>; # Needed for runtime loading of modules [jmt12]
84
85# Greenstone Modules
86use buildcolutils;
87use FileUtils;
88use util;
89
90# Globals
91# - build up arguments list/control
92my $mode_list =
93 [ { 'name' => "all",
94 'desc' => "{buildcol.mode.all}" },
95 { 'name' => "compress_text",
96 'desc' => "{buildcol.mode.compress_text}" },
97 { 'name' => "build_index",
98 'desc' => "{buildcol.mode.build_index}" },
99 { 'name' => "infodb",
100 'desc' => "{buildcol.mode.infodb}" } ];
101
102my $sec_index_list =
103 [ {'name' => "never",
104 'desc' => "{buildcol.sections_index_document_metadata.never}" },
105 {'name' => "always",
106 'desc' => "{buildcol.sections_index_document_metadata.always}" },
107 {'name' => "unless_section_metadata_exists",
108 'desc' => "{buildcol.sections_index_document_metadata.unless_section_metadata_exists}" }
109 ];
110
111my $arguments =
112 [ { 'name' => "remove_empty_classifications",
113 'desc' => "{buildcol.remove_empty_classifications}",
114 'type' => "flag",
115 'reqd' => "no",
116 'modegli' => "2" },
117 { 'name' => "archivedir",
118 'desc' => "{buildcol.archivedir}",
119 'type' => "string",
120 'reqd' => "no",
121 'hiddengli' => "yes" },
122 { 'name' => "builddir",
123 'desc' => "{buildcol.builddir}",
124 'type' => "string",
125 'reqd' => "no",
126 'hiddengli' => "yes" },
127# { 'name' => "cachedir",
128# 'desc' => "{buildcol.cachedir}",
129# 'type' => "string",
130# 'reqd' => "no" },
131 { 'name' => "collectdir",
132 'desc' => "{buildcol.collectdir}",
133 'type' => "string",
134 # parsearg left "" as default
135 #'deft' => &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect"),
136 'reqd' => "no",
137 'hiddengli' => "yes" },
138 { 'name' => "site",
139 'desc' => "{buildcol.site}",
140 'type' => "string",
141 'deft' => "",
142 'reqd' => "no",
143 'hiddengli' => "yes" },
144 { 'name' => "debug",
145 'desc' => "{buildcol.debug}",
146 'type' => "flag",
147 'reqd' => "no",
148 'hiddengli' => "yes" },
149 { 'name' => "faillog",
150 'desc' => "{buildcol.faillog}",
151 'type' => "string",
152 # parsearg left "" as default
153 #'deft' => &FileUtils::filenameConcatenate("<collectdir>", "colname", "etc", "fail.log"),
154 'reqd' => "no",
155 'modegli' => "3" },
156 { 'name' => "index",
157 'desc' => "{buildcol.index}",
158 'type' => "string",
159 'reqd' => "no",
160 'modegli' => "3" },
161 { 'name' => "incremental",
162 'desc' => "{buildcol.incremental}",
163 'type' => "flag",
164 'hiddengli' => "yes" },
165 { 'name' => "keepold",
166 'desc' => "{buildcol.keepold}",
167 'type' => "flag",
168 'reqd' => "no",
169 #'modegli' => "3",
170 'hiddengli' => "yes" },
171 { 'name' => "removeold",
172 'desc' => "{buildcol.removeold}",
173 'type' => "flag",
174 'reqd' => "no",
175 #'modegli' => "3",
176 'hiddengli' => "yes" },
177 { 'name' => "language",
178 'desc' => "{scripts.language}",
179 'type' => "string",
180 'reqd' => "no",
181 'modegli' => "3" },
182 { 'name' => "maxdocs",
183 'desc' => "{buildcol.maxdocs}",
184 'type' => "int",
185 'reqd' => "no",
186 'hiddengli' => "yes" },
187 { 'name' => "maxnumeric",
188 'desc' => "{buildcol.maxnumeric}",
189 'type' => "int",
190 'reqd' => "no",
191 'deft' => "4",
192 'range' => "4,512",
193 'modegli' => "3" },
194 { 'name' => "mode",
195 'desc' => "{buildcol.mode}",
196 'type' => "enum",
197 'list' => $mode_list,
198 # parsearg left "" as default
199# 'deft' => "all",
200 'reqd' => "no",
201 'modegli' => "3" },
202 { 'name' => "no_strip_html",
203 'desc' => "{buildcol.no_strip_html}",
204 'type' => "flag",
205 'reqd' => "no",
206 'modegli' => "3" },
207 { 'name' => "store_metadata_coverage",
208 'desc' => "{buildcol.store_metadata_coverage}",
209 'type' => "flag",
210 'reqd' => "no",
211 'modegli' => "3" },
212 { 'name' => "no_text",
213 'desc' => "{buildcol.no_text}",
214 'type' => "flag",
215 'reqd' => "no",
216 'modegli' => "2" },
217 { 'name' => "sections_index_document_metadata",
218 'desc' => "{buildcol.sections_index_document_metadata}",
219 'type' => "enum",
220 'list' => $sec_index_list,
221 'reqd' => "no",
222 'modegli' => "2" },
223 { 'name' => "sections_sort_on_document_metadata",
224 'desc' => "{buildcol.sections_sort_on_document_metadata}",
225 'type' => "enum",
226 'list' => $sec_index_list,
227 'reqd' => "no",
228 'modegli' => "2" },
229 { 'name' => "out",
230 'desc' => "{buildcol.out}",
231 'type' => "string",
232 'deft' => "STDERR",
233 'reqd' => "no",
234 'hiddengli' => "yes" },
235 { 'name' => "verbosity",
236 'desc' => "{buildcol.verbosity}",
237 'type' => "int",
238 # parsearg left "" as default
239 #'deft' => "2",
240 'reqd' => "no",
241 'modegli' => "3" },
242 { 'name' => "gli",
243 'desc' => "",
244 'type' => "flag",
245 'reqd' => "no",
246 'hiddengli' => "yes" },
247 { 'name' => "xml",
248 'desc' => "{scripts.xml}",
249 'type' => "flag",
250 'reqd' => "no",
251 'hiddengli' => "yes" },
252 { 'name' => "activate",
253 'desc' => "{buildcol.activate}",
254 'type' => "flag",
255 'reqd' => "no",
256 'hiddengli' => "yes" },
257 { 'name' => "indexname",
258 'desc' => "{buildcol.index}",
259 'type' => "string",
260 'reqd' => "no",
261 'modegli' => "3" },
262 { 'name' => "indexlevel",
263 'desc' => "{buildcol.indexlevel}",
264 'type' => "string",
265 'reqd' => "no",
266 'modegli' => "3" },
267 ];
268
269my $options = { 'name' => "buildcol.pl",
270 'desc' => "{buildcol.desc}",
271 'args' => $arguments };
272
273# The hash maps between argument and the buildcolutils subclass supporting that
274# argument - allowing for extensions to override the normal buildcolutils as
275# necessary
276my $function_to_subclass_mappings = {};
277
278# Lets get the party rolling... or ball started... hmmm
279&main();
280
281exit;
282
283sub main
284{
285 # Dynamically include arguments from any subclasses of buildcolutils we find
286 # in the extensions directory
287 if (defined $ENV{'GSDLEXTS'})
288 {
289 &_scanForSubclasses($ENV{'GSDLHOME'}, $ENV{'GSDLEXTS'});
290 }
291 if (defined $ENV{'GSDL3EXTS'})
292 {
293 &_scanForSubclasses($ENV{'GSDL3SRCHOME'}, $ENV{'GSDL3EXTS'});
294 }
295
296 # Loop through arguments, checking to see if any depend on a specific
297 # subclass of buildcolutils. Note that we load the first subclass we
298 # encounter so only support a single 'override' ATM.
299 my $subclass;
300 foreach my $argument (@ARGV)
301 {
302 # proper arguments start with a hyphen
303 if ($argument =~ /^-/ && defined $function_to_subclass_mappings->{$argument})
304 {
305 my $required_subclass = $function_to_subclass_mappings->{$argument};
306 if (!defined $subclass)
307 {
308 $subclass = $required_subclass;
309 }
310 # Oh noes! The user has included specific arguments from two different
311 # subclasses... this isn't supported
312 elsif ($subclass ne $required_subclass)
313 {
314 print STDERR "Error! You cannot specify arguments from two different extention specific buildcolutils modules: " . $subclass . " != " . $required_subclass . "\n";
315 exit;
316 }
317 }
318 }
319
320 my $buildcolutils;
321 if (defined $subclass)
322 {
323 print "* Loading overriding buildcolutils module: " . $subclass . "\n";
324 require $subclass . '.pm';
325 $buildcolutils = new $subclass(\@ARGV, $options);
326 }
327 # We don't have a overridden buildcolutils, or the above command failed
328 # somehow so load the base class
329 if (!defined $buildcolutils)
330 {
331 $buildcolutils = new buildcolutils(\@ARGV, $options);
332 }
333
334 my $collection = $buildcolutils->get_collection();
335 if (defined $collection)
336 {
337 my ($config_filename,$collect_cfg) = $buildcolutils->read_collection_cfg($collection, $options);
338 $buildcolutils->set_collection_options($collect_cfg);
339
340 my $builders_ref = $buildcolutils->prepare_builders($config_filename, $collect_cfg);
341 $buildcolutils->build_collection($builders_ref);
342 $buildcolutils->build_auxiliary_files($builders_ref);
343 $buildcolutils->complete_builders($builders_ref);
344
345 # The user may have requested the collection be activated
346 $buildcolutils->activate_collection();
347 }
348
349 # Cleanup
350 $buildcolutils->deinit();
351}
352# main()
353
354# @function _scanForSubclasses()
355# @param $dir The extension directory to look within
356# @param $exts A list of the available extensions (as a colon separated string)
357# @return The number of subclasses of buildcolutils found as an Integer
358sub _scanForSubclasses
359{
360 my ($dir, $exts) = @_;
361 my $class_count = 0;
362 my $ext_prefix = &FileUtils::filenameConcatenate($dir, "ext");
363 my @extensions = split(/:/, $exts);
364 foreach my $e (@extensions)
365 {
366 # - any subclass must be prefixed with the name of the ext
367 my $package_name = $e . 'buildcolutils';
368 $package_name =~ s/[^a-z]//gi; # package names have limited characters
369 my $file_name = $package_name . '.pm';
370 my $file_path = &FileUtils::filenameConcatenate($ext_prefix, $e, 'perllib', $file_name);
371 # see if we have a subclass lurking in that extension folder
372 if (&FileUtils::fileExists($file_path))
373 {
374 # - note we load the filename (with pm) unlike normal modules
375 require $file_name;
376 # - make call to the newly created package
377 my $symbol = qualify('getSupportedArguments', $package_name);
378 # - strict prevents strings being used as function calls, so temporarily
379 # disable that pragma
380 no strict;
381 # - lets check that the function we are about to call actually exists
382 if ( defined &{$symbol} )
383 {
384 my $extra_arguments = &{$symbol}();
385 foreach my $argument (@{$extra_arguments})
386 {
387 # - record a mapping from each extra arguments to the subclass
388 # that supports it. We put the hyphen on here to make comparing
389 # with command line arguments even easier
390 $function_to_subclass_mappings->{'-' . $argument->{'name'}} = $package_name;
391 # - and them add them as acceptable arguments to import.pl
392 push(@{$options->{'args'}}, $argument);
393 }
394 $class_count++;
395 }
396 else
397 {
398 print "Warning! A subclass of buildcolutils module (named '" . $file_name . "') does not implement the required getSupportedArguments() function - ignoring. Found in: " . $file_path . "\n";
399 }
400 }
401 }
402 return $class_count;
403}
404# _scanForSubclasses()
Note: See TracBrowser for help on using the repository browser.