root/main/trunk/greenstone2/bin/script/buildcol.pl @ 31753

Revision 31753, 12.9 KB (checked in by ak19, 12 months ago)

Two fixes Kathy requested: 1. when running buildcol, ONLY deactivate a collection surrounding the lock-sensitive make_infodatabase() calls IF incremental. 2. Allow buildcol.pl to accept activate parameters like library_url (library_name and skipactivation). full-(re)build and incremental-(re)build scripts already accept additional parameters such as with -activate:skipactivation, but buildcol.pl doesn't work that way.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# buildcol.pl --
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 1999 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29# This program will build a particular collection.
30package buildcol;
31
32# Environment
33BEGIN
34{
35  die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
36  die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
37
38  # Order is important. With unshift want our XMLParser to be
39  # found ahead of XML/XPath
40
41  unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/cpan/XML/XPath');
42  unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/classify');
43  unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/plugins');
44  unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/cpan');
45  unshift (@INC, $ENV{'GSDLHOME'} . '/perllib');
46
47  if (defined $ENV{'GSDL-RUN-SETUP'})
48  {
49    require util;
50    &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
51  }
52
53  if (defined $ENV{'GSDLEXTS'})
54  {
55    my @extensions = split(/:/, $ENV{'GSDLEXTS'});
56    foreach my $e (@extensions)
57    {
58      my $ext_prefix = $ENV{'GSDLHOME'} . '/ext/' . $e;
59
60      unshift(@INC, $ext_prefix . '/perllib');
61      unshift(@INC, $ext_prefix . '/perllib/cpan');
62      unshift(@INC, $ext_prefix . '/perllib/plugins');
63      unshift(@INC, $ext_prefix . '/perllib/classify');
64    }
65  }
66  if (defined $ENV{'GSDL3EXTS'})
67  {
68    my @extensions = split(/:/, $ENV{'GSDL3EXTS'});
69    foreach my $e (@extensions)
70    {
71      my $ext_prefix = $ENV{'GSDL3SRCHOME'} . '/ext/' . $e;
72
73      unshift(@INC, $ext_prefix . '/perllib');
74      unshift(@INC, $ext_prefix . '/perllib/cpan');
75      unshift(@INC, $ext_prefix . '/perllib/plugins');
76      unshift(@INC, $ext_prefix . '/perllib/classify');
77    }
78  }
79}
80
81# Pragma
82use strict;
83no strict 'refs'; # allow filehandles to be variables and vice versa
84no strict 'subs'; # allow barewords (eg STDERR) as function arguments
85
86# Modules
87use Symbol qw<qualify>; # Needed for runtime loading of modules [jmt12]
88
89# Greenstone Modules
90use buildcolutils;
91use FileUtils;
92use util;
93
94# Globals
95# - build up arguments list/control
96my $mode_list =
97    [ { 'name' => "all",
98        'desc' => "{buildcol.mode.all}" },
99      { 'name' => "compress_text",
100        'desc' => "{buildcol.mode.compress_text}" },
101      { 'name' => "build_index",
102        'desc' => "{buildcol.mode.build_index}" },
103      { 'name' => "infodb",
104        'desc' => "{buildcol.mode.infodb}" },
105      { 'name' => "extra",
106        'desc' => "{buildcol.mode.extra}" } ];
107
108my $sec_index_list =
109    [ {'name' => "never",
110       'desc' => "{buildcol.sections_index_document_metadata.never}" },
111      {'name' => "always",
112       'desc' => "{buildcol.sections_index_document_metadata.always}" },
113      {'name' => "unless_section_metadata_exists",
114       'desc' => "{buildcol.sections_index_document_metadata.unless_section_metadata_exists}" }
115      ];
116
117my $arguments =
118    [ { 'name' => "remove_empty_classifications",
119    'desc' => "{buildcol.remove_empty_classifications}",
120    'type' => "flag",
121    'reqd' => "no",
122    'modegli' => "2" },
123      { 'name' => "archivedir",
124    'desc' => "{buildcol.archivedir}",
125    'type' => "string",
126    'reqd' => "no",
127        'hiddengli' => "yes" },
128      { 'name' => "builddir",
129    'desc' => "{buildcol.builddir}",
130    'type' => "string",
131    'reqd' => "no",
132        'hiddengli' => "yes" },
133#     { 'name' => "cachedir",
134#   'desc' => "{buildcol.cachedir}",
135#   'type' => "string",
136#   'reqd' => "no" },
137      { 'name' => "collectdir",
138    'desc' => "{buildcol.collectdir}",
139    'type' => "string",
140    # parsearg left "" as default
141    #'deft' => &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect"),
142    'reqd' => "no",
143        'hiddengli' => "yes" },
144      { 'name' => "site",
145    'desc' => "{buildcol.site}",
146    'type' => "string",
147    'deft' => "",
148    'reqd' => "no",
149        'hiddengli' => "yes" },
150      { 'name' => "debug",
151    'desc' => "{buildcol.debug}",
152    'type' => "flag",
153    'reqd' => "no",
154        'hiddengli' => "yes" },
155      { 'name' => "faillog",
156    'desc' => "{buildcol.faillog}",
157    'type' => "string",
158    # parsearg left "" as default
159    #'deft' => &FileUtils::filenameConcatenate("<collectdir>", "colname", "etc", "fail.log"),
160    'reqd' => "no",
161    'modegli' => "3" },
162      { 'name' => "index",
163    'desc' => "{buildcol.index}",
164    'type' => "string",
165    'reqd' => "no",
166    'modegli' => "3" },
167      { 'name' => "incremental",
168    'desc' => "{buildcol.incremental}",
169    'type' => "flag",
170    'hiddengli' => "yes" },
171      { 'name' => "keepold",
172    'desc' => "{buildcol.keepold}",
173    'type' => "flag",
174    'reqd' => "no",
175        #'modegli' => "3",
176    'hiddengli' => "yes" },
177      { 'name' => "removeold",
178    'desc' => "{buildcol.removeold}",
179    'type' => "flag",
180    'reqd' => "no",
181    #'modegli' => "3",
182    'hiddengli' => "yes"  },
183      { 'name' => "language",
184    'desc' => "{scripts.language}",
185    'type' => "string",
186    'reqd' => "no",
187    'modegli' => "3" },
188      { 'name' => "maxdocs",
189    'desc' => "{buildcol.maxdocs}",
190    'type' => "int",
191    'reqd' => "no",
192        'hiddengli' => "yes" },
193      { 'name' => "maxnumeric",
194    'desc' => "{buildcol.maxnumeric}",
195    'type' => "int",
196    'reqd' => "no",
197    'deft' => "4",
198    'range' => "4,512",
199    'modegli' => "3" },
200      { 'name' => "mode",
201    'desc' => "{buildcol.mode}",
202    'type' => "enum",
203    'list' => $mode_list,
204    # parsearg left "" as default
205#   'deft' => "all",
206    'reqd' => "no",
207    'modegli' => "3" },
208      { 'name' => "no_strip_html",
209    'desc' => "{buildcol.no_strip_html}",
210    'type' => "flag",
211    'reqd' => "no",
212    'modegli' => "3" },
213      { 'name' => "store_metadata_coverage",
214    'desc' => "{buildcol.store_metadata_coverage}",
215    'type' => "flag",
216    'reqd' => "no",
217    'modegli' => "3" },
218      { 'name' => "no_text",
219    'desc' => "{buildcol.no_text}",
220    'type' => "flag",
221    'reqd' => "no",
222    'modegli' => "2" },
223      { 'name' => "sections_index_document_metadata",
224    'desc' => "{buildcol.sections_index_document_metadata}",
225    'type' => "enum",
226    'list' => $sec_index_list,
227    'reqd' => "no",
228    'modegli' => "2" },
229      { 'name' => "sections_sort_on_document_metadata",
230    'desc' => "{buildcol.sections_sort_on_document_metadata}",
231    'type' => "enum",
232    'list' => $sec_index_list,
233    'reqd' => "no",
234    'modegli' => "2" },
235      { 'name' => "out",
236    'desc' => "{buildcol.out}",
237    'type' => "string",
238    'deft' => "STDERR",
239    'reqd' => "no",
240        'hiddengli' => "yes" },
241      { 'name' => "verbosity",
242    'desc' => "{buildcol.verbosity}",
243    'type' => "int",
244    # parsearg left "" as default
245    #'deft' => "2",
246    'reqd' => "no",
247    'modegli' => "3" },
248      { 'name' => "gli",
249    'desc' => "",
250    'type' => "flag",
251    'reqd' => "no",
252    'hiddengli' => "yes" },
253      { 'name' => "xml",
254    'desc' => "{scripts.xml}",
255    'type' => "flag",
256    'reqd' => "no",
257    'hiddengli' => "yes" },
258      { 'name' => "activate",
259    'desc' => "{buildcol.activate}",
260    'type' => "flag",
261    'reqd' => "no",
262    'hiddengli' => "yes" },
263      { 'name' => "skipactivation",
264    'desc' => "{buildcol.skipactivation}",
265    'type' => "flag",
266    'reqd' => "no",
267    'hiddengli' => "yes" },
268      { 'name' => "library_url",
269    'desc' => "{buildcol.library_url}",
270    'type' => "string",
271    'reqd' => "no",
272    'hiddengli' => "yes" },
273      { 'name' => "library_name",
274    'desc' => "{buildcol.library_name}",
275    'type' => "string",
276    'reqd' => "no",
277    'hiddengli' => "yes" },
278      { 'name' => "indexname",
279    'desc' => "{buildcol.index}",
280    'type' => "string",
281    'reqd' => "no",
282    'modegli' => "3" },
283      { 'name' => "indexlevel",
284    'desc' => "{buildcol.indexlevel}",
285    'type' => "string",
286    'reqd' => "no",
287    'modegli' => "3" },
288      ];
289
290my $options = { 'name' => "buildcol.pl",
291        'desc' => "{buildcol.desc}",
292        'args' => $arguments };
293
294# The hash maps between argument and the buildcolutils subclass supporting that
295# argument - allowing for extensions to override the normal buildcolutils as
296# necessary
297my $function_to_subclass_mappings = {};
298
299# Lets get the party rolling... or ball started... hmmm
300&main();
301
302exit;
303
304sub main
305{
306  # Dynamically include arguments from any subclasses of buildcolutils we find
307  # in the extensions directory
308  if (defined $ENV{'GSDLEXTS'})
309  {
310    &_scanForSubclasses($ENV{'GSDLHOME'}, $ENV{'GSDLEXTS'});
311  }
312  if (defined $ENV{'GSDL3EXTS'})
313  {
314    &_scanForSubclasses($ENV{'GSDL3SRCHOME'}, $ENV{'GSDL3EXTS'});
315  }
316
317  # Loop through arguments, checking to see if any depend on a specific
318  # subclass of buildcolutils. Note that we load the first subclass we
319  # encounter so only support a single 'override' ATM.
320  my $subclass;
321  foreach my $argument (@ARGV)
322  {
323    # proper arguments start with a hyphen
324    if ($argument =~ /^-/ && defined $function_to_subclass_mappings->{$argument})
325    {
326      my $required_subclass = $function_to_subclass_mappings->{$argument};
327      if (!defined $subclass)
328      {
329        $subclass = $required_subclass;
330      }
331      # Oh noes! The user has included specific arguments from two different
332      # subclasses... this isn't supported
333      elsif ($subclass ne $required_subclass)
334      {
335        print STDERR "Error! You cannot specify arguments from two different extension specific buildcolutils modules: " . $subclass . " != " . $required_subclass . "\n";
336        exit;
337      }
338    }
339  }
340
341  my $buildcolutils;
342  if (defined $subclass)
343  {
344    print "* Loading overriding buildcolutils module: " . $subclass . "\n";
345    require $subclass . '.pm';
346    $buildcolutils = new $subclass(\@ARGV, $options);
347  }
348  # We don't have an overridden buildcolutils, or the above command failed
349  # somehow so load the base class
350  if (!defined $buildcolutils)
351  {
352    $buildcolutils = new buildcolutils(\@ARGV, $options);
353  }
354
355  my $collection = $buildcolutils->get_collection();
356  if (defined $collection)
357  {
358    my ($config_filename,$collect_cfg) = $buildcolutils->read_collection_cfg($collection, $options);
359    $buildcolutils->set_collection_options($collect_cfg);
360
361    my $builders_ref = $buildcolutils->prepare_builders($config_filename, $collect_cfg);
362    $buildcolutils->build_collection($builders_ref);
363    $buildcolutils->build_auxiliary_files($builders_ref);
364    $buildcolutils->complete_builders($builders_ref);
365
366    # The user may have requested the collection be activated
367    $buildcolutils->activate_collection();
368  }
369
370  # Cleanup
371  $buildcolutils->deinit();
372}
373# main()
374
375# @function _scanForSubclasses()
376# @param $dir The extension directory to look within
377# @param $exts A list of the available extensions (as a colon separated string)
378# @return The number of subclasses of buildcolutils found as an Integer
379sub _scanForSubclasses
380{
381  my ($dir, $exts) = @_;
382  my $class_count = 0;
383  my $ext_prefix = &FileUtils::filenameConcatenate($dir, "ext");
384  my @extensions = split(/:/, $exts);
385  foreach my $e (@extensions)
386  {
387    # - any subclass must be prefixed with the name of the ext
388    my $package_name = $e . 'buildcolutils';
389    $package_name =~ s/[^a-z]//gi; # package names have limited characters
390    my $file_name = $package_name . '.pm';
391    my $file_path = &FileUtils::filenameConcatenate($ext_prefix, $e, 'perllib', $file_name);
392    # see if we have a subclass lurking in that extension folder
393    if (&FileUtils::fileExists($file_path))
394    {
395      # - note we load the filename (with pm) unlike normal modules
396      require $file_name;
397      # - make call to the newly created package
398      my $symbol = qualify('getSupportedArguments', $package_name);
399      # - strict prevents strings being used as function calls, so temporarily
400      #   disable that pragma
401      no strict;
402      # - lets check that the function we are about to call actually exists
403      if ( defined &{$symbol} )
404      {
405        my $extra_arguments = &{$symbol}();
406        foreach my $argument (@{$extra_arguments})
407        {
408          # - record a mapping from each extra arguments to the subclass
409          #   that supports it. We put the hyphen on here to make comparing
410          #   with command line arguments even easier
411          $function_to_subclass_mappings->{'-' . $argument->{'name'}} = $package_name;
412          # - and them add them as acceptable arguments to import.pl
413          push(@{$options->{'args'}}, $argument);
414        }
415        $class_count++;
416      }
417      else
418      {
419        print "Warning! A subclass of buildcolutils module (named '" . $file_name . "') does not implement the required getSupportedArguments() function - ignoring. Found in: " . $file_path . "\n";
420      }
421    }
422  }
423  return $class_count;
424}
425# _scanForSubclasses()
Note: See TracBrowser for help on using the browser.