root/main/trunk/greenstone2/bin/script/buildcol.pl @ 27634

Revision 27634, 12.4 KB (checked in by ak19, 7 years ago)

Changed order of @INC 'unshifts' due to clash over Greenstone own XMLParser.pm and XML/XPath/XMLParser. We want it to find our one first in @INC.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# buildcol.pl --
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 1999 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29# This program will build a particular collection.
30package buildcol;
31
32# Environment
33BEGIN
34{
35  die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
36  die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
37
38  # Order is important. With unshift want our XMLParser to be
39  # found ahead of XML/XPath
40
41  unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/cpan/XML/XPath');
42  unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/classify');
43  unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/plugins');
44  unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/cpan');
45  unshift (@INC, $ENV{'GSDLHOME'} . '/perllib');
46
47  if (defined $ENV{'GSDL-RUN-SETUP'})
48  {
49    require util;
50    &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
51  }
52
53  if (defined $ENV{'GSDLEXTS'})
54  {
55    my @extensions = split(/:/, $ENV{'GSDLEXTS'});
56    foreach my $e (@extensions)
57    {
58      my $ext_prefix = $ENV{'GSDLHOME'} . '/ext/' . $e;
59
60      unshift(@INC, $ext_prefix . '/perllib');
61      unshift(@INC, $ext_prefix . '/perllib/cpan');
62      unshift(@INC, $ext_prefix . '/perllib/plugins');
63      unshift(@INC, $ext_prefix . '/perllib/classify');
64    }
65  }
66  if (defined $ENV{'GSDL3EXTS'})
67  {
68    my @extensions = split(/:/, $ENV{'GSDL3EXTS'});
69    foreach my $e (@extensions)
70    {
71      my $ext_prefix = $ENV{'GSDL3SRCHOME'} . '/ext/' . $e;
72
73      unshift(@INC, $ext_prefix . '/perllib');
74      unshift(@INC, $ext_prefix . '/perllib/cpan');
75      unshift(@INC, $ext_prefix . '/perllib/plugins');
76      unshift(@INC, $ext_prefix . '/perllib/classify');
77    }
78  }
79}
80
81# Pragma
82use strict;
83no strict 'refs'; # allow filehandles to be variables and vice versa
84no strict 'subs'; # allow barewords (eg STDERR) as function arguments
85
86# Modules
87use Symbol qw<qualify>; # Needed for runtime loading of modules [jmt12]
88
89# Greenstone Modules
90use buildcolutils;
91use FileUtils;
92use util;
93
94# Globals
95# - build up arguments list/control
96my $mode_list =
97    [ { 'name' => "all",
98        'desc' => "{buildcol.mode.all}" },
99      { 'name' => "compress_text",
100        'desc' => "{buildcol.mode.compress_text}" },
101      { 'name' => "build_index",
102        'desc' => "{buildcol.mode.build_index}" },
103      { 'name' => "infodb",
104        'desc' => "{buildcol.mode.infodb}" } ];
105
106my $sec_index_list =
107    [ {'name' => "never",
108       'desc' => "{buildcol.sections_index_document_metadata.never}" },
109      {'name' => "always",
110       'desc' => "{buildcol.sections_index_document_metadata.always}" },
111      {'name' => "unless_section_metadata_exists",
112       'desc' => "{buildcol.sections_index_document_metadata.unless_section_metadata_exists}" }
113      ];
114
115my $arguments =
116    [ { 'name' => "remove_empty_classifications",
117    'desc' => "{buildcol.remove_empty_classifications}",
118    'type' => "flag",
119    'reqd' => "no",
120    'modegli' => "2" },
121      { 'name' => "archivedir",
122    'desc' => "{buildcol.archivedir}",
123    'type' => "string",
124    'reqd' => "no",
125        'hiddengli' => "yes" },
126      { 'name' => "builddir",
127    'desc' => "{buildcol.builddir}",
128    'type' => "string",
129    'reqd' => "no",
130        'hiddengli' => "yes" },
131#     { 'name' => "cachedir",
132#   'desc' => "{buildcol.cachedir}",
133#   'type' => "string",
134#   'reqd' => "no" },
135      { 'name' => "collectdir",
136    'desc' => "{buildcol.collectdir}",
137    'type' => "string",
138    # parsearg left "" as default
139    #'deft' => &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect"),
140    'reqd' => "no",
141        'hiddengli' => "yes" },
142      { 'name' => "site",
143    'desc' => "{buildcol.site}",
144    'type' => "string",
145    'deft' => "",
146    'reqd' => "no",
147        'hiddengli' => "yes" },
148      { 'name' => "debug",
149    'desc' => "{buildcol.debug}",
150    'type' => "flag",
151    'reqd' => "no",
152        'hiddengli' => "yes" },
153      { 'name' => "faillog",
154    'desc' => "{buildcol.faillog}",
155    'type' => "string",
156    # parsearg left "" as default
157    #'deft' => &FileUtils::filenameConcatenate("<collectdir>", "colname", "etc", "fail.log"),
158    'reqd' => "no",
159    'modegli' => "3" },
160      { 'name' => "index",
161    'desc' => "{buildcol.index}",
162    'type' => "string",
163    'reqd' => "no",
164    'modegli' => "3" },
165      { 'name' => "incremental",
166    'desc' => "{buildcol.incremental}",
167    'type' => "flag",
168    'hiddengli' => "yes" },
169      { 'name' => "keepold",
170    'desc' => "{buildcol.keepold}",
171    'type' => "flag",
172    'reqd' => "no",
173        #'modegli' => "3",
174    'hiddengli' => "yes" },
175      { 'name' => "removeold",
176    'desc' => "{buildcol.removeold}",
177    'type' => "flag",
178    'reqd' => "no",
179    #'modegli' => "3",
180    'hiddengli' => "yes"  },
181      { 'name' => "language",
182    'desc' => "{scripts.language}",
183    'type' => "string",
184    'reqd' => "no",
185    'modegli' => "3" },
186      { 'name' => "maxdocs",
187    'desc' => "{buildcol.maxdocs}",
188    'type' => "int",
189    'reqd' => "no",
190        'hiddengli' => "yes" },
191      { 'name' => "maxnumeric",
192    'desc' => "{buildcol.maxnumeric}",
193    'type' => "int",
194    'reqd' => "no",
195    'deft' => "4",
196    'range' => "4,512",
197    'modegli' => "3" },
198      { 'name' => "mode",
199    'desc' => "{buildcol.mode}",
200    'type' => "enum",
201    'list' => $mode_list,
202    # parsearg left "" as default
203#   'deft' => "all",
204    'reqd' => "no",
205    'modegli' => "3" },
206      { 'name' => "no_strip_html",
207    'desc' => "{buildcol.no_strip_html}",
208    'type' => "flag",
209    'reqd' => "no",
210    'modegli' => "3" },
211      { 'name' => "store_metadata_coverage",
212    'desc' => "{buildcol.store_metadata_coverage}",
213    'type' => "flag",
214    'reqd' => "no",
215    'modegli' => "3" },
216      { 'name' => "no_text",
217    'desc' => "{buildcol.no_text}",
218    'type' => "flag",
219    'reqd' => "no",
220    'modegli' => "2" },
221      { 'name' => "sections_index_document_metadata",
222    'desc' => "{buildcol.sections_index_document_metadata}",
223    'type' => "enum",
224    'list' => $sec_index_list,
225    'reqd' => "no",
226    'modegli' => "2" },
227      { 'name' => "sections_sort_on_document_metadata",
228    'desc' => "{buildcol.sections_sort_on_document_metadata}",
229    'type' => "enum",
230    'list' => $sec_index_list,
231    'reqd' => "no",
232    'modegli' => "2" },
233      { 'name' => "out",
234    'desc' => "{buildcol.out}",
235    'type' => "string",
236    'deft' => "STDERR",
237    'reqd' => "no",
238        'hiddengli' => "yes" },
239      { 'name' => "verbosity",
240    'desc' => "{buildcol.verbosity}",
241    'type' => "int",
242    # parsearg left "" as default
243    #'deft' => "2",
244    'reqd' => "no",
245    'modegli' => "3" },
246      { 'name' => "gli",
247    'desc' => "",
248    'type' => "flag",
249    'reqd' => "no",
250    'hiddengli' => "yes" },
251      { 'name' => "xml",
252    'desc' => "{scripts.xml}",
253    'type' => "flag",
254    'reqd' => "no",
255    'hiddengli' => "yes" },
256      { 'name' => "activate",
257    'desc' => "{buildcol.activate}",
258    'type' => "flag",
259    'reqd' => "no",
260    'hiddengli' => "yes" },
261      { 'name' => "indexname",
262    'desc' => "{buildcol.index}",
263    'type' => "string",
264    'reqd' => "no",
265    'modegli' => "3" },
266      { 'name' => "indexlevel",
267    'desc' => "{buildcol.indexlevel}",
268    'type' => "string",
269    'reqd' => "no",
270    'modegli' => "3" },
271      ];
272
273my $options = { 'name' => "buildcol.pl",
274        'desc' => "{buildcol.desc}",
275        'args' => $arguments };
276
277# The hash maps between argument and the buildcolutils subclass supporting that
278# argument - allowing for extensions to override the normal buildcolutils as
279# necessary
280my $function_to_subclass_mappings = {};
281
282# Lets get the party rolling... or ball started... hmmm
283&main();
284
285exit;
286
287sub main
288{
289  # Dynamically include arguments from any subclasses of buildcolutils we find
290  # in the extensions directory
291  if (defined $ENV{'GSDLEXTS'})
292  {
293    &_scanForSubclasses($ENV{'GSDLHOME'}, $ENV{'GSDLEXTS'});
294  }
295  if (defined $ENV{'GSDL3EXTS'})
296  {
297    &_scanForSubclasses($ENV{'GSDL3SRCHOME'}, $ENV{'GSDL3EXTS'});
298  }
299
300  # Loop through arguments, checking to see if any depend on a specific
301  # subclass of buildcolutils. Note that we load the first subclass we
302  # encounter so only support a single 'override' ATM.
303  my $subclass;
304  foreach my $argument (@ARGV)
305  {
306    # proper arguments start with a hyphen
307    if ($argument =~ /^-/ && defined $function_to_subclass_mappings->{$argument})
308    {
309      my $required_subclass = $function_to_subclass_mappings->{$argument};
310      if (!defined $subclass)
311      {
312        $subclass = $required_subclass;
313      }
314      # Oh noes! The user has included specific arguments from two different
315      # subclasses... this isn't supported
316      elsif ($subclass ne $required_subclass)
317      {
318        print STDERR "Error! You cannot specify arguments from two different extention specific buildcolutils modules: " . $subclass . " != " . $required_subclass . "\n";
319        exit;
320      }
321    }
322  }
323
324  my $buildcolutils;
325  if (defined $subclass)
326  {
327    print "* Loading overriding buildcolutils module: " . $subclass . "\n";
328    require $subclass . '.pm';
329    $buildcolutils = new $subclass(\@ARGV, $options);
330  }
331  # We don't have a overridden buildcolutils, or the above command failed
332  # somehow so load the base class
333  if (!defined $buildcolutils)
334  {
335    $buildcolutils = new buildcolutils(\@ARGV, $options);
336  }
337
338  my $collection = $buildcolutils->get_collection();
339  if (defined $collection)
340  {
341    my ($config_filename,$collect_cfg) = $buildcolutils->read_collection_cfg($collection, $options);
342    $buildcolutils->set_collection_options($collect_cfg);
343
344    my $builders_ref = $buildcolutils->prepare_builders($config_filename, $collect_cfg);
345    $buildcolutils->build_collection($builders_ref);
346    $buildcolutils->build_auxiliary_files($builders_ref);
347    $buildcolutils->complete_builders($builders_ref);
348
349    # The user may have requested the collection be activated
350    $buildcolutils->activate_collection();
351  }
352
353  # Cleanup
354  $buildcolutils->deinit();
355}
356# main()
357
358# @function _scanForSubclasses()
359# @param $dir The extension directory to look within
360# @param $exts A list of the available extensions (as a colon separated string)
361# @return The number of subclasses of buildcolutils found as an Integer
362sub _scanForSubclasses
363{
364  my ($dir, $exts) = @_;
365  my $class_count = 0;
366  my $ext_prefix = &FileUtils::filenameConcatenate($dir, "ext");
367  my @extensions = split(/:/, $exts);
368  foreach my $e (@extensions)
369  {
370    # - any subclass must be prefixed with the name of the ext
371    my $package_name = $e . 'buildcolutils';
372    $package_name =~ s/[^a-z]//gi; # package names have limited characters
373    my $file_name = $package_name . '.pm';
374    my $file_path = &FileUtils::filenameConcatenate($ext_prefix, $e, 'perllib', $file_name);
375    # see if we have a subclass lurking in that extension folder
376    if (&FileUtils::fileExists($file_path))
377    {
378      # - note we load the filename (with pm) unlike normal modules
379      require $file_name;
380      # - make call to the newly created package
381      my $symbol = qualify('getSupportedArguments', $package_name);
382      # - strict prevents strings being used as function calls, so temporarily
383      #   disable that pragma
384      no strict;
385      # - lets check that the function we are about to call actually exists
386      if ( defined &{$symbol} )
387      {
388        my $extra_arguments = &{$symbol}();
389        foreach my $argument (@{$extra_arguments})
390        {
391          # - record a mapping from each extra arguments to the subclass
392          #   that supports it. We put the hyphen on here to make comparing
393          #   with command line arguments even easier
394          $function_to_subclass_mappings->{'-' . $argument->{'name'}} = $package_name;
395          # - and them add them as acceptable arguments to import.pl
396          push(@{$options->{'args'}}, $argument);
397        }
398        $class_count++;
399      }
400      else
401      {
402        print "Warning! A subclass of buildcolutils module (named '" . $file_name . "') does not implement the required getSupportedArguments() function - ignoring. Found in: " . $file_path . "\n";
403      }
404    }
405  }
406  return $class_count;
407}
408# _scanForSubclasses()
Note: See TracBrowser for help on using the browser.