root/main/trunk/greenstone2/bin/script/import.pl @ 27305

Revision 27305, 12.6 KB (checked in by jmt12, 7 years ago)

Add code to allow importing and building to load overriding versions of inexport.pm and buildcolutils.pm from extensions at runtime. When an extension provides a possible override, Greenstone will dynamically detect and add additional options (visible in the --help). When a user specifies one of these options the appropriate inexport/buildcolutils subclass will be loaded

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# import.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will import a number of files into a particular collection
30
31package import;
32
33BEGIN {
34    die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
35    die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
36    unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
37    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
38    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
39    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugouts");
40
41    if (defined $ENV{'GSDLEXTS'}) {
42    my @extensions = split(/:/,$ENV{'GSDLEXTS'});
43    foreach my $e (@extensions) {
44        my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
45
46        unshift (@INC, "$ext_prefix/perllib");
47        unshift (@INC, "$ext_prefix/perllib/cpan");
48        unshift (@INC, "$ext_prefix/perllib/plugins");
49        unshift (@INC, "$ext_prefix/perllib/plugouts");
50    }
51    }
52    if (defined $ENV{'GSDL3EXTS'}) {
53    my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
54    foreach my $e (@extensions) {
55        my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
56
57        unshift (@INC, "$ext_prefix/perllib");
58        unshift (@INC, "$ext_prefix/perllib/cpan");
59        unshift (@INC, "$ext_prefix/perllib/plugins");
60        unshift (@INC, "$ext_prefix/perllib/plugouts");
61    }
62    }
63
64    if ((defined $ENV{'DEBUG_UNICODE'}) && (defined $ENV{'DEBUG_UNICODE'})) {
65    binmode(STDERR,":utf8");
66    }
67}
68
69# Pragma
70use strict;
71use warnings;
72
73# Modules
74use Symbol qw<qualify>; # Needed for runtime loading of modules [jmt12]
75
76# Greenstone Modules
77use FileUtils;
78use inexport;
79use util;
80
81my $oidtype_list =
82    [ { 'name' => "hash",
83        'desc' => "{import.OIDtype.hash}" },
84      { 'name' => "hash_on_full_filename",
85        'desc' => "{import.OIDtype.hash_on_full_filename}" },
86      { 'name' => "assigned",
87        'desc' => "{import.OIDtype.assigned}" },
88      { 'name' => "incremental",
89        'desc' => "{import.OIDtype.incremental}" },
90      { 'name' => "dirname",
91        'desc' => "{import.OIDtype.dirname}" },
92      { 'name' => "full_filename",
93        'desc' => "{import.OIDtype.full_filename}" } ];
94
95
96# used to control output file format
97my $saveas_list =
98    [ { 'name' => "GreenstoneXML",
99        'desc' => "{export.saveas.GreenstoneXML}"},
100      { 'name' => "GreenstoneMETS",
101        'desc' => "{export.saveas.GreenstoneMETS}"},
102      ];
103
104
105# Possible attributes for each argument
106# name: The name of the argument
107# desc: A description (or more likely a reference to a description) for this argument
108# type: The type of control used to represent the argument. Options include: string, int, flag, regexp, metadata, language, enum etc
109# reqd: Is this argument required?
110# hiddengli: Is this argument hidden in GLI?
111# modegli: The lowest detail mode this argument is visible at in GLI
112
113my $saveas_argument
114    = { 'name' => "saveas",
115    'desc' => "{import.saveas}",
116    'type' => "enum",
117    'list' => $saveas_list,
118    'deft' => "GreenstoneXML",
119    'reqd' => "no",
120    'modegli' => "3" };
121
122
123my $arguments =
124    [
125      $saveas_argument,
126      { 'name' => "archivedir",
127    'desc' => "{import.archivedir}",
128    'type' => "string",
129    'reqd' => "no",
130        'hiddengli' => "yes" },
131      { 'name' => "importdir",
132    'desc' => "{import.importdir}",
133    'type' => "string",
134    'reqd' => "no",
135        'hiddengli' => "yes" },
136      { 'name' => "collectdir",
137    'desc' => "{import.collectdir}",
138    'type' => "string",
139    # parsearg left "" as default
140    #'deft' => &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect"),
141    'deft' => "",
142    'reqd' => "no",
143        'hiddengli' => "yes" },
144      { 'name' => "site",
145    'desc' => "{import.site}",
146    'type' => "string",
147    'deft' => "",
148    'reqd' => "no",
149        'hiddengli' => "yes" },
150      { 'name' => "manifest",
151    'desc' => "{import.manifest}",
152    'type' => "string",
153    'deft' => "",
154    'reqd' => "no",
155        'hiddengli' => "yes" },
156      { 'name' => "debug",
157    'desc' => "{import.debug}",
158    'type' => "flag",
159    'reqd' => "no",
160        'hiddengli' => "yes" },
161      { 'name' => "faillog",
162    'desc' => "{import.faillog}",
163    'type' => "string",
164    # parsearg left "" as default
165    #'deft' => &FileUtils::filenameConcatenate("&lt;collectdir&gt;", "colname", "etc", "fail.log"),
166    'deft' => "",
167    'reqd' => "no",
168        'modegli' => "3" },
169      { 'name' => "incremental",
170    'desc' => "{import.incremental}",
171    'type' => "flag",
172    'hiddengli' => "yes" },
173      { 'name' => "keepold",
174    'desc' => "{import.keepold}",
175    'type' => "flag",
176    'reqd' => "no",
177    'hiddengli' => "yes" },
178      { 'name' => "removeold",
179    'desc' => "{import.removeold}",
180    'type' => "flag",
181    'reqd' => "no",
182    'hiddengli' => "yes" },
183      { 'name' => "language",
184    'desc' => "{scripts.language}",
185    'type' => "string",
186    'reqd' => "no",
187    'hiddengli' => "yes" },
188      { 'name' => "maxdocs",
189    'desc' => "{import.maxdocs}",
190    'type' => "int",
191    'reqd' => "no",
192    # parsearg left "" as default
193    #'deft' => "-1",
194    'range' => "1,",
195    'modegli' => "1" },
196      # don't set the default to hash - want to allow this to come from
197      # entry in collect.cfg but want to override it here
198      { 'name' => "OIDtype",
199    'desc' => "{import.OIDtype}",
200    'type' => "enum",
201    'list' => $oidtype_list,
202    # parsearg left "" as default
203    #'deft' => "hash",
204    'reqd' => "no",
205    'modegli' => "2" },
206      { 'name' => "OIDmetadata",
207    'desc' => "{import.OIDmetadata}",
208    'type' => "string",
209     #'type' => "metadata", #doesn't work properly in GLI
210    # parsearg left "" as default
211    #'deft' => "dc.Identifier",
212    'reqd' => "no",
213    'modegli' => "2" },
214      { 'name' => "out",
215    'desc' => "{import.out}",
216    'type' => "string",
217    'deft' => "STDERR",
218    'reqd' => "no",
219        'hiddengli' => "yes" },
220      { 'name' => "sortmeta",
221    'desc' => "{import.sortmeta}",
222    'type' => "string",
223    #'type' => "metadata", #doesn't work properly in GLI
224    'reqd' => "no",
225    'modegli' => "2" },
226      { 'name' => "removeprefix",
227    'desc' => "{BasClas.removeprefix}",
228    'type' => "regexp",
229    'deft' => "",
230    'reqd' => "no",
231    'modegli' => "3" },
232      { 'name' => "removesuffix",
233    'desc' => "{BasClas.removesuffix}",
234    'type' => "regexp",
235    'deft' => "",
236    'reqd' => "no",
237    'modegli' => "3" },
238      { 'name' => "groupsize",
239    'desc' => "{import.groupsize}",
240    'type' => "int",
241    'deft' => "1",
242    'reqd' => "no",
243    'modegli' => "2" },
244      { 'name' => "gzip",
245    'desc' => "{import.gzip}",
246    'type' => "flag",
247    'reqd' => "no",
248    'modegli' => "3" },
249      { 'name' => "statsfile",
250    'desc' => "{import.statsfile}",
251    'type' => "string",
252    'deft' => "STDERR",
253    'reqd' => "no",
254        'hiddengli' => "yes" },
255      { 'name' => "verbosity",
256    'desc' => "{import.verbosity}",
257    'type' => "int",
258    'range' => "0,",
259    # parsearg left "" as default
260    # 'deft' => "2",
261    'reqd' => "no",
262    'modegli' => "3" },
263      { 'name' => "gli",
264    'desc' => "{scripts.gli}",
265    'type' => "flag",
266    'reqd' => "no",
267    'hiddengli' => "yes" },
268      { 'name' => "xml",
269    'desc' => "{scripts.xml}",
270    'type' => "flag",
271    'reqd' => "no",
272    'hiddengli' => "yes" }];
273
274my $options = { 'name' => "import.pl",
275        'desc' => "{import.desc}",
276        'args' => $arguments };
277
278my $function_to_inexport_subclass_mappings = {};
279
280sub main
281{
282  # Dynamically include arguments from any subclasses of inexport we find
283  # in the extensions directory
284  if (defined $ENV{'GSDLEXTS'})
285  {
286    &_scanForSubclasses($ENV{'GSDLHOME'}, $ENV{'GSDLEXTS'});
287  }
288  if (defined $ENV{'GSDL3EXTS'})
289  {
290    &_scanForSubclasses($ENV{'GSDL3SRCHOME'}, $ENV{'GSDL3EXTS'});
291  }
292
293  # Loop through arguments, checking to see if any depend on a specific
294  # subclass of InExport. Note that we load the first subclass we encounter
295  # so only support a single 'override' ATM.
296  my $inexport_subclass;
297  foreach my $argument (@ARGV)
298  {
299    # proper arguments start with a hyphen
300    if ($argument =~ /^-/ && defined $function_to_inexport_subclass_mappings->{$argument})
301    {
302      my $required_inexport_subclass = $function_to_inexport_subclass_mappings->{$argument};
303      if (!defined $inexport_subclass)
304      {
305        $inexport_subclass = $required_inexport_subclass;
306      }
307      # Oh noes! The user has included specific arguments from two different
308      # inexport subclasses... this isn't supported
309      elsif ($inexport_subclass ne $required_inexport_subclass)
310      {
311        print STDERR "Error! You cannot specify arguments from two different extention specific inexport modules: " . $inexport_subclass . " != " . $required_inexport_subclass . "\n";
312        exit;
313      }
314    }
315  }
316
317  my $inexport;
318  if (defined $inexport_subclass)
319  {
320    print "* Loading Overriding InExport Module: " . $inexport_subclass . "\n";
321    require $inexport_subclass . '.pm';
322    $inexport = new $inexport_subclass("import",\@ARGV,$options);
323  }
324  # We don't have a overridden inexport, or the above command failed somehow
325  # so load the base inexport class
326  if (!defined $inexport)
327  {
328    $inexport = new inexport("import",\@ARGV,$options);
329  }
330
331  my $collection = $inexport->get_collection();
332
333  if (defined $collection)
334  {
335    my ($config_filename,$collect_cfg) = $inexport->read_collection_cfg($collection,$options);
336
337    $inexport->set_collection_options($collect_cfg);
338
339    my $pluginfo = $inexport->process_files($config_filename,$collect_cfg);
340
341    $inexport->generate_statistics($pluginfo);
342  }
343
344  $inexport->deinit();
345}
346# main()
347
348# @function _scanForSubclasses()
349# @param $dir The extension directory to look within
350# @param $exts A list of the available extensions (as a colon separated string)
351# @return The number of subclasses of InExport found as an Integer
352sub _scanForSubclasses
353{
354  my ($dir, $exts) = @_;
355  my $inexport_class_count = 0;
356  my $ext_prefix = &FileUtils::filenameConcatenate($dir, "ext");
357  my @extensions = split(/:/, $exts);
358  foreach my $e (@extensions)
359  {
360    # - any subclass of InExport must be prefixed with the name of the ext
361    my $package_name = $e . 'inexport';
362    $package_name =~ s/[^a-z]//gi; # package names have limited characters
363    my $inexport_filename = $package_name . '.pm';
364    my $inexport_path = &FileUtils::filenameConcatenate($ext_prefix, $e, 'perllib', $inexport_filename);
365    # see if we have a subclass of InExport lurking in that extension folder
366    if (-f $inexport_path)
367    {
368      # - note we load the filename (with pm) unlike normal modules
369      require $inexport_filename;
370      # - make call to the newly created package
371      my $symbol = qualify('getSupportedArguments', $package_name);
372      # - strict prevents strings being used as function calls, so temporarily
373      #   disable that pragma
374      no strict;
375      # - lets check that the function we are about to call actually exists
376      if ( defined &{$symbol} )
377      {
378        my $extra_arguments = &{$symbol}();
379        foreach my $argument (@{$extra_arguments})
380        {
381          # - record a mapping from each extra arguments to the inexport class
382          #   that supports it. We put the hyphen on here to make comparing
383          #   with command line arguments even easier
384          $function_to_inexport_subclass_mappings->{'-' . $argument->{'name'}} = $package_name;
385          # - and them add them as acceptable arguments to import.pl
386          push(@{$options->{'args'}}, $argument);
387        }
388        $inexport_class_count++;
389      }
390      else
391      {
392        print "Warning! A subclass of InExport module (named '" . $inexport_filename . "') does not implement the required getSupportedArguments() function - ignoring. Found in: " . $inexport_path . "\n";
393      }
394    }
395  }
396  return $inexport_class_count;
397}
398# _scanForInExportModules()
399
400&main();
Note: See TracBrowser for help on using the browser.