root/main/trunk/greenstone2/bin/script/import.pl @ 27351

Revision 27351, 12.7 KB (checked in by kjdon, 7 years ago)

added filename option to OIDtype - was there for plugins, but not for import

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# import.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will import a number of files into a particular collection
30
31package import;
32
33BEGIN {
34    die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
35    die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
36    unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
37    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
38    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
39    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugouts");
40
41    if (defined $ENV{'GSDLEXTS'}) {
42    my @extensions = split(/:/,$ENV{'GSDLEXTS'});
43    foreach my $e (@extensions) {
44        my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
45
46        unshift (@INC, "$ext_prefix/perllib");
47        unshift (@INC, "$ext_prefix/perllib/cpan");
48        unshift (@INC, "$ext_prefix/perllib/plugins");
49        unshift (@INC, "$ext_prefix/perllib/plugouts");
50    }
51    }
52    if (defined $ENV{'GSDL3EXTS'}) {
53    my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
54    foreach my $e (@extensions) {
55        my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
56
57        unshift (@INC, "$ext_prefix/perllib");
58        unshift (@INC, "$ext_prefix/perllib/cpan");
59        unshift (@INC, "$ext_prefix/perllib/plugins");
60        unshift (@INC, "$ext_prefix/perllib/plugouts");
61    }
62    }
63
64    if ((defined $ENV{'DEBUG_UNICODE'}) && (defined $ENV{'DEBUG_UNICODE'})) {
65    binmode(STDERR,":utf8");
66    }
67}
68
69# Pragma
70use strict;
71use warnings;
72
73# Modules
74use Symbol qw<qualify>; # Needed for runtime loading of modules [jmt12]
75
76# Greenstone Modules
77use FileUtils;
78use inexport;
79use util;
80
81my $oidtype_list =
82    [ { 'name' => "hash",
83        'desc' => "{import.OIDtype.hash}" },
84      { 'name' => "hash_on_full_filename",
85        'desc' => "{import.OIDtype.hash_on_full_filename}" },
86      { 'name' => "assigned",
87        'desc' => "{import.OIDtype.assigned}" },
88      { 'name' => "incremental",
89        'desc' => "{import.OIDtype.incremental}" },
90      { 'name' => "filename",
91        'desc' => "{import.OIDtype.filename}" },
92      { 'name' => "dirname",
93        'desc' => "{import.OIDtype.dirname}" },
94      { 'name' => "full_filename",
95        'desc' => "{import.OIDtype.full_filename}" } ];
96
97
98# used to control output file format
99my $saveas_list =
100    [ { 'name' => "GreenstoneXML",
101        'desc' => "{export.saveas.GreenstoneXML}"},
102      { 'name' => "GreenstoneMETS",
103        'desc' => "{export.saveas.GreenstoneMETS}"},
104      ];
105
106
107# Possible attributes for each argument
108# name: The name of the argument
109# desc: A description (or more likely a reference to a description) for this argument
110# type: The type of control used to represent the argument. Options include: string, int, flag, regexp, metadata, language, enum etc
111# reqd: Is this argument required?
112# hiddengli: Is this argument hidden in GLI?
113# modegli: The lowest detail mode this argument is visible at in GLI
114
115my $saveas_argument
116    = { 'name' => "saveas",
117    'desc' => "{import.saveas}",
118    'type' => "enum",
119    'list' => $saveas_list,
120    'deft' => "GreenstoneXML",
121    'reqd' => "no",
122    'modegli' => "3" };
123
124
125my $arguments =
126    [
127      $saveas_argument,
128      { 'name' => "archivedir",
129    'desc' => "{import.archivedir}",
130    'type' => "string",
131    'reqd' => "no",
132        'hiddengli' => "yes" },
133      { 'name' => "importdir",
134    'desc' => "{import.importdir}",
135    'type' => "string",
136    'reqd' => "no",
137        'hiddengli' => "yes" },
138      { 'name' => "collectdir",
139    'desc' => "{import.collectdir}",
140    'type' => "string",
141    # parsearg left "" as default
142    #'deft' => &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect"),
143    'deft' => "",
144    'reqd' => "no",
145        'hiddengli' => "yes" },
146      { 'name' => "site",
147    'desc' => "{import.site}",
148    'type' => "string",
149    'deft' => "",
150    'reqd' => "no",
151        'hiddengli' => "yes" },
152      { 'name' => "manifest",
153    'desc' => "{import.manifest}",
154    'type' => "string",
155    'deft' => "",
156    'reqd' => "no",
157        'hiddengli' => "yes" },
158      { 'name' => "debug",
159    'desc' => "{import.debug}",
160    'type' => "flag",
161    'reqd' => "no",
162        'hiddengli' => "yes" },
163      { 'name' => "faillog",
164    'desc' => "{import.faillog}",
165    'type' => "string",
166    # parsearg left "" as default
167    #'deft' => &FileUtils::filenameConcatenate("&lt;collectdir&gt;", "colname", "etc", "fail.log"),
168    'deft' => "",
169    'reqd' => "no",
170        'modegli' => "3" },
171      { 'name' => "incremental",
172    'desc' => "{import.incremental}",
173    'type' => "flag",
174    'hiddengli' => "yes" },
175      { 'name' => "keepold",
176    'desc' => "{import.keepold}",
177    'type' => "flag",
178    'reqd' => "no",
179    'hiddengli' => "yes" },
180      { 'name' => "removeold",
181    'desc' => "{import.removeold}",
182    'type' => "flag",
183    'reqd' => "no",
184    'hiddengli' => "yes" },
185      { 'name' => "language",
186    'desc' => "{scripts.language}",
187    'type' => "string",
188    'reqd' => "no",
189    'hiddengli' => "yes" },
190      { 'name' => "maxdocs",
191    'desc' => "{import.maxdocs}",
192    'type' => "int",
193    'reqd' => "no",
194    # parsearg left "" as default
195    #'deft' => "-1",
196    'range' => "1,",
197    'modegli' => "1" },
198      # don't set the default to hash - want to allow this to come from
199      # entry in collect.cfg but want to override it here
200      { 'name' => "OIDtype",
201    'desc' => "{import.OIDtype}",
202    'type' => "enum",
203    'list' => $oidtype_list,
204    # parsearg left "" as default
205    #'deft' => "hash",
206    'reqd' => "no",
207    'modegli' => "2" },
208      { 'name' => "OIDmetadata",
209    'desc' => "{import.OIDmetadata}",
210    'type' => "string",
211     #'type' => "metadata", #doesn't work properly in GLI
212    # parsearg left "" as default
213    #'deft' => "dc.Identifier",
214    'reqd' => "no",
215    'modegli' => "2" },
216      { 'name' => "out",
217    'desc' => "{import.out}",
218    'type' => "string",
219    'deft' => "STDERR",
220    'reqd' => "no",
221        'hiddengli' => "yes" },
222      { 'name' => "sortmeta",
223    'desc' => "{import.sortmeta}",
224    'type' => "string",
225    #'type' => "metadata", #doesn't work properly in GLI
226    'reqd' => "no",
227    'modegli' => "2" },
228      { 'name' => "removeprefix",
229    'desc' => "{BasClas.removeprefix}",
230    'type' => "regexp",
231    'deft' => "",
232    'reqd' => "no",
233    'modegli' => "3" },
234      { 'name' => "removesuffix",
235    'desc' => "{BasClas.removesuffix}",
236    'type' => "regexp",
237    'deft' => "",
238    'reqd' => "no",
239    'modegli' => "3" },
240      { 'name' => "groupsize",
241    'desc' => "{import.groupsize}",
242    'type' => "int",
243    'deft' => "1",
244    'reqd' => "no",
245    'modegli' => "2" },
246      { 'name' => "gzip",
247    'desc' => "{import.gzip}",
248    'type' => "flag",
249    'reqd' => "no",
250    'modegli' => "3" },
251      { 'name' => "statsfile",
252    'desc' => "{import.statsfile}",
253    'type' => "string",
254    'deft' => "STDERR",
255    'reqd' => "no",
256        'hiddengli' => "yes" },
257      { 'name' => "verbosity",
258    'desc' => "{import.verbosity}",
259    'type' => "int",
260    'range' => "0,",
261    # parsearg left "" as default
262    # 'deft' => "2",
263    'reqd' => "no",
264    'modegli' => "3" },
265      { 'name' => "gli",
266    'desc' => "{scripts.gli}",
267    'type' => "flag",
268    'reqd' => "no",
269    'hiddengli' => "yes" },
270      { 'name' => "xml",
271    'desc' => "{scripts.xml}",
272    'type' => "flag",
273    'reqd' => "no",
274    'hiddengli' => "yes" }];
275
276my $options = { 'name' => "import.pl",
277        'desc' => "{import.desc}",
278        'args' => $arguments };
279
280my $function_to_inexport_subclass_mappings = {};
281
282sub main
283{
284  # Dynamically include arguments from any subclasses of inexport we find
285  # in the extensions directory
286  if (defined $ENV{'GSDLEXTS'})
287  {
288    &_scanForSubclasses($ENV{'GSDLHOME'}, $ENV{'GSDLEXTS'});
289  }
290  if (defined $ENV{'GSDL3EXTS'})
291  {
292    &_scanForSubclasses($ENV{'GSDL3SRCHOME'}, $ENV{'GSDL3EXTS'});
293  }
294
295  # Loop through arguments, checking to see if any depend on a specific
296  # subclass of InExport. Note that we load the first subclass we encounter
297  # so only support a single 'override' ATM.
298  my $inexport_subclass;
299  foreach my $argument (@ARGV)
300  {
301    # proper arguments start with a hyphen
302    if ($argument =~ /^-/ && defined $function_to_inexport_subclass_mappings->{$argument})
303    {
304      my $required_inexport_subclass = $function_to_inexport_subclass_mappings->{$argument};
305      if (!defined $inexport_subclass)
306      {
307        $inexport_subclass = $required_inexport_subclass;
308      }
309      # Oh noes! The user has included specific arguments from two different
310      # inexport subclasses... this isn't supported
311      elsif ($inexport_subclass ne $required_inexport_subclass)
312      {
313        print STDERR "Error! You cannot specify arguments from two different extention specific inexport modules: " . $inexport_subclass . " != " . $required_inexport_subclass . "\n";
314        exit;
315      }
316    }
317  }
318
319  my $inexport;
320  if (defined $inexport_subclass)
321  {
322    print "* Loading Overriding InExport Module: " . $inexport_subclass . "\n";
323    require $inexport_subclass . '.pm';
324    $inexport = new $inexport_subclass("import",\@ARGV,$options);
325  }
326  # We don't have a overridden inexport, or the above command failed somehow
327  # so load the base inexport class
328  if (!defined $inexport)
329  {
330    $inexport = new inexport("import",\@ARGV,$options);
331  }
332
333  my $collection = $inexport->get_collection();
334
335  if (defined $collection)
336  {
337    my ($config_filename,$collect_cfg) = $inexport->read_collection_cfg($collection,$options);
338
339    $inexport->set_collection_options($collect_cfg);
340
341    my $pluginfo = $inexport->process_files($config_filename,$collect_cfg);
342
343    $inexport->generate_statistics($pluginfo);
344  }
345
346  $inexport->deinit();
347}
348# main()
349
350# @function _scanForSubclasses()
351# @param $dir The extension directory to look within
352# @param $exts A list of the available extensions (as a colon separated string)
353# @return The number of subclasses of InExport found as an Integer
354sub _scanForSubclasses
355{
356  my ($dir, $exts) = @_;
357  my $inexport_class_count = 0;
358  my $ext_prefix = &FileUtils::filenameConcatenate($dir, "ext");
359  my @extensions = split(/:/, $exts);
360  foreach my $e (@extensions)
361  {
362    # - any subclass of InExport must be prefixed with the name of the ext
363    my $package_name = $e . 'inexport';
364    $package_name =~ s/[^a-z]//gi; # package names have limited characters
365    my $inexport_filename = $package_name . '.pm';
366    my $inexport_path = &FileUtils::filenameConcatenate($ext_prefix, $e, 'perllib', $inexport_filename);
367    # see if we have a subclass of InExport lurking in that extension folder
368    if (-f $inexport_path)
369    {
370      # - note we load the filename (with pm) unlike normal modules
371      require $inexport_filename;
372      # - make call to the newly created package
373      my $symbol = qualify('getSupportedArguments', $package_name);
374      # - strict prevents strings being used as function calls, so temporarily
375      #   disable that pragma
376      no strict;
377      # - lets check that the function we are about to call actually exists
378      if ( defined &{$symbol} )
379      {
380        my $extra_arguments = &{$symbol}();
381        foreach my $argument (@{$extra_arguments})
382        {
383          # - record a mapping from each extra arguments to the inexport class
384          #   that supports it. We put the hyphen on here to make comparing
385          #   with command line arguments even easier
386          $function_to_inexport_subclass_mappings->{'-' . $argument->{'name'}} = $package_name;
387          # - and them add them as acceptable arguments to import.pl
388          push(@{$options->{'args'}}, $argument);
389        }
390        $inexport_class_count++;
391      }
392      else
393      {
394        print "Warning! A subclass of InExport module (named '" . $inexport_filename . "') does not implement the required getSupportedArguments() function - ignoring. Found in: " . $inexport_path . "\n";
395      }
396    }
397  }
398  return $inexport_class_count;
399}
400# _scanForInExportModules()
401
402&main();
Note: See TracBrowser for help on using the browser.