root/main/trunk/greenstone2/bin/script/import.pl @ 28640

Revision 28640, 11.0 KB (checked in by kjdon, 6 years ago)

fixed a few errors that I hadn't noticed - I obviously hadn't tested the last code changes I made :-(

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# import.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will import a number of files into a particular collection
30
31package import;
32
33BEGIN {
34    die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
35    die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
36    unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
37    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
38    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
39    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugouts");
40
41    if (defined $ENV{'GSDLEXTS'}) {
42    my @extensions = split(/:/,$ENV{'GSDLEXTS'});
43    foreach my $e (@extensions) {
44        my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
45
46        unshift (@INC, "$ext_prefix/perllib");
47        unshift (@INC, "$ext_prefix/perllib/cpan");
48        unshift (@INC, "$ext_prefix/perllib/plugins");
49        unshift (@INC, "$ext_prefix/perllib/plugouts");
50    }
51    }
52    if (defined $ENV{'GSDL3EXTS'}) {
53    my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
54    foreach my $e (@extensions) {
55        my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
56
57        unshift (@INC, "$ext_prefix/perllib");
58        unshift (@INC, "$ext_prefix/perllib/cpan");
59        unshift (@INC, "$ext_prefix/perllib/plugins");
60        unshift (@INC, "$ext_prefix/perllib/plugouts");
61    }
62    }
63
64    if ((defined $ENV{'DEBUG_UNICODE'}) && (defined $ENV{'DEBUG_UNICODE'})) {
65    binmode(STDERR,":utf8");
66    }
67}
68
69# Pragma
70use strict;
71use warnings;
72
73# Modules
74use Symbol qw<qualify>; # Needed for runtime loading of modules [jmt12]
75
76# Greenstone Modules
77use FileUtils;
78use inexport;
79use util;
80use gsprintf 'gsprintf';
81
82
83# used to control output file format
84my $saveas_list =
85    [ { 'name' => "GreenstoneXML",
86        'desc' => "{export.saveas.GreenstoneXML}"},
87      { 'name' => "GreenstoneMETS",
88        'desc' => "{export.saveas.GreenstoneMETS}"},
89      ];
90
91
92# Possible attributes for each argument
93# name: The name of the argument
94# desc: A description (or more likely a reference to a description) for this argument
95# type: The type of control used to represent the argument. Options include: string, int, flag, regexp, metadata, language, enum etc
96# reqd: Is this argument required?
97# hiddengli: Is this argument hidden in GLI?
98# modegli: The lowest detail mode this argument is visible at in GLI
99
100my $saveas_argument
101    = { 'name' => "saveas",
102    'desc' => "{import.saveas}",
103    'type' => "enum",
104    'list' => $saveas_list,
105    #'deft' => "GreenstoneXML", # if saveas is defined a default here, then any valid value provided in collect.cfg is ignored
106    'reqd' => "no",
107    'modegli' => "3" };
108
109
110my $arguments =
111    [
112      $saveas_argument,
113    { 'name' => "sortmeta",
114    'desc' => "{import.sortmeta}",
115    'type' => "string",
116    #'type' => "metadata", #doesn't work properly in GLI
117    'reqd' => "no",
118    'modegli' => "2" },
119      { 'name' => "removeprefix",
120    'desc' => "{BasClas.removeprefix}",
121    'type' => "regexp",
122    'deft' => "",
123    'reqd' => "no",
124    'modegli' => "3" },
125      { 'name' => "removesuffix",
126    'desc' => "{BasClas.removesuffix}",
127    'type' => "regexp",
128    'deft' => "",
129    'reqd' => "no",
130    'modegli' => "3" },
131      { 'name' => "groupsize",
132    'desc' => "{import.groupsize}",
133    'type' => "int",
134    'deft' => "1",
135    'reqd' => "no",
136    'modegli' => "2" },
137      { 'name' => "archivedir",
138    'desc' => "{import.archivedir}",
139    'type' => "string",
140    'reqd' => "no",
141        'hiddengli' => "yes" },
142      @$inexport::directory_arguments,
143      { 'name' => "gzip",
144    'desc' => "{import.gzip}",
145    'type' => "flag",
146    'reqd' => "no",
147    'modegli' => "3" },
148     @$inexport::arguments
149];
150
151my $options = { 'name' => "import.pl",
152        'desc' => "{import.desc}",
153        'args' => $arguments };
154
155my $function_to_inexport_subclass_mappings = {};
156
157sub main
158{
159  # Dynamically include arguments from any subclasses of inexport we find
160  # in the extensions directory
161  if (defined $ENV{'GSDLEXTS'})
162  {
163    &_scanForSubclasses($ENV{'GSDLHOME'}, $ENV{'GSDLEXTS'});
164  }
165  if (defined $ENV{'GSDL3EXTS'})
166  {
167    &_scanForSubclasses($ENV{'GSDL3SRCHOME'}, $ENV{'GSDL3EXTS'});
168  }
169
170  # Loop through arguments, checking to see if any depend on a specific
171  # subclass of InExport. Note that we load the first subclass we encounter
172  # so only support a single 'override' ATM.
173  my $inexport_subclass;
174  foreach my $argument (@ARGV)
175  {
176    # proper arguments start with a hyphen
177    if ($argument =~ /^-/ && defined $function_to_inexport_subclass_mappings->{$argument})
178    {
179      my $required_inexport_subclass = $function_to_inexport_subclass_mappings->{$argument};
180      if (!defined $inexport_subclass)
181      {
182        $inexport_subclass = $required_inexport_subclass;
183      }
184      # Oh noes! The user has included specific arguments from two different
185      # inexport subclasses... this isn't supported
186      elsif ($inexport_subclass ne $required_inexport_subclass)
187      {
188        print STDERR "Error! You cannot specify arguments from two different extention specific inexport modules: " . $inexport_subclass . " != " . $required_inexport_subclass . "\n";
189        exit;
190      }
191    }
192  }
193
194  my $inexport;
195  if (defined $inexport_subclass)
196  {
197    print "* Loading Overriding InExport Module: " . $inexport_subclass . "\n";
198    require $inexport_subclass . '.pm';
199    $inexport = new $inexport_subclass("import",\@ARGV,$options);
200  }
201  # We don't have a overridden inexport, or the above command failed somehow
202  # so load the base inexport class
203  if (!defined $inexport)
204  {
205    $inexport = new inexport("import",\@ARGV,$options);
206  }
207
208  my $collection = $inexport->get_collection();
209
210  if (defined $collection)
211  {
212    my ($config_filename,$collect_cfg) = $inexport->read_collection_cfg($collection,$options);
213
214    #$inexport->set_collection_options($collect_cfg);
215    &set_collection_options($inexport, $collect_cfg);
216
217    my $pluginfo = $inexport->process_files($config_filename,$collect_cfg);
218
219    $inexport->generate_statistics($pluginfo);
220  }
221
222  $inexport->deinit();
223}
224# main()
225
226# @function _scanForSubclasses()
227# @param $dir The extension directory to look within
228# @param $exts A list of the available extensions (as a colon separated string)
229# @return The number of subclasses of InExport found as an Integer
230sub _scanForSubclasses
231{
232  my ($dir, $exts) = @_;
233  my $inexport_class_count = 0;
234  my $ext_prefix = &FileUtils::filenameConcatenate($dir, "ext");
235  my @extensions = split(/:/, $exts);
236  foreach my $e (@extensions)
237  {
238    # - any subclass of InExport must be prefixed with the name of the ext
239    my $package_name = $e . 'inexport';
240    $package_name =~ s/[^a-z]//gi; # package names have limited characters
241    my $inexport_filename = $package_name . '.pm';
242    my $inexport_path = &FileUtils::filenameConcatenate($ext_prefix, $e, 'perllib', $inexport_filename);
243    # see if we have a subclass of InExport lurking in that extension folder
244    if (-f $inexport_path)
245    {
246      # - note we load the filename (with pm) unlike normal modules
247      require $inexport_filename;
248      # - make call to the newly created package
249      my $symbol = qualify('getSupportedArguments', $package_name);
250      # - strict prevents strings being used as function calls, so temporarily
251      #   disable that pragma
252      no strict;
253      # - lets check that the function we are about to call actually exists
254      if ( defined &{$symbol} )
255      {
256        my $extra_arguments = &{$symbol}();
257        foreach my $argument (@{$extra_arguments})
258        {
259          # - record a mapping from each extra arguments to the inexport class
260          #   that supports it. We put the hyphen on here to make comparing
261          #   with command line arguments even easier
262          $function_to_inexport_subclass_mappings->{'-' . $argument->{'name'}} = $package_name;
263          # - and them add them as acceptable arguments to import.pl
264          push(@{$options->{'args'}}, $argument);
265        }
266        $inexport_class_count++;
267      }
268      else
269      {
270        print "Warning! A subclass of InExport module (named '" . $inexport_filename . "') does not implement the required getSupportedArguments() function - ignoring. Found in: " . $inexport_path . "\n";
271      }
272    }
273  }
274  return $inexport_class_count;
275}
276# _scanForInExportModules()
277
278# look up collect.cfg for import options, then all inexport version for the
279# common ones
280sub set_collection_options
281{
282
283    my ($inexport, $collectcfg) = @_;
284    my $out        = $inexport->{'out'};
285
286    #groupsize can (currently) only be defined for import, not export, and
287    # will be set to 1 if the user has not set it
288    if ((defined $inexport->{'groupsize'}) && ($inexport->{'groupsize'} == 1)) {
289    if (defined $collectcfg->{'groupsize'} && $collectcfg->{'groupsize'} =~ /\d+/) {
290        $inexport->{'groupsize'} = $collectcfg->{'groupsize'};
291    }
292    }
293
294    if (!defined $inexport->{'saveas'}) {
295    if (defined $collectcfg->{'saveas'}
296        && $collectcfg->{'saveas'} =~ /^(GreenstoneXML|GreenstoneMETS)$/) {
297        $inexport->{'saveas'} = $collectcfg->{'saveas'};
298    } else {
299        $inexport->{'saveas'} = "GreenstoneXML"; # the default
300    }
301    }
302   
303    my $sortmeta = $inexport->{'sortmeta'};
304    if (defined $collectcfg->{'sortmeta'} && (!defined $sortmeta || $sortmeta eq "")) {
305    $sortmeta = $collectcfg->{'sortmeta'};
306    }
307    # sortmeta cannot be used with group size
308    $sortmeta = undef unless defined $sortmeta && $sortmeta =~ /\S/;
309    if (defined $sortmeta && $inexport->{'groupsize'} > 1) {
310    &gsprintf($out, "{import.cannot_sort}\n\n");
311    $sortmeta = undef;
312    }
313    if (defined $sortmeta) {
314    &gsprintf($out, "{import.sortmeta_paired_with_ArchivesInfPlugin}\n\n");
315    }
316    $inexport->{'sortmeta'} = $sortmeta;
317
318    if (defined $collectcfg->{'removeprefix'} && $inexport->{'removeprefix'} eq "") {
319    $inexport->{'removeprefix'} = $collectcfg->{'removeprefix'};
320    }
321   
322    if (defined $collectcfg->{'removesuffix'} && $inexport->{'removesuffix'} eq "") {
323    $inexport->{'removesuffix'} = $collectcfg->{'removesuffix'};
324    }
325
326    $inexport->set_collection_options($collectcfg);
327 
328}
329&main();
Note: See TracBrowser for help on using the browser.