source: main/trunk/greenstone2/bin/script/import.pl@ 28077

Last change on this file since 28077 was 28077, checked in by ak19, 8 years ago

Fix noticed when doing diffcol. When the import option saveas was set to GreenstoneMETS in collect.cfg, this was ignored, although passing it as a cmdline option to import.pl worked. The reason was that the saveas option had a default assigned, unlike OIDtype, and if it already had a value, then any value in collect.cfg was never consulted. Now the METS tutorial collection should work from the cmdline.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 12.7 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# import.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will import a number of files into a particular collection
30
31package import;
32
33BEGIN {
34 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
35 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
39 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugouts");
40
41 if (defined $ENV{'GSDLEXTS'}) {
42 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
43 foreach my $e (@extensions) {
44 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
45
46 unshift (@INC, "$ext_prefix/perllib");
47 unshift (@INC, "$ext_prefix/perllib/cpan");
48 unshift (@INC, "$ext_prefix/perllib/plugins");
49 unshift (@INC, "$ext_prefix/perllib/plugouts");
50 }
51 }
52 if (defined $ENV{'GSDL3EXTS'}) {
53 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
54 foreach my $e (@extensions) {
55 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
56
57 unshift (@INC, "$ext_prefix/perllib");
58 unshift (@INC, "$ext_prefix/perllib/cpan");
59 unshift (@INC, "$ext_prefix/perllib/plugins");
60 unshift (@INC, "$ext_prefix/perllib/plugouts");
61 }
62 }
63
64 if ((defined $ENV{'DEBUG_UNICODE'}) && (defined $ENV{'DEBUG_UNICODE'})) {
65 binmode(STDERR,":utf8");
66 }
67}
68
69# Pragma
70use strict;
71use warnings;
72
73# Modules
74use Symbol qw<qualify>; # Needed for runtime loading of modules [jmt12]
75
76# Greenstone Modules
77use FileUtils;
78use inexport;
79use util;
80
81my $oidtype_list =
82 [ { 'name' => "hash",
83 'desc' => "{import.OIDtype.hash}" },
84 { 'name' => "hash_on_full_filename",
85 'desc' => "{import.OIDtype.hash_on_full_filename}" },
86 { 'name' => "assigned",
87 'desc' => "{import.OIDtype.assigned}" },
88 { 'name' => "incremental",
89 'desc' => "{import.OIDtype.incremental}" },
90 { 'name' => "filename",
91 'desc' => "{import.OIDtype.filename}" },
92 { 'name' => "dirname",
93 'desc' => "{import.OIDtype.dirname}" },
94 { 'name' => "full_filename",
95 'desc' => "{import.OIDtype.full_filename}" } ];
96
97
98# used to control output file format
99my $saveas_list =
100 [ { 'name' => "GreenstoneXML",
101 'desc' => "{export.saveas.GreenstoneXML}"},
102 { 'name' => "GreenstoneMETS",
103 'desc' => "{export.saveas.GreenstoneMETS}"},
104 ];
105
106
107# Possible attributes for each argument
108# name: The name of the argument
109# desc: A description (or more likely a reference to a description) for this argument
110# type: The type of control used to represent the argument. Options include: string, int, flag, regexp, metadata, language, enum etc
111# reqd: Is this argument required?
112# hiddengli: Is this argument hidden in GLI?
113# modegli: The lowest detail mode this argument is visible at in GLI
114
115my $saveas_argument
116 = { 'name' => "saveas",
117 'desc' => "{import.saveas}",
118 'type' => "enum",
119 'list' => $saveas_list,
120 #'deft' => "GreenstoneXML", # if saveas is defined a default here, then any valid value provided in collect.cfg is ignored
121 'reqd' => "no",
122 'modegli' => "3" };
123
124
125my $arguments =
126 [
127 $saveas_argument,
128 { 'name' => "archivedir",
129 'desc' => "{import.archivedir}",
130 'type' => "string",
131 'reqd' => "no",
132 'hiddengli' => "yes" },
133 { 'name' => "importdir",
134 'desc' => "{import.importdir}",
135 'type' => "string",
136 'reqd' => "no",
137 'hiddengli' => "yes" },
138 { 'name' => "collectdir",
139 'desc' => "{import.collectdir}",
140 'type' => "string",
141 # parsearg left "" as default
142 #'deft' => &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect"),
143 'deft' => "",
144 'reqd' => "no",
145 'hiddengli' => "yes" },
146 { 'name' => "site",
147 'desc' => "{import.site}",
148 'type' => "string",
149 'deft' => "",
150 'reqd' => "no",
151 'hiddengli' => "yes" },
152 { 'name' => "manifest",
153 'desc' => "{import.manifest}",
154 'type' => "string",
155 'deft' => "",
156 'reqd' => "no",
157 'hiddengli' => "yes" },
158 { 'name' => "debug",
159 'desc' => "{import.debug}",
160 'type' => "flag",
161 'reqd' => "no",
162 'hiddengli' => "yes" },
163 { 'name' => "faillog",
164 'desc' => "{import.faillog}",
165 'type' => "string",
166 # parsearg left "" as default
167 #'deft' => &FileUtils::filenameConcatenate("&lt;collectdir&gt;", "colname", "etc", "fail.log"),
168 'deft' => "",
169 'reqd' => "no",
170 'modegli' => "3" },
171 { 'name' => "incremental",
172 'desc' => "{import.incremental}",
173 'type' => "flag",
174 'hiddengli' => "yes" },
175 { 'name' => "keepold",
176 'desc' => "{import.keepold}",
177 'type' => "flag",
178 'reqd' => "no",
179 'hiddengli' => "yes" },
180 { 'name' => "removeold",
181 'desc' => "{import.removeold}",
182 'type' => "flag",
183 'reqd' => "no",
184 'hiddengli' => "yes" },
185 { 'name' => "language",
186 'desc' => "{scripts.language}",
187 'type' => "string",
188 'reqd' => "no",
189 'hiddengli' => "yes" },
190 { 'name' => "maxdocs",
191 'desc' => "{import.maxdocs}",
192 'type' => "int",
193 'reqd' => "no",
194 # parsearg left "" as default
195 #'deft' => "-1",
196 'range' => "1,",
197 'modegli' => "1" },
198 # don't set the default to hash - want to allow this to come from
199 # entry in collect.cfg but want to override it here
200 { 'name' => "OIDtype",
201 'desc' => "{import.OIDtype}",
202 'type' => "enum",
203 'list' => $oidtype_list,
204 # parsearg left "" as default
205 #'deft' => "hash",
206 'reqd' => "no",
207 'modegli' => "2" },
208 { 'name' => "OIDmetadata",
209 'desc' => "{import.OIDmetadata}",
210 'type' => "string",
211 #'type' => "metadata", #doesn't work properly in GLI
212 # parsearg left "" as default
213 #'deft' => "dc.Identifier",
214 'reqd' => "no",
215 'modegli' => "2" },
216 { 'name' => "out",
217 'desc' => "{import.out}",
218 'type' => "string",
219 'deft' => "STDERR",
220 'reqd' => "no",
221 'hiddengli' => "yes" },
222 { 'name' => "sortmeta",
223 'desc' => "{import.sortmeta}",
224 'type' => "string",
225 #'type' => "metadata", #doesn't work properly in GLI
226 'reqd' => "no",
227 'modegli' => "2" },
228 { 'name' => "removeprefix",
229 'desc' => "{BasClas.removeprefix}",
230 'type' => "regexp",
231 'deft' => "",
232 'reqd' => "no",
233 'modegli' => "3" },
234 { 'name' => "removesuffix",
235 'desc' => "{BasClas.removesuffix}",
236 'type' => "regexp",
237 'deft' => "",
238 'reqd' => "no",
239 'modegli' => "3" },
240 { 'name' => "groupsize",
241 'desc' => "{import.groupsize}",
242 'type' => "int",
243 'deft' => "1",
244 'reqd' => "no",
245 'modegli' => "2" },
246 { 'name' => "gzip",
247 'desc' => "{import.gzip}",
248 'type' => "flag",
249 'reqd' => "no",
250 'modegli' => "3" },
251 { 'name' => "statsfile",
252 'desc' => "{import.statsfile}",
253 'type' => "string",
254 'deft' => "STDERR",
255 'reqd' => "no",
256 'hiddengli' => "yes" },
257 { 'name' => "verbosity",
258 'desc' => "{import.verbosity}",
259 'type' => "int",
260 'range' => "0,",
261 # parsearg left "" as default
262 # 'deft' => "2",
263 'reqd' => "no",
264 'modegli' => "3" },
265 { 'name' => "gli",
266 'desc' => "{scripts.gli}",
267 'type' => "flag",
268 'reqd' => "no",
269 'hiddengli' => "yes" },
270 { 'name' => "xml",
271 'desc' => "{scripts.xml}",
272 'type' => "flag",
273 'reqd' => "no",
274 'hiddengli' => "yes" }];
275
276my $options = { 'name' => "import.pl",
277 'desc' => "{import.desc}",
278 'args' => $arguments };
279
280my $function_to_inexport_subclass_mappings = {};
281
282sub main
283{
284 # Dynamically include arguments from any subclasses of inexport we find
285 # in the extensions directory
286 if (defined $ENV{'GSDLEXTS'})
287 {
288 &_scanForSubclasses($ENV{'GSDLHOME'}, $ENV{'GSDLEXTS'});
289 }
290 if (defined $ENV{'GSDL3EXTS'})
291 {
292 &_scanForSubclasses($ENV{'GSDL3SRCHOME'}, $ENV{'GSDL3EXTS'});
293 }
294
295 # Loop through arguments, checking to see if any depend on a specific
296 # subclass of InExport. Note that we load the first subclass we encounter
297 # so only support a single 'override' ATM.
298 my $inexport_subclass;
299 foreach my $argument (@ARGV)
300 {
301 # proper arguments start with a hyphen
302 if ($argument =~ /^-/ && defined $function_to_inexport_subclass_mappings->{$argument})
303 {
304 my $required_inexport_subclass = $function_to_inexport_subclass_mappings->{$argument};
305 if (!defined $inexport_subclass)
306 {
307 $inexport_subclass = $required_inexport_subclass;
308 }
309 # Oh noes! The user has included specific arguments from two different
310 # inexport subclasses... this isn't supported
311 elsif ($inexport_subclass ne $required_inexport_subclass)
312 {
313 print STDERR "Error! You cannot specify arguments from two different extention specific inexport modules: " . $inexport_subclass . " != " . $required_inexport_subclass . "\n";
314 exit;
315 }
316 }
317 }
318
319 my $inexport;
320 if (defined $inexport_subclass)
321 {
322 print "* Loading Overriding InExport Module: " . $inexport_subclass . "\n";
323 require $inexport_subclass . '.pm';
324 $inexport = new $inexport_subclass("import",\@ARGV,$options);
325 }
326 # We don't have a overridden inexport, or the above command failed somehow
327 # so load the base inexport class
328 if (!defined $inexport)
329 {
330 $inexport = new inexport("import",\@ARGV,$options);
331 }
332
333 my $collection = $inexport->get_collection();
334
335 if (defined $collection)
336 {
337 my ($config_filename,$collect_cfg) = $inexport->read_collection_cfg($collection,$options);
338
339 $inexport->set_collection_options($collect_cfg);
340
341 my $pluginfo = $inexport->process_files($config_filename,$collect_cfg);
342
343 $inexport->generate_statistics($pluginfo);
344 }
345
346 $inexport->deinit();
347}
348# main()
349
350# @function _scanForSubclasses()
351# @param $dir The extension directory to look within
352# @param $exts A list of the available extensions (as a colon separated string)
353# @return The number of subclasses of InExport found as an Integer
354sub _scanForSubclasses
355{
356 my ($dir, $exts) = @_;
357 my $inexport_class_count = 0;
358 my $ext_prefix = &FileUtils::filenameConcatenate($dir, "ext");
359 my @extensions = split(/:/, $exts);
360 foreach my $e (@extensions)
361 {
362 # - any subclass of InExport must be prefixed with the name of the ext
363 my $package_name = $e . 'inexport';
364 $package_name =~ s/[^a-z]//gi; # package names have limited characters
365 my $inexport_filename = $package_name . '.pm';
366 my $inexport_path = &FileUtils::filenameConcatenate($ext_prefix, $e, 'perllib', $inexport_filename);
367 # see if we have a subclass of InExport lurking in that extension folder
368 if (-f $inexport_path)
369 {
370 # - note we load the filename (with pm) unlike normal modules
371 require $inexport_filename;
372 # - make call to the newly created package
373 my $symbol = qualify('getSupportedArguments', $package_name);
374 # - strict prevents strings being used as function calls, so temporarily
375 # disable that pragma
376 no strict;
377 # - lets check that the function we are about to call actually exists
378 if ( defined &{$symbol} )
379 {
380 my $extra_arguments = &{$symbol}();
381 foreach my $argument (@{$extra_arguments})
382 {
383 # - record a mapping from each extra arguments to the inexport class
384 # that supports it. We put the hyphen on here to make comparing
385 # with command line arguments even easier
386 $function_to_inexport_subclass_mappings->{'-' . $argument->{'name'}} = $package_name;
387 # - and them add them as acceptable arguments to import.pl
388 push(@{$options->{'args'}}, $argument);
389 }
390 $inexport_class_count++;
391 }
392 else
393 {
394 print "Warning! A subclass of InExport module (named '" . $inexport_filename . "') does not implement the required getSupportedArguments() function - ignoring. Found in: " . $inexport_path . "\n";
395 }
396 }
397 }
398 return $inexport_class_count;
399}
400# _scanForInExportModules()
401
402&main();
Note: See TracBrowser for help on using the repository browser.