source: main/trunk/greenstone2/bin/script/import.pl@ 28077

Last change on this file since 28077 was 28077, checked in by ak19, 11 years ago

Fix noticed when doing diffcol. When the import option saveas was set to GreenstoneMETS in collect.cfg, this was ignored, although passing it as a cmdline option to import.pl worked. The reason was that the saveas option had a default assigned, unlike OIDtype, and if it already had a value, then any value in collect.cfg was never consulted. Now the METS tutorial collection should work from the cmdline.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 12.7 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# import.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will import a number of files into a particular collection
30
31package import;
32
33BEGIN {
34 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
35 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
39 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugouts");
40
41 if (defined $ENV{'GSDLEXTS'}) {
42 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
43 foreach my $e (@extensions) {
44 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
45
46 unshift (@INC, "$ext_prefix/perllib");
47 unshift (@INC, "$ext_prefix/perllib/cpan");
48 unshift (@INC, "$ext_prefix/perllib/plugins");
49 unshift (@INC, "$ext_prefix/perllib/plugouts");
50 }
51 }
52 if (defined $ENV{'GSDL3EXTS'}) {
53 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
54 foreach my $e (@extensions) {
55 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
56
57 unshift (@INC, "$ext_prefix/perllib");
58 unshift (@INC, "$ext_prefix/perllib/cpan");
59 unshift (@INC, "$ext_prefix/perllib/plugins");
60 unshift (@INC, "$ext_prefix/perllib/plugouts");
61 }
62 }
63
64 if ((defined $ENV{'DEBUG_UNICODE'}) && (defined $ENV{'DEBUG_UNICODE'})) {
65 binmode(STDERR,":utf8");
66 }
67}
68
69# Pragma
70use strict;
71use warnings;
72
73# Modules
74use Symbol qw<qualify>; # Needed for runtime loading of modules [jmt12]
75
76# Greenstone Modules
77use FileUtils;
78use inexport;
79use util;
80
81my $oidtype_list =
82 [ { 'name' => "hash",
83 'desc' => "{import.OIDtype.hash}" },
84 { 'name' => "hash_on_full_filename",
85 'desc' => "{import.OIDtype.hash_on_full_filename}" },
86 { 'name' => "assigned",
87 'desc' => "{import.OIDtype.assigned}" },
88 { 'name' => "incremental",
89 'desc' => "{import.OIDtype.incremental}" },
90 { 'name' => "filename",
91 'desc' => "{import.OIDtype.filename}" },
92 { 'name' => "dirname",
93 'desc' => "{import.OIDtype.dirname}" },
94 { 'name' => "full_filename",
95 'desc' => "{import.OIDtype.full_filename}" } ];
96
97
98# used to control output file format
99my $saveas_list =
100 [ { 'name' => "GreenstoneXML",
101 'desc' => "{export.saveas.GreenstoneXML}"},
102 { 'name' => "GreenstoneMETS",
103 'desc' => "{export.saveas.GreenstoneMETS}"},
104 ];
105
106
107# Possible attributes for each argument
108# name: The name of the argument
109# desc: A description (or more likely a reference to a description) for this argument
110# type: The type of control used to represent the argument. Options include: string, int, flag, regexp, metadata, language, enum etc
111# reqd: Is this argument required?
112# hiddengli: Is this argument hidden in GLI?
113# modegli: The lowest detail mode this argument is visible at in GLI
114
115my $saveas_argument
116 = { 'name' => "saveas",
117 'desc' => "{import.saveas}",
118 'type' => "enum",
119 'list' => $saveas_list,
120 #'deft' => "GreenstoneXML", # if saveas is defined a default here, then any valid value provided in collect.cfg is ignored
121 'reqd' => "no",
122 'modegli' => "3" };
123
124
125my $arguments =
126 [
127 $saveas_argument,
128 { 'name' => "archivedir",
129 'desc' => "{import.archivedir}",
130 'type' => "string",
131 'reqd' => "no",
132 'hiddengli' => "yes" },
133 { 'name' => "importdir",
134 'desc' => "{import.importdir}",
135 'type' => "string",
136 'reqd' => "no",
137 'hiddengli' => "yes" },
138 { 'name' => "collectdir",
139 'desc' => "{import.collectdir}",
140 'type' => "string",
141 # parsearg left "" as default
142 #'deft' => &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect"),
143 'deft' => "",
144 'reqd' => "no",
145 'hiddengli' => "yes" },
146 { 'name' => "site",
147 'desc' => "{import.site}",
148 'type' => "string",
149 'deft' => "",
150 'reqd' => "no",
151 'hiddengli' => "yes" },
152 { 'name' => "manifest",
153 'desc' => "{import.manifest}",
154 'type' => "string",
155 'deft' => "",
156 'reqd' => "no",
157 'hiddengli' => "yes" },
158 { 'name' => "debug",
159 'desc' => "{import.debug}",
160 'type' => "flag",
161 'reqd' => "no",
162 'hiddengli' => "yes" },
163 { 'name' => "faillog",
164 'desc' => "{import.faillog}",
165 'type' => "string",
166 # parsearg left "" as default
167 #'deft' => &FileUtils::filenameConcatenate("&lt;collectdir&gt;", "colname", "etc", "fail.log"),
168 'deft' => "",
169 'reqd' => "no",
170 'modegli' => "3" },
171 { 'name' => "incremental",
172 'desc' => "{import.incremental}",
173 'type' => "flag",
174 'hiddengli' => "yes" },
175 { 'name' => "keepold",
176 'desc' => "{import.keepold}",
177 'type' => "flag",
178 'reqd' => "no",
179 'hiddengli' => "yes" },
180 { 'name' => "removeold",
181 'desc' => "{import.removeold}",
182 'type' => "flag",
183 'reqd' => "no",
184 'hiddengli' => "yes" },
185 { 'name' => "language",
186 'desc' => "{scripts.language}",
187 'type' => "string",
188 'reqd' => "no",
189 'hiddengli' => "yes" },
190 { 'name' => "maxdocs",
191 'desc' => "{import.maxdocs}",
192 'type' => "int",
193 'reqd' => "no",
194 # parsearg left "" as default
195 #'deft' => "-1",
196 'range' => "1,",
197 'modegli' => "1" },
198 # don't set the default to hash - want to allow this to come from
199 # entry in collect.cfg but want to override it here
200 { 'name' => "OIDtype",
201 'desc' => "{import.OIDtype}",
202 'type' => "enum",
203 'list' => $oidtype_list,
204 # parsearg left "" as default
205 #'deft' => "hash",
206 'reqd' => "no",
207 'modegli' => "2" },
208 { 'name' => "OIDmetadata",
209 'desc' => "{import.OIDmetadata}",
210 'type' => "string",
211 #'type' => "metadata", #doesn't work properly in GLI
212 # parsearg left "" as default
213 #'deft' => "dc.Identifier",
214 'reqd' => "no",
215 'modegli' => "2" },
216 { 'name' => "out",
217 'desc' => "{import.out}",
218 'type' => "string",
219 'deft' => "STDERR",
220 'reqd' => "no",
221 'hiddengli' => "yes" },
222 { 'name' => "sortmeta",
223 'desc' => "{import.sortmeta}",
224 'type' => "string",
225 #'type' => "metadata", #doesn't work properly in GLI
226 'reqd' => "no",
227 'modegli' => "2" },
228 { 'name' => "removeprefix",
229 'desc' => "{BasClas.removeprefix}",
230 'type' => "regexp",
231 'deft' => "",
232 'reqd' => "no",
233 'modegli' => "3" },
234 { 'name' => "removesuffix",
235 'desc' => "{BasClas.removesuffix}",
236 'type' => "regexp",
237 'deft' => "",
238 'reqd' => "no",
239 'modegli' => "3" },
240 { 'name' => "groupsize",
241 'desc' => "{import.groupsize}",
242 'type' => "int",
243 'deft' => "1",
244 'reqd' => "no",
245 'modegli' => "2" },
246 { 'name' => "gzip",
247 'desc' => "{import.gzip}",
248 'type' => "flag",
249 'reqd' => "no",
250 'modegli' => "3" },
251 { 'name' => "statsfile",
252 'desc' => "{import.statsfile}",
253 'type' => "string",
254 'deft' => "STDERR",
255 'reqd' => "no",
256 'hiddengli' => "yes" },
257 { 'name' => "verbosity",
258 'desc' => "{import.verbosity}",
259 'type' => "int",
260 'range' => "0,",
261 # parsearg left "" as default
262 # 'deft' => "2",
263 'reqd' => "no",
264 'modegli' => "3" },
265 { 'name' => "gli",
266 'desc' => "{scripts.gli}",
267 'type' => "flag",
268 'reqd' => "no",
269 'hiddengli' => "yes" },
270 { 'name' => "xml",
271 'desc' => "{scripts.xml}",
272 'type' => "flag",
273 'reqd' => "no",
274 'hiddengli' => "yes" }];
275
276my $options = { 'name' => "import.pl",
277 'desc' => "{import.desc}",
278 'args' => $arguments };
279
280my $function_to_inexport_subclass_mappings = {};
281
282sub main
283{
284 # Dynamically include arguments from any subclasses of inexport we find
285 # in the extensions directory
286 if (defined $ENV{'GSDLEXTS'})
287 {
288 &_scanForSubclasses($ENV{'GSDLHOME'}, $ENV{'GSDLEXTS'});
289 }
290 if (defined $ENV{'GSDL3EXTS'})
291 {
292 &_scanForSubclasses($ENV{'GSDL3SRCHOME'}, $ENV{'GSDL3EXTS'});
293 }
294
295 # Loop through arguments, checking to see if any depend on a specific
296 # subclass of InExport. Note that we load the first subclass we encounter
297 # so only support a single 'override' ATM.
298 my $inexport_subclass;
299 foreach my $argument (@ARGV)
300 {
301 # proper arguments start with a hyphen
302 if ($argument =~ /^-/ && defined $function_to_inexport_subclass_mappings->{$argument})
303 {
304 my $required_inexport_subclass = $function_to_inexport_subclass_mappings->{$argument};
305 if (!defined $inexport_subclass)
306 {
307 $inexport_subclass = $required_inexport_subclass;
308 }
309 # Oh noes! The user has included specific arguments from two different
310 # inexport subclasses... this isn't supported
311 elsif ($inexport_subclass ne $required_inexport_subclass)
312 {
313 print STDERR "Error! You cannot specify arguments from two different extention specific inexport modules: " . $inexport_subclass . " != " . $required_inexport_subclass . "\n";
314 exit;
315 }
316 }
317 }
318
319 my $inexport;
320 if (defined $inexport_subclass)
321 {
322 print "* Loading Overriding InExport Module: " . $inexport_subclass . "\n";
323 require $inexport_subclass . '.pm';
324 $inexport = new $inexport_subclass("import",\@ARGV,$options);
325 }
326 # We don't have a overridden inexport, or the above command failed somehow
327 # so load the base inexport class
328 if (!defined $inexport)
329 {
330 $inexport = new inexport("import",\@ARGV,$options);
331 }
332
333 my $collection = $inexport->get_collection();
334
335 if (defined $collection)
336 {
337 my ($config_filename,$collect_cfg) = $inexport->read_collection_cfg($collection,$options);
338
339 $inexport->set_collection_options($collect_cfg);
340
341 my $pluginfo = $inexport->process_files($config_filename,$collect_cfg);
342
343 $inexport->generate_statistics($pluginfo);
344 }
345
346 $inexport->deinit();
347}
348# main()
349
350# @function _scanForSubclasses()
351# @param $dir The extension directory to look within
352# @param $exts A list of the available extensions (as a colon separated string)
353# @return The number of subclasses of InExport found as an Integer
354sub _scanForSubclasses
355{
356 my ($dir, $exts) = @_;
357 my $inexport_class_count = 0;
358 my $ext_prefix = &FileUtils::filenameConcatenate($dir, "ext");
359 my @extensions = split(/:/, $exts);
360 foreach my $e (@extensions)
361 {
362 # - any subclass of InExport must be prefixed with the name of the ext
363 my $package_name = $e . 'inexport';
364 $package_name =~ s/[^a-z]//gi; # package names have limited characters
365 my $inexport_filename = $package_name . '.pm';
366 my $inexport_path = &FileUtils::filenameConcatenate($ext_prefix, $e, 'perllib', $inexport_filename);
367 # see if we have a subclass of InExport lurking in that extension folder
368 if (-f $inexport_path)
369 {
370 # - note we load the filename (with pm) unlike normal modules
371 require $inexport_filename;
372 # - make call to the newly created package
373 my $symbol = qualify('getSupportedArguments', $package_name);
374 # - strict prevents strings being used as function calls, so temporarily
375 # disable that pragma
376 no strict;
377 # - lets check that the function we are about to call actually exists
378 if ( defined &{$symbol} )
379 {
380 my $extra_arguments = &{$symbol}();
381 foreach my $argument (@{$extra_arguments})
382 {
383 # - record a mapping from each extra arguments to the inexport class
384 # that supports it. We put the hyphen on here to make comparing
385 # with command line arguments even easier
386 $function_to_inexport_subclass_mappings->{'-' . $argument->{'name'}} = $package_name;
387 # - and them add them as acceptable arguments to import.pl
388 push(@{$options->{'args'}}, $argument);
389 }
390 $inexport_class_count++;
391 }
392 else
393 {
394 print "Warning! A subclass of InExport module (named '" . $inexport_filename . "') does not implement the required getSupportedArguments() function - ignoring. Found in: " . $inexport_path . "\n";
395 }
396 }
397 }
398 return $inexport_class_count;
399}
400# _scanForInExportModules()
401
402&main();
Note: See TracBrowser for help on using the repository browser.