source: main/trunk/greenstone2/bin/script/import.pl@ 27305

Last change on this file since 27305 was 27305, checked in by jmt12, 11 years ago

Add code to allow importing and building to load overriding versions of inexport.pm and buildcolutils.pm from extensions at runtime. When an extension provides a possible override, Greenstone will dynamically detect and add additional options (visible in the --help). When a user specifies one of these options the appropriate inexport/buildcolutils subclass will be loaded

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 12.6 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# import.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will import a number of files into a particular collection
30
31package import;
32
33BEGIN {
34 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
35 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
39 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugouts");
40
41 if (defined $ENV{'GSDLEXTS'}) {
42 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
43 foreach my $e (@extensions) {
44 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
45
46 unshift (@INC, "$ext_prefix/perllib");
47 unshift (@INC, "$ext_prefix/perllib/cpan");
48 unshift (@INC, "$ext_prefix/perllib/plugins");
49 unshift (@INC, "$ext_prefix/perllib/plugouts");
50 }
51 }
52 if (defined $ENV{'GSDL3EXTS'}) {
53 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
54 foreach my $e (@extensions) {
55 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
56
57 unshift (@INC, "$ext_prefix/perllib");
58 unshift (@INC, "$ext_prefix/perllib/cpan");
59 unshift (@INC, "$ext_prefix/perllib/plugins");
60 unshift (@INC, "$ext_prefix/perllib/plugouts");
61 }
62 }
63
64 if ((defined $ENV{'DEBUG_UNICODE'}) && (defined $ENV{'DEBUG_UNICODE'})) {
65 binmode(STDERR,":utf8");
66 }
67}
68
69# Pragma
70use strict;
71use warnings;
72
73# Modules
74use Symbol qw<qualify>; # Needed for runtime loading of modules [jmt12]
75
76# Greenstone Modules
77use FileUtils;
78use inexport;
79use util;
80
81my $oidtype_list =
82 [ { 'name' => "hash",
83 'desc' => "{import.OIDtype.hash}" },
84 { 'name' => "hash_on_full_filename",
85 'desc' => "{import.OIDtype.hash_on_full_filename}" },
86 { 'name' => "assigned",
87 'desc' => "{import.OIDtype.assigned}" },
88 { 'name' => "incremental",
89 'desc' => "{import.OIDtype.incremental}" },
90 { 'name' => "dirname",
91 'desc' => "{import.OIDtype.dirname}" },
92 { 'name' => "full_filename",
93 'desc' => "{import.OIDtype.full_filename}" } ];
94
95
96# used to control output file format
97my $saveas_list =
98 [ { 'name' => "GreenstoneXML",
99 'desc' => "{export.saveas.GreenstoneXML}"},
100 { 'name' => "GreenstoneMETS",
101 'desc' => "{export.saveas.GreenstoneMETS}"},
102 ];
103
104
105# Possible attributes for each argument
106# name: The name of the argument
107# desc: A description (or more likely a reference to a description) for this argument
108# type: The type of control used to represent the argument. Options include: string, int, flag, regexp, metadata, language, enum etc
109# reqd: Is this argument required?
110# hiddengli: Is this argument hidden in GLI?
111# modegli: The lowest detail mode this argument is visible at in GLI
112
113my $saveas_argument
114 = { 'name' => "saveas",
115 'desc' => "{import.saveas}",
116 'type' => "enum",
117 'list' => $saveas_list,
118 'deft' => "GreenstoneXML",
119 'reqd' => "no",
120 'modegli' => "3" };
121
122
123my $arguments =
124 [
125 $saveas_argument,
126 { 'name' => "archivedir",
127 'desc' => "{import.archivedir}",
128 'type' => "string",
129 'reqd' => "no",
130 'hiddengli' => "yes" },
131 { 'name' => "importdir",
132 'desc' => "{import.importdir}",
133 'type' => "string",
134 'reqd' => "no",
135 'hiddengli' => "yes" },
136 { 'name' => "collectdir",
137 'desc' => "{import.collectdir}",
138 'type' => "string",
139 # parsearg left "" as default
140 #'deft' => &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect"),
141 'deft' => "",
142 'reqd' => "no",
143 'hiddengli' => "yes" },
144 { 'name' => "site",
145 'desc' => "{import.site}",
146 'type' => "string",
147 'deft' => "",
148 'reqd' => "no",
149 'hiddengli' => "yes" },
150 { 'name' => "manifest",
151 'desc' => "{import.manifest}",
152 'type' => "string",
153 'deft' => "",
154 'reqd' => "no",
155 'hiddengli' => "yes" },
156 { 'name' => "debug",
157 'desc' => "{import.debug}",
158 'type' => "flag",
159 'reqd' => "no",
160 'hiddengli' => "yes" },
161 { 'name' => "faillog",
162 'desc' => "{import.faillog}",
163 'type' => "string",
164 # parsearg left "" as default
165 #'deft' => &FileUtils::filenameConcatenate("&lt;collectdir&gt;", "colname", "etc", "fail.log"),
166 'deft' => "",
167 'reqd' => "no",
168 'modegli' => "3" },
169 { 'name' => "incremental",
170 'desc' => "{import.incremental}",
171 'type' => "flag",
172 'hiddengli' => "yes" },
173 { 'name' => "keepold",
174 'desc' => "{import.keepold}",
175 'type' => "flag",
176 'reqd' => "no",
177 'hiddengli' => "yes" },
178 { 'name' => "removeold",
179 'desc' => "{import.removeold}",
180 'type' => "flag",
181 'reqd' => "no",
182 'hiddengli' => "yes" },
183 { 'name' => "language",
184 'desc' => "{scripts.language}",
185 'type' => "string",
186 'reqd' => "no",
187 'hiddengli' => "yes" },
188 { 'name' => "maxdocs",
189 'desc' => "{import.maxdocs}",
190 'type' => "int",
191 'reqd' => "no",
192 # parsearg left "" as default
193 #'deft' => "-1",
194 'range' => "1,",
195 'modegli' => "1" },
196 # don't set the default to hash - want to allow this to come from
197 # entry in collect.cfg but want to override it here
198 { 'name' => "OIDtype",
199 'desc' => "{import.OIDtype}",
200 'type' => "enum",
201 'list' => $oidtype_list,
202 # parsearg left "" as default
203 #'deft' => "hash",
204 'reqd' => "no",
205 'modegli' => "2" },
206 { 'name' => "OIDmetadata",
207 'desc' => "{import.OIDmetadata}",
208 'type' => "string",
209 #'type' => "metadata", #doesn't work properly in GLI
210 # parsearg left "" as default
211 #'deft' => "dc.Identifier",
212 'reqd' => "no",
213 'modegli' => "2" },
214 { 'name' => "out",
215 'desc' => "{import.out}",
216 'type' => "string",
217 'deft' => "STDERR",
218 'reqd' => "no",
219 'hiddengli' => "yes" },
220 { 'name' => "sortmeta",
221 'desc' => "{import.sortmeta}",
222 'type' => "string",
223 #'type' => "metadata", #doesn't work properly in GLI
224 'reqd' => "no",
225 'modegli' => "2" },
226 { 'name' => "removeprefix",
227 'desc' => "{BasClas.removeprefix}",
228 'type' => "regexp",
229 'deft' => "",
230 'reqd' => "no",
231 'modegli' => "3" },
232 { 'name' => "removesuffix",
233 'desc' => "{BasClas.removesuffix}",
234 'type' => "regexp",
235 'deft' => "",
236 'reqd' => "no",
237 'modegli' => "3" },
238 { 'name' => "groupsize",
239 'desc' => "{import.groupsize}",
240 'type' => "int",
241 'deft' => "1",
242 'reqd' => "no",
243 'modegli' => "2" },
244 { 'name' => "gzip",
245 'desc' => "{import.gzip}",
246 'type' => "flag",
247 'reqd' => "no",
248 'modegli' => "3" },
249 { 'name' => "statsfile",
250 'desc' => "{import.statsfile}",
251 'type' => "string",
252 'deft' => "STDERR",
253 'reqd' => "no",
254 'hiddengli' => "yes" },
255 { 'name' => "verbosity",
256 'desc' => "{import.verbosity}",
257 'type' => "int",
258 'range' => "0,",
259 # parsearg left "" as default
260 # 'deft' => "2",
261 'reqd' => "no",
262 'modegli' => "3" },
263 { 'name' => "gli",
264 'desc' => "{scripts.gli}",
265 'type' => "flag",
266 'reqd' => "no",
267 'hiddengli' => "yes" },
268 { 'name' => "xml",
269 'desc' => "{scripts.xml}",
270 'type' => "flag",
271 'reqd' => "no",
272 'hiddengli' => "yes" }];
273
274my $options = { 'name' => "import.pl",
275 'desc' => "{import.desc}",
276 'args' => $arguments };
277
278my $function_to_inexport_subclass_mappings = {};
279
280sub main
281{
282 # Dynamically include arguments from any subclasses of inexport we find
283 # in the extensions directory
284 if (defined $ENV{'GSDLEXTS'})
285 {
286 &_scanForSubclasses($ENV{'GSDLHOME'}, $ENV{'GSDLEXTS'});
287 }
288 if (defined $ENV{'GSDL3EXTS'})
289 {
290 &_scanForSubclasses($ENV{'GSDL3SRCHOME'}, $ENV{'GSDL3EXTS'});
291 }
292
293 # Loop through arguments, checking to see if any depend on a specific
294 # subclass of InExport. Note that we load the first subclass we encounter
295 # so only support a single 'override' ATM.
296 my $inexport_subclass;
297 foreach my $argument (@ARGV)
298 {
299 # proper arguments start with a hyphen
300 if ($argument =~ /^-/ && defined $function_to_inexport_subclass_mappings->{$argument})
301 {
302 my $required_inexport_subclass = $function_to_inexport_subclass_mappings->{$argument};
303 if (!defined $inexport_subclass)
304 {
305 $inexport_subclass = $required_inexport_subclass;
306 }
307 # Oh noes! The user has included specific arguments from two different
308 # inexport subclasses... this isn't supported
309 elsif ($inexport_subclass ne $required_inexport_subclass)
310 {
311 print STDERR "Error! You cannot specify arguments from two different extention specific inexport modules: " . $inexport_subclass . " != " . $required_inexport_subclass . "\n";
312 exit;
313 }
314 }
315 }
316
317 my $inexport;
318 if (defined $inexport_subclass)
319 {
320 print "* Loading Overriding InExport Module: " . $inexport_subclass . "\n";
321 require $inexport_subclass . '.pm';
322 $inexport = new $inexport_subclass("import",\@ARGV,$options);
323 }
324 # We don't have a overridden inexport, or the above command failed somehow
325 # so load the base inexport class
326 if (!defined $inexport)
327 {
328 $inexport = new inexport("import",\@ARGV,$options);
329 }
330
331 my $collection = $inexport->get_collection();
332
333 if (defined $collection)
334 {
335 my ($config_filename,$collect_cfg) = $inexport->read_collection_cfg($collection,$options);
336
337 $inexport->set_collection_options($collect_cfg);
338
339 my $pluginfo = $inexport->process_files($config_filename,$collect_cfg);
340
341 $inexport->generate_statistics($pluginfo);
342 }
343
344 $inexport->deinit();
345}
346# main()
347
348# @function _scanForSubclasses()
349# @param $dir The extension directory to look within
350# @param $exts A list of the available extensions (as a colon separated string)
351# @return The number of subclasses of InExport found as an Integer
352sub _scanForSubclasses
353{
354 my ($dir, $exts) = @_;
355 my $inexport_class_count = 0;
356 my $ext_prefix = &FileUtils::filenameConcatenate($dir, "ext");
357 my @extensions = split(/:/, $exts);
358 foreach my $e (@extensions)
359 {
360 # - any subclass of InExport must be prefixed with the name of the ext
361 my $package_name = $e . 'inexport';
362 $package_name =~ s/[^a-z]//gi; # package names have limited characters
363 my $inexport_filename = $package_name . '.pm';
364 my $inexport_path = &FileUtils::filenameConcatenate($ext_prefix, $e, 'perllib', $inexport_filename);
365 # see if we have a subclass of InExport lurking in that extension folder
366 if (-f $inexport_path)
367 {
368 # - note we load the filename (with pm) unlike normal modules
369 require $inexport_filename;
370 # - make call to the newly created package
371 my $symbol = qualify('getSupportedArguments', $package_name);
372 # - strict prevents strings being used as function calls, so temporarily
373 # disable that pragma
374 no strict;
375 # - lets check that the function we are about to call actually exists
376 if ( defined &{$symbol} )
377 {
378 my $extra_arguments = &{$symbol}();
379 foreach my $argument (@{$extra_arguments})
380 {
381 # - record a mapping from each extra arguments to the inexport class
382 # that supports it. We put the hyphen on here to make comparing
383 # with command line arguments even easier
384 $function_to_inexport_subclass_mappings->{'-' . $argument->{'name'}} = $package_name;
385 # - and them add them as acceptable arguments to import.pl
386 push(@{$options->{'args'}}, $argument);
387 }
388 $inexport_class_count++;
389 }
390 else
391 {
392 print "Warning! A subclass of InExport module (named '" . $inexport_filename . "') does not implement the required getSupportedArguments() function - ignoring. Found in: " . $inexport_path . "\n";
393 }
394 }
395 }
396 return $inexport_class_count;
397}
398# _scanForInExportModules()
399
400&main();
Note: See TracBrowser for help on using the repository browser.