source: main/trunk/greenstone2/bin/script/import.pl@ 35213

Last change on this file since 35213 was 35213, checked in by kjdon, 3 years ago

if GSDL3SRCHOME is set, and -site is not, set site to localsite - shortcut so now you can 'import.pl demo' in gs3

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 12.4 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# import.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will import a number of files into a particular collection
30
31package import;
32
33BEGIN {
34 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
35 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
39 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugouts");
40
41 if (defined $ENV{'GSDLEXTS'}) {
42 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
43 foreach my $e (@extensions) {
44 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
45
46 unshift (@INC, "$ext_prefix/perllib");
47 unshift (@INC, "$ext_prefix/perllib/cpan");
48 unshift (@INC, "$ext_prefix/perllib/plugins");
49 unshift (@INC, "$ext_prefix/perllib/plugouts");
50 }
51 }
52 if (defined $ENV{'GSDL3EXTS'}) {
53 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
54 foreach my $e (@extensions) {
55 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
56
57 unshift (@INC, "$ext_prefix/perllib");
58 unshift (@INC, "$ext_prefix/perllib/cpan");
59 unshift (@INC, "$ext_prefix/perllib/plugins");
60 unshift (@INC, "$ext_prefix/perllib/plugouts");
61 }
62 }
63
64 if ((defined $ENV{'DEBUG_UNICODE'}) && (defined $ENV{'DEBUG_UNICODE'})) {
65 binmode(STDERR,":utf8");
66 }
67}
68
69# Pragma
70use strict;
71no strict 'subs'; # allow barewords (eg STDERR) as function arguments
72use warnings;
73
74# Modules
75use Symbol qw<qualify>; # Needed for runtime loading of modules [jmt12]
76
77# Greenstone Modules
78use FileUtils;
79use inexport;
80use util;
81use gsprintf 'gsprintf';
82use doc; # for the variable our $cmd_line_mode ($doc::cmd_line_mode)
83
84
85# used to control output file format
86my $saveas_list =
87 [ { 'name' => "GreenstoneXML",
88 'desc' => "{export.saveas.GreenstoneXML}"},
89 { 'name' => "GreenstoneMETS",
90 'desc' => "{export.saveas.GreenstoneMETS}"},
91 { 'name' => "GreenstoneSQL",
92 'desc' => "{export.saveas.GreenstoneSQL}"}
93 ];
94
95
96# Possible attributes for each argument
97# name: The name of the argument
98# desc: A description (or more likely a reference to a description) for this argument
99# type: The type of control used to represent the argument. Options include: string, int, flag, regexp, metadata, language, enum etc
100# reqd: Is this argument required?
101# hiddengli: Is this argument hidden in GLI?
102# modegli: The lowest detail mode this argument is visible at in GLI
103
104my $saveas_argument
105 = { 'name' => "saveas",
106 'desc' => "{import.saveas}",
107 'type' => "enum",
108 'list' => $saveas_list,
109 'deft' => "GreenstoneXML",
110 'reqd' => "no",
111 'modegli' => "3" };
112
113
114my $arguments =
115 [
116 $saveas_argument,
117 { 'name' => "saveas_options",
118 'desc' => "{import.saveas_options}",
119 'type' => "string",
120 'reqd' => "no" },
121 { 'name' => "sortmeta",
122 'desc' => "{import.sortmeta}",
123 'type' => "string",
124 #'type' => "metadata", #doesn't work properly in GLI
125 'reqd' => "no",
126 'modegli' => "2" },
127 { 'name' => "removeprefix",
128 'desc' => "{BasClas.removeprefix}",
129 'type' => "regexp",
130 'deft' => "",
131 'reqd' => "no",
132 'modegli' => "3" },
133 { 'name' => "removesuffix",
134 'desc' => "{BasClas.removesuffix}",
135 'type' => "regexp",
136 'deft' => "",
137 'reqd' => "no",
138 'modegli' => "3" },
139 { 'name' => "groupsize",
140 'desc' => "{import.groupsize}",
141 'type' => "int",
142 'deft' => "1",
143 'reqd' => "no",
144 'modegli' => "2" },
145 { 'name' => "archivedir",
146 'desc' => "{import.archivedir}",
147 'type' => "string",
148 'reqd' => "no",
149 'deft' => "archives",
150 'hiddengli' => "yes" },
151 @$inexport::directory_arguments,
152 { 'name' => "gzip",
153 'desc' => "{import.gzip}",
154 'type' => "flag",
155 'reqd' => "no",
156 'modegli' => "3" },
157 @$inexport::arguments,
158 { 'name' => "NO_IMPORT",
159 'desc' => "{import.NO_IMPORT}",
160 'type' => "flag",
161 'reqd' => "no",
162 'modegli' => "3"}
163];
164
165my $options = { 'name' => "import.pl",
166 'desc' => "{import.desc}",
167 'args' => $arguments };
168
169my $function_to_inexport_subclass_mappings = {};
170
171sub main
172{
173 $doc::cmd_line_mode = "import";
174
175 # Dynamically include arguments from any subclasses of inexport we find
176 # in the extensions directory
177 if (defined $ENV{'GSDLEXTS'})
178 {
179 &_scanForSubclasses($ENV{'GSDLHOME'}, $ENV{'GSDLEXTS'});
180 }
181 if (defined $ENV{'GSDL3EXTS'})
182 {
183 &_scanForSubclasses($ENV{'GSDL3SRCHOME'}, $ENV{'GSDL3EXTS'});
184 }
185
186 # Loop through arguments, checking to see if any depend on a specific
187 # subclass of InExport. Note that we load the first subclass we encounter
188 # so only support a single 'override' ATM.
189 my $inexport_subclass;
190 foreach my $argument (@ARGV)
191 {
192 if ($argument eq "-NO_IMPORT") {
193 &gsprintf(STDERR, "{import.NO_IMPORT_set}\n\n");
194 exit 0;
195 }
196 # proper arguments start with a hyphen
197 if ($argument =~ /^-/ && defined $function_to_inexport_subclass_mappings->{$argument})
198 {
199 my $required_inexport_subclass = $function_to_inexport_subclass_mappings->{$argument};
200 if (!defined $inexport_subclass)
201 {
202 $inexport_subclass = $required_inexport_subclass;
203 }
204 # Oh noes! The user has included specific arguments from two different
205 # inexport subclasses... this isn't supported
206 elsif ($inexport_subclass ne $required_inexport_subclass)
207 {
208 print STDERR "Error! You cannot specify arguments from two different extention specific inexport modules: " . $inexport_subclass . " != " . $required_inexport_subclass . "\n";
209 exit;
210 }
211 }
212 }
213
214 my $inexport;
215 if (defined $inexport_subclass)
216 {
217 print "* Loading Overriding InExport Module: " . $inexport_subclass . "\n";
218 require $inexport_subclass . '.pm';
219 $inexport = new $inexport_subclass("import",\@ARGV,$options);
220 }
221
222 # We don't have a overridden inexport, or the above command failed somehow
223 # so load the base inexport class
224 if (!defined $inexport)
225 {
226 $inexport = new inexport("import",\@ARGV,$options);
227 }
228
229
230 ## shortcut - if gsdl3srchome is set, and site is not set, set site to localsite
231 if (defined $ENV{'GSDL3SRCHOME'}) {
232 if (!defined $inexport->{'site'} || $inexport->{'site'} eq "") {
233 $inexport->{'site'} = "localsite";
234 print STDERR "setting site to localsite as GSDL3SRCHOME was set\n";
235 }
236 }
237 my $collection = $inexport->get_collection();
238
239 if (defined $collection)
240 {
241 my ($config_filename,$collect_cfg) = $inexport->read_collection_cfg($collection,$options);
242 if ($collect_cfg->{'NO_IMPORT'}) {
243 &gsprintf(STDERR, "{import.NO_IMPORT_set}\n\n");
244 exit 0;
245 }
246 #$inexport->set_collection_options($collect_cfg);
247 &set_collection_options($inexport, $collect_cfg);
248
249
250 my $pluginfo = $inexport->process_files($config_filename,$collect_cfg);
251
252 $inexport->generate_statistics($pluginfo);
253 }
254
255 $inexport->deinit();
256}
257# main()
258
259# @function _scanForSubclasses()
260# @param $dir The extension directory to look within
261# @param $exts A list of the available extensions (as a colon separated string)
262# @return The number of subclasses of InExport found as an Integer
263sub _scanForSubclasses
264{
265 my ($dir, $exts) = @_;
266 my $inexport_class_count = 0;
267 my $ext_prefix = &FileUtils::filenameConcatenate($dir, "ext");
268 my @extensions = split(/:/, $exts);
269 foreach my $e (@extensions)
270 {
271 # - any subclass of InExport must be prefixed with the name of the ext
272 my $package_name = $e . 'inexport';
273 $package_name =~ s/[^a-z]//gi; # package names have limited characters
274 my $inexport_filename = $package_name . '.pm';
275 my $inexport_path = &FileUtils::filenameConcatenate($ext_prefix, $e, 'perllib', $inexport_filename);
276 # see if we have a subclass of InExport lurking in that extension folder
277 if (-f $inexport_path)
278 {
279 # - note we load the filename (with pm) unlike normal modules
280 require $inexport_filename;
281 # - make call to the newly created package
282 my $symbol = qualify('getSupportedArguments', $package_name);
283 # - strict prevents strings being used as function calls, so temporarily
284 # disable that pragma
285 no strict;
286 # - lets check that the function we are about to call actually exists
287 if ( defined &{$symbol} )
288 {
289 my $extra_arguments = &{$symbol}();
290 foreach my $argument (@{$extra_arguments})
291 {
292 # - record a mapping from each extra arguments to the inexport class
293 # that supports it. We put the hyphen on here to make comparing
294 # with command line arguments even easier
295 $function_to_inexport_subclass_mappings->{'-' . $argument->{'name'}} = $package_name;
296 # - and them add them as acceptable arguments to import.pl
297 push(@{$options->{'args'}}, $argument);
298 }
299 $inexport_class_count++;
300 }
301 else
302 {
303 print "Warning! A subclass of InExport module (named '" . $inexport_filename . "') does not implement the required getSupportedArguments() function - ignoring. Found in: " . $inexport_path . "\n";
304 }
305 }
306 }
307 return $inexport_class_count;
308}
309# _scanForInExportModules()
310
311# look up collect.cfg for import options, then all inexport version for the
312# common ones
313sub set_collection_options
314{
315
316 my ($inexport, $collectcfg) = @_;
317 my $out = $inexport->{'out'};
318
319 # check all options for default_optname - this will be set if the parsing
320 # code has just set the value based on the arg default. In this case,
321 # check in collect.cfg for the option
322
323 # groupsize can only be defined for import, not export, and actually only
324 # applies to GreenstoneXML format.
325 if (defined $inexport->{'default_groupsize'}) {
326 if (defined $collectcfg->{'groupsize'} && $collectcfg->{'groupsize'} =~ /\d+/) {
327 $inexport->{'groupsize'} = $collectcfg->{'groupsize'};
328 }
329
330 }
331 if (defined $inexport->{'default_saveas'}) {
332 if (defined $collectcfg->{'saveas'}
333 && $collectcfg->{'saveas'} =~ /^(GreenstoneXML|GreenstoneMETS)$/) {
334 $inexport->{'saveas'} = $collectcfg->{'saveas'};
335 } else {
336 $inexport->{'saveas'} = "GreenstoneXML"; # the default
337 }
338 }
339 if (!defined $inexport->{'saveas_options'} || $inexport->{'saveas_options'} eq "") {
340 if (defined $collectcfg->{'saveas_options'} ){
341 $inexport->{'saveas_options'} = $collectcfg->{'saveas_options'};
342 }
343 }
344
345 my $sortmeta = $inexport->{'sortmeta'};
346 if (defined $collectcfg->{'sortmeta'} && $sortmeta eq "") {
347 $sortmeta = $collectcfg->{'sortmeta'};
348 }
349 # sortmeta cannot be used with group size
350 $sortmeta = undef unless defined $sortmeta && $sortmeta =~ /\S/;
351 if (defined $sortmeta && $inexport->{'groupsize'} > 1) {
352 &gsprintf($out, "{import.cannot_sort}\n\n");
353 $sortmeta = undef;
354 }
355 if (defined $sortmeta) {
356 &gsprintf($out, "{import.sortmeta_paired_with_ArchivesInfPlugin}\n\n");
357 }
358 $inexport->{'sortmeta'} = $sortmeta;
359
360 if (defined $collectcfg->{'removeprefix'} && $inexport->{'removeprefix'} eq "") {
361 $inexport->{'removeprefix'} = $collectcfg->{'removeprefix'};
362 }
363
364 if (defined $collectcfg->{'removesuffix'} && $inexport->{'removesuffix'} eq "") {
365 $inexport->{'removesuffix'} = $collectcfg->{'removesuffix'};
366 }
367
368 $inexport->set_collection_options($collectcfg);
369
370}
371&main();
Note: See TracBrowser for help on using the repository browser.