source: main/trunk/greenstone2/bin/script/import.pl@ 31132

Last change on this file since 31132 was 31132, checked in by kjdon, 7 years ago

added a new flag to import -NO_IMPORT. set it in collectionConfig.xml as an importOption to prevent import.pl being accidentally run. If you are doing web editing of documents, it edits the archives files and running import will clobber any changes

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 11.5 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# import.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will import a number of files into a particular collection
30
31package import;
32
33BEGIN {
34 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
35 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
39 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugouts");
40
41 if (defined $ENV{'GSDLEXTS'}) {
42 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
43 foreach my $e (@extensions) {
44 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
45
46 unshift (@INC, "$ext_prefix/perllib");
47 unshift (@INC, "$ext_prefix/perllib/cpan");
48 unshift (@INC, "$ext_prefix/perllib/plugins");
49 unshift (@INC, "$ext_prefix/perllib/plugouts");
50 }
51 }
52 if (defined $ENV{'GSDL3EXTS'}) {
53 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
54 foreach my $e (@extensions) {
55 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
56
57 unshift (@INC, "$ext_prefix/perllib");
58 unshift (@INC, "$ext_prefix/perllib/cpan");
59 unshift (@INC, "$ext_prefix/perllib/plugins");
60 unshift (@INC, "$ext_prefix/perllib/plugouts");
61 }
62 }
63
64 if ((defined $ENV{'DEBUG_UNICODE'}) && (defined $ENV{'DEBUG_UNICODE'})) {
65 binmode(STDERR,":utf8");
66 }
67}
68
69# Pragma
70use strict;
71no strict 'subs'; # allow barewords (eg STDERR) as function arguments
72use warnings;
73
74# Modules
75use Symbol qw<qualify>; # Needed for runtime loading of modules [jmt12]
76
77# Greenstone Modules
78use FileUtils;
79use inexport;
80use util;
81use gsprintf 'gsprintf';
82
83
84# used to control output file format
85my $saveas_list =
86 [ { 'name' => "GreenstoneXML",
87 'desc' => "{export.saveas.GreenstoneXML}"},
88 { 'name' => "GreenstoneMETS",
89 'desc' => "{export.saveas.GreenstoneMETS}"},
90 ];
91
92
93# Possible attributes for each argument
94# name: The name of the argument
95# desc: A description (or more likely a reference to a description) for this argument
96# type: The type of control used to represent the argument. Options include: string, int, flag, regexp, metadata, language, enum etc
97# reqd: Is this argument required?
98# hiddengli: Is this argument hidden in GLI?
99# modegli: The lowest detail mode this argument is visible at in GLI
100
101my $saveas_argument
102 = { 'name' => "saveas",
103 'desc' => "{import.saveas}",
104 'type' => "enum",
105 'list' => $saveas_list,
106 'deft' => "GreenstoneXML",
107 'reqd' => "no",
108 'modegli' => "3" };
109
110
111my $arguments =
112 [
113 $saveas_argument,
114 { 'name' => "sortmeta",
115 'desc' => "{import.sortmeta}",
116 'type' => "string",
117 #'type' => "metadata", #doesn't work properly in GLI
118 'reqd' => "no",
119 'modegli' => "2" },
120 { 'name' => "removeprefix",
121 'desc' => "{BasClas.removeprefix}",
122 'type' => "regexp",
123 'deft' => "",
124 'reqd' => "no",
125 'modegli' => "3" },
126 { 'name' => "removesuffix",
127 'desc' => "{BasClas.removesuffix}",
128 'type' => "regexp",
129 'deft' => "",
130 'reqd' => "no",
131 'modegli' => "3" },
132 { 'name' => "groupsize",
133 'desc' => "{import.groupsize}",
134 'type' => "int",
135 'deft' => "1",
136 'reqd' => "no",
137 'modegli' => "2" },
138 { 'name' => "archivedir",
139 'desc' => "{import.archivedir}",
140 'type' => "string",
141 'reqd' => "no",
142 'deft' => "archives",
143 'hiddengli' => "yes" },
144 @$inexport::directory_arguments,
145 { 'name' => "gzip",
146 'desc' => "{import.gzip}",
147 'type' => "flag",
148 'reqd' => "no",
149 'modegli' => "3" },
150 @$inexport::arguments,
151 { 'name' => "NO_IMPORT",
152 'desc' => "{import.NO_IMPORT}",
153 'type' => "flag",
154 'reqd' => "no",
155 'modegli' => "3"}
156];
157
158my $options = { 'name' => "import.pl",
159 'desc' => "{import.desc}",
160 'args' => $arguments };
161
162my $function_to_inexport_subclass_mappings = {};
163
164sub main
165{
166 # Dynamically include arguments from any subclasses of inexport we find
167 # in the extensions directory
168 if (defined $ENV{'GSDLEXTS'})
169 {
170 &_scanForSubclasses($ENV{'GSDLHOME'}, $ENV{'GSDLEXTS'});
171 }
172 if (defined $ENV{'GSDL3EXTS'})
173 {
174 &_scanForSubclasses($ENV{'GSDL3SRCHOME'}, $ENV{'GSDL3EXTS'});
175 }
176
177 # Loop through arguments, checking to see if any depend on a specific
178 # subclass of InExport. Note that we load the first subclass we encounter
179 # so only support a single 'override' ATM.
180 my $inexport_subclass;
181 foreach my $argument (@ARGV)
182 {
183 if ($argument eq "-NO_IMPORT") {
184 &gsprintf(STDERR, "{import.NO_IMPORT_set}\n\n");
185 exit 0;
186 }
187 # proper arguments start with a hyphen
188 if ($argument =~ /^-/ && defined $function_to_inexport_subclass_mappings->{$argument})
189 {
190 my $required_inexport_subclass = $function_to_inexport_subclass_mappings->{$argument};
191 if (!defined $inexport_subclass)
192 {
193 $inexport_subclass = $required_inexport_subclass;
194 }
195 # Oh noes! The user has included specific arguments from two different
196 # inexport subclasses... this isn't supported
197 elsif ($inexport_subclass ne $required_inexport_subclass)
198 {
199 print STDERR "Error! You cannot specify arguments from two different extention specific inexport modules: " . $inexport_subclass . " != " . $required_inexport_subclass . "\n";
200 exit;
201 }
202 }
203 }
204
205 my $inexport;
206 if (defined $inexport_subclass)
207 {
208 print "* Loading Overriding InExport Module: " . $inexport_subclass . "\n";
209 require $inexport_subclass . '.pm';
210 $inexport = new $inexport_subclass("import",\@ARGV,$options);
211 }
212
213 # We don't have a overridden inexport, or the above command failed somehow
214 # so load the base inexport class
215 if (!defined $inexport)
216 {
217 $inexport = new inexport("import",\@ARGV,$options);
218 }
219
220 my $collection = $inexport->get_collection();
221
222 if (defined $collection)
223 {
224 my ($config_filename,$collect_cfg) = $inexport->read_collection_cfg($collection,$options);
225 if ($collect_cfg->{'NO_IMPORT'}) {
226 &gsprintf(STDERR, "{import.NO_IMPORT_set}\n\n");
227 exit 0;
228 }
229 #$inexport->set_collection_options($collect_cfg);
230 &set_collection_options($inexport, $collect_cfg);
231
232
233 my $pluginfo = $inexport->process_files($config_filename,$collect_cfg);
234
235 $inexport->generate_statistics($pluginfo);
236 }
237
238 $inexport->deinit();
239}
240# main()
241
242# @function _scanForSubclasses()
243# @param $dir The extension directory to look within
244# @param $exts A list of the available extensions (as a colon separated string)
245# @return The number of subclasses of InExport found as an Integer
246sub _scanForSubclasses
247{
248 my ($dir, $exts) = @_;
249 my $inexport_class_count = 0;
250 my $ext_prefix = &FileUtils::filenameConcatenate($dir, "ext");
251 my @extensions = split(/:/, $exts);
252 foreach my $e (@extensions)
253 {
254 # - any subclass of InExport must be prefixed with the name of the ext
255 my $package_name = $e . 'inexport';
256 $package_name =~ s/[^a-z]//gi; # package names have limited characters
257 my $inexport_filename = $package_name . '.pm';
258 my $inexport_path = &FileUtils::filenameConcatenate($ext_prefix, $e, 'perllib', $inexport_filename);
259 # see if we have a subclass of InExport lurking in that extension folder
260 if (-f $inexport_path)
261 {
262 # - note we load the filename (with pm) unlike normal modules
263 require $inexport_filename;
264 # - make call to the newly created package
265 my $symbol = qualify('getSupportedArguments', $package_name);
266 # - strict prevents strings being used as function calls, so temporarily
267 # disable that pragma
268 no strict;
269 # - lets check that the function we are about to call actually exists
270 if ( defined &{$symbol} )
271 {
272 my $extra_arguments = &{$symbol}();
273 foreach my $argument (@{$extra_arguments})
274 {
275 # - record a mapping from each extra arguments to the inexport class
276 # that supports it. We put the hyphen on here to make comparing
277 # with command line arguments even easier
278 $function_to_inexport_subclass_mappings->{'-' . $argument->{'name'}} = $package_name;
279 # - and them add them as acceptable arguments to import.pl
280 push(@{$options->{'args'}}, $argument);
281 }
282 $inexport_class_count++;
283 }
284 else
285 {
286 print "Warning! A subclass of InExport module (named '" . $inexport_filename . "') does not implement the required getSupportedArguments() function - ignoring. Found in: " . $inexport_path . "\n";
287 }
288 }
289 }
290 return $inexport_class_count;
291}
292# _scanForInExportModules()
293
294# look up collect.cfg for import options, then all inexport version for the
295# common ones
296sub set_collection_options
297{
298
299 my ($inexport, $collectcfg) = @_;
300 my $out = $inexport->{'out'};
301
302 # check all options for default_optname - this will be set if the parsing
303 # code has just set the value based on the arg default. In this case,
304 # check in collect.cfg for the option
305
306 # groupsize can only be defined for import, not export, and actually only
307 # applies to GreenstoneXML format.
308 if (defined $inexport->{'default_groupsize'}) {
309 if (defined $collectcfg->{'groupsize'} && $collectcfg->{'groupsize'} =~ /\d+/) {
310 $inexport->{'groupsize'} = $collectcfg->{'groupsize'};
311 }
312
313 }
314 if (defined $inexport->{'default_saveas'}) {
315 if (defined $collectcfg->{'saveas'}
316 && $collectcfg->{'saveas'} =~ /^(GreenstoneXML|GreenstoneMETS)$/) {
317 $inexport->{'saveas'} = $collectcfg->{'saveas'};
318 } else {
319 $inexport->{'saveas'} = "GreenstoneXML"; # the default
320 }
321 }
322
323 my $sortmeta = $inexport->{'sortmeta'};
324 if (defined $collectcfg->{'sortmeta'} && $sortmeta eq "") {
325 $sortmeta = $collectcfg->{'sortmeta'};
326 }
327 # sortmeta cannot be used with group size
328 $sortmeta = undef unless defined $sortmeta && $sortmeta =~ /\S/;
329 if (defined $sortmeta && $inexport->{'groupsize'} > 1) {
330 &gsprintf($out, "{import.cannot_sort}\n\n");
331 $sortmeta = undef;
332 }
333 if (defined $sortmeta) {
334 &gsprintf($out, "{import.sortmeta_paired_with_ArchivesInfPlugin}\n\n");
335 }
336 $inexport->{'sortmeta'} = $sortmeta;
337
338 if (defined $collectcfg->{'removeprefix'} && $inexport->{'removeprefix'} eq "") {
339 $inexport->{'removeprefix'} = $collectcfg->{'removeprefix'};
340 }
341
342 if (defined $collectcfg->{'removesuffix'} && $inexport->{'removesuffix'} eq "") {
343 $inexport->{'removesuffix'} = $collectcfg->{'removesuffix'};
344 }
345
346 $inexport->set_collection_options($collectcfg);
347
348}
349&main();
Note: See TracBrowser for help on using the repository browser.