source: tags/gsdl-2_70u-distribution/gsdl/bin/script/export.pl@ 11745

Last change on this file since 11745 was 11745, checked in by (none), 18 years ago

This commit was manufactured by cvs2svn to create tag
'gsdl-2_70u-distribution'.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 14.5 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# export.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will export a particular collection into a specific Format (e.g. METS or DSpace)
30# Author: Chi-Yu Huang Date: 08-10-2004
31
32package export;
33
34BEGIN {
35 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
36 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
39 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
40 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
41}
42
43use strict;
44no strict 'refs'; # allow filehandles to be variables and vice versa
45no strict 'subs'; # allow barewords (eg STDERR) as function arguments
46
47use arcinfo;
48use colcfg;
49use plugin;
50use docprint;
51use util;
52use scriptutil;
53use FileHandle;
54use gsprintf;
55use printusage;
56use parse2;
57
58
59my $oidtype_list =
60 [ { 'name' => "hash",
61 'desc' => "{export.OIDtype.hash}" },
62 { 'name' => "incremental",
63 'desc' => "{export.OIDtype.incremental}" } ];
64
65#** define to use the METS format or DSpace format
66my $saveas_list =
67 [ { 'name' => "DSpace",
68 'desc' => "{export.saveas.DSpace}" },
69 { 'name' => "METS",
70 'desc' => "{export.saveas.METS}" } ];
71
72
73# Possible attributes for each argument
74# name: The name of the argument
75# desc: A description (or more likely a reference to a description) for this argument
76# type: The type of control used to represent the argument. Options include: string, int, flag, regexp, metadata, metadatum, language, enum etc
77# reqd: Is this argument required?
78# hiddengli: Is this argument hidden in GLI?
79# modegli: The lowest detail mode this argument is visible at in GLI
80
81my $saveas_argument =
82 { 'name' => "saveas",
83 'desc' => "{export.saveas}",
84 'type' => "enum",
85 'list' => $saveas_list,
86 'deft' => "METS",
87 'reqd' => "no",
88 'modegli' => "3" };
89
90
91my $arguments =
92 [
93 $saveas_argument,
94 { 'name' => "saveas_version",
95 'desc' => "{export.saveas_version}",
96 'type' => "string",
97 'reqd' => "no",
98 'deft' => "greenstone" },
99 { 'name' => "exportdir",
100 'desc' => "{export.exportdir}",
101 'type' => "string",
102 'reqd' => "no",
103 'hiddengli' => "yes" },
104 { 'name' => "importdir",
105 'desc' => "{import.importdir}",
106 'type' => "string",
107 'reqd' => "no",
108 'hiddengli' => "yes" },
109 { 'name' => "collectdir",
110 'desc' => "{export.collectdir}",
111 'type' => "string",
112 'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"),
113 'reqd' => "no",
114 'hiddengli' => "yes" },
115 { 'name' => "listall",
116 'desc' => "{scripts.listall}",
117 'type' => "flag",
118 'reqd' => "no" },
119 { 'name' => "debug",
120 'desc' => "{export.debug}",
121 'type' => "flag",
122 'reqd' => "no",
123 'hiddengli' => "yes" },
124 { 'name' => "faillog",
125 'desc' => "{export.faillog}",
126 'type' => "string",
127 'deft' => "",
128 'reqd' => "no",
129 'modegli' => "4" },
130 { 'name' => "keepold",
131 'desc' => "{export.keepold}",
132 'type' => "flag",
133 'reqd' => "no",
134 'hiddengli' => "yes" },
135 { 'name' => "removeold",
136 'desc' => "{export.removeold}",
137 'type' => "flag",
138 'reqd' => "no",
139 'modegli' => "3" },
140 { 'name' => "language",
141 'desc' => "{scripts.language}",
142 'type' => "string",
143 'reqd' => "no",
144 'modegli' => "4" },
145 { 'name' => "maxdocs",
146 'desc' => "{export.maxdocs}",
147 'type' => "int",
148 'reqd' => "no",
149 'range' => "1,",
150 'modegli' => "1" },
151 { 'name' => "out",
152 'desc' => "{export.out}",
153 'type' => "string",
154 'deft' => "STDERR",
155 'reqd' => "no",
156 'hiddengli' => "yes" },
157 { 'name' => "statsfile",
158 'desc' => "{export.statsfile}",
159 'type' => "string",
160 'deft' => "STDERR",
161 'reqd' => "no",
162 'hiddengli' => "yes" },
163 { 'name' => "verbosity",
164 'desc' => "{export.verbosity}",
165 'type' => "int",
166 'range' => "0,3",
167 'deft' => "2",
168 'reqd' => "no",
169 'modegli' => "4" },
170 { 'name' => "gli",
171 'desc' => "",
172 'type' => "flag",
173 'reqd' => "no",
174 'hiddengli' => "yes" },
175 { 'name' => "xml",
176 'desc' => "{scripts.xml}",
177 'type' => "flag",
178 'reqd' => "no",
179 'hiddengli' => "yes" }
180 ];
181
182my $options = { 'name' => "export.pl",
183 'desc' => "{export.desc}",
184 'args' => $arguments };
185
186my $listall_options = { 'name' => "export.pl",
187 'desc' => "{export.desc}",
188 'args' => [ $saveas_argument ] };
189
190sub gsprintf
191{
192 return &gsprintf::gsprintf(@_);
193}
194
195
196&main();
197
198sub main {
199 # params
200 my ($language, $verbosity, $importdir, $exportdir, $keepold, $listall,
201 $removeold, $saveas, $saveas_version, $debug,
202 $maxdocs, $statsfile, $out, $faillog, $collectdir, $gli);
203 my $xml = 0;
204
205 # other vars
206 my ($configfilename, $collection, $export_info_filename, $export_info, $processor, $pluginfo);
207 my $service = "export";
208
209 my $hashParsingResult = {};
210 my $blnParseFailed = "false";
211 # general options available to all plugins
212 my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$hashParsingResult,"allow_extra_options");
213 # can have more than one collection name, so don't check num args left
214 foreach my $strVariable (keys %$hashParsingResult)
215 {
216 eval "\$$strVariable = \$hashParsingResult->{\"\$strVariable\"}";
217 }
218
219
220 # these are options used by other things - we just set default values
221 # undef means will be set from config file if there
222 my $gzip = undef;
223 my $groupsize = 1;
224 my $OIDtype = undef;
225 my $sortmeta = undef;
226
227 my $explicit_exportdir = (defined $exportdir) ? 1 : 0;
228
229 # save these command line settings. don't want config file settings in one
230 # coll used for other colls
231 # does this apply to other vars???
232 my $global_removeold = $removeold;
233 my $global_keepold = $keepold;
234 # If $language has been specified, load the appropriate resource bundle
235 # (Otherwise, the default resource bundle will be loaded automatically)
236 if ($language) {
237 &gsprintf::load_language_specific_resource_bundle($language);
238 }
239
240 if ($listall) {
241 if ($xml) {
242 &PrintUsage::print_xml_usage($listall_options);
243 }
244 else
245 {
246 &PrintUsage::print_txt_usage($listall_options,"{export.params}");
247 }
248 die "\n";
249 }
250 elsif ($xml) {
251 &PrintUsage::print_xml_usage($options);
252 die "\n";
253 }
254
255 if (scalar(@ARGV) == 0) {
256 &PrintUsage::print_txt_usage($options, "{export.params}");
257 die "\n";
258 }
259
260 if ($gli) { # the gli wants strings to be in UTF-8
261 &gsprintf::output_strings_in_UTF8;
262 }
263 my $close_out = 0;
264 if ($out !~ /^(STDERR|STDOUT)$/i) {
265 open (OUT, ">$out") ||
266 (&gsprintf(STDERR, "{common.cannot_open_output_file}\n", $out) && die);
267 $out = 'export::OUT';
268 $close_out = 1;
269 }
270 $out->autoflush(1);
271
272 while (scalar(@ARGV)>0) {
273 my $collect_name = shift @ARGV;
274 $ENV{'GSDLCOLLECTION'} = $collect_name;
275
276 eval {
277 # get and check the collection name
278 if (($collection = &util::use_collection($collect_name, $collectdir)) eq "") {
279 &PrintUsage::print_txt_usage($options, "{export.params}");
280 die "\n";
281 }
282
283 if ($faillog eq "") {
284 $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log");
285 }
286 open (FAILLOG, ">$faillog") ||
287 (&gsprintf(STDERR, "{export.cannot_open_fail_log}\n", $faillog) && die);
288 my $faillogname = $faillog;
289 $faillog = 'export::FAILLOG';
290 $faillog->autoflush(1);
291
292 # check sortmeta
293 $sortmeta = undef unless defined $sortmeta && $sortmeta =~ /\S/;
294 if (defined $sortmeta && $groupsize > 1) {
295 &gsprintf($out, "{export.cannot_sort}\n\n");
296 $sortmeta = undef;
297 }
298
299 # dynamically load 'docsave' module so it can pick up on a collection
300 # specific docsave.pm is specified.
301
302 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib");
303 require docsave;
304
305 # get the list of plugins for this collection and set any options that
306 # were specified in the collect.cfg (all export.pl options except
307 # -collectdir, -out and -faillog may be specified in the collect.cfg (these
308 # options must be known before we read the collect.cfg))
309 my $plugins = [];
310 my @global_opts = ();
311
312 $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg");
313 if (!-e $configfilename) {
314 (&gsprintf($out, "{common.cannot_find_cfg_file}\n", $configfilename) && die);
315 }
316
317 my $collectcfg = &colcfg::read_collect_cfg ($configfilename);
318 if (defined $collectcfg->{'plugin'}) {
319 $plugins = $collectcfg->{'plugin'};
320 }
321
322 if ($verbosity !~ /\d+/) {
323 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) {
324 $verbosity = $collectcfg->{'verbosity'};
325 } else {
326 $verbosity = 2; # the default
327 }
328 }
329 if (defined $collectcfg->{'importdir'} && $importdir eq "") {
330 $importdir = $collectcfg->{'importdir'};
331 }
332 if (defined $collectcfg->{'exportdir'} && $exportdir eq "") {
333 $exportdir = $collectcfg->{'exportdir'};
334 }
335
336 if (defined $collectcfg->{'gzip'} && !$gzip) {
337 if ($collectcfg->{'gzip'} =~ /^true$/i) {
338 $gzip = 1;
339 }
340 }
341 if ($maxdocs !~ /\-?\d+/) {
342 if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) {
343 $maxdocs = $collectcfg->{'maxdocs'};
344 } else {
345 $maxdocs = -1; # the default
346 }
347 }
348 if ($groupsize == 1) {
349 if (defined $collectcfg->{'groupsize'} && $collectcfg->{'groupsize'} =~ /\d+/) {
350 $groupsize = $collectcfg->{'groupsize'};
351 }
352 }
353 if (!defined $OIDtype || ($OIDtype !~ /^(hash|incremental)$/)) {
354 if (defined $collectcfg->{'OIDtype'} && $collectcfg->{'OIDtype'} =~ /^(hash|incremental)$/) {
355 $OIDtype = $collectcfg->{'OIDtype'};
356 } else {
357 $OIDtype = "hash"; # the default
358 }
359 }
360 if (defined $collectcfg->{'sortmeta'} && $sortmeta eq "") {
361 $sortmeta = $collectcfg->{'sortmeta'};
362 }
363 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) {
364 $debug = 1;
365 }
366 if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) {
367 $gli = 1;
368 }
369
370 # global plugin stuff
371 if (defined $collectcfg->{'separate_cjk'}&& $collectcfg->{'separate_cjk'} =~ /^true$/i) {
372 push @global_opts, "-separate_cjk";
373 }
374
375 ($removeold, $keepold) = &scriptutil::check_removeold_and_keepold($global_removeold, $global_keepold, "export", $collectcfg);
376
377 $gli = 0 unless defined $gli;
378
379 print STDERR "<export>\n" if $gli;
380
381 # fill in the default import and export directories if none
382 # were supplied, turn all \ into / and remove trailing /
383 $importdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "import") if $importdir eq "";
384 $importdir =~ s/[\\\/]+/\//g;
385 $importdir =~ s/\/$//;
386 $exportdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "export") if $exportdir eq "";
387 $exportdir =~ s/[\\\/]+/\//g;
388 $exportdir =~ s/\/$//;
389
390 # load all the plugins
391 $pluginfo = &plugin::load_plugins ($plugins, $verbosity, $out, $faillog, \@global_opts);
392
393 if (scalar(@$pluginfo) == 0) {
394 &gsprintf($out, "{import.no_plugins_loaded}\n");
395 die "\n";
396 }
397
398 # remove the old contents of the export directory if needed
399 if ($removeold && -e $exportdir) {
400 &gsprintf($out, "{export.removing_export}\n");
401 &util::rm_r ($exportdir);
402 }
403
404 # read the export information file
405 if (!$debug) {
406 # Export to DSpace Arhive format or METs format
407 # If saveas=DSpace, a "contents" file will be created, otherwise "export.inf"
408
409 if ($saveas eq "DSpace"){
410 $export_info_filename = &util::filename_cat ($exportdir, "contents");
411 } elsif ($saveas eq "METS") {
412 $export_info_filename = &util::filename_cat ($exportdir, "export.inf");
413 }
414
415 $export_info = new arcinfo();
416 $export_info -> load_info ($export_info_filename);
417
418 $processor = new docsave ($collection, $export_info, $verbosity, $gzip, $groupsize, $out, $service, $saveas);
419
420 $processor->setoutputdir ($exportdir);
421
422 $processor->set_sortmeta ($sortmeta) if defined $sortmeta;
423 $processor->set_OIDtype ($OIDtype);
424 $processor->set_saveas ($saveas);
425 $processor->set_saveas_version ($saveas_version);
426 } else {
427 $processor = new docprint ();
428 }
429
430 &plugin::begin($pluginfo, $importdir, $processor, $maxdocs);
431
432 # process the import directory
433 &plugin::read ($pluginfo, $importdir, "", {}, $processor, $maxdocs,0, $gli);
434
435 &plugin::end($pluginfo, $processor);
436
437 &plugin::deinit($pluginfo, $processor);
438
439 # write out the export information file
440 if (!$debug) {
441 $processor->close_file_output() if $groupsize > 1;
442 if ($saveas eq "METS") {
443 $export_info->save_info($export_info_filename);
444 }
445 }
446
447 # write out export stats
448 my $close_stats = 0;
449 if ($statsfile !~ /^(STDERR|STDOUT)$/i) {
450 if (open (STATS, ">$statsfile")) {
451 $statsfile = 'import::STATS';
452 $close_stats = 1;
453 } else {
454 &gsprintf($out, "{import.cannot_open_stats_file}", $statsfile);
455 &gsprintf($out, "{import.stats_backup}\n");
456 $statsfile = 'STDERR';
457 }
458 }
459
460 &gsprintf($out, "\n");
461 &gsprintf($out, "*********************************************\n");
462
463 &plugin::write_stats($pluginfo, $statsfile, $faillogname, $gli);
464 if ($close_stats) {
465 close STATS;
466 }
467
468 &gsprintf($out, "*********************************************\n");
469
470 close OUT if $close_out;
471
472 close FAILLOG;
473 };
474
475 if ($@) {
476 print STDERR $@;
477 }
478
479## $ENV{'GSDLCOLLECTION'} = undef;
480 $importdir = "";
481 $removeold = 0 if ($explicit_exportdir);
482
483 } # while processing ARGV
484
485 &gsprintf($out, "\n");
486 &gsprintf($out, "*********************************************\n");
487 &gsprintf($out, "* {export.complete}\n");
488 &gsprintf($out, "*********************************************\n");
489
490}
Note: See TracBrowser for help on using the repository browser.