source: main/tags/2.60/gsdl/bin/script/export.pl@ 25196

Last change on this file since 25196 was 9235, checked in by davidb, 19 years ago

Don't want to -removeold if working with an explicit export directory on
second, thrid ... collection.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 13.9 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# export.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will export a particular collection into a specific Format (e.g. METS or DSpace)
30# Author: Chi-Yu Huang Date: 08-10-2004
31
32package export;
33
34BEGIN {
35 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
36 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
39 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
40 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
41}
42
43use expinfo;
44use colcfg;
45use plugin;
46use docprint;
47use util;
48use parsargv;
49use FileHandle;
50use gsprintf;
51use printusage;
52
53my $oidtype_list =
54 [ { 'name' => "hash",
55 'desc' => "{export.OIDtype.hash}" },
56 { 'name' => "incremental",
57 'desc' => "{export.OIDtype.incremental}" } ];
58
59#** define to use the METS format or DSpace format
60my $saveas_list =
61 [ { 'name' => "DSpace",
62 'desc' => "{export.saveas.DSpace}" },
63 { 'name' => "METS",
64 'desc' => "{export.saveas.METS}" } ];
65
66
67# Possible attributes for each argument
68# name: The name of the argument
69# desc: A description (or more likely a reference to a description) for this argument
70# type: The type of control used to represent the argument. Options include: string, int, flag, regexp, metadata, metadatum, language, enum etc
71# reqd: Is this argument required?
72# hiddengli: Is this argument hidden in GLI?
73# modegli: The lowest detail mode this argument is visible at in GLI
74
75my $saveas_argument =
76 { 'name' => "saveas",
77 'desc' => "{export.saveas}",
78 'type' => "enum",
79 'list' => $saveas_list,
80 'deft' => "METS",
81 'reqd' => "no",
82 'modegli' => "3" };
83
84
85my $arguments =
86 [ { 'name' => "exportdir",
87 'desc' => "{export.exportdir}",
88 'type' => "string",
89 'reqd' => "no",
90 'hiddengli' => "yes" },
91 { 'name' => "collectdir",
92 'desc' => "{export.collectdir}",
93 'type' => "string",
94 'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"),
95 'reqd' => "no",
96 'hiddengli' => "yes" },
97 { 'name' => "listall",
98 'desc' => "{scripts.listall}",
99 'type' => "flag",
100 'reqd' => "no" },
101 { 'name' => "debug",
102 'desc' => "{export.debug}",
103 'type' => "flag",
104 'reqd' => "no",
105 'hiddengli' => "yes" },
106 { 'name' => "faillog",
107 'desc' => "{export.faillog}",
108 'type' => "string",
109 'deft' => &util::filename_cat("<collectdir>", "colname", "etc", "fail.log"),
110 'reqd' => "no",
111 'modegli' => "4" },
112 { 'name' => "importdir",
113 'desc' => "{import.importdir}",
114 'type' => "string",
115 'reqd' => "no",
116 'hiddengli' => "yes" },
117 { 'name' => "keepold",
118 'desc' => "{export.keepold}",
119 'type' => "flag",
120 'reqd' => "no",
121 'hiddengli' => "yes" },
122 { 'name' => "language",
123 'desc' => "{scripts.language}",
124 'type' => "string",
125 'reqd' => "no",
126 'modegli' => "4" },
127 { 'name' => "maxdocs",
128 'desc' => "{export.maxdocs}",
129 'type' => "int",
130 'reqd' => "no",
131 'range' => "1,",
132 'modegli' => "1" },
133 { 'name' => "out",
134 'desc' => "{export.out}",
135 'type' => "string",
136 'deft' => "STDERR",
137 'reqd' => "no",
138 'hiddengli' => "yes" },
139 { 'name' => "removeold",
140 'desc' => "{export.removeold}",
141 'type' => "flag",
142 'reqd' => "no",
143 'modegli' => "3" },
144 $saveas_argument,
145# { 'name' => "statsfile",
146# 'desc' => "{export.statsfile}",
147# 'type' => "string",
148# 'deft' => "STDERR",
149# 'reqd' => "no",
150# 'hiddengli' => "yes" },
151 { 'name' => "verbosity",
152 'desc' => "{export.verbosity}",
153 'type' => "int",
154 'range' => "0,3",
155 'deft' => "2",
156 'reqd' => "no",
157 'modegli' => "4" } ];
158
159my $options = { 'name' => "export.pl",
160 'desc' => "{export.desc}",
161 'args' => $arguments };
162
163my $listall_options = { 'name' => "export.pl",
164 'desc' => "{export.desc}",
165 'args' => [ $saveas_argument ] };
166
167sub gsprintf
168{
169 return &gsprintf::gsprintf(@_);
170}
171
172
173&main();
174
175sub main {
176 my ($verbosity, $importdir, $archivedir, $keepold, $listall,
177 $removeold, $saveas, $gzip, $groupsize, $OIDtype, $debug,
178 $maxdocs, $collection, $configfilename,
179 $pluginfo, $sortmeta, $export_info_filename,
180 $export_info, $processor, $out, $faillog, $collectdir, $gli);
181
182 # ***** 11-04-03 - John Thompson *****
183 my $xml = 0;
184 # ************************************
185 my $service = "export";
186
187 # note that no defaults are passed for most options as they're set
188 # later (after we check the collect.cfg file)
189 if (!parsargv::parse(\@ARGV,
190 'language/.*/', \$language,
191 'verbosity/\d+/', \$verbosity,
192 'importdir/.*/', \$importdir,
193 'exportdir/.*/', \$exportdir,
194 'listall', \$listall,
195 'keepold', \$keepold,
196 'removeold', \$removeold,
197 'saveas/^(DSpace|METS)$/METS', \$saveas,
198 'debug', \$debug,
199 'maxdocs/^\-?\d+/', \$maxdocs,
200 'collectdir/.*/', \$collectdir,
201 'out/.*/STDERR', \$out,
202# 'statsfile/.*/STDERR', \$statsfile,
203 'faillog/.*/', \$faillog,
204 'gli', \$gli,
205 q^xml^, \$xml)) {
206 &PrintUsage::print_txt_usage($options, "{export.params}");
207 die "\n";
208 }
209
210 $gzip = undef;
211 $groupsize = 1;
212 $OIDtype = undef;
213 $sortmeta = undef;
214
215 my $explicit_exportdir = (defined $exportdir) ? 1 : 0;
216
217 # If $language has been specified, load the appropriate resource bundle
218 # (Otherwise, the default resource bundle will be loaded automatically)
219 if ($language) {
220 &gsprintf::load_language_specific_resource_bundle($language);
221 }
222
223 if ($listall) {
224 if ($xml) {
225 &PrintUsage::print_xml_usage($listall_options);
226 }
227 else
228 {
229 &PrintUsage::print_txt_usage($listall_options,"{export.params}");
230 }
231 die "\n";
232 }
233 elsif ($xml) {
234 &PrintUsage::print_xml_usage($options);
235 die "\n";
236 }
237
238 if ($gli) { # the gli wants strings to be in UTF-8
239 &gsprintf::output_strings_in_UTF8;
240 }
241 my $close_out = 0;
242 if ($out !~ /^(STDERR|STDOUT)$/i) {
243 open (OUT, ">$out") ||
244 (&gsprintf(STDERR, "{common.cannot_open_output_file}\n", $out) && die);
245 $out = 'export::OUT';
246 $close_out = 1;
247 }
248 $out->autoflush(1);
249
250 # set removeold to false if it has been defined
251 $removeold = 0 if ($keepold);
252
253 while (scalar(@ARGV)>0) {
254 my $collect_name = shift @ARGV;
255
256 $ENV{'GSDLCOLLECTION'} = $collect_name;
257
258 eval {
259 # get and check the collection name
260 if (($collection = &util::use_collection($collect_name, $collectdir)) eq "") {
261 &PrintUsage::print_txt_usage($options, "{export.params}");
262 die "\n";
263 }
264
265 if ($faillog eq "") {
266 $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log");
267 }
268 open (FAILLOG, ">$faillog") ||
269 (&gsprintf(STDERR, "{export.cannot_open_fail_log}\n", $faillog) && die);
270 my $faillogname = $faillog;
271 $faillog = 'export::FAILLOG';
272 $faillog->autoflush(1);
273
274 # check sortmeta
275 $sortmeta = undef unless defined $sortmeta && $sortmeta =~ /\S/;
276 if (defined $sortmeta && $groupsize > 1) {
277 &gsprintf($out, "{export.cannot_sort}\n\n");
278 $sortmeta = undef;
279 }
280
281 # dynamically load 'docsave' module so it can pick up on a collection
282 # specific docsave.pm is specified.
283
284 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib");
285 require docsave;
286
287 # get the list of plugins for this collection and set any options that
288 # were specified in the collect.cfg (all export.pl options except
289 # -collectdir, -out and -faillog may be specified in the collect.cfg (these
290 # options must be known before we read the collect.cfg))
291 my $plugins = [];
292 my @global_opts = ();
293
294 $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg");
295 if (-e $configfilename) {
296 my $collectcfg = &colcfg::read_collect_cfg ($configfilename);
297 if (defined $collectcfg->{'plugin'}) {
298 $plugins = $collectcfg->{'plugin'};
299 }
300
301 if ($verbosity !~ /\d+/) {
302 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) {
303 $verbosity = $collectcfg->{'verbosity'};
304 } else {
305 $verbosity = 2; # the default
306 }
307 }
308 if (defined $collectcfg->{'importdir'} && $importdir eq "") {
309 $importdir = $collectcfg->{'importdir'};
310 }
311 if (defined $collectcfg->{'exportdir'} && $exportdir eq "") {
312 $exportdir = $collectcfg->{'exportdir'};
313 }
314 if (defined $collectcfg->{'removeold'}) {
315 if ($collectcfg->{'removeold'} =~ /^true$/i && !$keepold) {
316 $removeold = 1;
317 }
318 if ($collectcfg->{'removeold'} =~ /^false$/i && !$removeold) {
319 $removeold = 0;
320 }
321 }
322 if (defined $collectcfg->{'keepold'}) {
323 if ($collectcfg->{'keepold'} =~ /^false$/i && !$keepold) {
324 $removeold = 1;
325 }
326 }
327 if (defined $collectcfg->{'gzip'} && !$gzip) {
328 if ($collectcfg->{'gzip'} =~ /^true$/i) {
329 $gzip = 1;
330 }
331 }
332 if ($maxdocs !~ /\-?\d+/) {
333 if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) {
334 $maxdocs = $collectcfg->{'maxdocs'};
335 } else {
336 $maxdocs = -1; # the default
337 }
338 }
339 if ($groupsize == 1) {
340 if (defined $collectcfg->{'groupsize'} && $collectcfg->{'groupsize'} =~ /\d+/) {
341 $groupsize = $collectcfg->{'groupsize'};
342 }
343 }
344 if (!defined $OIDtype || ($OIDtype !~ /^(hash|incremental)$/)) {
345 if (defined $collectcfg->{'OIDtype'} && $collectcfg->{'OIDtype'} =~ /^(hash|incremental)$/) {
346 $OIDtype = $collectcfg->{'OIDtype'};
347 } else {
348 $OIDtype = "hash"; # the default
349 }
350 }
351 if (defined $collectcfg->{'sortmeta'} && $sortmeta eq "") {
352 $sortmeta = $collectcfg->{'sortmeta'};
353 }
354 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) {
355 $debug = 1;
356 }
357 if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) {
358 $gli = 1;
359 }
360
361 # global plugin stuff
362 if (defined $collectcfg->{'separate_cjk'}&& $collectcfg->{'separate_cjk'} =~ /^true$/i) {
363 push @global_opts, "-separate_cjk";
364 }
365 } else {
366 (&gsprintf($out, "{common.cannot_find_cfg_file}\n", $configfilename) && die);
367 }
368
369 $gli = 0 unless defined $gli;
370
371 print STDERR "<export>\n" if $gli;
372
373 # fill in the default import and export directories if none
374 # were supplied, turn all \ into / and remove trailing /
375 $importdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "import") if $importdir eq "";
376 $importdir =~ s/[\\\/]+/\//g;
377 $importdir =~ s/\/$//;
378 $exportdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "export") if $exportdir eq "";
379 $exportdir =~ s/[\\\/]+/\//g;
380 $exportdir =~ s/\/$//;
381
382 # load all the plugins
383 $pluginfo = &plugin::load_plugins ($plugins, $verbosity, $out, $faillog, \@global_opts);
384
385 if (scalar(@$pluginfo) == 0) {
386 &gsprintf($out, "{import.no_plugins_loaded}\n");
387 die "\n";
388 }
389
390 # remove the old contents of the export directory if needed
391 if ($removeold && -e $exportdir) {
392 &gsprintf($out, "{export.removing_export}\n");
393 &util::rm_r ($exportdir);
394 }
395
396 # read the export information file
397 if (!$debug) {
398 # Export to DSpace Arhive format or METs format
399 # If saveas=DSpace, a "contents" file will be created, otherwise "export.inf"
400
401 if ($saveas eq "DSpace"){
402 $export_info_filename = &util::filename_cat ($exportdir, "contents");
403 } elsif ($saveas eq "METS") {
404 $export_info_filename = &util::filename_cat ($exportdir, "export.inf");
405 }
406
407 $export_info = new expinfo();
408 $export_info -> load_info ($export_info_filename);
409
410 $processor = new docsave ($collection, $export_info, $verbosity, $gzip, $groupsize, $out, $service, $saveas);
411
412 $processor->setexportdir ($exportdir);
413
414 $processor->set_sortmeta ($sortmeta) if defined $sortmeta;
415 $processor->set_OIDtype ($OIDtype);
416 $processor->set_saveas ($saveas);
417 } else {
418 $processor = new docprint ();
419 }
420
421 &plugin::begin($pluginfo, $importdir, $processor, $maxdocs);
422
423 # process the import directory
424 &plugin::read ($pluginfo, $importdir, "", {}, $processor, $maxdocs, $gli);
425
426 &plugin::end($pluginfo, $processor);
427
428 # write out the export information file
429 if (!$debug) {
430 $processor->close_file_output() if $groupsize > 1;
431 if ($saveas eq "METS") {
432 $export_info->save_info($export_info_filename);
433 }
434 }
435
436# # write out export stats
437# my $close_stats = 0;
438# if ($statsfile !~ /^(STDERR|STDOUT)$/i) {
439# if (open (STATS, ">$statsfile")) {
440# $statsfile = 'import::STATS';
441# $close_stats = 1;
442# } else {
443# &gsprintf($out, "{import.cannot_open_stats_file}", $statsfile);
444# &gsprintf($out, "{import.stats_backup}\n");
445# $statsfile = 'STDERR';
446# }
447# }
448 close FAILLOG;
449 };
450
451## $ENV{'GSDLCOLLECTION'} = undef;
452 $importdir = "";
453 $removeold = 0 if ($explicit_exportdir);
454
455 }
456
457 &gsprintf($out, "\n");
458 &gsprintf($out, "*********************************************\n");
459 &gsprintf($out, "{export.complete}\n");
460 &gsprintf($out, "*********************************************\n");
461
462# &plugin::write_stats($pluginfo, $statsfile, $faillogname, $gli);
463# if ($close_stats) {
464# close STATS;
465# }
466
467 close OUT if $close_out;
468
469
470}
Note: See TracBrowser for help on using the repository browser.