source: gsdl/trunk/bin/script/import.pl@ 19303

Last change on this file since 19303 was 19303, checked in by davidb, 15 years ago

Tag looked for in manifest file changed back to 'import' to work with The Depositor

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 21.0 KB
RevLine 
[14031]1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# import.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will import a number of files into a particular collection
30
31package import;
32
33BEGIN {
34 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
35 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
39 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugouts");
40 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
[14957]41
42 if (defined $ENV{'GSDLEXTS'}) {
43 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
44 foreach my $e (@extensions) {
45 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
46
47 unshift (@INC, "$ext_prefix/perllib");
48 unshift (@INC, "$ext_prefix/perllib/cpan");
[16788]49 unshift (@INC, "$ext_prefix/perllib/plugins");
50 unshift (@INC, "$ext_prefix/perllib/plugouts");
51 unshift (@INC, "$ext_prefix/perllib/classify");
[14957]52 }
53 }
[14031]54}
55
[14957]56use strict;
57no strict 'refs'; # allow filehandles to be variables and vice versa
58no strict 'subs'; # allow barewords (eg STDERR) as function arguments
59
[14031]60use arcinfo;
61use colcfg;
62use plugin;
63use plugout;
64use manifest;
[18456]65use inexport;
[14031]66use util;
67use scriptutil;
68use FileHandle;
69use gsprintf 'gsprintf';
70use printusage;
71use parse2;
72
73
74
75my $oidtype_list =
76 [ { 'name' => "hash",
77 'desc' => "{import.OIDtype.hash}" },
78 { 'name' => "assigned",
79 'desc' => "{import.OIDtype.assigned}" },
80 { 'name' => "incremental",
81 'desc' => "{import.OIDtype.incremental}" },
82 { 'name' => "dirname",
83 'desc' => "{import.OIDtype.dirname}" } ];
84
[14957]85
86# used to control output file format
[14031]87my $saveas_list =
[17751]88 [ { 'name' => "GreenstoneXML",
89 'desc' => "{export.saveas.GreenstoneXML}"},
[14957]90 { 'name' => "GreenstoneMETS",
91 'desc' => "{export.saveas.GreenstoneMETS}"},
[17038]92 ];
[14031]93
94
95# Possible attributes for each argument
96# name: The name of the argument
97# desc: A description (or more likely a reference to a description) for this argument
98# type: The type of control used to represent the argument. Options include: string, int, flag, regexp, metadata, language, enum etc
99# reqd: Is this argument required?
100# hiddengli: Is this argument hidden in GLI?
101# modegli: The lowest detail mode this argument is visible at in GLI
102
[14957]103my $saveas_argument
104 = { 'name' => "saveas",
105 'desc' => "{import.saveas}",
106 'type' => "enum",
107 'list' => $saveas_list,
[17751]108 'deft' => "GreenstoneXML",
[14957]109 'reqd' => "no",
110 'modegli' => "3" };
111
112
[14031]113my $arguments =
[14957]114 [
115 $saveas_argument,
116 { 'name' => "archivedir",
[14031]117 'desc' => "{import.archivedir}",
118 'type' => "string",
119 'reqd' => "no",
120 'hiddengli' => "yes" },
[14957]121 { 'name' => "importdir",
122 'desc' => "{import.importdir}",
123 'type' => "string",
124 'reqd' => "no",
125 'hiddengli' => "yes" },
[14031]126 { 'name' => "collectdir",
127 'desc' => "{import.collectdir}",
128 'type' => "string",
129 # parsearg left "" as default
130 #'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"),
131 'deft' => "",
132 'reqd' => "no",
133 'hiddengli' => "yes" },
[14925]134 { 'name' => "site",
135 'desc' => "{import.site}",
136 'type' => "string",
137 'deft' => "",
138 'reqd' => "no",
139 'hiddengli' => "yes" },
[14031]140 { 'name' => "manifest",
141 'desc' => "{import.manifest}",
142 'type' => "string",
143 'deft' => "",
144 'reqd' => "no",
145 'hiddengli' => "yes" },
146 { 'name' => "debug",
147 'desc' => "{import.debug}",
148 'type' => "flag",
149 'reqd' => "no",
150 'hiddengli' => "yes" },
151 { 'name' => "faillog",
152 'desc' => "{import.faillog}",
153 'type' => "string",
154 # parsearg left "" as default
155 #'deft' => &util::filename_cat("<collectdir>", "colname", "etc", "fail.log"),
156 'deft' => "",
157 'reqd' => "no",
[18590]158 'modegli' => "3" },
[14031]159 { 'name' => "incremental",
160 'desc' => "{import.incremental}",
161 'type' => "flag",
162 'hiddengli' => "yes" },
163 { 'name' => "keepold",
164 'desc' => "{import.keepold}",
165 'type' => "flag",
166 'reqd' => "no",
167 'hiddengli' => "yes" },
168 { 'name' => "removeold",
169 'desc' => "{import.removeold}",
170 'type' => "flag",
171 'reqd' => "no",
172 'hiddengli' => "yes" },
173 { 'name' => "language",
174 'desc' => "{scripts.language}",
175 'type' => "string",
176 'reqd' => "no",
177 'hiddengli' => "yes" },
178 { 'name' => "maxdocs",
179 'desc' => "{import.maxdocs}",
180 'type' => "int",
181 'reqd' => "no",
182 # parsearg left "" as default
183 #'deft' => "-1",
184 'range' => "1,",
185 'modegli' => "1" },
[17038]186 # don't set the default to hash - want to allow this to come from
187 # entry in collect.cfg but want to override it here
[14031]188 { 'name' => "OIDtype",
189 'desc' => "{import.OIDtype}",
190 'type' => "enum",
191 'list' => $oidtype_list,
192 # parsearg left "" as default
193 #'deft' => "hash",
194 'reqd' => "no",
195 'modegli' => "2" },
196 { 'name' => "OIDmetadata",
197 'desc' => "{import.OIDmetadata}",
198 'type' => "metadata",
[18528]199 # parsearg left "" as default
200 #'deft' => "dc.Identifier",
[14031]201 'reqd' => "no",
202 'modegli' => "2" },
203 { 'name' => "out",
204 'desc' => "{import.out}",
205 'type' => "string",
206 'deft' => "STDERR",
207 'reqd' => "no",
208 'hiddengli' => "yes" },
209 { 'name' => "sortmeta",
210 'desc' => "{import.sortmeta}",
211 'type' => "metadata",
212 'reqd' => "no",
[18590]213 'modegli' => "2" },
[15072]214 { 'name' => "reversesort",
215 'desc' => "{import.reversesort}",
216 'type' => "flag",
217 'reqd' => "no",
[18590]218 'modegli' => "2" },
[14031]219 { 'name' => "removeprefix",
220 'desc' => "{BasClas.removeprefix}",
221 'type' => "regexp",
222 'deft' => "",
223 'reqd' => "no",
224 'modegli' => "3" },
225 { 'name' => "removesuffix",
226 'desc' => "{BasClas.removesuffix}",
227 'type' => "regexp",
228 'deft' => "",
229 'reqd' => "no",
230 'modegli' => "3" },
231 { 'name' => "groupsize",
232 'desc' => "{import.groupsize}",
233 'type' => "int",
234 'deft' => "1",
235 'reqd' => "no",
[18590]236 'modegli' => "2" },
[14031]237 { 'name' => "gzip",
238 'desc' => "{import.gzip}",
239 'type' => "flag",
240 'reqd' => "no",
[18590]241 'modegli' => "3" },
[14031]242 { 'name' => "statsfile",
243 'desc' => "{import.statsfile}",
244 'type' => "string",
245 'deft' => "STDERR",
246 'reqd' => "no",
247 'hiddengli' => "yes" },
248 { 'name' => "verbosity",
249 'desc' => "{import.verbosity}",
250 'type' => "int",
251 'range' => "0,",
252 # parsearg left "" as default
253 #'deft' => "2",
254 'reqd' => "no",
[18590]255 'modegli' => "3" },
[14031]256 { 'name' => "gli",
[17142]257 'desc' => "{scripts.gli}",
[14031]258 'type' => "flag",
259 'reqd' => "no",
260 'hiddengli' => "yes" },
261 { 'name' => "xml",
262 'desc' => "{scripts.xml}",
263 'type' => "flag",
264 'reqd' => "no",
265 'hiddengli' => "yes" }];
266
267my $options = { 'name' => "import.pl",
268 'desc' => "{import.desc}",
269 'args' => $arguments };
270
271
272&main();
273
274sub main {
[14957]275 # params
276 my ($language, $verbosity, $debug,
[17142]277 $collectdir, $importdir, $archivedir, $site, $manifest,
278 $incremental, $keepold, $removeold,
279 $saveas,
[14957]280 $OIDtype, $OIDmetadata,
281 $maxdocs, $statsfile,
[17142]282 $out, $faillog, $gli,
[14957]283 $gzip, $groupsize,
[17142]284 $sortmeta, $reversesort, $removeprefix, $removesuffix
[14957]285 );
286
[14031]287 my $xml = 0;
288
[14957]289 # other vars
[17142]290 my ($configfilename, $collection, $collectcfg,
[18440]291 $arcinfo_doc_filename, $arcinfo_src_filename, $archive_info,
[14957]292 $gs_mode,
293 $processor, $pluginfo);
294
[14031]295 my $service = "import";
296
297 my $hashParsingResult = {};
298 # general options available to all plugins
299 my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$hashParsingResult,"allow_extra_options");
300 # Parse returns -1 if something has gone wrong
[14957]301 if ($intArgLeftinAfterParsing == -1)
[14031]302 {
303 &PrintUsage::print_txt_usage($options, "{import.params}");
304 die "\n";
305 }
306
307 foreach my $strVariable (keys %$hashParsingResult)
308 {
309 eval "\$$strVariable = \$hashParsingResult->{\"\$strVariable\"}";
310 }
311
312 # If $language has been specified, load the appropriate resource bundle
313 # (Otherwise, the default resource bundle will be loaded automatically)
314 if ($language && $language =~ /\S/) {
315 &gsprintf::load_language_specific_resource_bundle($language);
316 }
317
318 if ($xml) {
319 &PrintUsage::print_xml_usage($options);
320 print "\n";
321 return;
322 }
323
324 if ($gli) { # the gli wants strings to be in UTF-8
325 &gsprintf::output_strings_in_UTF8;
326 }
327
328 # now check that we had exactly one leftover arg, which should be
329 # the collection name. We don't want to do this earlier, cos
330 # -xml arg doesn't need a collection name
331 # Or if the user specified -h, then we output the usage also
332 if ($intArgLeftinAfterParsing != 1 || (@ARGV && $ARGV[0] =~ /^\-+h/))
333 {
[17142]334 &PrintUsage::print_txt_usage($options, "{import.params}");
[14031]335 die "\n";
336 }
337
338 my $close_out = 0;
339 if ($out !~ /^(STDERR|STDOUT)$/i) {
340 open (OUT, ">$out") ||
341 (&gsprintf(STDERR, "{common.cannot_open_output_file}: $!\n", $out) && die);
342 $out = 'import::OUT';
343 $close_out = 1;
344 }
345 $out->autoflush(1);
346
347 # get and check the collection name
[14925]348 if (($collection = &colcfg::use_collection($site, @ARGV, $collectdir)) eq "") {
[14031]349 &PrintUsage::print_txt_usage($options, "{import.params}");
350 die "\n";
351 }
352
353 # add collection's perllib dir into include path in
354 # case we have collection specific modules
355 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib");
356
357 # check that we can open the faillog
358 if ($faillog eq "") {
359 $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log");
360 }
361 open (FAILLOG, ">$faillog") ||
362 (&gsprintf(STDERR, "{import.cannot_open_fail_log}\n", $faillog) && die);
363
364
365 my $faillogname = $faillog;
366 $faillog = 'import::FAILLOG';
367 $faillog->autoflush(1);
368
[14111]369 # Read in the collection configuration file.
370 ($configfilename, $gs_mode) = &colcfg::get_collect_cfg_name($out);
[14925]371
[14111]372 if ($gs_mode eq "gs2") {
373 $collectcfg = &colcfg::read_collect_cfg ($configfilename);
374 } elsif ($gs_mode eq "gs3") {
[14031]375 $collectcfg = &colcfg::read_collection_cfg_xml ($configfilename);
376 }
377
378 if (defined $collectcfg->{'importdir'} && $importdir eq "") {
379 $importdir = $collectcfg->{'importdir'};
380 }
381 if (defined $collectcfg->{'archivedir'} && $archivedir eq "") {
382 $archivedir = $collectcfg->{'archivedir'};
383 }
384 # fill in the default import and archives directories if none
385 # were supplied, turn all \ into / and remove trailing /
386 $importdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "import") if $importdir eq "";
387 $importdir =~ s/[\\\/]+/\//g;
388 $importdir =~ s/\/$//;
389 if (!-e $importdir) {
390 &gsprintf($out, "{import.no_import_dir}\n\n", $importdir);
391 die "\n";
392 }
393
394 $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives") if $archivedir eq "";
395 $archivedir =~ s/[\\\/]+/\//g;
396 $archivedir =~ s/\/$//;
397
398 my $plugins = [];
399 if (defined $collectcfg->{'plugin'}) {
400 $plugins = $collectcfg->{'plugin'};
401 }
402 #some global options for the plugins
403 my @global_opts = ();
404
405 if ($verbosity !~ /\d+/) {
406 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) {
407 $verbosity = $collectcfg->{'verbosity'};
408 } else {
409 $verbosity = 2; # the default
410 }
411 }
412 if (defined $collectcfg->{'manifest'} && $manifest eq "") {
413 $manifest = $collectcfg->{'manifest'};
414 }
415
416 if (defined $collectcfg->{'gzip'} && !$gzip) {
417 if ($collectcfg->{'gzip'} =~ /^true$/i) {
418 $gzip = 1;
419 }
420 }
421
422 if ($maxdocs !~ /\-?\d+/) {
423 if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) {
424 $maxdocs = $collectcfg->{'maxdocs'};
425 } else {
426 $maxdocs = -1; # the default
427 }
428 }
429 if ($groupsize == 1) {
430 if (defined $collectcfg->{'groupsize'} && $collectcfg->{'groupsize'} =~ /\d+/) {
431 $groupsize = $collectcfg->{'groupsize'};
432 }
433 }
434
[17142]435 if (!defined $OIDtype || ($OIDtype !~ /^(hash|incremental|assigned|dirname)$/ )) {
[14031]436 if (defined $collectcfg->{'OIDtype'} && $collectcfg->{'OIDtype'} =~ /^(hash|incremental|assigned|dirname)$/) {
437 $OIDtype = $collectcfg->{'OIDtype'};
438 } else {
439 $OIDtype = "hash"; # the default
440 }
441 }
[14556]442
[18528]443 if ((!defined $OIDmetadata) || ($OIDmetadata eq "")) {
444 if (defined $collectcfg->{'OIDmetadata'}) {
445 $OIDmetadata = $collectcfg->{'OIDmetadata'};
446 } else {
447 $OIDmetadata = "dc.Identifier"; # the default
448 }
449 }
450
[14031]451 if (defined $collectcfg->{'sortmeta'} && (!defined $sortmeta || $sortmeta eq "")) {
452 $sortmeta = $collectcfg->{'sortmeta'};
453 }
454 # sortmeta cannot be used with group size
455 $sortmeta = undef unless defined $sortmeta && $sortmeta =~ /\S/;
456 if (defined $sortmeta && $groupsize > 1) {
457 &gsprintf($out, "{import.cannot_sort}\n\n");
458 $sortmeta = undef;
459 }
[15072]460
461 if (defined $sortmeta) {
462 if (defined $collectcfg->{'reversesort'} && $collectcfg->{'reversesort'} =~ /^true$/i) {
463 $reversesort = 1;
464 }
465 } else {
466 # reversesort only valid with sortmeta
467 $reversesort = 0;
468 }
[14031]469 if (defined $collectcfg->{'removeprefix'} && $removeprefix eq "") {
470 $removeprefix = $collectcfg->{'removeprefix'};
471 }
472
473 if (defined $collectcfg->{'removesuffix'} && $removesuffix eq "") {
474 $removesuffix = $collectcfg->{'removesuffix'};
475 }
476 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) {
477 $debug = 1;
478 }
479 if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) {
480 $gli = 1;
481 }
[17142]482 $gli = 0 unless defined $gli;
483
[14031]484 # check keepold and removeold
485 ($removeold, $keepold, $incremental) = &scriptutil::check_removeold_and_keepold($removeold, $keepold, $incremental, "archives", $collectcfg);
486
487
488 print STDERR "<Import>\n" if $gli;
489
490 my $manifest_lookup = new manifest();
491 if ($manifest ne "") {
492 my $manifest_filename = $manifest;
493
494 if ($manifest_filename !~ m/^[\\\/]/) {
495 $manifest_filename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, $manifest_filename);
496 }
497
498 $manifest =~ s/[\\\/]+/\//g;
499 $manifest =~ s/\/$//;
500
501 $manifest_lookup->parse($manifest_filename);
502 }
503
504
505 # load all the plugins
506 $pluginfo = &plugin::load_plugins ($plugins, $verbosity, $out, $faillog, \@global_opts, $incremental);
507 if (scalar(@$pluginfo) == 0) {
508 &gsprintf($out, "{import.no_plugins_loaded}\n");
509 die "\n";
510 }
511
512 # remove the old contents of the archives directory (and tmp directory) if needed
513 if ($removeold) {
514 if (-e $archivedir) {
515 &gsprintf($out, "{import.removing_archives}\n");
516 &util::rm_r ($archivedir);
517 }
518 my $tmpdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "tmp");
519 $tmpdir =~ s/[\\\/]+/\//g;
520 $tmpdir =~ s/\/$//;
521 if (-e $tmpdir) {
522 &gsprintf($out, "{import.removing_tmpdir}\n");
523 &util::rm_r ($tmpdir);
524 }
525 }
526 # create the archives dir if needed
527 &util::mk_all_dir($archivedir);
528
529 # read the archive information file
[18440]530## $arcinfo_doc_filename = &util::filename_cat ($archivedir, "archives.inf");
[18660]531
532 $arcinfo_doc_filename = &util::filename_cat ($archivedir, "archiveinf-doc");
533 &util::rename_gdbm_file($arcinfo_doc_filename); # ensures gdb
534 $arcinfo_doc_filename .= ".gdb";
535
536 $arcinfo_src_filename = &util::filename_cat ($archivedir, "archiveinf-src");
537 &util::rename_gdbm_file($arcinfo_src_filename); # ensures gdb
538 $arcinfo_src_filename .= ".gdb";
[18440]539
[18660]540
[14031]541 $archive_info = new arcinfo ();
[18440]542 $archive_info->load_info ($arcinfo_doc_filename);
[15072]543 if ($reversesort) {
544 $archive_info->reverse_sort();
545 }
[14031]546
[18440]547 if ($manifest eq "") {
548 # Load in list of files in import folder from last import (if present)
[18456]549 $archive_info->load_prev_import_filelist ($arcinfo_src_filename);
[18440]550 }
551
[14031]552 ####Use Plugout####
[17142]553 my ($plugout);
[17751]554 if (defined $collectcfg->{'plugout'} && $collectcfg->{'plugout'} =~ /^(GreenstoneXML|GreenstoneMETS)Plugout/) {
[17142]555 $plugout = $collectcfg->{'plugout'};
556 }
557 else{
[17751]558 if ($saveas !~ /^(GreenstoneXML|GreenstoneMETS)$/) {
559 push @$plugout,"GreenstoneXMLPlugout";
[17142]560 }
561 else{
562 push @$plugout,$saveas."Plugout";
563 }
564 }
565
[14556]566 push @$plugout,("-output_info",$archive_info) if (defined $archive_info);
567 push @$plugout,("-verbosity",$verbosity) if (defined $verbosity);
568 push @$plugout,("-gzip_output") if ($gzip);
569 push @$plugout,("-group_size",$groupsize) if (defined $groupsize);
570 push @$plugout,("-output_handle",$out) if (defined $out);
571 push @$plugout,("-debug") if ($debug);
[14031]572
[14556]573 $processor = &plugout::load_plugout($plugout);
[14031]574 $processor->setoutputdir ($archivedir);
575 $processor->set_sortmeta ($sortmeta, $removeprefix, $removesuffix) if defined $sortmeta;
576 $processor->set_OIDtype ($OIDtype, $OIDmetadata);
577
578 &plugin::begin($pluginfo, $importdir, $processor, $maxdocs, $gli);
579
580 if ($manifest eq "") {
581 # process the import directory
[16377]582 my $block_hash = {};
583 my $metadata = {};
584 # gobal blocking pass may set up some metadata
585 &plugin::file_block_read($pluginfo, $importdir, "", $block_hash, $metadata, $gli);
[18440]586
587
[18469]588 if (!$removeold) {
[18528]589
590 &inexport::prime_doc_oid_count($archivedir);
591
592
[18469]593 # Can now work out which files were new, already existed, and have
594 # been deleted
595
596 &inexport::new_vs_old_import_diff($archive_info,$block_hash,$importdir);
597
598 my @deleted_files = sort keys %{$block_hash->{'deleted_files'}};
599 if (scalar(@deleted_files>0)) {
600 print STDERR "Files deleted since last import:\n ";
601 print STDERR join("\n ",@deleted_files), "\n";
602 }
603
604 my @new_files = sort keys %{$block_hash->{'new_files'}};
605 if (scalar(@new_files>0)) {
606 print STDERR "New files since last import:\n ";
607 print STDERR join("\n ",@new_files), "\n";
608 }
609
610 &inexport::mark_docs_for_deletion($archive_info,\@deleted_files,
611 $archivedir,$verbosity);
612
613 my @existing_files = sort keys %{$block_hash->{'existing_files'}};
614
615 my @reindex_files
616 = &inexport::mark_docs_for_reindex($archive_info,\@existing_files,
617 $archivedir,$verbosity);
[18440]618 }
619
[16377]620 &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli);
[14031]621 }
[18507]622 else
[16255]623 {
624 # process any files marked for importing
[19303]625 foreach my $file (keys %{$manifest_lookup->{'import'}}) {
[16377]626 &plugin::read ($pluginfo, $importdir, $file, {}, {}, $processor, $maxdocs, 0, $gli);
[14031]627 }
628
[18456]629 my @deleted_files = keys %{$manifest_lookup->{'delete'}};
[18440]630
[18456]631 &inexport::mark_docs_for_deletion($archive_info,\@deleted_files,$archivedir);
[14031]632 }
633
634 &plugin::end($pluginfo, $processor);
635
636 &plugin::deinit($pluginfo, $processor);
637
[18528]638 # Store the value of OIDCount (used in doc.pm) so it can be
639 # restored correctly to this value on an incremental build
640 &inexport::store_doc_oid_count($archivedir);
641
[14031]642 # write out the archive information file
643 $processor->close_file_output() if $groupsize > 1;
644 $processor->close_group_output() if $processor->is_group();
[14957]645
646# The following 'if' statement is in the export.pl version of the script,
[18440]647# The reason for the 'if' statement is now given in export.pl
648# Unclear at this point if the same should be done here
649## if (($saveas =~ m/^.*METS$/) || ($saveas eq "MARC")) {
650 # Not all export types need this (e.g. DSpace)
651
[14957]652 # should we still do this in debug mode??
653
[18440]654 # for backwards compatability with archvies.inf file
655 if ($arcinfo_doc_filename =~ m/\.inf$/) {
656 $archive_info->save_info($arcinfo_doc_filename);
657 }
658
[14957]659## }
[14031]660
661 # write out import stats
662 my $close_stats = 0;
663 if ($statsfile !~ /^(STDERR|STDOUT)$/i) {
664 if (open (STATS, ">$statsfile")) {
665 $statsfile = 'import::STATS';
666 $close_stats = 1;
667 } else {
668 &gsprintf($out, "{import.cannot_open_stats_file}", $statsfile);
669 &gsprintf($out, "{import.stats_backup}\n");
670 $statsfile = 'STDERR';
671 }
672 }
673
674 &gsprintf($out, "\n");
675 &gsprintf($out, "*********************************************\n");
676 &gsprintf($out, "{import.complete}\n");
677 &gsprintf($out, "*********************************************\n");
678
679 &plugin::write_stats($pluginfo, $statsfile, $faillogname, $gli);
680 if ($close_stats) {
681 close STATS;
682 }
683
684 close OUT if $close_out;
685 close FAILLOG;
686}
Note: See TracBrowser for help on using the repository browser.