source: gsdl/trunk/bin/script/import.pl@ 20757

Last change on this file since 20757 was 20757, checked in by kjdon, 15 years ago

reversesort option not useful for import, as the sorting is done during build now. I have moved this option to ArchivesInfPlugin

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 22.1 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# import.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will import a number of files into a particular collection
30
31package import;
32
33BEGIN {
34 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
35 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
39 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
40 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugouts");
41
42 if (defined $ENV{'GSDLEXTS'}) {
43 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
44 foreach my $e (@extensions) {
45 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
46
47 unshift (@INC, "$ext_prefix/perllib");
48 unshift (@INC, "$ext_prefix/perllib/cpan");
49 unshift (@INC, "$ext_prefix/perllib/plugins");
50 unshift (@INC, "$ext_prefix/perllib/plugouts");
51 }
52 }
53}
54
55use strict;
56no strict 'refs'; # allow filehandles to be variables and vice versa
57no strict 'subs'; # allow barewords (eg STDERR) as function arguments
58
59use arcinfo;
60use colcfg;
61use plugin;
62use plugout;
63use manifest;
64use inexport;
65use util;
66use scriptutil;
67use FileHandle;
68use gsprintf 'gsprintf';
69use printusage;
70use parse2;
71
72
73
74my $oidtype_list =
75 [ { 'name' => "hash",
76 'desc' => "{import.OIDtype.hash}" },
77 { 'name' => "assigned",
78 'desc' => "{import.OIDtype.assigned}" },
79 { 'name' => "incremental",
80 'desc' => "{import.OIDtype.incremental}" },
81 { 'name' => "dirname",
82 'desc' => "{import.OIDtype.dirname}" } ];
83
84
85# used to control output file format
86my $saveas_list =
87 [ { 'name' => "GreenstoneXML",
88 'desc' => "{export.saveas.GreenstoneXML}"},
89 { 'name' => "GreenstoneMETS",
90 'desc' => "{export.saveas.GreenstoneMETS}"},
91 ];
92
93
94# Possible attributes for each argument
95# name: The name of the argument
96# desc: A description (or more likely a reference to a description) for this argument
97# type: The type of control used to represent the argument. Options include: string, int, flag, regexp, metadata, language, enum etc
98# reqd: Is this argument required?
99# hiddengli: Is this argument hidden in GLI?
100# modegli: The lowest detail mode this argument is visible at in GLI
101
102my $saveas_argument
103 = { 'name' => "saveas",
104 'desc' => "{import.saveas}",
105 'type' => "enum",
106 'list' => $saveas_list,
107 'deft' => "GreenstoneXML",
108 'reqd' => "no",
109 'modegli' => "3" };
110
111
112my $arguments =
113 [
114 $saveas_argument,
115 { 'name' => "archivedir",
116 'desc' => "{import.archivedir}",
117 'type' => "string",
118 'reqd' => "no",
119 'hiddengli' => "yes" },
120 { 'name' => "importdir",
121 'desc' => "{import.importdir}",
122 'type' => "string",
123 'reqd' => "no",
124 'hiddengli' => "yes" },
125 { 'name' => "collectdir",
126 'desc' => "{import.collectdir}",
127 'type' => "string",
128 # parsearg left "" as default
129 #'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"),
130 'deft' => "",
131 'reqd' => "no",
132 'hiddengli' => "yes" },
133 { 'name' => "site",
134 'desc' => "{import.site}",
135 'type' => "string",
136 'deft' => "",
137 'reqd' => "no",
138 'hiddengli' => "yes" },
139 { 'name' => "manifest",
140 'desc' => "{import.manifest}",
141 'type' => "string",
142 'deft' => "",
143 'reqd' => "no",
144 'hiddengli' => "yes" },
145 { 'name' => "debug",
146 'desc' => "{import.debug}",
147 'type' => "flag",
148 'reqd' => "no",
149 'hiddengli' => "yes" },
150 { 'name' => "faillog",
151 'desc' => "{import.faillog}",
152 'type' => "string",
153 # parsearg left "" as default
154 #'deft' => &util::filename_cat("<collectdir>", "colname", "etc", "fail.log"),
155 'deft' => "",
156 'reqd' => "no",
157 'modegli' => "3" },
158 { 'name' => "incremental",
159 'desc' => "{import.incremental}",
160 'type' => "flag",
161 'hiddengli' => "yes" },
162 { 'name' => "keepold",
163 'desc' => "{import.keepold}",
164 'type' => "flag",
165 'reqd' => "no",
166 'hiddengli' => "yes" },
167 { 'name' => "removeold",
168 'desc' => "{import.removeold}",
169 'type' => "flag",
170 'reqd' => "no",
171 'hiddengli' => "yes" },
172 { 'name' => "language",
173 'desc' => "{scripts.language}",
174 'type' => "string",
175 'reqd' => "no",
176 'hiddengli' => "yes" },
177 { 'name' => "maxdocs",
178 'desc' => "{import.maxdocs}",
179 'type' => "int",
180 'reqd' => "no",
181 # parsearg left "" as default
182 #'deft' => "-1",
183 'range' => "1,",
184 'modegli' => "1" },
185 # don't set the default to hash - want to allow this to come from
186 # entry in collect.cfg but want to override it here
187 { 'name' => "OIDtype",
188 'desc' => "{import.OIDtype}",
189 'type' => "enum",
190 'list' => $oidtype_list,
191 # parsearg left "" as default
192 #'deft' => "hash",
193 'reqd' => "no",
194 'modegli' => "2" },
195 { 'name' => "OIDmetadata",
196 'desc' => "{import.OIDmetadata}",
197 'type' => "string",
198 #'type' => "metadata", #doesn't work properly in GLI
199 # parsearg left "" as default
200 #'deft' => "dc.Identifier",
201 'reqd' => "no",
202 'modegli' => "2" },
203 { 'name' => "out",
204 'desc' => "{import.out}",
205 'type' => "string",
206 'deft' => "STDERR",
207 'reqd' => "no",
208 'hiddengli' => "yes" },
209 { 'name' => "sortmeta",
210 'desc' => "{import.sortmeta}",
211 'type' => "string",
212 #'type' => "metadata", #doesn't work properly in GLI
213 'reqd' => "no",
214 'modegli' => "2" },
215 { 'name' => "removeprefix",
216 'desc' => "{BasClas.removeprefix}",
217 'type' => "regexp",
218 'deft' => "",
219 'reqd' => "no",
220 'modegli' => "3" },
221 { 'name' => "removesuffix",
222 'desc' => "{BasClas.removesuffix}",
223 'type' => "regexp",
224 'deft' => "",
225 'reqd' => "no",
226 'modegli' => "3" },
227 { 'name' => "groupsize",
228 'desc' => "{import.groupsize}",
229 'type' => "int",
230 'deft' => "1",
231 'reqd' => "no",
232 'modegli' => "2" },
233 { 'name' => "gzip",
234 'desc' => "{import.gzip}",
235 'type' => "flag",
236 'reqd' => "no",
237 'modegli' => "3" },
238 { 'name' => "statsfile",
239 'desc' => "{import.statsfile}",
240 'type' => "string",
241 'deft' => "STDERR",
242 'reqd' => "no",
243 'hiddengli' => "yes" },
244 { 'name' => "verbosity",
245 'desc' => "{import.verbosity}",
246 'type' => "int",
247 'range' => "0,",
248 # parsearg left "" as default
249 #'deft' => "2",
250 'reqd' => "no",
251 'modegli' => "3" },
252 { 'name' => "gli",
253 'desc' => "{scripts.gli}",
254 'type' => "flag",
255 'reqd' => "no",
256 'hiddengli' => "yes" },
257 { 'name' => "xml",
258 'desc' => "{scripts.xml}",
259 'type' => "flag",
260 'reqd' => "no",
261 'hiddengli' => "yes" }];
262
263my $options = { 'name' => "import.pl",
264 'desc' => "{import.desc}",
265 'args' => $arguments };
266
267
268&main();
269
270sub main {
271 # params
272 my ($language, $verbosity, $debug,
273 $collectdir, $importdir, $archivedir, $site, $manifest,
274 $incremental, $incremental_mode, $keepold, $removeold,
275 $saveas,
276 $OIDtype, $OIDmetadata,
277 $maxdocs, $statsfile,
278 $out, $faillog, $gli,
279 $gzip, $groupsize,
280 $sortmeta, $removeprefix, $removesuffix
281 );
282
283 my $xml = 0;
284
285 # other vars
286 my ($configfilename, $collection, $collectcfg,
287 $arcinfo_doc_filename, $arcinfo_src_filename, $archive_info,
288 $gs_mode,
289 $processor, $pluginfo);
290
291 my $service = "import";
292
293 my $hashParsingResult = {};
294 # general options available to all plugins
295 my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$hashParsingResult,"allow_extra_options");
296 # Parse returns -1 if something has gone wrong
297 if ($intArgLeftinAfterParsing == -1)
298 {
299 &PrintUsage::print_txt_usage($options, "{import.params}");
300 die "\n";
301 }
302
303 foreach my $strVariable (keys %$hashParsingResult)
304 {
305 eval "\$$strVariable = \$hashParsingResult->{\"\$strVariable\"}";
306 }
307
308 # If $language has been specified, load the appropriate resource bundle
309 # (Otherwise, the default resource bundle will be loaded automatically)
310 if ($language && $language =~ /\S/) {
311 &gsprintf::load_language_specific_resource_bundle($language);
312 }
313
314 if ($xml) {
315 &PrintUsage::print_xml_usage($options);
316 print "\n";
317 return;
318 }
319
320 if ($gli) { # the gli wants strings to be in UTF-8
321 &gsprintf::output_strings_in_UTF8;
322 }
323
324 # now check that we had exactly one leftover arg, which should be
325 # the collection name. We don't want to do this earlier, cos
326 # -xml arg doesn't need a collection name
327 # Or if the user specified -h, then we output the usage also
328 if ($intArgLeftinAfterParsing != 1 || (@ARGV && $ARGV[0] =~ /^\-+h/))
329 {
330 &PrintUsage::print_txt_usage($options, "{import.params}");
331 die "\n";
332 }
333
334 my $close_out = 0;
335 if ($out !~ /^(STDERR|STDOUT)$/i) {
336 open (OUT, ">$out") ||
337 (&gsprintf(STDERR, "{common.cannot_open_output_file}: $!\n", $out) && die);
338 $out = 'import::OUT';
339 $close_out = 1;
340 }
341 $out->autoflush(1);
342
343 # get and check the collection name
344 if (($collection = &colcfg::use_collection($site, @ARGV, $collectdir)) eq "") {
345 &PrintUsage::print_txt_usage($options, "{import.params}");
346 die "\n";
347 }
348
349 # add collection's perllib dir into include path in
350 # case we have collection specific modules
351 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib");
352
353 # check that we can open the faillog
354 if ($faillog eq "") {
355 $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log");
356 }
357 open (FAILLOG, ">$faillog") ||
358 (&gsprintf(STDERR, "{import.cannot_open_fail_log}\n", $faillog) && die);
359
360
361 my $faillogname = $faillog;
362 $faillog = 'import::FAILLOG';
363 $faillog->autoflush(1);
364
365 # Read in the collection configuration file.
366 ($configfilename, $gs_mode) = &colcfg::get_collect_cfg_name($out);
367 $collectcfg = &colcfg::read_collection_cfg ($configfilename, $gs_mode);
368
369 if (defined $collectcfg->{'importdir'} && $importdir eq "") {
370 $importdir = $collectcfg->{'importdir'};
371 }
372 if (defined $collectcfg->{'archivedir'} && $archivedir eq "") {
373 $archivedir = $collectcfg->{'archivedir'};
374 }
375 # fill in the default import and archives directories if none
376 # were supplied, turn all \ into / and remove trailing /
377 $importdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "import") if $importdir eq "";
378 $importdir =~ s/[\\\/]+/\//g;
379 $importdir =~ s/\/$//;
380 if (!-e $importdir) {
381 &gsprintf($out, "{import.no_import_dir}\n\n", $importdir);
382 die "\n";
383 }
384
385 $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives") if $archivedir eq "";
386 $archivedir =~ s/[\\\/]+/\//g;
387 $archivedir =~ s/\/$//;
388
389 my $plugins = [];
390 if (defined $collectcfg->{'plugin'}) {
391 $plugins = $collectcfg->{'plugin'};
392 }
393 #some global options for the plugins
394 my @global_opts = ();
395
396 if ($verbosity !~ /\d+/) {
397 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) {
398 $verbosity = $collectcfg->{'verbosity'};
399 } else {
400 $verbosity = 2; # the default
401 }
402 }
403 if (defined $collectcfg->{'manifest'} && $manifest eq "") {
404 $manifest = $collectcfg->{'manifest'};
405 }
406
407 if (defined $collectcfg->{'gzip'} && !$gzip) {
408 if ($collectcfg->{'gzip'} =~ /^true$/i) {
409 $gzip = 1;
410 }
411 }
412
413 if ($maxdocs !~ /\-?\d+/) {
414 if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) {
415 $maxdocs = $collectcfg->{'maxdocs'};
416 } else {
417 $maxdocs = -1; # the default
418 }
419 }
420 if ($groupsize == 1) {
421 if (defined $collectcfg->{'groupsize'} && $collectcfg->{'groupsize'} =~ /\d+/) {
422 $groupsize = $collectcfg->{'groupsize'};
423 }
424 }
425
426 if (!defined $OIDtype || ($OIDtype !~ /^(hash|incremental|assigned|dirname)$/ )) {
427 if (defined $collectcfg->{'OIDtype'} && $collectcfg->{'OIDtype'} =~ /^(hash|incremental|assigned|dirname)$/) {
428 $OIDtype = $collectcfg->{'OIDtype'};
429 } else {
430 $OIDtype = "hash"; # the default
431 }
432 }
433
434 if ((!defined $OIDmetadata) || ($OIDmetadata eq "")) {
435 if (defined $collectcfg->{'OIDmetadata'}) {
436 $OIDmetadata = $collectcfg->{'OIDmetadata'};
437 } else {
438 $OIDmetadata = "dc.Identifier"; # the default
439 }
440 }
441
442 if (defined $collectcfg->{'sortmeta'} && (!defined $sortmeta || $sortmeta eq "")) {
443 $sortmeta = $collectcfg->{'sortmeta'};
444 }
445 # sortmeta cannot be used with group size
446 $sortmeta = undef unless defined $sortmeta && $sortmeta =~ /\S/;
447 if (defined $sortmeta && $groupsize > 1) {
448 &gsprintf($out, "{import.cannot_sort}\n\n");
449 $sortmeta = undef;
450 }
451
452 if (defined $collectcfg->{'removeprefix'} && $removeprefix eq "") {
453 $removeprefix = $collectcfg->{'removeprefix'};
454 }
455
456 if (defined $collectcfg->{'removesuffix'} && $removesuffix eq "") {
457 $removesuffix = $collectcfg->{'removesuffix'};
458 }
459 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) {
460 $debug = 1;
461 }
462 if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) {
463 $gli = 1;
464 }
465 $gli = 0 unless defined $gli;
466
467 # check keepold and removeold
468 ($removeold, $keepold, $incremental, $incremental_mode)
469 = &scriptutil::check_removeold_and_keepold($removeold, $keepold,
470 $incremental, "archives",
471 $collectcfg);
472
473
474 print STDERR "<Import>\n" if $gli;
475
476 my $manifest_lookup = new manifest();
477 if ($manifest ne "") {
478 my $manifest_filename = $manifest;
479
480 if ($manifest_filename !~ m/^[\\\/]/) {
481 $manifest_filename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, $manifest_filename);
482 }
483
484 $manifest =~ s/[\\\/]+/\//g;
485 $manifest =~ s/\/$//;
486
487 $manifest_lookup->parse($manifest_filename);
488 }
489
490
491 # load all the plugins
492 $pluginfo = &plugin::load_plugins ($plugins, $verbosity, $out, $faillog, \@global_opts, $incremental_mode);
493 if (scalar(@$pluginfo) == 0) {
494 &gsprintf($out, "{import.no_plugins_loaded}\n");
495 die "\n";
496 }
497
498 # remove the old contents of the archives directory (and tmp directory) if needed
499 if ($removeold) {
500 if (-e $archivedir) {
501 &gsprintf($out, "{import.removing_archives}\n");
502 &util::rm_r ($archivedir);
503 }
504 my $tmpdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "tmp");
505 $tmpdir =~ s/[\\\/]+/\//g;
506 $tmpdir =~ s/\/$//;
507 if (-e $tmpdir) {
508 &gsprintf($out, "{import.removing_tmpdir}\n");
509 &util::rm_r ($tmpdir);
510 }
511 }
512 # create the archives dir if needed
513 &util::mk_all_dir($archivedir);
514
515 # read the archive information file
516## $arcinfo_doc_filename = &util::filename_cat ($archivedir, "archives.inf");
517
518 $arcinfo_doc_filename = &util::filename_cat ($archivedir, "archiveinf-doc");
519 &util::rename_gdbm_file($arcinfo_doc_filename); # ensures gdb
520 $arcinfo_doc_filename .= ".gdb";
521
522 $arcinfo_src_filename = &util::filename_cat ($archivedir, "archiveinf-src");
523 &util::rename_gdbm_file($arcinfo_src_filename); # ensures gdb
524 $arcinfo_src_filename .= ".gdb";
525
526
527 $archive_info = new arcinfo ();
528 $archive_info->load_info ($arcinfo_doc_filename);
529
530 if ($manifest eq "") {
531 # Load in list of files in import folder from last import (if present)
532 $archive_info->load_prev_import_filelist ($arcinfo_src_filename);
533 }
534
535 ####Use Plugout####
536 my ($plugout);
537 if (defined $collectcfg->{'plugout'}) {
538 # If a plugout was specified in the collect.cfg file, assume it is sensible
539 # We can't check the name because it could be anything, if it is a custom plugout
540 $plugout = $collectcfg->{'plugout'};
541 }
542 else{
543 if ($saveas !~ /^(GreenstoneXML|GreenstoneMETS)$/) {
544 push @$plugout,"GreenstoneXMLPlugout";
545 }
546 else{
547 push @$plugout,$saveas."Plugout";
548 }
549 }
550
551 push @$plugout,("-output_info",$archive_info) if (defined $archive_info);
552 push @$plugout,("-verbosity",$verbosity) if (defined $verbosity);
553 push @$plugout,("-gzip_output") if ($gzip);
554 push @$plugout,("-group_size",$groupsize) if (defined $groupsize);
555 push @$plugout,("-output_handle",$out) if (defined $out);
556 push @$plugout,("-debug") if ($debug);
557
558 $processor = &plugout::load_plugout($plugout);
559 $processor->setoutputdir ($archivedir);
560 $processor->set_sortmeta ($sortmeta, $removeprefix, $removesuffix) if defined $sortmeta;
561 $processor->set_OIDtype ($OIDtype, $OIDmetadata);
562
563 &plugin::begin($pluginfo, $importdir, $processor, $maxdocs, $gli);
564
565 if ($manifest eq "") {
566 # process the import directory
567 my $block_hash = {};
568 my $metadata = {};
569 # gobal blocking pass may set up some metadata
570 &plugin::file_block_read($pluginfo, $importdir, "", $block_hash, $metadata, $gli);
571
572
573 if ($incremental || $incremental_mode eq "onlyadd") {
574
575 &inexport::prime_doc_oid_count($archivedir);
576
577
578 # Can now work out which files were new, already existed, and have
579 # been deleted
580
581 &inexport::new_vs_old_import_diff($archive_info,$block_hash,$importdir,
582 $archivedir,$verbosity,$incremental_mode);
583
584 my @new_files = sort keys %{$block_hash->{'new_files'}};
585 if (scalar(@new_files>0)) {
586 print STDERR "New files since last import:\n ";
587 print STDERR join("\n ",@new_files), "\n";
588 }
589
590 if ($incremental) {
591 # only look for deletions if we are truely incremental
592 my @deleted_files = sort keys %{$block_hash->{'deleted_files'}};
593 # Filter out any in gsdl/tmp area
594 my @filtered_deleted_files = ();
595 my $gsdl_tmp_area = &util::filename_cat($ENV{'GSDLHOME'}, "tmp");
596 my $collect_tmp_area = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp");
597 $gsdl_tmp_area = &util::filename_to_regex($gsdl_tmp_area);
598 $collect_tmp_area = &util::filename_to_regex($collect_tmp_area);
599
600
601 foreach my $df (@deleted_files) {
602 next if ($df =~ m/^$gsdl_tmp_area/);
603 next if ($df =~ m/^$collect_tmp_area/);
604
605 push(@filtered_deleted_files,$df);
606 }
607
608
609 @deleted_files = @filtered_deleted_files;
610
611 if (scalar(@deleted_files>0)) {
612 print STDERR "Files deleted since last import:\n ";
613 print STDERR join("\n ",@deleted_files), "\n";
614 }
615
616
617 &inexport::mark_docs_for_deletion($archive_info,$block_hash,\@deleted_files,
618 $archivedir,$verbosity);
619
620 &inexport::mark_docs_for_reindex($archive_info,$block_hash,
621 $archivedir,$verbosity);
622
623 my @reindex_files = sort keys %{$block_hash->{'reindex_files'}};
624
625 if (scalar(@reindex_files>0)) {
626 print STDERR "Files to reindex since last import:\n ";
627 print STDERR join("\n ",@reindex_files), "\n";
628 }
629
630
631 # not sure if the following will work -- will the metadata data-structure be correctly initialized
632 # in the right order?
633# foreach my $file (@new_files, @reindex_files) {
634# &plugin::read ($pluginfo, $importdir, $file, $block_hash, $metadata, $processor, $maxdocs, 0, $gli);
635# }
636
637 }
638
639 # Play it safe, and run through the entire folder, only processing new or edited files
640 &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli);
641
642 }
643 else {
644 &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli);
645 }
646
647 }
648 else
649 {
650 # process any files marked for importing
651 foreach my $file (keys %{$manifest_lookup->{'import'}}) {
652 &plugin::read ($pluginfo, $importdir, $file, {}, {}, $processor, $maxdocs, 0, $gli);
653 }
654
655 my @deleted_files = keys %{$manifest_lookup->{'delete'}};
656
657 &inexport::mark_docs_for_deletion($archive_info,{},\@deleted_files,$archivedir);
658 }
659
660 &plugin::end($pluginfo, $processor);
661
662 &plugin::deinit($pluginfo, $processor);
663
664 # Store the value of OIDCount (used in doc.pm) so it can be
665 # restored correctly to this value on an incremental build
666 &inexport::store_doc_oid_count($archivedir);
667
668 # write out the archive information file
669 $processor->close_file_output() if $groupsize > 1;
670 $processor->close_group_output() if $processor->is_group();
671
672# The following 'if' statement is in the export.pl version of the script,
673# The reason for the 'if' statement is now given in export.pl
674# Unclear at this point if the same should be done here
675## if (($saveas =~ m/^.*METS$/) || ($saveas eq "MARC")) {
676 # Not all export types need this (e.g. DSpace)
677
678 # should we still do this in debug mode??
679
680 # for backwards compatability with archvies.inf file
681 if ($arcinfo_doc_filename =~ m/(contents)|(\.inf)$/) {
682 $archive_info->save_info($arcinfo_doc_filename);
683 }
684 else {
685 $archive_info->save_revinfo_gdbm($arcinfo_src_filename);
686 }
687
688
689## }
690
691 # write out import stats
692 my $close_stats = 0;
693 if ($statsfile !~ /^(STDERR|STDOUT)$/i) {
694 if (open (STATS, ">$statsfile")) {
695 $statsfile = 'import::STATS';
696 $close_stats = 1;
697 } else {
698 &gsprintf($out, "{import.cannot_open_stats_file}", $statsfile);
699 &gsprintf($out, "{import.stats_backup}\n");
700 $statsfile = 'STDERR';
701 }
702 }
703
704 &gsprintf($out, "\n");
705 &gsprintf($out, "*********************************************\n");
706 &gsprintf($out, "{import.complete}\n");
707 &gsprintf($out, "*********************************************\n");
708
709 &plugin::write_stats($pluginfo, $statsfile, $faillogname, $gli);
710 if ($close_stats) {
711 close STATS;
712 }
713
714 close OUT if $close_out;
715 close FAILLOG;
716}
Note: See TracBrowser for help on using the repository browser.