source: main/trunk/greenstone2/bin/script/import.pl@ 21822

Last change on this file since 21822 was 21822, checked in by ak19, 14 years ago

Dr Bainbridge has fixed several perl files that depended on perl 5.8 to work and used to fail with Perl 5.10.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 23.0 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# import.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will import a number of files into a particular collection
30
31package import;
32
33BEGIN {
34 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
35 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
39 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugouts");
40
41 if (defined $ENV{'GSDLEXTS'}) {
42 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
43 foreach my $e (@extensions) {
44 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
45
46 unshift (@INC, "$ext_prefix/perllib");
47 unshift (@INC, "$ext_prefix/perllib/cpan");
48 unshift (@INC, "$ext_prefix/perllib/plugins");
49 unshift (@INC, "$ext_prefix/perllib/plugouts");
50 }
51 }
52 if (defined $ENV{'GSDL3EXTS'}) {
53 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
54 foreach my $e (@extensions) {
55 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
56
57 unshift (@INC, "$ext_prefix/perllib");
58 unshift (@INC, "$ext_prefix/perllib/cpan");
59 unshift (@INC, "$ext_prefix/perllib/plugins");
60 unshift (@INC, "$ext_prefix/perllib/plugouts");
61 }
62 }
63}
64
65use strict;
66no strict 'refs'; # allow filehandles to be variables and vice versa
67no strict 'subs'; # allow barewords (eg STDERR) as function arguments
68
69use arcinfo;
70use colcfg;
71use dbutil;
72use plugin;
73use plugout;
74use manifest;
75use inexport;
76use util;
77use scriptutil;
78use FileHandle;
79use gsprintf 'gsprintf';
80use printusage;
81use parse2;
82
83
84
85my $oidtype_list =
86 [ { 'name' => "hash",
87 'desc' => "{import.OIDtype.hash}" },
88 { 'name' => "assigned",
89 'desc' => "{import.OIDtype.assigned}" },
90 { 'name' => "incremental",
91 'desc' => "{import.OIDtype.incremental}" },
92 { 'name' => "dirname",
93 'desc' => "{import.OIDtype.dirname}" } ];
94
95
96# used to control output file format
97my $saveas_list =
98 [ { 'name' => "GreenstoneXML",
99 'desc' => "{export.saveas.GreenstoneXML}"},
100 { 'name' => "GreenstoneMETS",
101 'desc' => "{export.saveas.GreenstoneMETS}"},
102 ];
103
104
105# Possible attributes for each argument
106# name: The name of the argument
107# desc: A description (or more likely a reference to a description) for this argument
108# type: The type of control used to represent the argument. Options include: string, int, flag, regexp, metadata, language, enum etc
109# reqd: Is this argument required?
110# hiddengli: Is this argument hidden in GLI?
111# modegli: The lowest detail mode this argument is visible at in GLI
112
113my $saveas_argument
114 = { 'name' => "saveas",
115 'desc' => "{import.saveas}",
116 'type' => "enum",
117 'list' => $saveas_list,
118 'deft' => "GreenstoneXML",
119 'reqd' => "no",
120 'modegli' => "3" };
121
122
123my $arguments =
124 [
125 $saveas_argument,
126 { 'name' => "archivedir",
127 'desc' => "{import.archivedir}",
128 'type' => "string",
129 'reqd' => "no",
130 'hiddengli' => "yes" },
131 { 'name' => "importdir",
132 'desc' => "{import.importdir}",
133 'type' => "string",
134 'reqd' => "no",
135 'hiddengli' => "yes" },
136 { 'name' => "collectdir",
137 'desc' => "{import.collectdir}",
138 'type' => "string",
139 # parsearg left "" as default
140 #'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"),
141 'deft' => "",
142 'reqd' => "no",
143 'hiddengli' => "yes" },
144 { 'name' => "site",
145 'desc' => "{import.site}",
146 'type' => "string",
147 'deft' => "",
148 'reqd' => "no",
149 'hiddengli' => "yes" },
150 { 'name' => "manifest",
151 'desc' => "{import.manifest}",
152 'type' => "string",
153 'deft' => "",
154 'reqd' => "no",
155 'hiddengli' => "yes" },
156 { 'name' => "debug",
157 'desc' => "{import.debug}",
158 'type' => "flag",
159 'reqd' => "no",
160 'hiddengli' => "yes" },
161 { 'name' => "faillog",
162 'desc' => "{import.faillog}",
163 'type' => "string",
164 # parsearg left "" as default
165 #'deft' => &util::filename_cat("<collectdir>", "colname", "etc", "fail.log"),
166 'deft' => "",
167 'reqd' => "no",
168 'modegli' => "3" },
169 { 'name' => "incremental",
170 'desc' => "{import.incremental}",
171 'type' => "flag",
172 'hiddengli' => "yes" },
173 { 'name' => "keepold",
174 'desc' => "{import.keepold}",
175 'type' => "flag",
176 'reqd' => "no",
177 'hiddengli' => "yes" },
178 { 'name' => "removeold",
179 'desc' => "{import.removeold}",
180 'type' => "flag",
181 'reqd' => "no",
182 'hiddengli' => "yes" },
183 { 'name' => "language",
184 'desc' => "{scripts.language}",
185 'type' => "string",
186 'reqd' => "no",
187 'hiddengli' => "yes" },
188 { 'name' => "maxdocs",
189 'desc' => "{import.maxdocs}",
190 'type' => "int",
191 'reqd' => "no",
192 # parsearg left "" as default
193 #'deft' => "-1",
194 'range' => "1,",
195 'modegli' => "1" },
196 # don't set the default to hash - want to allow this to come from
197 # entry in collect.cfg but want to override it here
198 { 'name' => "OIDtype",
199 'desc' => "{import.OIDtype}",
200 'type' => "enum",
201 'list' => $oidtype_list,
202 # parsearg left "" as default
203 #'deft' => "hash",
204 'reqd' => "no",
205 'modegli' => "2" },
206 { 'name' => "OIDmetadata",
207 'desc' => "{import.OIDmetadata}",
208 'type' => "string",
209 #'type' => "metadata", #doesn't work properly in GLI
210 # parsearg left "" as default
211 #'deft' => "dc.Identifier",
212 'reqd' => "no",
213 'modegli' => "2" },
214 { 'name' => "out",
215 'desc' => "{import.out}",
216 'type' => "string",
217 'deft' => "STDERR",
218 'reqd' => "no",
219 'hiddengli' => "yes" },
220 { 'name' => "sortmeta",
221 'desc' => "{import.sortmeta}",
222 'type' => "string",
223 #'type' => "metadata", #doesn't work properly in GLI
224 'reqd' => "no",
225 'modegli' => "2" },
226 { 'name' => "removeprefix",
227 'desc' => "{BasClas.removeprefix}",
228 'type' => "regexp",
229 'deft' => "",
230 'reqd' => "no",
231 'modegli' => "3" },
232 { 'name' => "removesuffix",
233 'desc' => "{BasClas.removesuffix}",
234 'type' => "regexp",
235 'deft' => "",
236 'reqd' => "no",
237 'modegli' => "3" },
238 { 'name' => "groupsize",
239 'desc' => "{import.groupsize}",
240 'type' => "int",
241 'deft' => "1",
242 'reqd' => "no",
243 'modegli' => "2" },
244 { 'name' => "gzip",
245 'desc' => "{import.gzip}",
246 'type' => "flag",
247 'reqd' => "no",
248 'modegli' => "3" },
249 { 'name' => "statsfile",
250 'desc' => "{import.statsfile}",
251 'type' => "string",
252 'deft' => "STDERR",
253 'reqd' => "no",
254 'hiddengli' => "yes" },
255 { 'name' => "verbosity",
256 'desc' => "{import.verbosity}",
257 'type' => "int",
258 'range' => "0,",
259 # parsearg left "" as default
260 #'deft' => "2",
261 'reqd' => "no",
262 'modegli' => "3" },
263 { 'name' => "gli",
264 'desc' => "{scripts.gli}",
265 'type' => "flag",
266 'reqd' => "no",
267 'hiddengli' => "yes" },
268 { 'name' => "xml",
269 'desc' => "{scripts.xml}",
270 'type' => "flag",
271 'reqd' => "no",
272 'hiddengli' => "yes" }];
273
274my $options = { 'name' => "import.pl",
275 'desc' => "{import.desc}",
276 'args' => $arguments };
277
278
279&main();
280
281sub main {
282 # params
283 my ($language, $verbosity, $debug,
284 $collectdir, $importdir, $archivedir, $site, $manifest,
285 $incremental, $incremental_mode, $keepold, $removeold,
286 $saveas,
287 $OIDtype, $OIDmetadata,
288 $maxdocs, $statsfile,
289 $out, $faillog, $gli,
290 $gzip, $groupsize,
291 $sortmeta, $removeprefix, $removesuffix
292 );
293
294 my $xml = 0;
295
296 # other vars
297 my ($configfilename, $collection, $collectcfg,
298 $arcinfo_doc_filename, $arcinfo_src_filename, $archive_info,
299 $gs_mode,
300 $processor, $pluginfo);
301
302 my $service = "import";
303
304 my $hashParsingResult = {};
305 # general options available to all plugins
306 my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$hashParsingResult,"allow_extra_options");
307 # Parse returns -1 if something has gone wrong
308 if ($intArgLeftinAfterParsing == -1)
309 {
310 &PrintUsage::print_txt_usage($options, "{import.params}");
311 die "\n";
312 }
313
314 foreach my $strVariable (keys %$hashParsingResult)
315 {
316 eval "\$$strVariable = \$hashParsingResult->{\"\$strVariable\"}";
317 }
318
319 # If $language has been specified, load the appropriate resource bundle
320 # (Otherwise, the default resource bundle will be loaded automatically)
321 if ($language && $language =~ /\S/) {
322 &gsprintf::load_language_specific_resource_bundle($language);
323 }
324
325 if ($xml) {
326 &PrintUsage::print_xml_usage($options);
327 print "\n";
328 return;
329 }
330
331 if ($gli) { # the gli wants strings to be in UTF-8
332 &gsprintf::output_strings_in_UTF8;
333 }
334
335 # now check that we had exactly one leftover arg, which should be
336 # the collection name. We don't want to do this earlier, cos
337 # -xml arg doesn't need a collection name
338 # Or if the user specified -h, then we output the usage also
339 if ($intArgLeftinAfterParsing != 1 || (@ARGV && $ARGV[0] =~ /^\-+h/))
340 {
341 &PrintUsage::print_txt_usage($options, "{import.params}");
342 die "\n";
343 }
344
345 my $close_out = 0;
346 if ($out !~ /^(STDERR|STDOUT)$/i) {
347 open (OUT, ">$out") ||
348 (&gsprintf(STDERR, "{common.cannot_open_output_file}: $!\n", $out) && die);
349 $out = 'import::OUT';
350 $close_out = 1;
351 }
352 $out->autoflush(1);
353
354 # get and check the collection name
355 if (($collection = &colcfg::use_collection($site, @ARGV, $collectdir)) eq "") {
356 &PrintUsage::print_txt_usage($options, "{import.params}");
357 die "\n";
358 }
359
360 # add collection's perllib dir into include path in
361 # case we have collection specific modules
362 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib");
363
364 # check that we can open the faillog
365 if ($faillog eq "") {
366 $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log");
367 }
368 open (FAILLOG, ">$faillog") ||
369 (&gsprintf(STDERR, "{import.cannot_open_fail_log}\n", $faillog) && die);
370
371
372 my $faillogname = $faillog;
373 $faillog = 'import::FAILLOG';
374 $faillog->autoflush(1);
375
376 # Read in the collection configuration file.
377 ($configfilename, $gs_mode) = &colcfg::get_collect_cfg_name($out);
378 $collectcfg = &colcfg::read_collection_cfg ($configfilename, $gs_mode);
379
380 # If the infodbtype value wasn't defined in the collect.cfg file, use the default
381 if (!defined($collectcfg->{'infodbtype'}))
382 {
383 $collectcfg->{'infodbtype'} = &dbutil::get_default_infodb_type();
384 }
385
386 if (defined $collectcfg->{'importdir'} && $importdir eq "") {
387 $importdir = $collectcfg->{'importdir'};
388 }
389 if (defined $collectcfg->{'archivedir'} && $archivedir eq "") {
390 $archivedir = $collectcfg->{'archivedir'};
391 }
392 # fill in the default import and archives directories if none
393 # were supplied, turn all \ into / and remove trailing /
394 $importdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "import") if $importdir eq "";
395 $importdir =~ s/[\\\/]+/\//g;
396 $importdir =~ s/\/$//;
397 if (!-e $importdir) {
398 &gsprintf($out, "{import.no_import_dir}\n\n", $importdir);
399 die "\n";
400 }
401
402 $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives") if $archivedir eq "";
403 $archivedir =~ s/[\\\/]+/\//g;
404 $archivedir =~ s/\/$//;
405
406 my $plugins = [];
407 if (defined $collectcfg->{'plugin'}) {
408 $plugins = $collectcfg->{'plugin'};
409 }
410 #some global options for the plugins
411 my @global_opts = ();
412
413 if ($verbosity !~ /\d+/) {
414 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) {
415 $verbosity = $collectcfg->{'verbosity'};
416 } else {
417 $verbosity = 2; # the default
418 }
419 }
420 if (defined $collectcfg->{'manifest'} && $manifest eq "") {
421 $manifest = $collectcfg->{'manifest'};
422 }
423
424 if (defined $collectcfg->{'gzip'} && !$gzip) {
425 if ($collectcfg->{'gzip'} =~ /^true$/i) {
426 $gzip = 1;
427 }
428 }
429
430 if ($maxdocs !~ /\-?\d+/) {
431 if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) {
432 $maxdocs = $collectcfg->{'maxdocs'};
433 } else {
434 $maxdocs = -1; # the default
435 }
436 }
437 if ($groupsize == 1) {
438 if (defined $collectcfg->{'groupsize'} && $collectcfg->{'groupsize'} =~ /\d+/) {
439 $groupsize = $collectcfg->{'groupsize'};
440 }
441 }
442
443 if (!defined $OIDtype || ($OIDtype !~ /^(hash|incremental|assigned|dirname)$/ )) {
444 if (defined $collectcfg->{'OIDtype'} && $collectcfg->{'OIDtype'} =~ /^(hash|incremental|assigned|dirname)$/) {
445 $OIDtype = $collectcfg->{'OIDtype'};
446 } else {
447 $OIDtype = "hash"; # the default
448 }
449 }
450
451 if ((!defined $OIDmetadata) || ($OIDmetadata eq "")) {
452 if (defined $collectcfg->{'OIDmetadata'}) {
453 $OIDmetadata = $collectcfg->{'OIDmetadata'};
454 } else {
455 $OIDmetadata = "dc.Identifier"; # the default
456 }
457 }
458
459 if (defined $collectcfg->{'sortmeta'} && (!defined $sortmeta || $sortmeta eq "")) {
460 $sortmeta = $collectcfg->{'sortmeta'};
461 }
462 # sortmeta cannot be used with group size
463 $sortmeta = undef unless defined $sortmeta && $sortmeta =~ /\S/;
464 if (defined $sortmeta && $groupsize > 1) {
465 &gsprintf($out, "{import.cannot_sort}\n\n");
466 $sortmeta = undef;
467 }
468
469 if (defined $collectcfg->{'removeprefix'} && $removeprefix eq "") {
470 $removeprefix = $collectcfg->{'removeprefix'};
471 }
472
473 if (defined $collectcfg->{'removesuffix'} && $removesuffix eq "") {
474 $removesuffix = $collectcfg->{'removesuffix'};
475 }
476 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) {
477 $debug = 1;
478 }
479 if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) {
480 $gli = 1;
481 }
482 $gli = 0 unless defined $gli;
483
484 # check keepold and removeold
485 ($removeold, $keepold, $incremental, $incremental_mode)
486 = &scriptutil::check_removeold_and_keepold($removeold, $keepold,
487 $incremental, "archives",
488 $collectcfg);
489
490
491 print STDERR "<Import>\n" if $gli;
492
493 my $manifest_lookup = new manifest();
494 if ($manifest ne "") {
495 my $manifest_filename = $manifest;
496
497 if ($manifest_filename !~ m/^[\\\/]/) {
498 $manifest_filename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, $manifest_filename);
499 }
500
501 $manifest =~ s/[\\\/]+/\//g;
502 $manifest =~ s/\/$//;
503
504 $manifest_lookup->parse($manifest_filename);
505 }
506
507
508 # load all the plugins
509 $pluginfo = &plugin::load_plugins ($plugins, $verbosity, $out, $faillog, \@global_opts, $incremental_mode);
510 if (scalar(@$pluginfo) == 0) {
511 &gsprintf($out, "{import.no_plugins_loaded}\n");
512 die "\n";
513 }
514
515 # remove the old contents of the archives directory (and tmp directory) if needed
516 if ($removeold) {
517 if (-e $archivedir) {
518 &gsprintf($out, "{import.removing_archives}\n");
519 &util::rm_r ($archivedir);
520 }
521 my $tmpdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "tmp");
522 $tmpdir =~ s/[\\\/]+/\//g;
523 $tmpdir =~ s/\/$//;
524 if (-e $tmpdir) {
525 &gsprintf($out, "{import.removing_tmpdir}\n");
526 &util::rm_r ($tmpdir);
527 }
528 }
529
530 # create the archives dir if needed
531 &util::mk_all_dir($archivedir);
532
533 # read the archive information file
534## $arcinfo_doc_filename = &util::filename_cat ($archivedir, "archives.inf");
535
536 # BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files (won't do anything for other infodbtypes)
537 &util::rename_ldb_or_bdb_file(&util::filename_cat($archivedir, "archiveinf-doc"));
538 &util::rename_ldb_or_bdb_file(&util::filename_cat($archivedir, "archiveinf-src"));
539
540 $arcinfo_doc_filename = &dbutil::get_infodb_file_path($collectcfg->{'infodbtype'}, "archiveinf-doc", $archivedir);
541 $arcinfo_src_filename = &dbutil::get_infodb_file_path($collectcfg->{'infodbtype'}, "archiveinf-src", $archivedir);
542
543 $archive_info = new arcinfo ($collectcfg->{'infodbtype'});
544 $archive_info->load_info ($arcinfo_doc_filename);
545
546 if ($manifest eq "") {
547 # Load in list of files in import folder from last import (if present)
548 $archive_info->load_prev_import_filelist ($arcinfo_src_filename);
549 }
550
551 ####Use Plugout####
552 my ($plugout);
553 if (defined $collectcfg->{'plugout'}) {
554 # If a plugout was specified in the collect.cfg file, assume it is sensible
555 # We can't check the name because it could be anything, if it is a custom plugout
556 $plugout = $collectcfg->{'plugout'};
557 }
558 else{
559 if ($saveas !~ /^(GreenstoneXML|GreenstoneMETS)$/) {
560 push @$plugout,"GreenstoneXMLPlugout";
561 }
562 else{
563 push @$plugout,$saveas."Plugout";
564 }
565 }
566
567 push @$plugout,("-output_info",$archive_info) if (defined $archive_info);
568 push @$plugout,("-verbosity",$verbosity) if (defined $verbosity);
569 push @$plugout,("-gzip_output") if ($gzip);
570 push @$plugout,("-group_size",$groupsize) if (defined $groupsize);
571 push @$plugout,("-output_handle",$out) if (defined $out);
572 push @$plugout,("-debug") if ($debug);
573
574 $processor = &plugout::load_plugout($plugout);
575 $processor->setoutputdir ($archivedir);
576 $processor->set_sortmeta ($sortmeta, $removeprefix, $removesuffix) if defined $sortmeta;
577 $processor->set_OIDtype ($OIDtype, $OIDmetadata);
578
579 &plugin::begin($pluginfo, $importdir, $processor, $maxdocs, $gli);
580
581 if ($removeold) {
582 # occasionally, plugins may want to do something on remove old, eg pharos image indexing
583 &plugin::remove_all($pluginfo, $importdir, $processor, $maxdocs, $gli);
584 }
585 if ($manifest eq "") {
586 # process the import directory
587 my $block_hash = {};
588 my $metadata = {};
589 # gobal blocking pass may set up some metadata
590 &plugin::file_block_read($pluginfo, $importdir, "", $block_hash, $metadata, $gli);
591
592
593 if ($incremental || $incremental_mode eq "onlyadd") {
594
595 &inexport::prime_doc_oid_count($archivedir);
596
597
598 # Can now work out which files were new, already existed, and have
599 # been deleted
600
601 &inexport::new_vs_old_import_diff($archive_info,$block_hash,$importdir,
602 $archivedir,$verbosity,$incremental_mode);
603
604 my @new_files = sort keys %{$block_hash->{'new_files'}};
605 if (scalar(@new_files>0)) {
606 print STDERR "New files and modified metadata files since last import:\n ";
607 print STDERR join("\n ",@new_files), "\n";
608 }
609
610 if ($incremental) {
611 # only look for deletions if we are truely incremental
612 my @deleted_files = sort keys %{$block_hash->{'deleted_files'}};
613 # Filter out any in gsdl/tmp area
614 my @filtered_deleted_files = ();
615 my $gsdl_tmp_area = &util::filename_cat($ENV{'GSDLHOME'}, "tmp");
616 my $collect_tmp_area = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp");
617 $gsdl_tmp_area = &util::filename_to_regex($gsdl_tmp_area);
618 $collect_tmp_area = &util::filename_to_regex($collect_tmp_area);
619
620
621 foreach my $df (@deleted_files) {
622 next if ($df =~ m/^$gsdl_tmp_area/);
623 next if ($df =~ m/^$collect_tmp_area/);
624
625 push(@filtered_deleted_files,$df);
626 }
627
628
629 @deleted_files = @filtered_deleted_files;
630
631 if (scalar(@deleted_files)>0) {
632 print STDERR "Files deleted since last import:\n ";
633 print STDERR join("\n ",@deleted_files), "\n";
634
635
636 &plugin::remove_some($pluginfo, $collectcfg->{'infodbtype'}, $archivedir, \@deleted_files);
637
638 &inexport::mark_docs_for_deletion($archive_info,$block_hash,\@deleted_files, $archivedir,$verbosity, "delete");
639 }
640
641 my @reindex_files = sort keys %{$block_hash->{'reindex_files'}};
642
643 if (scalar(@reindex_files)>0) {
644 print STDERR "Files to reindex since last import:\n ";
645 print STDERR join("\n ",@reindex_files), "\n";
646 &plugin::remove_some($pluginfo, $collectcfg->{'infodbtype'}, $archivedir, \@reindex_files);
647 &inexport::mark_docs_for_deletion($archive_info,$block_hash,\@reindex_files, $archivedir,$verbosity, "reindex");
648 }
649
650 }
651
652 # Play it safe, and run through the entire folder, only processing new or edited files
653 &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli);
654
655 }
656 else {
657 &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli);
658 }
659
660 }
661 else
662 {
663 # process any files marked for importing
664 foreach my $file (keys %{$manifest_lookup->{'import'}}) {
665 &plugin::read ($pluginfo, $importdir, $file, {}, {}, $processor, $maxdocs, 0, $gli);
666 }
667
668 my @deleted_files = keys %{$manifest_lookup->{'delete'}};
669
670 &inexport::mark_docs_for_deletion($archive_info,{},\@deleted_files,$archivedir);
671 }
672
673 &plugin::end($pluginfo, $processor);
674
675 &plugin::deinit($pluginfo, $processor);
676
677 # Store the value of OIDCount (used in doc.pm) so it can be
678 # restored correctly to this value on an incremental build
679 &inexport::store_doc_oid_count($archivedir);
680
681 # write out the archive information file
682 $processor->close_file_output() if $groupsize > 1;
683 $processor->close_group_output() if $processor->is_group();
684
685# The following 'if' statement is in the export.pl version of the script,
686# The reason for the 'if' statement is now given in export.pl
687# Unclear at this point if the same should be done here
688## if (($saveas =~ m/^.*METS$/) || ($saveas eq "MARC")) {
689 # Not all export types need this (e.g. DSpace)
690
691 # should we still do this in debug mode??
692
693 # for backwards compatability with archvies.inf file
694 if ($arcinfo_doc_filename =~ m/(contents)|(\.inf)$/) {
695 $archive_info->save_info($arcinfo_doc_filename);
696 }
697 else {
698 $archive_info->save_revinfo_db($arcinfo_src_filename);
699 }
700
701
702## }
703
704 # write out import stats
705 my $close_stats = 0;
706 if ($statsfile !~ /^(STDERR|STDOUT)$/i) {
707 if (open (STATS, ">$statsfile")) {
708 $statsfile = 'import::STATS';
709 $close_stats = 1;
710 } else {
711 &gsprintf($out, "{import.cannot_open_stats_file}", $statsfile);
712 &gsprintf($out, "{import.stats_backup}\n");
713 $statsfile = 'STDERR';
714 }
715 }
716
717 &gsprintf($out, "\n");
718 &gsprintf($out, "*********************************************\n");
719 &gsprintf($out, "{import.complete}\n");
720 &gsprintf($out, "*********************************************\n");
721
722 &plugin::write_stats($pluginfo, $statsfile, $faillogname, $gli);
723 if ($close_stats) {
724 close STATS;
725 }
726
727 close OUT if $close_out;
728 close FAILLOG;
729}
Note: See TracBrowser for help on using the repository browser.