source: main/trunk/greenstone2/bin/script/import.pl@ 21564

Last change on this file since 21564 was 21564, checked in by mdewsnip, 14 years ago

Changed lots of occurrences of "GDBM" in comments, variable names and function names, where the code isn't GDBM-specific. Part of making the code less GDBM-specific.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 22.6 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# import.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will import a number of files into a particular collection
30
31package import;
32
33BEGIN {
34 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
35 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
39 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
40 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugouts");
41
42 if (defined $ENV{'GSDLEXTS'}) {
43 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
44 foreach my $e (@extensions) {
45 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
46
47 unshift (@INC, "$ext_prefix/perllib");
48 unshift (@INC, "$ext_prefix/perllib/cpan");
49 unshift (@INC, "$ext_prefix/perllib/plugins");
50 unshift (@INC, "$ext_prefix/perllib/plugouts");
51 }
52 }
53 if (defined $ENV{'GSDL3EXTS'}) {
54 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
55 foreach my $e (@extensions) {
56 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
57
58 unshift (@INC, "$ext_prefix/perllib");
59 unshift (@INC, "$ext_prefix/perllib/cpan");
60 unshift (@INC, "$ext_prefix/perllib/plugins");
61 unshift (@INC, "$ext_prefix/perllib/plugouts");
62 }
63 }
64}
65
66use strict;
67no strict 'refs'; # allow filehandles to be variables and vice versa
68no strict 'subs'; # allow barewords (eg STDERR) as function arguments
69
70use arcinfo;
71use colcfg;
72use plugin;
73use plugout;
74use manifest;
75use inexport;
76use util;
77use scriptutil;
78use FileHandle;
79use gsprintf 'gsprintf';
80use printusage;
81use parse2;
82
83
84
85my $oidtype_list =
86 [ { 'name' => "hash",
87 'desc' => "{import.OIDtype.hash}" },
88 { 'name' => "assigned",
89 'desc' => "{import.OIDtype.assigned}" },
90 { 'name' => "incremental",
91 'desc' => "{import.OIDtype.incremental}" },
92 { 'name' => "dirname",
93 'desc' => "{import.OIDtype.dirname}" } ];
94
95
96# used to control output file format
97my $saveas_list =
98 [ { 'name' => "GreenstoneXML",
99 'desc' => "{export.saveas.GreenstoneXML}"},
100 { 'name' => "GreenstoneMETS",
101 'desc' => "{export.saveas.GreenstoneMETS}"},
102 ];
103
104
105# Possible attributes for each argument
106# name: The name of the argument
107# desc: A description (or more likely a reference to a description) for this argument
108# type: The type of control used to represent the argument. Options include: string, int, flag, regexp, metadata, language, enum etc
109# reqd: Is this argument required?
110# hiddengli: Is this argument hidden in GLI?
111# modegli: The lowest detail mode this argument is visible at in GLI
112
113my $saveas_argument
114 = { 'name' => "saveas",
115 'desc' => "{import.saveas}",
116 'type' => "enum",
117 'list' => $saveas_list,
118 'deft' => "GreenstoneXML",
119 'reqd' => "no",
120 'modegli' => "3" };
121
122
123my $arguments =
124 [
125 $saveas_argument,
126 { 'name' => "archivedir",
127 'desc' => "{import.archivedir}",
128 'type' => "string",
129 'reqd' => "no",
130 'hiddengli' => "yes" },
131 { 'name' => "importdir",
132 'desc' => "{import.importdir}",
133 'type' => "string",
134 'reqd' => "no",
135 'hiddengli' => "yes" },
136 { 'name' => "collectdir",
137 'desc' => "{import.collectdir}",
138 'type' => "string",
139 # parsearg left "" as default
140 #'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"),
141 'deft' => "",
142 'reqd' => "no",
143 'hiddengli' => "yes" },
144 { 'name' => "site",
145 'desc' => "{import.site}",
146 'type' => "string",
147 'deft' => "",
148 'reqd' => "no",
149 'hiddengli' => "yes" },
150 { 'name' => "manifest",
151 'desc' => "{import.manifest}",
152 'type' => "string",
153 'deft' => "",
154 'reqd' => "no",
155 'hiddengli' => "yes" },
156 { 'name' => "debug",
157 'desc' => "{import.debug}",
158 'type' => "flag",
159 'reqd' => "no",
160 'hiddengli' => "yes" },
161 { 'name' => "faillog",
162 'desc' => "{import.faillog}",
163 'type' => "string",
164 # parsearg left "" as default
165 #'deft' => &util::filename_cat("<collectdir>", "colname", "etc", "fail.log"),
166 'deft' => "",
167 'reqd' => "no",
168 'modegli' => "3" },
169 { 'name' => "incremental",
170 'desc' => "{import.incremental}",
171 'type' => "flag",
172 'hiddengli' => "yes" },
173 { 'name' => "keepold",
174 'desc' => "{import.keepold}",
175 'type' => "flag",
176 'reqd' => "no",
177 'hiddengli' => "yes" },
178 { 'name' => "removeold",
179 'desc' => "{import.removeold}",
180 'type' => "flag",
181 'reqd' => "no",
182 'hiddengli' => "yes" },
183 { 'name' => "language",
184 'desc' => "{scripts.language}",
185 'type' => "string",
186 'reqd' => "no",
187 'hiddengli' => "yes" },
188 { 'name' => "maxdocs",
189 'desc' => "{import.maxdocs}",
190 'type' => "int",
191 'reqd' => "no",
192 # parsearg left "" as default
193 #'deft' => "-1",
194 'range' => "1,",
195 'modegli' => "1" },
196 # don't set the default to hash - want to allow this to come from
197 # entry in collect.cfg but want to override it here
198 { 'name' => "OIDtype",
199 'desc' => "{import.OIDtype}",
200 'type' => "enum",
201 'list' => $oidtype_list,
202 # parsearg left "" as default
203 #'deft' => "hash",
204 'reqd' => "no",
205 'modegli' => "2" },
206 { 'name' => "OIDmetadata",
207 'desc' => "{import.OIDmetadata}",
208 'type' => "string",
209 #'type' => "metadata", #doesn't work properly in GLI
210 # parsearg left "" as default
211 #'deft' => "dc.Identifier",
212 'reqd' => "no",
213 'modegli' => "2" },
214 { 'name' => "out",
215 'desc' => "{import.out}",
216 'type' => "string",
217 'deft' => "STDERR",
218 'reqd' => "no",
219 'hiddengli' => "yes" },
220 { 'name' => "sortmeta",
221 'desc' => "{import.sortmeta}",
222 'type' => "string",
223 #'type' => "metadata", #doesn't work properly in GLI
224 'reqd' => "no",
225 'modegli' => "2" },
226 { 'name' => "removeprefix",
227 'desc' => "{BasClas.removeprefix}",
228 'type' => "regexp",
229 'deft' => "",
230 'reqd' => "no",
231 'modegli' => "3" },
232 { 'name' => "removesuffix",
233 'desc' => "{BasClas.removesuffix}",
234 'type' => "regexp",
235 'deft' => "",
236 'reqd' => "no",
237 'modegli' => "3" },
238 { 'name' => "groupsize",
239 'desc' => "{import.groupsize}",
240 'type' => "int",
241 'deft' => "1",
242 'reqd' => "no",
243 'modegli' => "2" },
244 { 'name' => "gzip",
245 'desc' => "{import.gzip}",
246 'type' => "flag",
247 'reqd' => "no",
248 'modegli' => "3" },
249 { 'name' => "statsfile",
250 'desc' => "{import.statsfile}",
251 'type' => "string",
252 'deft' => "STDERR",
253 'reqd' => "no",
254 'hiddengli' => "yes" },
255 { 'name' => "verbosity",
256 'desc' => "{import.verbosity}",
257 'type' => "int",
258 'range' => "0,",
259 # parsearg left "" as default
260 #'deft' => "2",
261 'reqd' => "no",
262 'modegli' => "3" },
263 { 'name' => "gli",
264 'desc' => "{scripts.gli}",
265 'type' => "flag",
266 'reqd' => "no",
267 'hiddengli' => "yes" },
268 { 'name' => "xml",
269 'desc' => "{scripts.xml}",
270 'type' => "flag",
271 'reqd' => "no",
272 'hiddengli' => "yes" }];
273
274my $options = { 'name' => "import.pl",
275 'desc' => "{import.desc}",
276 'args' => $arguments };
277
278
279&main();
280
281sub main {
282 # params
283 my ($language, $verbosity, $debug,
284 $collectdir, $importdir, $archivedir, $site, $manifest,
285 $incremental, $incremental_mode, $keepold, $removeold,
286 $saveas,
287 $OIDtype, $OIDmetadata,
288 $maxdocs, $statsfile,
289 $out, $faillog, $gli,
290 $gzip, $groupsize,
291 $sortmeta, $removeprefix, $removesuffix
292 );
293
294 my $xml = 0;
295
296 # other vars
297 my ($configfilename, $collection, $collectcfg,
298 $arcinfo_doc_filename, $arcinfo_src_filename, $archive_info,
299 $gs_mode,
300 $processor, $pluginfo);
301
302 my $service = "import";
303
304 my $hashParsingResult = {};
305 # general options available to all plugins
306 my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$hashParsingResult,"allow_extra_options");
307 # Parse returns -1 if something has gone wrong
308 if ($intArgLeftinAfterParsing == -1)
309 {
310 &PrintUsage::print_txt_usage($options, "{import.params}");
311 die "\n";
312 }
313
314 foreach my $strVariable (keys %$hashParsingResult)
315 {
316 eval "\$$strVariable = \$hashParsingResult->{\"\$strVariable\"}";
317 }
318
319 # If $language has been specified, load the appropriate resource bundle
320 # (Otherwise, the default resource bundle will be loaded automatically)
321 if ($language && $language =~ /\S/) {
322 &gsprintf::load_language_specific_resource_bundle($language);
323 }
324
325 if ($xml) {
326 &PrintUsage::print_xml_usage($options);
327 print "\n";
328 return;
329 }
330
331 if ($gli) { # the gli wants strings to be in UTF-8
332 &gsprintf::output_strings_in_UTF8;
333 }
334
335 # now check that we had exactly one leftover arg, which should be
336 # the collection name. We don't want to do this earlier, cos
337 # -xml arg doesn't need a collection name
338 # Or if the user specified -h, then we output the usage also
339 if ($intArgLeftinAfterParsing != 1 || (@ARGV && $ARGV[0] =~ /^\-+h/))
340 {
341 &PrintUsage::print_txt_usage($options, "{import.params}");
342 die "\n";
343 }
344
345 my $close_out = 0;
346 if ($out !~ /^(STDERR|STDOUT)$/i) {
347 open (OUT, ">$out") ||
348 (&gsprintf(STDERR, "{common.cannot_open_output_file}: $!\n", $out) && die);
349 $out = 'import::OUT';
350 $close_out = 1;
351 }
352 $out->autoflush(1);
353
354 # get and check the collection name
355 if (($collection = &colcfg::use_collection($site, @ARGV, $collectdir)) eq "") {
356 &PrintUsage::print_txt_usage($options, "{import.params}");
357 die "\n";
358 }
359
360 # add collection's perllib dir into include path in
361 # case we have collection specific modules
362 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib");
363
364 # check that we can open the faillog
365 if ($faillog eq "") {
366 $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log");
367 }
368 open (FAILLOG, ">$faillog") ||
369 (&gsprintf(STDERR, "{import.cannot_open_fail_log}\n", $faillog) && die);
370
371
372 my $faillogname = $faillog;
373 $faillog = 'import::FAILLOG';
374 $faillog->autoflush(1);
375
376 # Read in the collection configuration file.
377 ($configfilename, $gs_mode) = &colcfg::get_collect_cfg_name($out);
378 $collectcfg = &colcfg::read_collection_cfg ($configfilename, $gs_mode);
379
380 if (defined $collectcfg->{'importdir'} && $importdir eq "") {
381 $importdir = $collectcfg->{'importdir'};
382 }
383 if (defined $collectcfg->{'archivedir'} && $archivedir eq "") {
384 $archivedir = $collectcfg->{'archivedir'};
385 }
386 # fill in the default import and archives directories if none
387 # were supplied, turn all \ into / and remove trailing /
388 $importdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "import") if $importdir eq "";
389 $importdir =~ s/[\\\/]+/\//g;
390 $importdir =~ s/\/$//;
391 if (!-e $importdir) {
392 &gsprintf($out, "{import.no_import_dir}\n\n", $importdir);
393 die "\n";
394 }
395
396 $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives") if $archivedir eq "";
397 $archivedir =~ s/[\\\/]+/\//g;
398 $archivedir =~ s/\/$//;
399
400 my $plugins = [];
401 if (defined $collectcfg->{'plugin'}) {
402 $plugins = $collectcfg->{'plugin'};
403 }
404 #some global options for the plugins
405 my @global_opts = ();
406
407 if ($verbosity !~ /\d+/) {
408 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) {
409 $verbosity = $collectcfg->{'verbosity'};
410 } else {
411 $verbosity = 2; # the default
412 }
413 }
414 if (defined $collectcfg->{'manifest'} && $manifest eq "") {
415 $manifest = $collectcfg->{'manifest'};
416 }
417
418 if (defined $collectcfg->{'gzip'} && !$gzip) {
419 if ($collectcfg->{'gzip'} =~ /^true$/i) {
420 $gzip = 1;
421 }
422 }
423
424 if ($maxdocs !~ /\-?\d+/) {
425 if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) {
426 $maxdocs = $collectcfg->{'maxdocs'};
427 } else {
428 $maxdocs = -1; # the default
429 }
430 }
431 if ($groupsize == 1) {
432 if (defined $collectcfg->{'groupsize'} && $collectcfg->{'groupsize'} =~ /\d+/) {
433 $groupsize = $collectcfg->{'groupsize'};
434 }
435 }
436
437 if (!defined $OIDtype || ($OIDtype !~ /^(hash|incremental|assigned|dirname)$/ )) {
438 if (defined $collectcfg->{'OIDtype'} && $collectcfg->{'OIDtype'} =~ /^(hash|incremental|assigned|dirname)$/) {
439 $OIDtype = $collectcfg->{'OIDtype'};
440 } else {
441 $OIDtype = "hash"; # the default
442 }
443 }
444
445 if ((!defined $OIDmetadata) || ($OIDmetadata eq "")) {
446 if (defined $collectcfg->{'OIDmetadata'}) {
447 $OIDmetadata = $collectcfg->{'OIDmetadata'};
448 } else {
449 $OIDmetadata = "dc.Identifier"; # the default
450 }
451 }
452
453 if (defined $collectcfg->{'sortmeta'} && (!defined $sortmeta || $sortmeta eq "")) {
454 $sortmeta = $collectcfg->{'sortmeta'};
455 }
456 # sortmeta cannot be used with group size
457 $sortmeta = undef unless defined $sortmeta && $sortmeta =~ /\S/;
458 if (defined $sortmeta && $groupsize > 1) {
459 &gsprintf($out, "{import.cannot_sort}\n\n");
460 $sortmeta = undef;
461 }
462
463 if (defined $collectcfg->{'removeprefix'} && $removeprefix eq "") {
464 $removeprefix = $collectcfg->{'removeprefix'};
465 }
466
467 if (defined $collectcfg->{'removesuffix'} && $removesuffix eq "") {
468 $removesuffix = $collectcfg->{'removesuffix'};
469 }
470 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) {
471 $debug = 1;
472 }
473 if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) {
474 $gli = 1;
475 }
476 $gli = 0 unless defined $gli;
477
478 # check keepold and removeold
479 ($removeold, $keepold, $incremental, $incremental_mode)
480 = &scriptutil::check_removeold_and_keepold($removeold, $keepold,
481 $incremental, "archives",
482 $collectcfg);
483
484
485 print STDERR "<Import>\n" if $gli;
486
487 my $manifest_lookup = new manifest();
488 if ($manifest ne "") {
489 my $manifest_filename = $manifest;
490
491 if ($manifest_filename !~ m/^[\\\/]/) {
492 $manifest_filename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, $manifest_filename);
493 }
494
495 $manifest =~ s/[\\\/]+/\//g;
496 $manifest =~ s/\/$//;
497
498 $manifest_lookup->parse($manifest_filename);
499 }
500
501
502 # load all the plugins
503 $pluginfo = &plugin::load_plugins ($plugins, $verbosity, $out, $faillog, \@global_opts, $incremental_mode);
504 if (scalar(@$pluginfo) == 0) {
505 &gsprintf($out, "{import.no_plugins_loaded}\n");
506 die "\n";
507 }
508
509 # remove the old contents of the archives directory (and tmp directory) if needed
510 if ($removeold) {
511 if (-e $archivedir) {
512 &gsprintf($out, "{import.removing_archives}\n");
513 &util::rm_r ($archivedir);
514 }
515 my $tmpdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "tmp");
516 $tmpdir =~ s/[\\\/]+/\//g;
517 $tmpdir =~ s/\/$//;
518 if (-e $tmpdir) {
519 &gsprintf($out, "{import.removing_tmpdir}\n");
520 &util::rm_r ($tmpdir);
521 }
522 }
523
524 # create the archives dir if needed
525 &util::mk_all_dir($archivedir);
526
527 # read the archive information file
528## $arcinfo_doc_filename = &util::filename_cat ($archivedir, "archives.inf");
529
530 $arcinfo_doc_filename = &util::filename_cat ($archivedir, "archiveinf-doc");
531 &util::rename_gdbm_file($arcinfo_doc_filename); # ensures gdb
532 $arcinfo_doc_filename .= ".gdb";
533
534 $arcinfo_src_filename = &util::filename_cat ($archivedir, "archiveinf-src");
535 &util::rename_gdbm_file($arcinfo_src_filename); # ensures gdb
536 $arcinfo_src_filename .= ".gdb";
537
538
539 $archive_info = new arcinfo ();
540 $archive_info->load_info ($arcinfo_doc_filename);
541
542 if ($manifest eq "") {
543 # Load in list of files in import folder from last import (if present)
544 $archive_info->load_prev_import_filelist ($arcinfo_src_filename);
545 }
546
547 ####Use Plugout####
548 my ($plugout);
549 if (defined $collectcfg->{'plugout'}) {
550 # If a plugout was specified in the collect.cfg file, assume it is sensible
551 # We can't check the name because it could be anything, if it is a custom plugout
552 $plugout = $collectcfg->{'plugout'};
553 }
554 else{
555 if ($saveas !~ /^(GreenstoneXML|GreenstoneMETS)$/) {
556 push @$plugout,"GreenstoneXMLPlugout";
557 }
558 else{
559 push @$plugout,$saveas."Plugout";
560 }
561 }
562
563 push @$plugout,("-output_info",$archive_info) if (defined $archive_info);
564 push @$plugout,("-verbosity",$verbosity) if (defined $verbosity);
565 push @$plugout,("-gzip_output") if ($gzip);
566 push @$plugout,("-group_size",$groupsize) if (defined $groupsize);
567 push @$plugout,("-output_handle",$out) if (defined $out);
568 push @$plugout,("-debug") if ($debug);
569
570 $processor = &plugout::load_plugout($plugout);
571 $processor->setoutputdir ($archivedir);
572 $processor->set_sortmeta ($sortmeta, $removeprefix, $removesuffix) if defined $sortmeta;
573 $processor->set_OIDtype ($OIDtype, $OIDmetadata);
574
575 &plugin::begin($pluginfo, $importdir, $processor, $maxdocs, $gli);
576
577 if ($removeold) {
578 # occasionally, plugins may want to do something on remove old, eg pharos image indexing
579 &plugin::remove_all($pluginfo, $importdir, $processor, $maxdocs, $gli);
580 }
581 if ($manifest eq "") {
582 # process the import directory
583 my $block_hash = {};
584 my $metadata = {};
585 # gobal blocking pass may set up some metadata
586 &plugin::file_block_read($pluginfo, $importdir, "", $block_hash, $metadata, $gli);
587
588
589 if ($incremental || $incremental_mode eq "onlyadd") {
590
591 &inexport::prime_doc_oid_count($archivedir);
592
593
594 # Can now work out which files were new, already existed, and have
595 # been deleted
596
597 &inexport::new_vs_old_import_diff($archive_info,$block_hash,$importdir,
598 $archivedir,$verbosity,$incremental_mode);
599
600 my @new_files = sort keys %{$block_hash->{'new_files'}};
601 if (scalar(@new_files>0)) {
602 print STDERR "New files and modified metadata files since last import:\n ";
603 print STDERR join("\n ",@new_files), "\n";
604 }
605
606 if ($incremental) {
607 # only look for deletions if we are truely incremental
608 my @deleted_files = sort keys %{$block_hash->{'deleted_files'}};
609 # Filter out any in gsdl/tmp area
610 my @filtered_deleted_files = ();
611 my $gsdl_tmp_area = &util::filename_cat($ENV{'GSDLHOME'}, "tmp");
612 my $collect_tmp_area = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp");
613 $gsdl_tmp_area = &util::filename_to_regex($gsdl_tmp_area);
614 $collect_tmp_area = &util::filename_to_regex($collect_tmp_area);
615
616
617 foreach my $df (@deleted_files) {
618 next if ($df =~ m/^$gsdl_tmp_area/);
619 next if ($df =~ m/^$collect_tmp_area/);
620
621 push(@filtered_deleted_files,$df);
622 }
623
624
625 @deleted_files = @filtered_deleted_files;
626
627 if (scalar(@deleted_files)>0) {
628 print STDERR "Files deleted since last import:\n ";
629 print STDERR join("\n ",@deleted_files), "\n";
630
631
632 &plugin::remove_some($pluginfo, $archivedir, \@deleted_files);
633
634 &inexport::mark_docs_for_deletion($archive_info,$block_hash,\@deleted_files, $archivedir,$verbosity, "delete");
635 }
636
637 my @reindex_files = sort keys %{$block_hash->{'reindex_files'}};
638
639 if (scalar(@reindex_files)>0) {
640 print STDERR "Files to reindex since last import:\n ";
641 print STDERR join("\n ",@reindex_files), "\n";
642 &plugin::remove_some($pluginfo, $archivedir, \@reindex_files);
643 &inexport::mark_docs_for_deletion($archive_info,$block_hash,\@reindex_files, $archivedir,$verbosity, "reindex");
644 }
645
646 }
647
648 # Play it safe, and run through the entire folder, only processing new or edited files
649 &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli);
650
651 }
652 else {
653 &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli);
654 }
655
656 }
657 else
658 {
659 # process any files marked for importing
660 foreach my $file (keys %{$manifest_lookup->{'import'}}) {
661 &plugin::read ($pluginfo, $importdir, $file, {}, {}, $processor, $maxdocs, 0, $gli);
662 }
663
664 my @deleted_files = keys %{$manifest_lookup->{'delete'}};
665
666 &inexport::mark_docs_for_deletion($archive_info,{},\@deleted_files,$archivedir);
667 }
668
669 &plugin::end($pluginfo, $processor);
670
671 &plugin::deinit($pluginfo, $processor);
672
673 # Store the value of OIDCount (used in doc.pm) so it can be
674 # restored correctly to this value on an incremental build
675 &inexport::store_doc_oid_count($archivedir);
676
677 # write out the archive information file
678 $processor->close_file_output() if $groupsize > 1;
679 $processor->close_group_output() if $processor->is_group();
680
681# The following 'if' statement is in the export.pl version of the script,
682# The reason for the 'if' statement is now given in export.pl
683# Unclear at this point if the same should be done here
684## if (($saveas =~ m/^.*METS$/) || ($saveas eq "MARC")) {
685 # Not all export types need this (e.g. DSpace)
686
687 # should we still do this in debug mode??
688
689 # for backwards compatability with archvies.inf file
690 if ($arcinfo_doc_filename =~ m/(contents)|(\.inf)$/) {
691 $archive_info->save_info($arcinfo_doc_filename);
692 }
693 else {
694 $archive_info->save_revinfo_db($arcinfo_src_filename);
695 }
696
697
698## }
699
700 # write out import stats
701 my $close_stats = 0;
702 if ($statsfile !~ /^(STDERR|STDOUT)$/i) {
703 if (open (STATS, ">$statsfile")) {
704 $statsfile = 'import::STATS';
705 $close_stats = 1;
706 } else {
707 &gsprintf($out, "{import.cannot_open_stats_file}", $statsfile);
708 &gsprintf($out, "{import.stats_backup}\n");
709 $statsfile = 'STDERR';
710 }
711 }
712
713 &gsprintf($out, "\n");
714 &gsprintf($out, "*********************************************\n");
715 &gsprintf($out, "{import.complete}\n");
716 &gsprintf($out, "*********************************************\n");
717
718 &plugin::write_stats($pluginfo, $statsfile, $faillogname, $gli);
719 if ($close_stats) {
720 close STATS;
721 }
722
723 close OUT if $close_out;
724 close FAILLOG;
725}
Note: See TracBrowser for help on using the repository browser.