source: main/trunk/greenstone2/perllib/inexport.pm@ 26658

Last change on this file since 26658 was 26567, checked in by ak19, 12 years ago

When a GS2 collection contains both collect.cfg and collectionConfig.xml (as advanced beatles does) the old code used to end up reading in the GS3 collectionConfig.xml instead of the GS2 collect.cfg and set the GS_mode to GS3. Now colcfg::get_collect_cfg_name takes the gs_mode (instead of determining this and returning it) and works out the collectcfg file name for the gs_mode. That means that the calling functions now need to work out the gs_mode. They do so by setting the gs_mode to gs3 if the site flag is present in the commandline, if not then it defaults to gs2. So from now on, the site flag must be specified for GS3 collections.

  • Property svn:executable set to *
File size: 38.7 KB
RevLine 
[18457]1###########################################################################
2#
[22413]3# inexport.pm -- useful class to support import.pl and export.pl
[18457]4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package inexport;
27
28use strict;
29
[22413]30no strict 'refs'; # allow filehandles to be variables and vice versa
31no strict 'subs'; # allow barewords (eg STDERR) as function arguments
[19789]32
[22413]33use arcinfo;
34use colcfg;
[21553]35use dbutil;
[22464]36use doc;
[22413]37use plugin;
38use plugout;
39use manifest;
40use inexport;
[18457]41use util;
[22413]42use scriptutil;
43use FileHandle;
44use gsprintf 'gsprintf';
45use printusage;
46use parse2;
[18457]47
[22413]48use File::Basename;
[21563]49
[22413]50sub new
51{
52 my $class = shift (@_);
[22421]53 my ($mode,$argv,$options,$opt_listall_options) = @_;
[22413]54
[22421]55 my $self = { 'xml' => 0, 'mode' => $mode };
[22413]56
57 # general options available to all plugins
58 my $arguments = $options->{'args'};
59 my $intArgLeftinAfterParsing = parse2::parse($argv,$arguments,$self,"allow_extra_options");
60 # Parse returns -1 if something has gone wrong
61 if ($intArgLeftinAfterParsing == -1)
62 {
63 &PrintUsage::print_txt_usage($options, "{import.params}");
64 die "\n";
65 }
66
67 my $language = $self->{'language'};
68 # If $language has been specified, load the appropriate resource bundle
69 # (Otherwise, the default resource bundle will be loaded automatically)
70 if ($language && $language =~ /\S/) {
71 &gsprintf::load_language_specific_resource_bundle($language);
72 }
73
[22421]74 if ($self->{'listall'}) {
75 if ($self->{'xml'}) {
76 &PrintUsage::print_xml_usage($opt_listall_options);
77 }
78 else
79 {
80 &PrintUsage::print_txt_usage($opt_listall_options,"{export.params}");
81 }
82 die "\n";
83 }
84
85
[22413]86 if ($self->{'xml'}) {
87 &PrintUsage::print_xml_usage($options);
88 print "\n";
[22460]89 return bless $self, $class;
[22413]90 }
91
92 if ($self->{'gli'}) { # the gli wants strings to be in UTF-8
93 &gsprintf::output_strings_in_UTF8;
94 }
95
96 # now check that we had exactly one leftover arg, which should be
97 # the collection name. We don't want to do this earlier, cos
98 # -xml arg doesn't need a collection name
99 # Or if the user specified -h, then we output the usage also
[22460]100
[22413]101 if ($intArgLeftinAfterParsing != 1 || (@$argv && $argv->[0] =~ /^\-+h/))
102 {
103 &PrintUsage::print_txt_usage($options, "{import.params}");
104 die "\n";
105 }
106
107 $self->{'close_out'} = 0;
108 my $out = $self->{'out'};
109 if ($out !~ /^(STDERR|STDOUT)$/i) {
110 open (OUT, ">$out") ||
111 (&gsprintf(STDERR, "{common.cannot_open_output_file}: $!\n", $out) && die);
[23042]112 $out = 'inexport::OUT';
[22413]113 $self->{'close_out'} = 1;
114 }
115 $out->autoflush(1);
116 $self->{'out'} = $out;
117
118 # @ARGV should be only one item, the name of the collection
119 $self->{'collection'} = shift @$argv;
120
[22445]121 if ((defined $self->{'jobs'}) && ($self->{'jobs'}>1)) {
122 require ParallelInexport;
123 }
124
[22413]125 return bless $self, $class;
126}
127
[23402]128# Simplified version of the contstructor for use with CGI scripts
129sub newCGI
130{
131 my $class = shift (@_);
[23767]132 my ($mode,$collect,$gsdl_cgi,$opt_site) = @_;
[23402]133
134 my $self = { 'xml' => 0, 'mode' => $mode };
135
136 $self->{'out'} = STDERR;
[23767]137
138 if (defined $gsdl_cgi) {
139 $self->{'site'} = $opt_site;
140 my $collect_dir = $gsdl_cgi->get_collection_dir($opt_site);
141 $self->{'collectdir'} = $collect_dir;
142 }
143 else {
144 $self->{'site'} = "";
145 $self->{'collectdir'} = &util::filename_cat($ENV{'GSDLHOME'},"collect");
146 }
[23402]147 $self->{'faillog'} = "";
148
149 $self->{'collection'} = $collect;
150
151 return bless $self, $class;
152}
[22413]153sub get_collection
154{
155 my $self = shift @_;
156
157 return $self->{'collection'};
158}
159
160
161sub read_collection_cfg
162{
163 my $self = shift @_;
164 my ($collection,$options) = @_;
165
166 my $collectdir = $self->{'collectdir'};
167 my $site = $self->{'site'};
168 my $out = $self->{'out'};
[23767]169
[22413]170 if (($collection = &colcfg::use_collection($site, $collection, $collectdir)) eq "") {
171 &PrintUsage::print_txt_usage($options, "{import.params}");
172 die "\n";
173 }
174
[26567]175 # set gs_version 2/3
[25957]176 $self->{'gs_version'} = "2";
177 if ((defined $site) && ($site ne "")) {
178 # gs3
179 $self->{'gs_version'} = "3";
180 }
[22413]181 # add collection's perllib dir into include path in
182 # case we have collection specific modules
183 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib");
184
185 # check that we can open the faillog
186 my $faillog = $self->{'faillog'};
187 if ($faillog eq "") {
188 $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log");
189 }
190 open (FAILLOG, ">$faillog") ||
191 (&gsprintf(STDERR, "{import.cannot_open_fail_log}\n", $faillog) && die);
192
193
194 my $faillogname = $faillog;
195 $faillog = 'inexport::FAILLOG';
196 $faillog->autoflush(1);
197 $self->{'faillog'} = $faillog;
198 $self->{'faillogname'} = $faillogname;
199
200 # Read in the collection configuration file.
[26567]201 my $gs_mode = "gs".$self->{'gs_version'}; #gs2 or gs3
202 my $config_filename = &colcfg::get_collect_cfg_name($out, $gs_mode);
[22421]203 my $collectcfg = &colcfg::read_collection_cfg ($config_filename, $gs_mode);
[22413]204
[22421]205 return ($config_filename,$collectcfg);
[22413]206}
207
208sub set_collection_options
209{
210 my $self = shift @_;
[22421]211 my ($collectcfg) = @_;
[22413]212
[22421]213 my $inexport_mode = $self->{'mode'};
214
[22413]215 my $verbosity = $self->{'verbosity'};
216 my $debug = $self->{'debug'};
217 my $importdir = $self->{'importdir'};
[22466]218 my $archivedir = $self->{'archivedir'} || $self->{'exportdir'} || "";
[22413]219 my $out = $self->{'out'};
220
221 # If the infodbtype value wasn't defined in the collect.cfg file, use the default
222 if (!defined($collectcfg->{'infodbtype'}))
223 {
224 $collectcfg->{'infodbtype'} = &dbutil::get_default_infodb_type();
225 }
[23170]226 if ($collectcfg->{'infodbtype'} eq "gdbm-txtgz") {
227 # we can't use the text version for archives dbs.
228 $collectcfg->{'infodbtype'} = "gdbm";
229 }
[26450]230
[22413]231 if (defined $collectcfg->{'importdir'} && $importdir eq "") {
232 $importdir = $collectcfg->{'importdir'};
233 }
234 if (defined $collectcfg->{'archivedir'} && $archivedir eq "") {
235 $archivedir = $collectcfg->{'archivedir'};
236 }
237 # fill in the default import and archives directories if none
238 # were supplied, turn all \ into / and remove trailing /
239 $importdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "import") if $importdir eq "";
240 $importdir =~ s/[\\\/]+/\//g;
241 $importdir =~ s/\/$//;
242 if (!-e $importdir) {
243 &gsprintf($out, "{import.no_import_dir}\n\n", $importdir);
244 die "\n";
245 }
246 $self->{'importdir'} = $importdir;
247
248 if ($archivedir eq "") {
249 if ($inexport_mode eq "import") {
250 $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives");
251 }
252 elsif ($inexport_mode eq "export") {
253 $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "export");
254 }
255 else {
256 print STDERR "Warning: Unrecognized import/export mode '$inexport_mode'\n";
257 print STDERR " Defaulting to 'archives' for file output\n";
258 $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives");
259 }
260 }
261
262 $archivedir =~ s/[\\\/]+/\//g;
263 $archivedir =~ s/\/$//;
264 $self->{'archivedir'} = $archivedir;
265
266 if ($verbosity !~ /\d+/) {
267 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) {
268 $verbosity = $collectcfg->{'verbosity'};
269 } else {
270 $verbosity = 2; # the default
271 }
272 }
[22421]273 $self->{'verbosity'} = $verbosity;
274
[22413]275 if (defined $collectcfg->{'manifest'} && $self->{'manifest'} eq "") {
276 $self->{'manifest'} = $collectcfg->{'manifest'};
277 }
278
279 if (defined $collectcfg->{'gzip'} && !$self->{'gzip'}) {
280 if ($collectcfg->{'gzip'} =~ /^true$/i) {
281 $self->{'gzip'} = 1;
282 }
283 }
284
285 if ($self->{'maxdocs'} !~ /\-?\d+/) {
286 if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) {
287 $self->{'maxdocs'} = $collectcfg->{'maxdocs'};
288 } else {
289 $self->{'maxdocs'} = -1; # the default
290 }
291 }
[22421]292
293 if ((defined $self->{'groupsize'}) && ($self->{'groupsize'} == 1)) {
[22413]294 if (defined $collectcfg->{'groupsize'} && $collectcfg->{'groupsize'} =~ /\d+/) {
295 $self->{'groupsize'} = $collectcfg->{'groupsize'};
296 }
297 }
298
299 if (!defined $self->{'OIDtype'}
[26536]300 || ($self->{'OIDtype'} !~ /^(hash|hash_on_full_filename|incremental|assigned|dirname|full_filename)$/ )) {
301 # OIDtype was either not defined on the command-line, or if it was not one of the recognized values
[22413]302 if (defined $collectcfg->{'OIDtype'}
[26536]303 && $collectcfg->{'OIDtype'} =~ /^(hash|hash_on_full_filename|incremental|assigned|dirname|full_filename)$/) {
[22413]304 $self->{'OIDtype'} = $collectcfg->{'OIDtype'};
305 } else {
306 $self->{'OIDtype'} = "hash"; # the default
307 }
308 }
309
310 if ((!defined $self->{'OIDmetadata'}) || ($self->{'OIDmetadata'} eq "")) {
311 if (defined $collectcfg->{'OIDmetadata'}) {
312 $self->{'OIDmetadata'} = $collectcfg->{'OIDmetadata'};
313 } else {
314 $self->{'OIDmetadata'} = "dc.Identifier"; # the default
315 }
316 }
317
318 my $sortmeta = $self->{'sortmeta'};
319 if (defined $collectcfg->{'sortmeta'} && (!defined $sortmeta || $sortmeta eq "")) {
320 $sortmeta = $collectcfg->{'sortmeta'};
321 }
322 # sortmeta cannot be used with group size
323 $sortmeta = undef unless defined $sortmeta && $sortmeta =~ /\S/;
324 if (defined $sortmeta && $self->{'groupsize'} > 1) {
325 &gsprintf($out, "{import.cannot_sort}\n\n");
326 $sortmeta = undef;
327 }
328 $self->{'sortmeta'} = $sortmeta;
329
330 if (defined $collectcfg->{'removeprefix'} && $self->{'removeprefix'} eq "") {
331 $self->{'removeprefix'} = $collectcfg->{'removeprefix'};
332 }
333
334 if (defined $collectcfg->{'removesuffix'} && $self->{'removesuffix'} eq "") {
335 $self->{'removesuffix'} = $collectcfg->{'removesuffix'};
336 }
337 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) {
338 $self->{'debug'} = 1;
339 }
340 if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) {
341 $self->{'gli'} = 1;
342 }
343 $self->{'gli'} = 0 unless defined $self->{'gli'};
344
345 # check keepold and removeold
[22421]346 my $checkdir = ($inexport_mode eq "import") ? "archives" : "export";
347
[22413]348 my ($removeold, $keepold, $incremental, $incremental_mode)
349 = &scriptutil::check_removeold_and_keepold($self->{'removeold'}, $self->{'keepold'},
[22421]350 $self->{'incremental'}, $checkdir,
[22413]351 $collectcfg);
352
353 $self->{'removeold'} = $removeold;
354 $self->{'keepold'} = $keepold;
355 $self->{'incremental'} = $incremental;
356 $self->{'incremental_mode'} = $incremental_mode;
357}
358
359sub process_files
360{
361 my $self = shift @_;
[22421]362 my ($config_filename,$collectcfg) = @_;
[22413]363
[22421]364 my $inexport_mode = $self->{'mode'};
365
[22413]366 my $verbosity = $self->{'verbosity'};
367 my $debug = $self->{'debug'};
368
369 my $importdir = $self->{'importdir'};
[22460]370 my $archivedir = $self->{'archivedir'} || $self->{'exportdir'};
[22413]371
372 my $incremental = $self->{'incremental'};
373 my $incremental_mode = $self->{'incremental_mode'};
374
[25957]375 my $gs_version = $self->{'gs_version'};
376
[22413]377 my $removeold = $self->{'removeold'};
378 my $keepold = $self->{'keepold'};
379
380 my $saveas = $self->{'saveas'};
381 my $OIDtype = $self->{'OIDtype'};
382 my $OIDmetadata = $self->{'OIDmetadata'};
383
384 my $out = $self->{'out'};
385 my $faillog = $self->{'faillog'};
386
387 my $maxdocs = $self->{'maxdocs'};
388 my $gzip = $self->{'gzip'};
389 my $groupsize = $self->{'groupsize'};
390 my $sortmeta = $self->{'sortmeta'};
391
392 my $removeprefix = $self->{'removeprefix'};
393 my $removesuffix = $self->{'removesuffix'};
394
[22421]395 my $gli = $self->{'gli'};
[22413]396
[22445]397 my $jobs = $self->{'jobs'};
398 my $epoch = $self->{'epoch'};
399
[22421]400 # related to export
401 my $xsltfile = $self->{'xsltfile'};
402 my $group_marc = $self->{'group_marc'};
403 my $mapping_file = $self->{'mapping_file'};
404 my $xslt_mets = $self->{'xslt_mets'};
405 my $xslt_txt = $self->{'xslt_txt'};
406 my $fedora_namespace = $self->{'fedora_namespace'};
[23825]407 my $metadata_prefix = $self->{'metadata_prefix'};
[22421]408
409 if ($inexport_mode eq "import") {
410 print STDERR "<Import>\n" if $gli;
411 }
412 else {
413 print STDERR "<export>\n" if $gli;
414 }
[24344]415
[22413]416 my $manifest_lookup = new manifest($collectcfg->{'infodbtype'},$archivedir);
[23053]417 if ($self->{'manifest'} ne "") {
[22413]418 my $manifest_filename = $self->{'manifest'};
419
420 if (!&util::filename_is_absolute($manifest_filename)) {
421 $manifest_filename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, $manifest_filename);
422 }
423
424 $self->{'manifest'} =~ s/[\\\/]+/\//g;
425 $self->{'manifest'} =~ s/\/$//;
426
427 $manifest_lookup->parse($manifest_filename);
428 }
429
430 my $manifest = $self->{'manifest'};
431
432 # load all the plugins
433 my $plugins = [];
434 if (defined $collectcfg->{'plugin'}) {
435 $plugins = $collectcfg->{'plugin'};
436 }
437
[23053]438 my $plugin_incr_mode = $incremental_mode;
439 if ($manifest ne "") {
440 # if we have a manifest file, then we pretend we are fully incremental for plugins
441 $plugin_incr_mode = "all";
442 }
[22413]443 #some global options for the plugins
444 my @global_opts = ();
445
[25957]446 my $pluginfo = &plugin::load_plugins ($plugins, $verbosity, $out, $faillog, \@global_opts, $plugin_incr_mode, $gs_version);
[22413]447 if (scalar(@$pluginfo) == 0) {
448 &gsprintf($out, "{import.no_plugins_loaded}\n");
449 die "\n";
450 }
451
[24344]452 # remove the old contents of the archives directory (and tmp
453 # directory) if needed
454
[22413]455 if ($removeold) {
456 if (-e $archivedir) {
457 &gsprintf($out, "{import.removing_archives}\n");
458 &util::rm_r ($archivedir);
459 }
460 my $tmpdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "tmp");
461 $tmpdir =~ s/[\\\/]+/\//g;
462 $tmpdir =~ s/\/$//;
463 if (-e $tmpdir) {
464 &gsprintf($out, "{import.removing_tmpdir}\n");
465 &util::rm_r ($tmpdir);
466 }
467 }
468
469 # create the archives dir if needed
470 &util::mk_all_dir($archivedir);
471
472 # read the archive information file
473
474 # BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files (won't do anything for other infodbtypes)
475 &util::rename_ldb_or_bdb_file(&util::filename_cat($archivedir, "archiveinf-doc"));
476 &util::rename_ldb_or_bdb_file(&util::filename_cat($archivedir, "archiveinf-src"));
477
478 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($collectcfg->{'infodbtype'}, "archiveinf-doc", $archivedir);
479 my $arcinfo_src_filename = &dbutil::get_infodb_file_path($collectcfg->{'infodbtype'}, "archiveinf-src", $archivedir);
480
481 my $archive_info = new arcinfo ($collectcfg->{'infodbtype'});
482 $archive_info->load_info ($arcinfo_doc_filename);
483
484 if ($manifest eq "") {
485 # Load in list of files in import folder from last import (if present)
486 $archive_info->load_prev_import_filelist ($arcinfo_src_filename);
487 }
488
489 ####Use Plugout####
[22421]490 my $plugout;
491
492 if ($inexport_mode eq "import") {
493 if (defined $collectcfg->{'plugout'}) {
494 # If a plugout was specified in the collect.cfg file, assume it is sensible
495 # We can't check the name because it could be anything, if it is a custom plugout
496 $plugout = $collectcfg->{'plugout'};
497 }
498 else{
499 if ($saveas !~ /^(GreenstoneXML|GreenstoneMETS)$/) {
500 push @$plugout,"GreenstoneXMLPlugout";
501 }
502 else{
503 push @$plugout,$saveas."Plugout";
504 }
505 }
[22413]506 }
[22421]507 else {
508 if (defined $collectcfg->{'plugout'} && $collectcfg->{'plugout'} =~ /^(.*METS|DSpace|MARCXML)Plugout/) {
509 $plugout = $collectcfg->{'plugout'};
[22413]510 }
511 else{
[22421]512 if ($saveas !~ /^(GreenstoneMETS|FedoraMETS|DSpace|MARCXML)$/) {
513 push @$plugout,"GreenstoneMETSPlugout";
514 }
515 else{
516 push @$plugout,$saveas."Plugout";
517 }
[22413]518 }
519 }
[22421]520
521 my $plugout_name = $plugout->[0];
[22413]522
[22421]523 push @$plugout,("-output_info",$archive_info) if (defined $archive_info);
524 push @$plugout,("-verbosity",$verbosity) if (defined $verbosity);
525 push @$plugout,("-debug") if ($debug);
526 push @$plugout,("-group_size",$groupsize) if (defined $groupsize);
527 push @$plugout,("-gzip_output") if ($gzip);
528 push @$plugout,("-output_handle",$out) if (defined $out);
529
530 push @$plugout,("-xslt_file",$xsltfile) if (defined $xsltfile && $xsltfile ne "");
531
532 if ($plugout_name =~ m/^MARCXMLPlugout$/) {
533 push @$plugout,("-group") if ($group_marc);
534 push @$plugout,("-mapping_file",$mapping_file) if (defined $mapping_file && $mapping_file ne "");
535 }
536 if ($plugout_name =~ m/^.*METSPlugout$/) {
537 push @$plugout,("-xslt_mets",$xslt_mets) if (defined $xslt_mets && $xslt_mets ne "");
538 push @$plugout,("-xslt_txt",$xslt_txt) if (defined $xslt_txt && $xslt_txt ne "");
539 }
540
541 if ($plugout_name eq "FedoraMETSPlugout") {
542 push @$plugout,("-fedora_namespace",$fedora_namespace) if (defined $fedora_namespace && $fedora_namespace ne "");
543 }
[23825]544
545 if ($plugout_name eq "DSpacePlugout") {
546 push @$plugout,("-metadata_prefix",$metadata_prefix) if (defined $metadata_prefix && $metadata_prefix ne "");
547 }
[22421]548
[22413]549 my $processor = &plugout::load_plugout($plugout);
550 $processor->setoutputdir ($archivedir);
551 $processor->set_sortmeta ($sortmeta, $removeprefix, $removesuffix) if defined $sortmeta;
[26536]552
[22413]553 $processor->set_OIDtype ($OIDtype, $OIDmetadata);
554
555 &plugin::begin($pluginfo, $importdir, $processor, $maxdocs, $gli);
556
557 if ($removeold) {
[24344]558 # occasionally, plugins may want to do something on remove
559 # old, eg pharos image indexing
[22413]560 &plugin::remove_all($pluginfo, $importdir, $processor, $maxdocs, $gli);
561 }
562
[23053]563 # process the import directory
564 my $block_hash = {};
565 $block_hash->{'new_files'} = {};
566 $block_hash->{'reindex_files'} = {};
567 my $metadata = {};
568
[23943]569 # global blocking pass may set up some metadata
[23053]570 &plugin::file_block_read($pluginfo, $importdir, "", $block_hash, $metadata, $gli);
571
572 if ($manifest ne "") {
573 #
574 # 1. Process delete files first
575 #
576 my @deleted_files = keys %{$manifest_lookup->{'delete'}};
577 my @full_deleted_files = ();
578
579 # ensure all filenames are absolute
580 foreach my $df (@deleted_files) {
581 my $full_df =
582 (&util::filename_is_absolute($df))
583 ? $df
584 : &util::filename_cat($importdir,$df);
585
586 if (-d $full_df) {
587 &add_dir_contents_to_list($full_df, \@full_deleted_files);
588 } else {
589 push(@full_deleted_files,$full_df);
590 }
591 }
592
593 &plugin::remove_some($pluginfo, $collectcfg->{'infodbtype'}, $archivedir, \@full_deleted_files);
594 mark_docs_for_deletion($archive_info,{},
595 \@full_deleted_files,
596 $archivedir, $verbosity, "delete");
597
598
599 #
600 # 2. Now files for reindexing
601 #
602
603 my @reindex_files = keys %{$manifest_lookup->{'reindex'}};
604 my @full_reindex_files = ();
605 # ensure all filenames are absolute
606 foreach my $rf (@reindex_files) {
607 my $full_rf =
608 (&util::filename_is_absolute($rf))
609 ? $rf
610 : &util::filename_cat($importdir,$rf);
611
612 if (-d $full_rf) {
613 &add_dir_contents_to_list($full_rf, \@full_reindex_files);
614 } else {
615 push(@full_reindex_files,$full_rf);
616 }
617 }
618
619 &plugin::remove_some($pluginfo, $collectcfg->{'infodbtype'}, $archivedir, \@full_reindex_files);
620 mark_docs_for_deletion($archive_info,{},\@full_reindex_files, $archivedir,$verbosity, "reindex");
621
622 # And now to ensure the new version of the file processed by
623 # appropriate plugin, we need to add it to block_hash reindex list
624 foreach my $full_rf (@full_reindex_files) {
625 $block_hash->{'reindex_files'}->{$full_rf} = 1;
626 }
627
628
629 #
630 # 3. Now finally any new files - add to block_hash new_files list
631 #
632
633 my @new_files = keys %{$manifest_lookup->{'index'}};
634 my @full_new_files = ();
635
636 foreach my $nf (@new_files) {
637 # ensure filename is absolute
638 my $full_nf =
639 (&util::filename_is_absolute($nf))
640 ? $nf
641 : &util::filename_cat($importdir,$nf);
642
643 if (-d $full_nf) {
644 &add_dir_contents_to_list($full_nf, \@full_new_files);
645 } else {
646 push(@full_new_files,$full_nf);
647 }
648 }
649
[23132]650 my $arcinfo_src_filename = &dbutil::get_infodb_file_path($collectcfg->{'infodbtype'}, "archiveinf-src", $archivedir);
651 my $arcinfodb_map = {};
652 &dbutil::read_infodb_file($collectcfg->{'infodbtype'}, $arcinfo_src_filename, $arcinfodb_map);
[23053]653 foreach my $f (@full_new_files) {
[23132]654 # check that we haven't seen it already
655 if (defined $arcinfodb_map->{$f}) {
656 # TODO make better warning
657 print STDERR "Warning: $f already in src archive, \n";
658 } else {
659 $block_hash->{'new_files'}->{$f} = 1;
660 }
[23053]661 }
[23132]662
663 undef $arcinfodb_map;
[23053]664 }
665 else {
666 # if incremental, we read through the import folder to see whats changed.
667
[22413]668 if ($incremental || $incremental_mode eq "onlyadd") {
669 prime_doc_oid_count($archivedir);
670
671 # Can now work out which files were new, already existed, and have
672 # been deleted
673
674 new_vs_old_import_diff($archive_info,$block_hash,$importdir,
675 $archivedir,$verbosity,$incremental_mode);
676
677 my @new_files = sort keys %{$block_hash->{'new_files'}};
678 if (scalar(@new_files>0)) {
679 print STDERR "New files and modified metadata files since last import:\n ";
680 print STDERR join("\n ",@new_files), "\n";
681 }
682
683 if ($incremental) {
684 # only look for deletions if we are truely incremental
685 my @deleted_files = sort keys %{$block_hash->{'deleted_files'}};
686 # Filter out any in gsdl/tmp area
687 my @filtered_deleted_files = ();
688 my $gsdl_tmp_area = &util::filename_cat($ENV{'GSDLHOME'}, "tmp");
689 my $collect_tmp_area = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp");
690 $gsdl_tmp_area = &util::filename_to_regex($gsdl_tmp_area);
691 $collect_tmp_area = &util::filename_to_regex($collect_tmp_area);
692
693 foreach my $df (@deleted_files) {
694 next if ($df =~ m/^$gsdl_tmp_area/);
695 next if ($df =~ m/^$collect_tmp_area/);
696
697 push(@filtered_deleted_files,$df);
698 }
699
700
701 @deleted_files = @filtered_deleted_files;
702
703 if (scalar(@deleted_files)>0) {
704 print STDERR "Files deleted since last import:\n ";
705 print STDERR join("\n ",@deleted_files), "\n";
706
707
708 &plugin::remove_some($pluginfo, $collectcfg->{'infodbtype'}, $archivedir, \@deleted_files);
709
710 mark_docs_for_deletion($archive_info,$block_hash,\@deleted_files, $archivedir,$verbosity, "delete");
711 }
712
713 my @reindex_files = sort keys %{$block_hash->{'reindex_files'}};
714
715 if (scalar(@reindex_files)>0) {
716 print STDERR "Files to reindex since last import:\n ";
717 print STDERR join("\n ",@reindex_files), "\n";
718 &plugin::remove_some($pluginfo, $collectcfg->{'infodbtype'}, $archivedir, \@reindex_files);
719 mark_docs_for_deletion($archive_info,$block_hash,\@reindex_files, $archivedir,$verbosity, "reindex");
720 }
721
[23053]722 }
[22413]723 }
724 }
[23939]725
726 # Check for existence of the file that's to contain earliestDateStamp in archivesdir
727 # Do nothing if the file already exists (file exists on incremental build).
728 # If the file doesn't exist, as happens on full build, create it and write out the current datestamp into it
729 # In buildcol, read the file's contents and set the earliestdateStamp in GS2's build.cfg / GS3's buildconfig.xml
730 # In doc.pm have set_oaiLastModified similar to set_lastmodified, and create the doc fields
731 # oailastmodified and oailastmodifieddate
732 my $earliestDatestampFile = &util::filename_cat($archivedir, "earliestDatestamp");
[23943]733 if (!-f $earliestDatestampFile && -d $archivedir) {
[23939]734 my $current_time_in_seconds = time; # in seconds
[23946]735
736 if(open(FOUT, ">$earliestDatestampFile")) {
737 # || (&gsprintf(STDERR, "{common.cannot_open}: $!\n", $earliestDatestampFile) && die);
738 print FOUT $current_time_in_seconds;
739 close(FOUT);
740 }
741 else {
742 &gsprintf(STDERR, "{import.cannot_write_earliestdatestamp}\n", $earliestDatestampFile);
743 }
744
[23939]745 }
746
[23053]747 # now, whichever mode we are in, we can process the entire import folder
748 if ((defined $jobs) && ($jobs > 1))
749 {
750 # if jobs are set to >1, run in parallel using MPI helper
751 # [hs, 1 july 2010]
752 &ParallelInexport::farm_out_processes($jobs, $epoch, $importdir, $block_hash,
753 $self->{'collection'}, $self->{'site'});
754 }
[22413]755 else
756 {
[23053]757 &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli);
[22413]758 }
[23053]759
760
[22421]761 if ($saveas eq "FedoraMETS") {
762 # create collection "doc obj" for Fedora that contains
763 # collection-level metadata
764
765 my $doc_obj = new doc($config_filename,"nonindexed_doc","none");
766 $doc_obj->set_OID("collection");
767
768 my $col_name = undef;
769 my $col_meta = $collectcfg->{'collectionmeta'};
770
771 if (defined $col_meta) {
772 store_collectionmeta($col_meta,"collectionname",$doc_obj); # in GS3 this is a collection's name
773 store_collectionmeta($col_meta,"collectionextra",$doc_obj); # in GS3 this is a collection's description
774 }
775 $processor->process($doc_obj);
776 }
777
[22413]778 &plugin::end($pluginfo, $processor);
779
780 &plugin::deinit($pluginfo, $processor);
781
782 # Store the value of OIDCount (used in doc.pm) so it can be
783 # restored correctly to this value on an incremental build
784 store_doc_oid_count($archivedir);
785
786 # write out the archive information file
[22464]787 $processor->close_file_output() if (defined $groupsize) && ($groupsize > 1);
[22413]788 $processor->close_group_output() if $processor->is_group();
789
790 # for backwards compatability with archvies.inf file
791 if ($arcinfo_doc_filename =~ m/(contents)|(\.inf)$/) {
792 $archive_info->save_info($arcinfo_doc_filename);
793 }
794 else {
795 $archive_info->save_revinfo_db($arcinfo_src_filename);
796 }
797
798 return $pluginfo;
799}
800
801
802sub generate_statistics
803{
804 my $self = shift @_;
[22421]805 my ($pluginfo) = @_;
[22413]806
[22421]807 my $inexport_mode = $self->{'mode'};
808
[22445]809 my $statsfile = $self->{'statsfile'};
810 my $out = $self->{'out'};
[22413]811 my $faillogname = $self->{'faillogname'};
[22445]812 my $gli = $self->{'gli'};
813 my $jobs = $self->{'jobs'};
[22413]814
815 # write out import stats
816
[22445]817 if ((!defined $jobs) || ($jobs == 1))
818 {
819 # only output statistics if there are multiple jobs
820 # [hs, 1 july 2010]
[22413]821
[22445]822 my $close_stats = 0;
823 if ($statsfile !~ /^(STDERR|STDOUT)$/i) {
824 if (open (STATS, ">$statsfile")) {
[23042]825 $statsfile = 'inexport::STATS';
[22445]826 $close_stats = 1;
827 } else {
828 &gsprintf($out, "{import.cannot_open_stats_file}", $statsfile);
829 &gsprintf($out, "{import.stats_backup}\n");
830 $statsfile = 'STDERR';
831 }
832 }
833
834 &gsprintf($out, "\n");
835 &gsprintf($out, "*********************************************\n");
836 &gsprintf($out, "{$inexport_mode.complete}\n");
837 &gsprintf($out, "*********************************************\n");
838
839 &plugin::write_stats($pluginfo, $statsfile, $faillogname, $gli);
840 if ($close_stats) {
841 close STATS;
842 }
[22413]843 }
844
845 close OUT if $self->{'close_out'};
846 close FAILLOG;
847}
848
849
[22464]850sub store_collectionmeta
851{
852 my ($collectionmeta,$field,$doc_obj) = @_;
853
854 my $section = $doc_obj->get_top_section();
855
856 my $field_hash = $collectionmeta->{$field};
857
858 foreach my $k (keys %$field_hash)
859 {
860 my $val = $field_hash->{$k};
861
862 ### print STDERR "*** $k = $field_hash->{$k}\n";
863
864 my $md_label = "ex.$field";
865
866
867 if ($k =~ m/^\[l=(.*?)\]$/)
868 {
869
870 my $md_suffix = $1;
871 $md_label .= "^$md_suffix";
872 }
873
874
875 $doc_obj->add_utf8_metadata($section,$md_label, $val);
876
877 # see collConfigxml.pm: GS2's "collectionextra" is called "description" in GS3,
878 # while "collectionname" in GS2 is called "name" in GS3.
879 # Variable $nameMap variable in collConfigxml.pm maps between GS2 and GS3
880 if (($md_label eq "ex.collectionname^en") || ($md_label eq "ex.collectionname"))
881 {
882 $doc_obj->add_utf8_metadata($section,"dc.Title", $val);
883 }
884
885 }
886}
[22413]887
888
[21306]889sub oid_count_file {
890 my ($archivedir) = @_;
891 return &util::filename_cat ($archivedir, "OIDcount");
892}
893
894
[18528]895sub prime_doc_oid_count
896{
897 my ($archivedir) = @_;
[21306]898 my $oid_count_filename = &oid_count_file($archivedir);
[18528]899
900 if (-e $oid_count_filename) {
901 if (open(OIDIN,"<$oid_count_filename")) {
902 my $OIDcount = <OIDIN>;
903 chomp $OIDcount;
904 close(OIDIN);
905
906 $doc::OIDcount = $OIDcount;
907 }
[23946]908 else {
909 &gsprintf(STDERR, "{import.cannot_read_OIDcount}\n", $oid_count_filename);
[18528]910 }
911 }
912
913}
914
915sub store_doc_oid_count
916{
917 # Use the file "OIDcount" in the archives directory to record
918 # what value doc.pm got up to
919
920 my ($archivedir) = @_;
[21306]921 my $oid_count_filename = &oid_count_file($archivedir);
[18528]922
923
924 if (open(OIDOUT,">$oid_count_filename")) {
925 print OIDOUT $doc::OIDcount, "\n";
926
927 close(OIDOUT);
928 }
929 else {
[23946]930 &gsprintf(STDERR, "{import.cannot_write_OIDcount}\n", $oid_count_filename);
[18528]931 }
932}
933
934
935
[18457]936sub new_vs_old_import_diff
937{
[20578]938 my ($archive_info,$block_hash,$importdir,$archivedir,$verbosity,$incremental_mode) = @_;
[18457]939
[21620]940 # Get the infodbtype value for this collection from the arcinfo object
941 my $infodbtype = $archive_info->{'infodbtype'};
942
[20776]943 # in this method, we want to know if metadata files are modified or not.
[21620]944 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archivedir);
[20776]945
946 my $archiveinf_timestamp = -M $arcinfo_doc_filename;
947
[18457]948 # First convert all files to absolute form
949 # This is to support the situation where the import folder is not
950 # the default
951
952 my $prev_all_files = $archive_info->{'prev_import_filelist'};
953 my $full_prev_all_files = {};
954
955 foreach my $prev_file (keys %$prev_all_files) {
956
957 if (!&util::filename_is_absolute($prev_file)) {
958 my $full_prev_file = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},$prev_file);
959 $full_prev_all_files->{$full_prev_file} = $prev_file;
960 }
961 else {
962 $full_prev_all_files->{$prev_file} = $prev_file;
963 }
964 }
965
[18469]966
[18457]967 # Figure out which are the new files, existing files and so
968 # by implication the files from the previous import that are not
969 # there any more => mark them for deletion
970 foreach my $curr_file (keys %{$block_hash->{'all_files'}}) {
971
972 my $full_curr_file = $curr_file;
973
974 # entry in 'all_files' is moved to either 'existing_files',
[20776]975 # 'deleted_files', 'new_files', or 'new_or_modified_metadata_files'
[18457]976
977 if (!&util::filename_is_absolute($curr_file)) {
978 # add in import dir to make absolute
979 $full_curr_file = &util::filename_cat($importdir,$curr_file);
980 }
981
[19498]982 # figure out if new file or not
[18457]983 if (defined $full_prev_all_files->{$full_curr_file}) {
[20776]984 # delete it so that only files that need deleting are left
985 delete $full_prev_all_files->{$full_curr_file};
986
987 # had it before. is it a metadata file?
988 if ($block_hash->{'metadata_files'}->{$full_curr_file}) {
[20578]989
[20776]990 # is it modified??
991 if (-M $full_curr_file < $archiveinf_timestamp) {
[23485]992 print STDERR "*** Detected a *modified metadata* file: $full_curr_file\n" if $verbosity >= 2;
[20776]993 # its newer than last build
994 $block_hash->{'new_or_modified_metadata_files'}->{$full_curr_file} = 1;
995 }
[20578]996 }
997 else {
[20776]998 if ($incremental_mode eq "all") {
999
1000 # had it before
1001 $block_hash->{'existing_files'}->{$full_curr_file} = 1;
1002
1003 }
1004 else {
1005 # Warning in "onlyadd" mode, but had it before!
1006 print STDERR "Warning: File $full_curr_file previously imported.\n";
1007 print STDERR " Treating as new file\n";
1008
1009 $block_hash->{'new_files'}->{$full_curr_file} = 1;
1010
1011 }
[20578]1012 }
1013 }
1014 else {
1015 if ($block_hash->{'metadata_files'}->{$full_curr_file}) {
1016 # the new file is the special sort of file greenstone uses
1017 # to attach metadata to src documents
1018 # i.e metadata.xml
1019 # (but note, the filename used is not constrained in
1020 # Greenstone to always be this)
[18457]1021
[23485]1022 print STDERR "*** Detected *new* metadata file: $full_curr_file\n" if $verbosity >= 2;
[20776]1023 $block_hash->{'new_or_modified_metadata_files'}->{$full_curr_file} = 1;
[20578]1024 }
1025 else {
1026 $block_hash->{'new_files'}->{$full_curr_file} = 1;
1027 }
[18457]1028 }
[20578]1029
[18457]1030
1031 delete $block_hash->{'all_files'}->{$curr_file};
1032 }
1033
[20578]1034
[21306]1035
1036
[20776]1037 # Deal with complication of new or modified metadata files by forcing
[20578]1038 # everything from this point down in the file hierarchy to
1039 # be freshly imported.
1040 #
1041 # This may mean files that have not changed are reindexed, but does
1042 # guarantee by the end of processing all new metadata is correctly
1043 # associated with the relevant document(s).
1044
[20776]1045 foreach my $new_mdf (keys %{$block_hash->{'new_or_modified_metadata_files'}}) {
[20578]1046 my ($fileroot,$situated_dir,$ext) = fileparse($new_mdf, "\\.[^\\.]+\$");
1047
1048 $situated_dir =~ s/[\\\/]+$//; # remove tailing slashes
[24829]1049 $situated_dir = &util::filename_to_regex($situated_dir); # need to escape windows slash \ and brackets in regular expression
[20769]1050
[20578]1051 # Go through existing_files, and mark anything that is contained
1052 # within 'situated_dir' to be reindexed (in case some of the metadata
1053 # attaches to one of these files)
1054
1055 my $reindex_files = [];
1056
1057 foreach my $existing_f (keys %{$block_hash->{'existing_files'}}) {
[20769]1058
[20578]1059 if ($existing_f =~ m/^$situated_dir/) {
[23485]1060
1061 print STDERR "**** Existing file $existing_f\nis located within\n$situated_dir\n";
1062
[20578]1063 push(@$reindex_files,$existing_f);
1064 $block_hash->{'reindex_files'}->{$existing_f} = 1;
[21306]1065 delete $block_hash->{'existing_files'}->{$existing_f};
[20578]1066
1067 }
1068 }
1069
1070 # metadata file needs to be in new_files list so parsed by MetadataXMLPlug
1071 # (or equivalent)
1072 $block_hash->{'new_files'}->{$new_mdf} = 1;
1073
1074 }
1075
[21306]1076 # go through remaining existing files and work out what has changed and needs to be reindexed.
1077 my @existing_files = sort keys %{$block_hash->{'existing_files'}};
1078
1079 my $reindex_files = [];
1080
1081 foreach my $existing_filename (@existing_files) {
1082 if (-M $existing_filename < $archiveinf_timestamp) {
1083 # file is newer than last build
1084
1085 my $existing_file = $existing_filename;
1086 #my $collectdir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'});
1087
1088 #my $collectdir_resafe = &util::filename_to_regex($collectdir);
1089 #$existing_file =~ s/^$collectdir_resafe(\\|\/)?//;
1090
1091 print STDERR "**** Reindexing existing file: $existing_file\n";
1092
1093 push(@$reindex_files,$existing_file);
1094 $block_hash->{'reindex_files'}->{$existing_filename} = 1;
1095 }
1096
1097 }
1098
[20578]1099
[18469]1100 # By this point full_prev_all_files contains the files
1101 # mentioned in archiveinf-src.db but are not in the 'import'
1102 # folder (or whatever was specified through -importdir ...)
1103
1104 # This list can contain files that were created in the 'tmp' or
1105 # 'cache' areas (such as screen-size and thumbnail images).
[18457]1106 #
[18469]1107 # In building the final list of files to delete, we test to see if
[20578]1108 # it exists on the filesystem and if it does (unusual for a "normal"
1109 # file in import, but possible in the case of 'tmp' files),
1110 # supress it from going into the final list
[18469]1111
1112 my $collectdir = $ENV{'GSDLCOLLECTDIR'};
1113
[18457]1114 my @deleted_files = values %$full_prev_all_files;
[18469]1115 map { my $curr_file = $_;
1116 my $full_curr_file = $curr_file;
1117
1118 if (!&util::filename_is_absolute($curr_file)) {
1119 # add in import dir to make absolute
1120
1121 $full_curr_file = &util::filename_cat($collectdir,$curr_file);
1122 }
1123
1124
1125 if (!-e $full_curr_file) {
1126 $block_hash->{'deleted_files'}->{$curr_file} = 1;
1127 }
1128 } @deleted_files;
[20578]1129
1130
1131
[18457]1132}
1133
[19498]1134
[20788]1135# this is used to delete "deleted" docs, and to remove old versions of "changed" docs
[21306]1136# $mode is 'delete' or 'reindex'
1137sub mark_docs_for_deletion
[18457]1138{
[21306]1139 my ($archive_info,$block_hash,$deleted_files,$archivedir,$verbosity,$mode) = @_;
[18457]1140
[21306]1141 my $mode_text = "deleted from index";
1142 if ($mode eq "reindex") {
1143 $mode_text = "reindexed";
1144 }
[18457]1145
[21620]1146 # Get the infodbtype value for this collection from the arcinfo object
1147 my $infodbtype = $archive_info->{'infodbtype'};
[18457]1148
[21620]1149 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archivedir);
1150 my $arcinfo_src_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-src", $archivedir);
1151
[22010]1152
[18457]1153 # record files marked for deletion in arcinfo
[19498]1154 foreach my $file (@$deleted_files) {
[21564]1155 # use 'archiveinf-src' info database file to look up all the OIDs
[19789]1156 # that this file is used in (note in most cases, it's just one OID)
[18457]1157
[23485]1158 my $src_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_src_filename, $file);
[18457]1159 my $oids = $src_rec->{'oid'};
[20776]1160 my $file_record_deleted = 0;
[20788]1161
1162 # delete the src record
[22010]1163 my $src_infodb_file_handle = &dbutil::open_infodb_write_handle($infodbtype, $arcinfo_src_filename, "append");
[21620]1164 &dbutil::delete_infodb_entry($infodbtype, $src_infodb_file_handle, $file);
[22010]1165 &dbutil::close_infodb_write_handle($infodbtype, $src_infodb_file_handle);
1166
1167
[18457]1168 foreach my $oid (@$oids) {
1169
[20788]1170 # find the source doc (the primary file that becomes this oid)
[23485]1171 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $oid);
[20776]1172 my $doc_source_file = $doc_rec->{'src-file'}->[0];
1173 if (!&util::filename_is_absolute($doc_source_file)) {
1174 $doc_source_file = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},$doc_source_file);
1175 }
[18457]1176
[20788]1177 if ($doc_source_file ne $file) {
1178 # its an associated or metadata file
1179
[20776]1180 # mark source doc for reimport as one of its assoc files has changed or deleted
1181 $block_hash->{'reindex_files'}->{$doc_source_file} = 1;
[20788]1182
[18457]1183 }
[20788]1184 my $curr_status = $archive_info->get_status_info($oid);
1185 if (defined($curr_status) && (($curr_status ne "D"))) {
1186 if ($verbosity>1) {
1187 print STDERR "$oid ($doc_source_file) marked to be $mode_text on next buildcol.pl\n";
[19498]1188 }
[20788]1189 # mark oid for deletion (it will be deleted or reimported)
1190 $archive_info->set_status_info($oid,"D");
[23485]1191 my $val = &dbutil::read_infodb_rawentry($infodbtype, $arcinfo_doc_filename, $oid);
[20788]1192 $val =~ s/^<index-status>(.*)$/<index-status>D/m;
[21557]1193
1194 my $val_rec = &dbutil::convert_infodb_string_to_hash($val);
[22010]1195 my $doc_infodb_file_handle = &dbutil::open_infodb_write_handle($infodbtype, $arcinfo_doc_filename, "append");
1196
[21620]1197 &dbutil::write_infodb_entry($infodbtype, $doc_infodb_file_handle, $oid, $val_rec);
[22010]1198 &dbutil::close_infodb_write_handle($infodbtype, $doc_infodb_file_handle);
[19498]1199 }
[18457]1200 }
[22327]1201
[18457]1202 }
[24344]1203
1204 # now go through and check that we haven't marked any primary
1205 # files for reindex (because their associated files have
1206 # changed/deleted) when they have been deleted themselves. only in
1207 # delete mode.
1208
[22567]1209 if ($mode eq "delete") {
1210 foreach my $file (@$deleted_files) {
1211 if (defined $block_hash->{'reindex_files'}->{$file}) {
1212 delete $block_hash->{'reindex_files'}->{$file};
1213 }
[22327]1214 }
1215 }
[21560]1216
[22010]1217
[18457]1218}
1219
[23053]1220sub add_dir_contents_to_list {
[18457]1221
[23053]1222 my ($dirname, $list) = @_;
1223
1224 # Recur over directory contents.
1225 my (@dir, $subfile);
1226
1227 # find all the files in the directory
1228 if (!opendir (DIR, $dirname)) {
1229 print STDERR "inexport: WARNING - couldn't read directory $dirname\n";
1230 return -1; # error in processing
1231 }
1232 @dir = readdir (DIR);
1233 closedir (DIR);
1234
1235 for (my $i = 0; $i < scalar(@dir); $i++) {
1236 my $subfile = $dir[$i];
1237 next if ($subfile =~ m/^\.\.?$/);
[23119]1238 next if ($subfile =~ /^\.svn$/);
[23053]1239 my $full_file = &util::filename_cat($dirname, $subfile);
1240 if (-d $full_file) {
1241 &add_dir_contents_to_list($full_file, $list);
1242 } else {
1243 push (@$list, $full_file);
1244 }
1245 }
1246
1247}
[18554]1248
[23053]1249
[18457]12501;
Note: See TracBrowser for help on using the repository browser.