source: main/trunk/greenstone2/perllib/plugins/DirectoryPlugin.pm@ 27306

Last change on this file since 27306 was 27306, checked in by jmt12, 11 years ago

Moving the critical file-related functions (copy, rm, etc) out of util.pm into their own proper class FileUtils. Use of the old functions in util.pm will prompt deprecated warning messages. There may be further functions that could be moved across in the future, but these are the critical ones when considering supporting other filesystems (HTTP, HDFS, WebDav, etc). Updated some key files to use the new functions so now deprecated messages thrown when importing/building demo collection 'out of the box'

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 21.9 KB
Line 
1###########################################################################
2#
3# DirectoryPlugin.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# DirectoryPlugin is a plugin which recurses through directories processing
27# each file it finds - which basically means passing it down the plugin
28# pipeline
29
30package DirectoryPlugin;
31
32use extrametautil;
33use PrintInfo;
34use plugin;
35use util;
36use FileUtils;
37use metadatautil;
38
39use File::Basename;
40use strict;
41no strict 'refs';
42no strict 'subs';
43
44use Encode;
45
46BEGIN {
47 @DirectoryPlugin::ISA = ('PrintInfo');
48}
49
50my $arguments =
51 [ { 'name' => "block_exp",
52 'desc' => "{BasePlugin.block_exp}",
53 'type' => "regexp",
54 'deft' => &get_default_block_exp(),
55 'reqd' => "no" },
56 # this option has been deprecated. leave it here for now so we can warn people not to use it
57 { 'name' => "use_metadata_files",
58 'desc' => "{DirectoryPlugin.use_metadata_files}",
59 'type' => "flag",
60 'reqd' => "no",
61 'hiddengli' => "yes" },
62 { 'name' => "recheck_directories",
63 'desc' => "{DirectoryPlugin.recheck_directories}",
64 'type' => "flag",
65 'reqd' => "no" } ];
66
67my $options = { 'name' => "DirectoryPlugin",
68 'desc' => "{DirectoryPlugin.desc}",
69 'abstract' => "no",
70 'inherits' => "yes",
71 'args' => $arguments };
72
73sub new {
74 my ($class) = shift (@_);
75 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
76 push(@$pluginlist, $class);
77
78 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
79 push(@{$hashArgOptLists->{"OptList"}},$options);
80
81 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists);
82
83 if ($self->{'info_only'}) {
84 # don't worry about any options or initialisations etc
85 return bless $self, $class;
86 }
87
88 # we have left this option in so we can warn people who are still using it
89 if ($self->{'use_metadata_files'}) {
90 die "ERROR: DirectoryPlugin -use_metadata_files option has been deprecated. Please remove the option and add MetadataXMLPlug to your plugin list instead!\n";
91 }
92
93 $self->{'num_processed'} = 0;
94 $self->{'num_not_processed'} = 0;
95 $self->{'num_blocked'} = 0;
96 $self->{'num_archives'} = 0;
97
98 $self->{'subdir_extrametakeys'} = {};
99
100 return bless $self, $class;
101}
102
103# called once, at the start of processing
104sub init {
105 my $self = shift (@_);
106 my ($verbosity, $outhandle, $failhandle) = @_;
107
108 # verbosity is passed through from the processor
109 $self->{'verbosity'} = $verbosity;
110
111 # as are the outhandle and failhandle
112 $self->{'outhandle'} = $outhandle if defined $outhandle;
113 $self->{'failhandle'} = $failhandle;
114
115}
116
117# called once, after all passes have finished
118sub deinit {
119 my ($self) = @_;
120
121}
122
123# called at the beginning of each plugin pass (import has one, building has many)
124sub begin {
125 my $self = shift (@_);
126 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
127
128 # Only lookup timestamp info for import.pl, and only if incremental is set
129 my $proc_package_name = ref $processor;
130 if ($proc_package_name !~ /buildproc$/ && $self->{'incremental'} == 1) {
131 # Get the infodbtype value for this collection from the arcinfo object
132 my $infodbtype = $processor->getoutputinfo()->{'infodbtype'};
133 $infodbtype = "gdbm" if $infodbtype eq "gdbm-txtgz"; # in archives, cannot use txtgz version
134 my $output_dir = $processor->getoutputdir();
135 my $archives_inf = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $output_dir);
136
137 if ( -e $archives_inf ) {
138 $self->{'inf_timestamp'} = -M $archives_inf;
139 }
140 }
141}
142
143sub remove_all {
144 my $self = shift (@_);
145 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
146}
147
148
149sub remove_one {
150 my $self = shift (@_);
151 my ($file, $oids, $archivedir) = @_;
152 return undef; # this will never be called for directories (will it??)
153
154}
155
156
157# called at the end of each plugin pass
158sub end {
159 my ($self) = shift (@_);
160
161}
162
163
164
165# return 1 if this class might recurse using $pluginfo
166sub is_recursive {
167 my $self = shift (@_);
168
169 return 1;
170}
171
172sub get_default_block_exp {
173 my $self = shift (@_);
174
175 return '(?i)(CVS|\.svn|Thumbs\.db|OIDcount|\.DS_Store|~)$';
176}
177
178sub check_directory_path {
179
180 my $self = shift(@_);
181 my ($dirname) = @_;
182
183 return undef unless (-d $dirname);
184
185 return 0 if ($self->{'block_exp'} ne "" && $dirname =~ /$self->{'block_exp'}/);
186
187 my $outhandle = $self->{'outhandle'};
188
189 # check to make sure we're not reading the archives or index directory
190 my $gsdlhome = quotemeta($ENV{'GSDLHOME'});
191 if ($dirname =~ m/^$gsdlhome\/.*?\/import.*?\/(archives|index)$/) {
192 print $outhandle "DirectoryPlugin: $dirname appears to be a reference to a Greenstone collection, skipping.\n";
193 return 0;
194 }
195
196 # check to see we haven't got a cyclic path...
197 if ($dirname =~ m%(/.*){,41}%) {
198 print $outhandle "DirectoryPlugin: $dirname is 40 directories deep, is this a recursive path? if not increase constant in DirectoryPlugin.pm.\n";
199 return 0;
200 }
201
202 # check to see we haven't got a cyclic path...
203 if ($dirname =~ m%.*?import/(.+?)/import/\1.*%) {
204 print $outhandle "DirectoryPlugin: $dirname appears to be in a recursive loop...\n";
205 return 0;
206 }
207
208 return 1;
209}
210
211# this may be called more than once
212sub sort_out_associated_files {
213
214 my $self = shift (@_);
215 my ($block_hash) = @_;
216 if (!scalar (keys %{$block_hash->{'shared_fileroot'}})) {
217 return;
218 }
219
220 $self->{'assocfile_info'} = {} unless defined $self->{'assocfile_info'};
221 my $metadata = $self->{'assocfile_info'};
222 foreach my $prefix (keys %{$block_hash->{'shared_fileroot'}}) {
223 my $record = $block_hash->{'shared_fileroot'}->{$prefix};
224
225 my $tie_to = $record->{'tie_to'};
226 my $exts = $record->{'exts'};
227
228 if ((defined $tie_to) && (scalar (keys %$exts) > 0)) {
229 # set up fileblocks and assocfile_tobe
230 my $base_file = "$prefix$tie_to";
231 $metadata->{$base_file} = {} unless defined $metadata->{$base_file};
232 my $base_file_metadata = $metadata->{$base_file};
233
234 $base_file_metadata->{'gsdlassocfile_tobe'} = [] unless defined $base_file_metadata->{'gsdlassocfile_tobe'};
235 my $assoc_tobe = $base_file_metadata->{'gsdlassocfile_tobe'};
236 foreach my $e (keys %$exts) {
237 # block the file
238 &util::block_filename($block_hash,"$prefix$e");
239 # set up as an associatd file
240 print STDERR " $self->{'plugin_type'}: Associating $prefix$e with $tie_to version\n";
241 my $mime_type = ""; # let system auto detect this
242 push(@$assoc_tobe,"$prefix$e:$mime_type:");
243
244 }
245 }
246 } # foreach record
247
248 $block_hash->{'shared_fileroot'} = undef;
249 $block_hash->{'shared_fileroot'} = {};
250
251}
252
253
254# do block exp OR special blocking ???
255
256sub file_is_blocked {
257 my $self = shift (@_);
258 my ($block_hash, $filename_full_path) = @_;
259
260 $filename_full_path = &util::upgrade_if_dos_filename($filename_full_path);
261### print STDERR "*** DirectoryPlugin::file_is_blocked $filename_full_path\n";
262
263 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
264 # on windows, all block paths are lowercased.
265 my $lower_filename = lc ($filename_full_path);
266 if (defined $block_hash->{'file_blocks'}->{$lower_filename}) {
267 $self->{'num_blocked'} ++;
268 return 1;
269 }
270 }
271 else {
272 if (defined $block_hash->{'file_blocks'}->{$filename_full_path}) {
273 $self->{'num_blocked'} ++;
274 return 1;
275 }
276 }
277 # check Directory plugin's own block_exp
278 if ($self->{'block_exp'} ne "" && $filename_full_path =~ /$self->{'block_exp'}/) {
279 $self->{'num_blocked'} ++;
280 return 1; # blocked
281 }
282 return 0;
283}
284
285
286
287sub file_block_read {
288 my $self = shift (@_);
289 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
290
291 my $outhandle = $self->{'outhandle'};
292 my $verbosity = $self->{'verbosity'};
293
294 # Calculate the directory name and ensure it is a directory and
295 # that it is not explicitly blocked.
296 my $dirname = $file;
297 $dirname = &FileUtils::filenameConcatenate($base_dir, $file) if $base_dir =~ /\w/;
298
299 my $directory_ok = $self->check_directory_path($dirname);
300 return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
301
302 print $outhandle "Global file scan checking directory: $dirname\n";
303
304 $block_hash->{'all_files'} = {} unless defined $block_hash->{'all_files'};
305 $block_hash->{'metadata_files'} = {} unless defined $block_hash->{'metadata_files'};
306
307 $block_hash->{'file_blocks'} = {} unless defined $block_hash->{'file_blocks'};
308 $block_hash->{'shared_fileroot'} = {} unless defined $block_hash->{'shared_fileroot'};
309
310 # Recur over directory contents.
311 my (@dir, $subfile);
312 #my $count = 0;
313
314 print $outhandle "DirectoryPlugin block: getting directory $dirname\n" if ($verbosity > 2);
315
316 # find all the files in the directory
317 if (!opendir (DIR, $dirname)) {
318 if ($gli) {
319 print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
320 }
321 print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
322 return -1; # error in processing
323 }
324 @dir = readdir (DIR);
325 closedir (DIR);
326
327 for (my $i = 0; $i < scalar(@dir); $i++) {
328 my $raw_subfile = $dir[$i];
329 next if ($raw_subfile =~ m/^\.\.?$/);
330
331 my $this_file_base_dir = $base_dir;
332 my $raw_file_subfile = &FileUtils::filenameConcatenate($file, $raw_subfile);
333
334 # Recursively read each $raw_subfile
335 print $outhandle "DirectoryPlugin block recurring: $raw_file_subfile\n" if ($verbosity > 2);
336
337 #$count += &plugin::file_block_read ($pluginfo, $this_file_base_dir,
338
339 &plugin::file_block_read ($pluginfo, $this_file_base_dir,
340 $raw_file_subfile,
341 $block_hash, $metadata, $gli);
342
343 }
344 $self->sort_out_associated_files($block_hash);
345 #return $count;
346 return 1;
347
348}
349
350# We don't do metadata_read
351sub metadata_read {
352 my $self = shift (@_);
353 my ($pluginfo, $base_dir, $file, $block_hash,
354 $extrametakeys, $extrametadata, $extrametafile,
355 $processor, $gli, $aux) = @_;
356
357 return undef;
358}
359
360
361# return number of files processed, undef if can't process
362# Note that $base_dir might be "" and that $file might
363# include directories
364
365# This function passes around metadata hash structures. Metadata hash
366# structures are hashes that map from a (scalar) key (the metadata element
367# name) to either a scalar metadata value or a reference to an array of
368# such values.
369
370sub read {
371 my $self = shift (@_);
372 my ($pluginfo, $base_dir, $file, $block_hash, $in_metadata, $processor, $maxdocs, $total_count, $gli) = @_;
373
374 my $outhandle = $self->{'outhandle'};
375 my $verbosity = $self->{'verbosity'};
376
377 # Calculate the directory name and ensure it is a directory and
378 # that it is not explicitly blocked.
379 my $dirname;
380 if ($file eq "") {
381 $dirname = $base_dir;
382 } else {
383 $dirname = $file;
384 $dirname = &FileUtils::filenameConcatenate($base_dir, $file) if $base_dir =~ /\w/;
385 }
386
387 my $directory_ok = $self->check_directory_path($dirname);
388 return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
389
390 if (($verbosity > 2) && ((scalar keys %$in_metadata) > 0)) {
391 print $outhandle "DirectoryPlugin: metadata passed in: ",
392 join(", ", keys %$in_metadata), "\n";
393 }
394
395
396 # Recur over directory contents.
397 my (@dir, $subfile);
398
399 print $outhandle "DirectoryPlugin read: getting directory $dirname\n" if ($verbosity > 2);
400
401 # find all the files in the directory
402 if (!opendir (DIR, $dirname)) {
403 if ($gli) {
404 print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
405 }
406 print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
407 return -1; # error in processing
408 }
409 @dir = readdir (DIR);
410 map { $_ = &unicode::raw_filename_to_url_encoded($_) } @dir;
411 closedir (DIR);
412
413 # Re-order the files in the list so any directories ending with .all are moved to the end
414 for (my $i = scalar(@dir) - 1; $i >= 0; $i--) {
415 if (-d &FileUtils::filenameConcatenate($dirname, $dir[$i]) && $dir[$i] =~ /\.all$/) {
416 push(@dir, splice(@dir, $i, 1));
417 }
418 }
419
420 # setup the metadata structures. we do a metadata_read pass to see if there is any additional metadata, then pass it to read
421
422 my $additionalmetadata = 0; # is there extra metadata available?
423 my %extrametadata; # maps from filespec to extra metadata keys
424 my %extrametafile; # maps from filespec to the metadata.xml (or similar) file it came from
425 my @extrametakeys; # keys of %extrametadata in order read
426
427
428 my $os_dirsep = &util::get_os_dirsep();
429 my $dirsep = &util::get_dirsep();
430 my $base_dir_regexp = $base_dir;
431 $base_dir_regexp =~ s/\//$os_dirsep/g;
432
433 # Want to get relative path of local_dirname within the base_directory
434 # but with URL style slashes.
435 my $local_dirname = &util::filename_within_directory_url_format($dirname, $base_dir);
436
437 # if we are in import folder, then local_dirname will be empty
438 if ($local_dirname ne "") {
439 # look for extra metadata passed down from higher folders
440 $local_dirname .= "/"; # closing slash must be URL type slash also and not $dirsep;
441 if (defined $self->{'subdir_extrametakeys'}->{$local_dirname}) {
442 my $extrakeys = $self->{'subdir_extrametakeys'}->{$local_dirname};
443 foreach my $ek (@$extrakeys) {
444 my $extrakeys_re = $ek->{'re'};
445 my $extrakeys_md = $ek->{'md'};
446 my $extrakeys_mf = $ek->{'mf'};
447 &extrametautil::addmetakey(\@extrametakeys, $extrakeys_re);
448 &extrametautil::setmetadata(\%extrametadata, $extrakeys_re, $extrakeys_md);
449 &extrametautil::setmetafile(\%extrametafile, $extrakeys_re, $extrakeys_mf);
450 }
451 delete($self->{'subdir_extrametakeys'}->{$local_dirname});
452 }
453 }
454 # apply metadata pass for each of the files in the directory -- ignore
455 # maxdocs here
456 my $num_files = scalar(@dir);
457 for (my $i = 0; $i < scalar(@dir); $i++) {
458 my $subfile = $dir[$i];
459 next if ($subfile =~ m/^\.\.?$/);
460
461 my $this_file_base_dir = $base_dir;
462 my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
463
464 my $raw_file_subfile = &FileUtils::filenameConcatenate($file, $raw_subfile);
465 my $raw_full_filename = &FileUtils::filenameConcatenate($this_file_base_dir, $raw_file_subfile);
466
467 if ($self->file_is_blocked($block_hash,$raw_full_filename)) {
468 print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for metadata_read\n" if ($verbosity > 2);
469 next;
470 }
471
472 # Recursively read each $raw_subfile
473 print $outhandle "DirectoryPlugin metadata recurring: $raw_subfile\n" if ($verbosity > 2);
474
475 &plugin::metadata_read ($pluginfo, $this_file_base_dir,
476 $raw_file_subfile,$block_hash,
477 \@extrametakeys, \%extrametadata,
478 \%extrametafile,
479 $processor, $gli);
480 $additionalmetadata = 1;
481 }
482
483 # filter out any extrametakeys that mention subdirectories and store
484 # for later use (i.e. when that sub-directory is being processed)
485 foreach my $ek (@extrametakeys) { # where each Extrametakey (which is a filename) is stored as a url-style regex
486
487 my ($subdir_re,$extrakey_dir) = &util::url_fileparse($ek);
488
489 if ($extrakey_dir ne "") {
490 # a subdir was specified
491 my $md = &extrametautil::getmetadata(\%extrametadata, $ek);
492 my $mf = &extrametautil::getmetafile(\%extrametafile, $ek);
493
494 my $subdir_extrametakeys = $self->{'subdir_extrametakeys'};
495 my $subdir_rec = { 're' => $subdir_re, 'md' => $md, 'mf' => $mf };
496
497 # when it's looked up, it must be relative to the base dir
498 push(@{$subdir_extrametakeys->{"$local_dirname$extrakey_dir"}},$subdir_rec);
499 }
500 }
501
502 # import each of the files in the directory
503 my $count=0;
504 for (my $i = 0; $i <= scalar(@dir); $i++) {
505 # When every file in the directory has been done, pause for a moment (figuratively!)
506 # If the -recheck_directories argument hasn't been provided, stop now (default)
507 # Otherwise, re-read the contents of the directory to check for new files
508 # Any new files are added to the @dir list and are processed as normal
509 # This is necessary when documents to be indexed are specified in bibliographic DBs
510 # These files are copied/downloaded and stored in a new folder at import time
511 if ($i == $num_files) {
512 last unless $self->{'recheck_directories'};
513
514 # Re-read the files in the directory to see if there are any new files
515 last if (!opendir (DIR, $dirname));
516 my @dirnow = readdir (DIR);
517 map { $_ = &unicode::raw_filename_to_url_encoded($_) } @dirnow;
518 closedir (DIR);
519
520 # We're only interested if there are more files than there were before
521 last if (scalar(@dirnow) <= scalar(@dir));
522
523 # Any new files are added to the end of @dir to get processed by the loop
524 my $j;
525 foreach my $subfilenow (@dirnow) {
526 for ($j = 0; $j < $num_files; $j++) {
527 last if ($subfilenow eq $dir[$j]);
528 }
529 if ($j == $num_files) {
530 # New file
531 push(@dir, $subfilenow);
532 }
533 }
534 # When the new files have been processed, check again
535 $num_files = scalar(@dir);
536 }
537
538 my $subfile = $dir[$i];
539 last if ($maxdocs != -1 && ($count + $total_count) >= $maxdocs);
540 next if ($subfile =~ /^\.\.?$/);
541
542 my $this_file_base_dir = $base_dir;
543 my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
544
545 my $raw_file_subfile = &FileUtils::filenameConcatenate($file, $raw_subfile);
546 my $raw_full_filename
547 = &FileUtils::filenameConcatenate($this_file_base_dir,$raw_file_subfile);
548
549 if ($self->file_is_blocked($block_hash,$raw_full_filename)) {
550 print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for read\n" if ($verbosity > 2);
551 next;
552 }
553 #print STDERR "processing $raw_full_filename\n";
554 # Follow Windows shortcuts
555 if ($raw_subfile =~ /(?i)\.lnk$/ && $ENV{'GSDLOS'} =~ /^windows$/i) {
556 require Win32::Shortcut;
557 my $shortcut = new Win32::Shortcut(&FileUtils::filenameConcatenate($dirname, $raw_subfile));
558 if ($shortcut) {
559 # The file to be processed is now the target of the shortcut
560 $this_file_base_dir = "";
561 $file = "";
562 $raw_subfile = $shortcut->Path;
563 }
564 }
565
566 # check for a symlink pointing back to a leading directory
567 if (-d "$dirname/$raw_subfile" && -l "$dirname/$raw_subfile") {
568 # readlink gives a "fatal error" on systems that don't implement
569 # symlinks. This assumes the the -l test above would fail on those.
570 my $linkdest=readlink "$dirname/$raw_subfile";
571 if (!defined ($linkdest)) {
572 # system error - file not found?
573 warn "DirectoryPlugin: symlink problem - $!";
574 } else {
575 # see if link points to current or a parent directory
576 if ($linkdest =~ m@^[\./\\]+$@ ||
577 index($dirname, $linkdest) != -1) {
578 warn "DirectoryPlugin: Ignoring recursive symlink ($dirname/$raw_subfile -> $linkdest)\n";
579 next;
580 ;
581 }
582 }
583 }
584
585 print $outhandle "DirectoryPlugin: preparing metadata for $raw_subfile\n" if ($verbosity > 2);
586
587 # Make a copy of $in_metadata to pass to $raw_subfile
588 my $out_metadata = {};
589 &metadatautil::combine_metadata_structures($out_metadata, $in_metadata);
590
591 # check the assocfile_info
592 if (defined $self->{'assocfile_info'}->{$raw_full_filename}) {
593 &metadatautil::combine_metadata_structures($out_metadata, $self->{'assocfile_info'}->{$raw_full_filename});
594 }
595
596 # $subfile by this point is url-encoded => all ASCII chars => no need to encode as UTF8
597
598 # Next add metadata read in XML files (if it is supplied)
599 if ($additionalmetadata == 1) {
600 foreach my $filespec (@extrametakeys) {
601 ## use the url-encoded filename to do the filename comparison
602
603 if ($subfile =~ /^$filespec$/) {
604 print $outhandle "File \"$subfile\" matches filespec \"$filespec\"\n"
605 if ($verbosity > 2);
606 my $mdref = &extrametautil::getmetadata(\%extrametadata, $filespec);
607 my $mfref = &extrametautil::getmetafile(\%extrametafile, $filespec);
608
609 # Add the list files where the metadata came from
610 # into the metadata table so we can track this
611 # This mechanism is similar to how gsdlassocfile works
612
613 my @metafile_pair = ();
614 foreach my $l (keys %$mfref) {
615 my $f = $mfref->{$l};
616 push (@metafile_pair, "$f : $l");
617 }
618
619 $mdref->{'gsdlmetafile'} = \@metafile_pair;
620
621 &metadatautil::combine_metadata_structures($out_metadata, $mdref);
622 }
623 }
624 }
625
626 if (defined $self->{'inf_timestamp'}) {
627 # Look to see if it's a completely new file
628
629 if (!$block_hash->{'new_files'}->{$raw_full_filename}) {
630 # Not a new file, must be an existing file
631 # Let' see if it's newer than the last import.pl
632
633
634 if (! -d $raw_full_filename) {
635 if (!$block_hash->{'reindex_files'}->{$raw_full_filename}) {
636 # filename has been around for longer than inf_timestamp
637 print $outhandle "**** Skipping $subfile\n" if ($verbosity >3);
638 next;
639 }
640 else {
641 # Remove old folder in archives (might hash to something different)
642 # *** should be doing this on a Del one as well
643 # but leave folder name?? and ensure hashs to
644 # same again??
645
646 # Then let through as new doc??
647
648 # mark to doc-oids that rely on it for re-indexing
649 }
650 }
651 }
652 }
653
654 # Recursively read each $subfile
655 print $outhandle "DirectoryPlugin recurring: $subfile\n" if ($verbosity > 2);
656
657 $count += &plugin::read ($pluginfo, $this_file_base_dir,
658 $raw_file_subfile, $block_hash,
659 $out_metadata, $processor, $maxdocs, ($total_count + $count), $gli);
660 }
661
662 return $count;
663}
664
665sub compile_stats {
666 my $self = shift(@_);
667 my ($stats) = @_;
668}
669
6701;
Note: See TracBrowser for help on using the repository browser.