source: main/trunk/greenstone2/perllib/plugins/DirectoryPlugin.pm@ 23392

Last change on this file since 23392 was 23363, checked in by davidb, 13 years ago

Plugin code upgrade to support Greenstone working with filenames under Windows when then go beyond Latin-1 and start turning up in their DOS abbreviated form (e.g. Test~1.txt)

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 21.4 KB
Line 
1###########################################################################
2#
3# DirectoryPlugin.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# DirectoryPlugin is a plugin which recurses through directories processing
27# each file it finds - which basically means passing it down the plugin
28# pipeline
29
30package DirectoryPlugin;
31
32use PrintInfo;
33use plugin;
34use util;
35use metadatautil;
36
37use File::Basename;
38use strict;
39no strict 'refs';
40no strict 'subs';
41
42use Encode;
43
44BEGIN {
45 @DirectoryPlugin::ISA = ('PrintInfo');
46}
47
48my $arguments =
49 [ { 'name' => "block_exp",
50 'desc' => "{BasePlugin.block_exp}",
51 'type' => "regexp",
52 'deft' => &get_default_block_exp(),
53 'reqd' => "no" },
54 # this option has been deprecated. leave it here for now so we can warn people not to use it
55 { 'name' => "use_metadata_files",
56 'desc' => "{DirectoryPlugin.use_metadata_files}",
57 'type' => "flag",
58 'reqd' => "no",
59 'hiddengli' => "yes" },
60 { 'name' => "recheck_directories",
61 'desc' => "{DirectoryPlugin.recheck_directories}",
62 'type' => "flag",
63 'reqd' => "no" } ];
64
65my $options = { 'name' => "DirectoryPlugin",
66 'desc' => "{DirectoryPlugin.desc}",
67 'abstract' => "no",
68 'inherits' => "yes",
69 'args' => $arguments };
70
71sub new {
72 my ($class) = shift (@_);
73 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
74 push(@$pluginlist, $class);
75
76 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
77 push(@{$hashArgOptLists->{"OptList"}},$options);
78
79 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists);
80
81 if ($self->{'info_only'}) {
82 # don't worry about any options or initialisations etc
83 return bless $self, $class;
84 }
85
86 # we have left this option in so we can warn people who are still using it
87 if ($self->{'use_metadata_files'}) {
88 die "ERROR: DirectoryPlugin -use_metadata_files option has been deprecated. Please remove the option and add MetadataXMLPlug to your plugin list instead!\n";
89 }
90
91 $self->{'num_processed'} = 0;
92 $self->{'num_not_processed'} = 0;
93 $self->{'num_blocked'} = 0;
94 $self->{'num_archives'} = 0;
95
96 $self->{'subdir_extrametakeys'} = {};
97
98 return bless $self, $class;
99}
100
101# called once, at the start of processing
102sub init {
103 my $self = shift (@_);
104 my ($verbosity, $outhandle, $failhandle) = @_;
105
106 # verbosity is passed through from the processor
107 $self->{'verbosity'} = $verbosity;
108
109 # as are the outhandle and failhandle
110 $self->{'outhandle'} = $outhandle if defined $outhandle;
111 $self->{'failhandle'} = $failhandle;
112
113}
114
115# called once, after all passes have finished
116sub deinit {
117 my ($self) = @_;
118
119}
120
121# called at the beginning of each plugin pass (import has one, building has many)
122sub begin {
123 my $self = shift (@_);
124 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
125
126 # Only lookup timestamp info for import.pl, and only if incremental is set
127 my $proc_package_name = ref $processor;
128 if ($proc_package_name !~ /buildproc$/ && $self->{'incremental'} == 1) {
129 # Get the infodbtype value for this collection from the arcinfo object
130 my $infodbtype = $processor->getoutputinfo()->{'infodbtype'};
131 $infodbtype = "gdbm" if $infodbtype eq "gdbm-txtgz"; # in archives, cannot use txtgz version
132 my $output_dir = $processor->getoutputdir();
133 my $archives_inf = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $output_dir);
134
135 if ( -e $archives_inf ) {
136 $self->{'inf_timestamp'} = -M $archives_inf;
137 }
138 }
139}
140
141sub remove_all {
142 my $self = shift (@_);
143 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
144
145}
146
147
148sub remove_one {
149 my $self = shift (@_);
150 my ($file, $oids, $archivedir) = @_;
151 return undef; # this will never be called for directories (will it??)
152
153}
154
155
156# called at the end of each plugin pass
157sub end {
158 my ($self) = shift (@_);
159
160}
161
162
163
164# return 1 if this class might recurse using $pluginfo
165sub is_recursive {
166 my $self = shift (@_);
167
168 return 1;
169}
170
171sub get_default_block_exp {
172 my $self = shift (@_);
173
174 return '(?i)(CVS|\.svn|Thumbs\.db|OIDcount|~)$';
175}
176
177sub check_directory_path {
178
179 my $self = shift(@_);
180 my ($dirname) = @_;
181
182 return undef unless (-d $dirname);
183
184 return 0 if ($self->{'block_exp'} ne "" && $dirname =~ /$self->{'block_exp'}/);
185
186 my $outhandle = $self->{'outhandle'};
187
188 # check to make sure we're not reading the archives or index directory
189 my $gsdlhome = quotemeta($ENV{'GSDLHOME'});
190 if ($dirname =~ m/^$gsdlhome\/.*?\/import.*?\/(archives|index)$/) {
191 print $outhandle "DirectoryPlugin: $dirname appears to be a reference to a Greenstone collection, skipping.\n";
192 return 0;
193 }
194
195 # check to see we haven't got a cyclic path...
196 if ($dirname =~ m%(/.*){,41}%) {
197 print $outhandle "DirectoryPlugin: $dirname is 40 directories deep, is this a recursive path? if not increase constant in DirectoryPlugin.pm.\n";
198 return 0;
199 }
200
201 # check to see we haven't got a cyclic path...
202 if ($dirname =~ m%.*?import/(.+?)/import/\1.*%) {
203 print $outhandle "DirectoryPlugin: $dirname appears to be in a recursive loop...\n";
204 return 0;
205 }
206
207 return 1;
208}
209
210# this may be called more than once
211sub sort_out_associated_files {
212
213 my $self = shift (@_);
214 my ($block_hash) = @_;
215 if (!scalar (keys %{$block_hash->{'shared_fileroot'}})) {
216 return;
217 }
218
219 $self->{'assocfile_info'} = {} unless defined $self->{'assocfile_info'};
220 my $metadata = $self->{'assocfile_info'};
221 foreach my $prefix (keys %{$block_hash->{'shared_fileroot'}}) {
222 my $record = $block_hash->{'shared_fileroot'}->{$prefix};
223
224 my $tie_to = $record->{'tie_to'};
225 my $exts = $record->{'exts'};
226
227 if ((defined $tie_to) && (scalar (keys %$exts) > 0)) {
228 # set up fileblocks and assocfile_tobe
229 my $base_file = "$prefix$tie_to";
230 $metadata->{$base_file} = {} unless defined $metadata->{$base_file};
231 my $base_file_metadata = $metadata->{$base_file};
232
233 $base_file_metadata->{'gsdlassocfile_tobe'} = [] unless defined $base_file_metadata->{'gsdlassocfile_tobe'};
234 my $assoc_tobe = $base_file_metadata->{'gsdlassocfile_tobe'};
235 foreach my $e (keys %$exts) {
236 # block the file
237 $block_hash->{'file_blocks'}->{"$prefix$e"} = 1;
238 # set up as an associatd file
239 print STDERR " $self->{'plugin_type'}: Associating $prefix$e with $tie_to version\n";
240 my $mime_type = ""; # let system auto detect this
241 push(@$assoc_tobe,"$prefix$e:$mime_type:");
242
243 }
244 }
245 } # foreach record
246
247 $block_hash->{'shared_fileroot'} = undef;
248 $block_hash->{'shared_fileroot'} = {};
249
250}
251
252
253# do block exp OR special blocking ???
254
255sub file_is_blocked {
256 my $self = shift (@_);
257 my ($block_hash, $filename_full_path) = @_;
258
259 $filename_full_path = &util::upgrade_if_dos_filename($filename_full_path);
260### print STDERR "*** DirectoryPlugin::file_is_blocked $filename_full_path\n";
261
262 if (defined $block_hash->{'file_blocks'}->{$filename_full_path}) {
263 $self->{'num_blocked'} ++;
264 return 1;
265 }
266 # check Directory plugin's own block_exp
267 if ($self->{'block_exp'} ne "" && $filename_full_path =~ /$self->{'block_exp'}/) {
268 $self->{'num_blocked'} ++;
269 return 1; # blocked
270 }
271 return 0;
272}
273
274
275
276sub file_block_read {
277 my $self = shift (@_);
278 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
279
280 my $outhandle = $self->{'outhandle'};
281 my $verbosity = $self->{'verbosity'};
282
283 # Calculate the directory name and ensure it is a directory and
284 # that it is not explicitly blocked.
285 my $dirname = $file;
286 $dirname = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
287
288 my $directory_ok = $self->check_directory_path($dirname);
289 return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
290
291 print $outhandle "Global file scan checking directory: $dirname\n";
292
293 $block_hash->{'all_files'} = {} unless defined $block_hash->{'all_files'};
294 $block_hash->{'metadata_files'} = {} unless defined $block_hash->{'metadata_files'};
295
296 $block_hash->{'file_blocks'} = {} unless defined $block_hash->{'file_blocks'};
297 $block_hash->{'shared_fileroot'} = {} unless defined $block_hash->{'shared_fileroot'};
298
299 # Recur over directory contents.
300 my (@dir, $subfile);
301 #my $count = 0;
302
303 print $outhandle "DirectoryPlugin block: getting directory $dirname\n" if ($verbosity > 2);
304
305 # find all the files in the directory
306 if (!opendir (DIR, $dirname)) {
307 if ($gli) {
308 print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
309 }
310 print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
311 return -1; # error in processing
312 }
313 @dir = readdir (DIR);
314 closedir (DIR);
315
316 for (my $i = 0; $i < scalar(@dir); $i++) {
317 my $raw_subfile = $dir[$i];
318 next if ($raw_subfile =~ m/^\.\.?$/);
319
320 my $this_file_base_dir = $base_dir;
321 my $raw_file_subfile = &util::filename_cat($file, $raw_subfile);
322
323 # Recursively read each $raw_subfile
324 print $outhandle "DirectoryPlugin block recurring: $raw_file_subfile\n" if ($verbosity > 2);
325
326 #$count += &plugin::file_block_read ($pluginfo, $this_file_base_dir,
327
328 &plugin::file_block_read ($pluginfo, $this_file_base_dir,
329 $raw_file_subfile,
330 $block_hash, $metadata, $gli);
331
332 }
333 $self->sort_out_associated_files($block_hash);
334 #return $count;
335
336}
337
338# We don't do metadata_read
339sub metadata_read {
340 my $self = shift (@_);
341 my ($pluginfo, $base_dir, $file, $block_hash,
342 $extrametakeys, $extrametadata, $extrametafile,
343 $processor, $gli, $aux) = @_;
344
345 return undef;
346}
347
348
349# return number of files processed, undef if can't process
350# Note that $base_dir might be "" and that $file might
351# include directories
352
353# This function passes around metadata hash structures. Metadata hash
354# structures are hashes that map from a (scalar) key (the metadata element
355# name) to either a scalar metadata value or a reference to an array of
356# such values.
357
358sub read {
359 my $self = shift (@_);
360 my ($pluginfo, $base_dir, $file, $block_hash, $in_metadata, $processor, $maxdocs, $total_count, $gli) = @_;
361
362 my $outhandle = $self->{'outhandle'};
363 my $verbosity = $self->{'verbosity'};
364
365 # Calculate the directory name and ensure it is a directory and
366 # that it is not explicitly blocked.
367 my $dirname;
368 if ($file eq "") {
369 $dirname = $base_dir;
370 } else {
371 $dirname = $file;
372 $dirname = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
373 }
374
375 my $directory_ok = $self->check_directory_path($dirname);
376 return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
377
378 if (($verbosity > 2) && ((scalar keys %$in_metadata) > 0)) {
379 print $outhandle "DirectoryPlugin: metadata passed in: ",
380 join(", ", keys %$in_metadata), "\n";
381 }
382
383
384 # Recur over directory contents.
385 my (@dir, $subfile);
386
387 print $outhandle "DirectoryPlugin read: getting directory $dirname\n" if ($verbosity > 2);
388
389 # find all the files in the directory
390 if (!opendir (DIR, $dirname)) {
391 if ($gli) {
392 print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
393 }
394 print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
395 return -1; # error in processing
396 }
397 @dir = readdir (DIR);
398 map { $_ = &unicode::raw_filename_to_url_encoded($_) } @dir;
399 closedir (DIR);
400
401 # Re-order the files in the list so any directories ending with .all are moved to the end
402 for (my $i = scalar(@dir) - 1; $i >= 0; $i--) {
403 if (-d &util::filename_cat($dirname, $dir[$i]) && $dir[$i] =~ /\.all$/) {
404 push(@dir, splice(@dir, $i, 1));
405 }
406 }
407
408 # setup the metadata structures. we do a metadata_read pass to see if there is any additional metadata, then pass it to read
409
410 my $additionalmetadata = 0; # is there extra metadata available?
411 my %extrametadata; # maps from filespec to extra metadata keys
412 my %extrametafile; # maps from filespec to the metadata.xml (or similar) file it came from
413 my @extrametakeys; # keys of %extrametadata in order read
414
415
416 my $os_dirsep = &util::get_os_dirsep();
417 my $dirsep = &util::get_dirsep();
418 my $base_dir_regexp = $base_dir;
419 $base_dir_regexp =~ s/\//$os_dirsep/g;
420 my $local_dirname = $dirname;
421
422 $local_dirname =~ s/^$base_dir_regexp($os_dirsep)*//;
423 # if we are in import folder, then local_dirname will be empty
424 if ($local_dirname ne "") {
425 # look for extra metadata passed down from higher folders
426 $local_dirname .= $dirsep;
427 if (defined $self->{'subdir_extrametakeys'}->{$local_dirname}) {
428 my $extrakeys = $self->{'subdir_extrametakeys'}->{$local_dirname};
429 foreach my $ek (@$extrakeys) {
430 my $extrakeys_re = $ek->{'re'};
431 my $extrakeys_md = $ek->{'md'};
432 my $extrakeys_mf = $ek->{'mf'};
433 push(@extrametakeys,$extrakeys_re);
434 $extrametadata{$extrakeys_re} = $extrakeys_md;
435 $extrametafile{$extrakeys_re} = $extrakeys_mf;
436 }
437 delete($self->{'subdir_extrametakeys'}->{$local_dirname});
438 }
439 }
440 # apply metadata pass for each of the files in the directory -- ignore
441 # maxdocs here
442 my $num_files = scalar(@dir);
443 for (my $i = 0; $i < scalar(@dir); $i++) {
444 my $subfile = $dir[$i];
445 next if ($subfile =~ m/^\.\.?$/);
446
447 my $this_file_base_dir = $base_dir;
448 my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
449
450 my $raw_file_subfile = &util::filename_cat($file, $raw_subfile);
451 my $raw_full_filename = &util::filename_cat($this_file_base_dir, $raw_file_subfile);
452
453 if ($self->file_is_blocked($block_hash,$raw_full_filename)) {
454 print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for metadata_read\n" if ($verbosity > 2);
455 next;
456 }
457
458 # Recursively read each $raw_subfile
459 print $outhandle "DirectoryPlugin metadata recurring: $raw_subfile\n" if ($verbosity > 2);
460
461 &plugin::metadata_read ($pluginfo, $this_file_base_dir,
462 $raw_file_subfile,$block_hash,
463 \@extrametakeys, \%extrametadata,
464 \%extrametafile,
465 $processor, $gli);
466 $additionalmetadata = 1;
467 }
468
469 # filter out any extrametakeys that mention subdirectories and store
470 # for later use (i.e. when that sub-directory is being processed)
471 foreach my $ek (@extrametakeys) {
472 my ($subdir_re,$extrakey_dir) = &File::Basename::fileparse($ek);
473 $extrakey_dir =~ s/\\\./\./g; # remove RE syntax for .
474 $extrakey_dir =~ s/\\\\/\\/g; # remove RE syntax for \
475
476 my $dirsep_re = &util::get_re_dirsep();
477
478 my $ek_non_re = $ek;
479 $ek_non_re =~ s/\\\./\./g; # remove RE syntax for .
480 $ek_non_re =~ s/\\\\/\\/g; # remove RE syntax for \
481 if ($ek_non_re =~ m/$dirsep_re/) { # specifies at least one directory
482 my $md = $extrametadata{$ek};
483 my $mf = $extrametafile{$ek};
484
485 my $subdir_extrametakeys = $self->{'subdir_extrametakeys'};
486
487 my $subdir_rec = { 're' => $subdir_re, 'md' => $md, 'mf' => $mf };
488
489 # when its looked up, it must be relative to the base dir
490 push(@{$subdir_extrametakeys->{"$local_dirname$extrakey_dir"}},$subdir_rec);
491 #push(@{$subdir_extrametakeys->{"$extrakey_dir"}},$subdir_rec);
492 }
493 }
494
495 # import each of the files in the directory
496 my $count=0;
497 for (my $i = 0; $i <= scalar(@dir); $i++) {
498 # When every file in the directory has been done, pause for a moment (figuratively!)
499 # If the -recheck_directories argument hasn't been provided, stop now (default)
500 # Otherwise, re-read the contents of the directory to check for new files
501 # Any new files are added to the @dir list and are processed as normal
502 # This is necessary when documents to be indexed are specified in bibliographic DBs
503 # These files are copied/downloaded and stored in a new folder at import time
504 if ($i == $num_files) {
505 last unless $self->{'recheck_directories'};
506
507 # Re-read the files in the directory to see if there are any new files
508 last if (!opendir (DIR, $dirname));
509 my @dirnow = readdir (DIR);
510 map { $_ = &unicode::raw_filename_to_url_encoded($_) } @dirnow;
511 closedir (DIR);
512
513 # We're only interested if there are more files than there were before
514 last if (scalar(@dirnow) <= scalar(@dir));
515
516 # Any new files are added to the end of @dir to get processed by the loop
517 my $j;
518 foreach my $subfilenow (@dirnow) {
519 for ($j = 0; $j < $num_files; $j++) {
520 last if ($subfilenow eq $dir[$j]);
521 }
522 if ($j == $num_files) {
523 # New file
524 push(@dir, $subfilenow);
525 }
526 }
527 # When the new files have been processed, check again
528 $num_files = scalar(@dir);
529 }
530
531 my $subfile = $dir[$i];
532 last if ($maxdocs != -1 && ($count + $total_count) >= $maxdocs);
533 next if ($subfile =~ /^\.\.?$/);
534
535 my $this_file_base_dir = $base_dir;
536 my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
537
538 my $raw_file_subfile = &util::filename_cat($file, $raw_subfile);
539 my $raw_full_filename
540 = &util::filename_cat($this_file_base_dir,$raw_file_subfile);
541
542 if ($self->file_is_blocked($block_hash,$raw_full_filename)) {
543 print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for read\n" if ($verbosity > 2);
544 next;
545 }
546 #print STDERR "processing $raw_full_filename\n";
547 # Follow Windows shortcuts
548 if ($raw_subfile =~ /(?i)\.lnk$/ && $ENV{'GSDLOS'} =~ /^windows$/i) {
549 require Win32::Shortcut;
550 my $shortcut = new Win32::Shortcut(&util::filename_cat($dirname, $raw_subfile));
551 if ($shortcut) {
552 # The file to be processed is now the target of the shortcut
553 $this_file_base_dir = "";
554 $file = "";
555 $raw_subfile = $shortcut->Path;
556 }
557 }
558
559 # check for a symlink pointing back to a leading directory
560 if (-d "$dirname/$raw_subfile" && -l "$dirname/$raw_subfile") {
561 # readlink gives a "fatal error" on systems that don't implement
562 # symlinks. This assumes the the -l test above would fail on those.
563 my $linkdest=readlink "$dirname/$raw_subfile";
564 if (!defined ($linkdest)) {
565 # system error - file not found?
566 warn "DirectoryPlugin: symlink problem - $!";
567 } else {
568 # see if link points to current or a parent directory
569 if ($linkdest =~ m@^[\./\\]+$@ ||
570 index($dirname, $linkdest) != -1) {
571 warn "DirectoryPlugin: Ignoring recursive symlink ($dirname/$raw_subfile -> $linkdest)\n";
572 next;
573 ;
574 }
575 }
576 }
577
578 print $outhandle "DirectoryPlugin: preparing metadata for $raw_subfile\n" if ($verbosity > 2);
579
580 # Make a copy of $in_metadata to pass to $raw_subfile
581 my $out_metadata = {};
582 &metadatautil::combine_metadata_structures($out_metadata, $in_metadata);
583
584 # check the assocfile_info
585 if (defined $self->{'assocfile_info'}->{$raw_full_filename}) {
586 &metadatautil::combine_metadata_structures($out_metadata, $self->{'assocfile_info'}->{$raw_full_filename});
587 }
588
589 # $subfile by this point is url-encoded => all ASCII chars => no need to encode as UTF8
590
591 # Next add metadata read in XML files (if it is supplied)
592 if ($additionalmetadata == 1) {
593 foreach my $filespec (@extrametakeys) {
594 ## use the url-encoded filename to do the filename comparison
595
596 if ($subfile =~ /^$filespec$/) {
597 print $outhandle "File \"$subfile\" matches filespec \"$filespec\"\n"
598 if ($verbosity > 2);
599 my $mdref = $extrametadata{$filespec};
600 my $mfref = $extrametafile{$filespec};
601
602 # Add the list files where the metadata came from
603 # into the metadata table so we can track this
604 # This mechanism is similar to how gsdlassocfile works
605
606 my @metafile_pair = ();
607 foreach my $l (keys %$mfref) {
608 my $f = $mfref->{$l};
609 push (@metafile_pair, "$f : $l");
610 }
611
612 $mdref->{'gsdlmetafile'} = \@metafile_pair;
613
614 &metadatautil::combine_metadata_structures($out_metadata, $mdref);
615 }
616 }
617 }
618
619 if (defined $self->{'inf_timestamp'}) {
620 # Look to see if it's a completely new file
621
622 if (!$block_hash->{'new_files'}->{$raw_full_filename}) {
623 # Not a new file, must be an existing file
624 # Let' see if it's newer than the last import.pl
625
626
627 if (! -d $raw_full_filename) {
628 if (!$block_hash->{'reindex_files'}->{$raw_full_filename}) {
629 # filename has been around for longer than inf_timestamp
630 print $outhandle "**** Skipping $subfile\n" if ($verbosity >3);
631 next;
632 }
633 else {
634 # Remove old folder in archives (might hash to something different)
635 # *** should be doing this on a Del one as well
636 # but leave folder name?? and ensure hashs to
637 # same again??
638
639 # Then let through as new doc??
640
641 # mark to doc-oids that rely on it for re-indexing
642 }
643 }
644 }
645 }
646
647 # Recursively read each $subfile
648 print $outhandle "DirectoryPlugin recurring: $subfile\n" if ($verbosity > 2);
649
650 $count += &plugin::read ($pluginfo, $this_file_base_dir,
651 $raw_file_subfile, $block_hash,
652 $out_metadata, $processor, $maxdocs, ($total_count + $count), $gli);
653 }
654
655 return $count;
656}
657
658sub compile_stats {
659 my $self = shift(@_);
660 my ($stats) = @_;
661}
662
6631;
Note: See TracBrowser for help on using the repository browser.