source: main/trunk/greenstone2/perllib/plugins/DirectoryPlugin.pm@ 23484

Last change on this file since 23484 was 23419, checked in by max, 13 years ago

Setting the values to store as block files is now done through an API call to BasePlugin. This way, anything uniform requirement (such as putting in both C:\... and c:\... entries for Windows) can be done in one place.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 21.4 KB
Line 
1###########################################################################
2#
3# DirectoryPlugin.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# DirectoryPlugin is a plugin which recurses through directories processing
27# each file it finds - which basically means passing it down the plugin
28# pipeline
29
30package DirectoryPlugin;
31
32use PrintInfo;
33use plugin;
34use util;
35use metadatautil;
36
37use File::Basename;
38use strict;
39no strict 'refs';
40no strict 'subs';
41
42use Encode;
43
44BEGIN {
45 @DirectoryPlugin::ISA = ('PrintInfo');
46}
47
48my $arguments =
49 [ { 'name' => "block_exp",
50 'desc' => "{BasePlugin.block_exp}",
51 'type' => "regexp",
52 'deft' => &get_default_block_exp(),
53 'reqd' => "no" },
54 # this option has been deprecated. leave it here for now so we can warn people not to use it
55 { 'name' => "use_metadata_files",
56 'desc' => "{DirectoryPlugin.use_metadata_files}",
57 'type' => "flag",
58 'reqd' => "no",
59 'hiddengli' => "yes" },
60 { 'name' => "recheck_directories",
61 'desc' => "{DirectoryPlugin.recheck_directories}",
62 'type' => "flag",
63 'reqd' => "no" } ];
64
65my $options = { 'name' => "DirectoryPlugin",
66 'desc' => "{DirectoryPlugin.desc}",
67 'abstract' => "no",
68 'inherits' => "yes",
69 'args' => $arguments };
70
71sub new {
72 my ($class) = shift (@_);
73 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
74 push(@$pluginlist, $class);
75
76 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
77 push(@{$hashArgOptLists->{"OptList"}},$options);
78
79 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists);
80
81 if ($self->{'info_only'}) {
82 # don't worry about any options or initialisations etc
83 return bless $self, $class;
84 }
85
86 # we have left this option in so we can warn people who are still using it
87 if ($self->{'use_metadata_files'}) {
88 die "ERROR: DirectoryPlugin -use_metadata_files option has been deprecated. Please remove the option and add MetadataXMLPlug to your plugin list instead!\n";
89 }
90
91 $self->{'num_processed'} = 0;
92 $self->{'num_not_processed'} = 0;
93 $self->{'num_blocked'} = 0;
94 $self->{'num_archives'} = 0;
95
96 $self->{'subdir_extrametakeys'} = {};
97
98 return bless $self, $class;
99}
100
101# called once, at the start of processing
102sub init {
103 my $self = shift (@_);
104 my ($verbosity, $outhandle, $failhandle) = @_;
105
106 # verbosity is passed through from the processor
107 $self->{'verbosity'} = $verbosity;
108
109 # as are the outhandle and failhandle
110 $self->{'outhandle'} = $outhandle if defined $outhandle;
111 $self->{'failhandle'} = $failhandle;
112
113}
114
115# called once, after all passes have finished
116sub deinit {
117 my ($self) = @_;
118
119}
120
121# called at the beginning of each plugin pass (import has one, building has many)
122sub begin {
123 my $self = shift (@_);
124 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
125
126 # Only lookup timestamp info for import.pl, and only if incremental is set
127 my $proc_package_name = ref $processor;
128 if ($proc_package_name !~ /buildproc$/ && $self->{'incremental'} == 1) {
129 # Get the infodbtype value for this collection from the arcinfo object
130 my $infodbtype = $processor->getoutputinfo()->{'infodbtype'};
131 $infodbtype = "gdbm" if $infodbtype eq "gdbm-txtgz"; # in archives, cannot use txtgz version
132 my $output_dir = $processor->getoutputdir();
133 my $archives_inf = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $output_dir);
134
135 if ( -e $archives_inf ) {
136 $self->{'inf_timestamp'} = -M $archives_inf;
137 }
138 }
139}
140
141sub remove_all {
142 my $self = shift (@_);
143 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
144
145}
146
147
148sub remove_one {
149 my $self = shift (@_);
150 my ($file, $oids, $archivedir) = @_;
151 return undef; # this will never be called for directories (will it??)
152
153}
154
155
156# called at the end of each plugin pass
157sub end {
158 my ($self) = shift (@_);
159
160}
161
162
163
164# return 1 if this class might recurse using $pluginfo
165sub is_recursive {
166 my $self = shift (@_);
167
168 return 1;
169}
170
171sub get_default_block_exp {
172 my $self = shift (@_);
173
174 return '(?i)(CVS|\.svn|Thumbs\.db|OIDcount|~)$';
175}
176
177sub check_directory_path {
178
179 my $self = shift(@_);
180 my ($dirname) = @_;
181
182 return undef unless (-d $dirname);
183
184 return 0 if ($self->{'block_exp'} ne "" && $dirname =~ /$self->{'block_exp'}/);
185
186 my $outhandle = $self->{'outhandle'};
187
188 # check to make sure we're not reading the archives or index directory
189 my $gsdlhome = quotemeta($ENV{'GSDLHOME'});
190 if ($dirname =~ m/^$gsdlhome\/.*?\/import.*?\/(archives|index)$/) {
191 print $outhandle "DirectoryPlugin: $dirname appears to be a reference to a Greenstone collection, skipping.\n";
192 return 0;
193 }
194
195 # check to see we haven't got a cyclic path...
196 if ($dirname =~ m%(/.*){,41}%) {
197 print $outhandle "DirectoryPlugin: $dirname is 40 directories deep, is this a recursive path? if not increase constant in DirectoryPlugin.pm.\n";
198 return 0;
199 }
200
201 # check to see we haven't got a cyclic path...
202 if ($dirname =~ m%.*?import/(.+?)/import/\1.*%) {
203 print $outhandle "DirectoryPlugin: $dirname appears to be in a recursive loop...\n";
204 return 0;
205 }
206
207 return 1;
208}
209
210# this may be called more than once
211sub sort_out_associated_files {
212
213 my $self = shift (@_);
214 my ($block_hash) = @_;
215 if (!scalar (keys %{$block_hash->{'shared_fileroot'}})) {
216 return;
217 }
218
219 $self->{'assocfile_info'} = {} unless defined $self->{'assocfile_info'};
220 my $metadata = $self->{'assocfile_info'};
221 foreach my $prefix (keys %{$block_hash->{'shared_fileroot'}}) {
222 my $record = $block_hash->{'shared_fileroot'}->{$prefix};
223
224 my $tie_to = $record->{'tie_to'};
225 my $exts = $record->{'exts'};
226
227 if ((defined $tie_to) && (scalar (keys %$exts) > 0)) {
228 # set up fileblocks and assocfile_tobe
229 my $base_file = "$prefix$tie_to";
230 $metadata->{$base_file} = {} unless defined $metadata->{$base_file};
231 my $base_file_metadata = $metadata->{$base_file};
232
233 $base_file_metadata->{'gsdlassocfile_tobe'} = [] unless defined $base_file_metadata->{'gsdlassocfile_tobe'};
234 my $assoc_tobe = $base_file_metadata->{'gsdlassocfile_tobe'};
235 foreach my $e (keys %$exts) {
236 # block the file
237 $self->block_filename($block_hash,"$prefix$e");
238 # set up as an associatd file
239 print STDERR " $self->{'plugin_type'}: Associating $prefix$e with $tie_to version\n";
240 my $mime_type = ""; # let system auto detect this
241 push(@$assoc_tobe,"$prefix$e:$mime_type:");
242
243 }
244 }
245 } # foreach record
246
247 $block_hash->{'shared_fileroot'} = undef;
248 $block_hash->{'shared_fileroot'} = {};
249
250}
251
252
253# do block exp OR special blocking ???
254
255sub file_is_blocked {
256 my $self = shift (@_);
257 my ($block_hash, $filename_full_path) = @_;
258
259 $filename_full_path = &util::upgrade_if_dos_filename($filename_full_path);
260### print STDERR "*** DirectoryPlugin::file_is_blocked $filename_full_path\n";
261
262 if (defined $block_hash->{'file_blocks'}->{$filename_full_path}) {
263 $self->{'num_blocked'} ++;
264 return 1;
265 }
266 # check Directory plugin's own block_exp
267 if ($self->{'block_exp'} ne "" && $filename_full_path =~ /$self->{'block_exp'}/) {
268 $self->{'num_blocked'} ++;
269 return 1; # blocked
270 }
271 return 0;
272}
273
274
275
276sub file_block_read {
277 my $self = shift (@_);
278 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
279
280 my $outhandle = $self->{'outhandle'};
281 my $verbosity = $self->{'verbosity'};
282
283 # Calculate the directory name and ensure it is a directory and
284 # that it is not explicitly blocked.
285 my $dirname = $file;
286 $dirname = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
287
288 my $directory_ok = $self->check_directory_path($dirname);
289 return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
290
291 print $outhandle "Global file scan checking directory: $dirname\n";
292
293 $block_hash->{'all_files'} = {} unless defined $block_hash->{'all_files'};
294 $block_hash->{'metadata_files'} = {} unless defined $block_hash->{'metadata_files'};
295
296 $block_hash->{'file_blocks'} = {} unless defined $block_hash->{'file_blocks'};
297 $block_hash->{'shared_fileroot'} = {} unless defined $block_hash->{'shared_fileroot'};
298
299 # Recur over directory contents.
300 my (@dir, $subfile);
301 #my $count = 0;
302
303 print $outhandle "DirectoryPlugin block: getting directory $dirname\n" if ($verbosity > 2);
304
305 # find all the files in the directory
306 if (!opendir (DIR, $dirname)) {
307 if ($gli) {
308 print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
309 }
310 print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
311 return -1; # error in processing
312 }
313 @dir = readdir (DIR);
314 closedir (DIR);
315
316 for (my $i = 0; $i < scalar(@dir); $i++) {
317 my $raw_subfile = $dir[$i];
318 next if ($raw_subfile =~ m/^\.\.?$/);
319
320 my $this_file_base_dir = $base_dir;
321 my $raw_file_subfile = &util::filename_cat($file, $raw_subfile);
322
323 # Recursively read each $raw_subfile
324 print $outhandle "DirectoryPlugin block recurring: $raw_file_subfile\n" if ($verbosity > 2);
325
326 #$count += &plugin::file_block_read ($pluginfo, $this_file_base_dir,
327
328 &plugin::file_block_read ($pluginfo, $this_file_base_dir,
329 $raw_file_subfile,
330 $block_hash, $metadata, $gli);
331
332 }
333 $self->sort_out_associated_files($block_hash);
334 #return $count;
335 return 1;
336
337}
338
339# We don't do metadata_read
340sub metadata_read {
341 my $self = shift (@_);
342 my ($pluginfo, $base_dir, $file, $block_hash,
343 $extrametakeys, $extrametadata, $extrametafile,
344 $processor, $gli, $aux) = @_;
345
346 return undef;
347}
348
349
350# return number of files processed, undef if can't process
351# Note that $base_dir might be "" and that $file might
352# include directories
353
354# This function passes around metadata hash structures. Metadata hash
355# structures are hashes that map from a (scalar) key (the metadata element
356# name) to either a scalar metadata value or a reference to an array of
357# such values.
358
359sub read {
360 my $self = shift (@_);
361 my ($pluginfo, $base_dir, $file, $block_hash, $in_metadata, $processor, $maxdocs, $total_count, $gli) = @_;
362
363 my $outhandle = $self->{'outhandle'};
364 my $verbosity = $self->{'verbosity'};
365
366 # Calculate the directory name and ensure it is a directory and
367 # that it is not explicitly blocked.
368 my $dirname;
369 if ($file eq "") {
370 $dirname = $base_dir;
371 } else {
372 $dirname = $file;
373 $dirname = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
374 }
375
376 my $directory_ok = $self->check_directory_path($dirname);
377 return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
378
379 if (($verbosity > 2) && ((scalar keys %$in_metadata) > 0)) {
380 print $outhandle "DirectoryPlugin: metadata passed in: ",
381 join(", ", keys %$in_metadata), "\n";
382 }
383
384
385 # Recur over directory contents.
386 my (@dir, $subfile);
387
388 print $outhandle "DirectoryPlugin read: getting directory $dirname\n" if ($verbosity > 2);
389
390 # find all the files in the directory
391 if (!opendir (DIR, $dirname)) {
392 if ($gli) {
393 print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
394 }
395 print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
396 return -1; # error in processing
397 }
398 @dir = readdir (DIR);
399 map { $_ = &unicode::raw_filename_to_url_encoded($_) } @dir;
400 closedir (DIR);
401
402 # Re-order the files in the list so any directories ending with .all are moved to the end
403 for (my $i = scalar(@dir) - 1; $i >= 0; $i--) {
404 if (-d &util::filename_cat($dirname, $dir[$i]) && $dir[$i] =~ /\.all$/) {
405 push(@dir, splice(@dir, $i, 1));
406 }
407 }
408
409 # setup the metadata structures. we do a metadata_read pass to see if there is any additional metadata, then pass it to read
410
411 my $additionalmetadata = 0; # is there extra metadata available?
412 my %extrametadata; # maps from filespec to extra metadata keys
413 my %extrametafile; # maps from filespec to the metadata.xml (or similar) file it came from
414 my @extrametakeys; # keys of %extrametadata in order read
415
416
417 my $os_dirsep = &util::get_os_dirsep();
418 my $dirsep = &util::get_dirsep();
419 my $base_dir_regexp = $base_dir;
420 $base_dir_regexp =~ s/\//$os_dirsep/g;
421 my $local_dirname = $dirname;
422
423 $local_dirname =~ s/^$base_dir_regexp($os_dirsep)*//;
424 # if we are in import folder, then local_dirname will be empty
425 if ($local_dirname ne "") {
426 # look for extra metadata passed down from higher folders
427 $local_dirname .= $dirsep;
428 if (defined $self->{'subdir_extrametakeys'}->{$local_dirname}) {
429 my $extrakeys = $self->{'subdir_extrametakeys'}->{$local_dirname};
430 foreach my $ek (@$extrakeys) {
431 my $extrakeys_re = $ek->{'re'};
432 my $extrakeys_md = $ek->{'md'};
433 my $extrakeys_mf = $ek->{'mf'};
434 push(@extrametakeys,$extrakeys_re);
435 $extrametadata{$extrakeys_re} = $extrakeys_md;
436 $extrametafile{$extrakeys_re} = $extrakeys_mf;
437 }
438 delete($self->{'subdir_extrametakeys'}->{$local_dirname});
439 }
440 }
441 # apply metadata pass for each of the files in the directory -- ignore
442 # maxdocs here
443 my $num_files = scalar(@dir);
444 for (my $i = 0; $i < scalar(@dir); $i++) {
445 my $subfile = $dir[$i];
446 next if ($subfile =~ m/^\.\.?$/);
447
448 my $this_file_base_dir = $base_dir;
449 my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
450
451 my $raw_file_subfile = &util::filename_cat($file, $raw_subfile);
452 my $raw_full_filename = &util::filename_cat($this_file_base_dir, $raw_file_subfile);
453
454 if ($self->file_is_blocked($block_hash,$raw_full_filename)) {
455 print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for metadata_read\n" if ($verbosity > 2);
456 next;
457 }
458
459 # Recursively read each $raw_subfile
460 print $outhandle "DirectoryPlugin metadata recurring: $raw_subfile\n" if ($verbosity > 2);
461
462 &plugin::metadata_read ($pluginfo, $this_file_base_dir,
463 $raw_file_subfile,$block_hash,
464 \@extrametakeys, \%extrametadata,
465 \%extrametafile,
466 $processor, $gli);
467 $additionalmetadata = 1;
468 }
469
470 # filter out any extrametakeys that mention subdirectories and store
471 # for later use (i.e. when that sub-directory is being processed)
472 foreach my $ek (@extrametakeys) {
473 my ($subdir_re,$extrakey_dir) = &File::Basename::fileparse($ek);
474 $extrakey_dir =~ s/\\\./\./g; # remove RE syntax for .
475 $extrakey_dir =~ s/\\\\/\\/g; # remove RE syntax for \
476
477 my $dirsep_re = &util::get_re_dirsep();
478
479 my $ek_non_re = $ek;
480 $ek_non_re =~ s/\\\./\./g; # remove RE syntax for .
481 $ek_non_re =~ s/\\\\/\\/g; # remove RE syntax for \
482 if ($ek_non_re =~ m/$dirsep_re/) { # specifies at least one directory
483 my $md = $extrametadata{$ek};
484 my $mf = $extrametafile{$ek};
485
486 my $subdir_extrametakeys = $self->{'subdir_extrametakeys'};
487
488 my $subdir_rec = { 're' => $subdir_re, 'md' => $md, 'mf' => $mf };
489
490 # when its looked up, it must be relative to the base dir
491 push(@{$subdir_extrametakeys->{"$local_dirname$extrakey_dir"}},$subdir_rec);
492 #push(@{$subdir_extrametakeys->{"$extrakey_dir"}},$subdir_rec);
493 }
494 }
495
496 # import each of the files in the directory
497 my $count=0;
498 for (my $i = 0; $i <= scalar(@dir); $i++) {
499 # When every file in the directory has been done, pause for a moment (figuratively!)
500 # If the -recheck_directories argument hasn't been provided, stop now (default)
501 # Otherwise, re-read the contents of the directory to check for new files
502 # Any new files are added to the @dir list and are processed as normal
503 # This is necessary when documents to be indexed are specified in bibliographic DBs
504 # These files are copied/downloaded and stored in a new folder at import time
505 if ($i == $num_files) {
506 last unless $self->{'recheck_directories'};
507
508 # Re-read the files in the directory to see if there are any new files
509 last if (!opendir (DIR, $dirname));
510 my @dirnow = readdir (DIR);
511 map { $_ = &unicode::raw_filename_to_url_encoded($_) } @dirnow;
512 closedir (DIR);
513
514 # We're only interested if there are more files than there were before
515 last if (scalar(@dirnow) <= scalar(@dir));
516
517 # Any new files are added to the end of @dir to get processed by the loop
518 my $j;
519 foreach my $subfilenow (@dirnow) {
520 for ($j = 0; $j < $num_files; $j++) {
521 last if ($subfilenow eq $dir[$j]);
522 }
523 if ($j == $num_files) {
524 # New file
525 push(@dir, $subfilenow);
526 }
527 }
528 # When the new files have been processed, check again
529 $num_files = scalar(@dir);
530 }
531
532 my $subfile = $dir[$i];
533 last if ($maxdocs != -1 && ($count + $total_count) >= $maxdocs);
534 next if ($subfile =~ /^\.\.?$/);
535
536 my $this_file_base_dir = $base_dir;
537 my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
538
539 my $raw_file_subfile = &util::filename_cat($file, $raw_subfile);
540 my $raw_full_filename
541 = &util::filename_cat($this_file_base_dir,$raw_file_subfile);
542
543 if ($self->file_is_blocked($block_hash,$raw_full_filename)) {
544 print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for read\n" if ($verbosity > 2);
545 next;
546 }
547 #print STDERR "processing $raw_full_filename\n";
548 # Follow Windows shortcuts
549 if ($raw_subfile =~ /(?i)\.lnk$/ && $ENV{'GSDLOS'} =~ /^windows$/i) {
550 require Win32::Shortcut;
551 my $shortcut = new Win32::Shortcut(&util::filename_cat($dirname, $raw_subfile));
552 if ($shortcut) {
553 # The file to be processed is now the target of the shortcut
554 $this_file_base_dir = "";
555 $file = "";
556 $raw_subfile = $shortcut->Path;
557 }
558 }
559
560 # check for a symlink pointing back to a leading directory
561 if (-d "$dirname/$raw_subfile" && -l "$dirname/$raw_subfile") {
562 # readlink gives a "fatal error" on systems that don't implement
563 # symlinks. This assumes the the -l test above would fail on those.
564 my $linkdest=readlink "$dirname/$raw_subfile";
565 if (!defined ($linkdest)) {
566 # system error - file not found?
567 warn "DirectoryPlugin: symlink problem - $!";
568 } else {
569 # see if link points to current or a parent directory
570 if ($linkdest =~ m@^[\./\\]+$@ ||
571 index($dirname, $linkdest) != -1) {
572 warn "DirectoryPlugin: Ignoring recursive symlink ($dirname/$raw_subfile -> $linkdest)\n";
573 next;
574 ;
575 }
576 }
577 }
578
579 print $outhandle "DirectoryPlugin: preparing metadata for $raw_subfile\n" if ($verbosity > 2);
580
581 # Make a copy of $in_metadata to pass to $raw_subfile
582 my $out_metadata = {};
583 &metadatautil::combine_metadata_structures($out_metadata, $in_metadata);
584
585 # check the assocfile_info
586 if (defined $self->{'assocfile_info'}->{$raw_full_filename}) {
587 &metadatautil::combine_metadata_structures($out_metadata, $self->{'assocfile_info'}->{$raw_full_filename});
588 }
589
590 # $subfile by this point is url-encoded => all ASCII chars => no need to encode as UTF8
591
592 # Next add metadata read in XML files (if it is supplied)
593 if ($additionalmetadata == 1) {
594 foreach my $filespec (@extrametakeys) {
595 ## use the url-encoded filename to do the filename comparison
596
597 if ($subfile =~ /^$filespec$/) {
598 print $outhandle "File \"$subfile\" matches filespec \"$filespec\"\n"
599 if ($verbosity > 2);
600 my $mdref = $extrametadata{$filespec};
601 my $mfref = $extrametafile{$filespec};
602
603 # Add the list files where the metadata came from
604 # into the metadata table so we can track this
605 # This mechanism is similar to how gsdlassocfile works
606
607 my @metafile_pair = ();
608 foreach my $l (keys %$mfref) {
609 my $f = $mfref->{$l};
610 push (@metafile_pair, "$f : $l");
611 }
612
613 $mdref->{'gsdlmetafile'} = \@metafile_pair;
614
615 &metadatautil::combine_metadata_structures($out_metadata, $mdref);
616 }
617 }
618 }
619
620 if (defined $self->{'inf_timestamp'}) {
621 # Look to see if it's a completely new file
622
623 if (!$block_hash->{'new_files'}->{$raw_full_filename}) {
624 # Not a new file, must be an existing file
625 # Let' see if it's newer than the last import.pl
626
627
628 if (! -d $raw_full_filename) {
629 if (!$block_hash->{'reindex_files'}->{$raw_full_filename}) {
630 # filename has been around for longer than inf_timestamp
631 print $outhandle "**** Skipping $subfile\n" if ($verbosity >3);
632 next;
633 }
634 else {
635 # Remove old folder in archives (might hash to something different)
636 # *** should be doing this on a Del one as well
637 # but leave folder name?? and ensure hashs to
638 # same again??
639
640 # Then let through as new doc??
641
642 # mark to doc-oids that rely on it for re-indexing
643 }
644 }
645 }
646 }
647
648 # Recursively read each $subfile
649 print $outhandle "DirectoryPlugin recurring: $subfile\n" if ($verbosity > 2);
650
651 $count += &plugin::read ($pluginfo, $this_file_base_dir,
652 $raw_file_subfile, $block_hash,
653 $out_metadata, $processor, $maxdocs, ($total_count + $count), $gli);
654 }
655
656 return $count;
657}
658
659sub compile_stats {
660 my $self = shift(@_);
661 my ($stats) = @_;
662}
663
6641;
Note: See TracBrowser for help on using the repository browser.