source: main/trunk/greenstone2/perllib/plugins/DirectoryPlugin.pm@ 24932

Last change on this file since 24932 was 24932, checked in by ak19, 12 years ago

Diego noticed how the metadata in a toplevel metadata.xml, which specifies metadata for files in import's subfolders, does not get attached to the files on Windows, while this works on Linux. It had to do with the difference between the file slashes used on the OS versus the URL-type fileslashes used in the metadata.xml Diego had constructed. This has now been fixed and Dr Bainbridge came up with a tidier solution of a new method in util.pm that would handle the details.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 21.7 KB
Line 
1###########################################################################
2#
3# DirectoryPlugin.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# DirectoryPlugin is a plugin which recurses through directories processing
27# each file it finds - which basically means passing it down the plugin
28# pipeline
29
30package DirectoryPlugin;
31
32use PrintInfo;
33use plugin;
34use util;
35use metadatautil;
36
37use File::Basename;
38use strict;
39no strict 'refs';
40no strict 'subs';
41
42use Encode;
43
44BEGIN {
45 @DirectoryPlugin::ISA = ('PrintInfo');
46}
47
48my $arguments =
49 [ { 'name' => "block_exp",
50 'desc' => "{BasePlugin.block_exp}",
51 'type' => "regexp",
52 'deft' => &get_default_block_exp(),
53 'reqd' => "no" },
54 # this option has been deprecated. leave it here for now so we can warn people not to use it
55 { 'name' => "use_metadata_files",
56 'desc' => "{DirectoryPlugin.use_metadata_files}",
57 'type' => "flag",
58 'reqd' => "no",
59 'hiddengli' => "yes" },
60 { 'name' => "recheck_directories",
61 'desc' => "{DirectoryPlugin.recheck_directories}",
62 'type' => "flag",
63 'reqd' => "no" } ];
64
65my $options = { 'name' => "DirectoryPlugin",
66 'desc' => "{DirectoryPlugin.desc}",
67 'abstract' => "no",
68 'inherits' => "yes",
69 'args' => $arguments };
70
71sub new {
72 my ($class) = shift (@_);
73 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
74 push(@$pluginlist, $class);
75
76 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
77 push(@{$hashArgOptLists->{"OptList"}},$options);
78
79 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists);
80
81 if ($self->{'info_only'}) {
82 # don't worry about any options or initialisations etc
83 return bless $self, $class;
84 }
85
86 # we have left this option in so we can warn people who are still using it
87 if ($self->{'use_metadata_files'}) {
88 die "ERROR: DirectoryPlugin -use_metadata_files option has been deprecated. Please remove the option and add MetadataXMLPlug to your plugin list instead!\n";
89 }
90
91 $self->{'num_processed'} = 0;
92 $self->{'num_not_processed'} = 0;
93 $self->{'num_blocked'} = 0;
94 $self->{'num_archives'} = 0;
95
96 $self->{'subdir_extrametakeys'} = {};
97
98 return bless $self, $class;
99}
100
101# called once, at the start of processing
102sub init {
103 my $self = shift (@_);
104 my ($verbosity, $outhandle, $failhandle) = @_;
105
106 # verbosity is passed through from the processor
107 $self->{'verbosity'} = $verbosity;
108
109 # as are the outhandle and failhandle
110 $self->{'outhandle'} = $outhandle if defined $outhandle;
111 $self->{'failhandle'} = $failhandle;
112
113}
114
115# called once, after all passes have finished
116sub deinit {
117 my ($self) = @_;
118
119}
120
121# called at the beginning of each plugin pass (import has one, building has many)
122sub begin {
123 my $self = shift (@_);
124 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
125
126 # Only lookup timestamp info for import.pl, and only if incremental is set
127 my $proc_package_name = ref $processor;
128 if ($proc_package_name !~ /buildproc$/ && $self->{'incremental'} == 1) {
129 # Get the infodbtype value for this collection from the arcinfo object
130 my $infodbtype = $processor->getoutputinfo()->{'infodbtype'};
131 $infodbtype = "gdbm" if $infodbtype eq "gdbm-txtgz"; # in archives, cannot use txtgz version
132 my $output_dir = $processor->getoutputdir();
133 my $archives_inf = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $output_dir);
134
135 if ( -e $archives_inf ) {
136 $self->{'inf_timestamp'} = -M $archives_inf;
137 }
138 }
139}
140
141sub remove_all {
142 my $self = shift (@_);
143 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
144}
145
146
147sub remove_one {
148 my $self = shift (@_);
149 my ($file, $oids, $archivedir) = @_;
150 return undef; # this will never be called for directories (will it??)
151
152}
153
154
155# called at the end of each plugin pass
156sub end {
157 my ($self) = shift (@_);
158
159}
160
161
162
163# return 1 if this class might recurse using $pluginfo
164sub is_recursive {
165 my $self = shift (@_);
166
167 return 1;
168}
169
170sub get_default_block_exp {
171 my $self = shift (@_);
172
173 return '(?i)(CVS|\.svn|Thumbs\.db|OIDcount|~)$';
174}
175
176sub check_directory_path {
177
178 my $self = shift(@_);
179 my ($dirname) = @_;
180
181 return undef unless (-d $dirname);
182
183 return 0 if ($self->{'block_exp'} ne "" && $dirname =~ /$self->{'block_exp'}/);
184
185 my $outhandle = $self->{'outhandle'};
186
187 # check to make sure we're not reading the archives or index directory
188 my $gsdlhome = quotemeta($ENV{'GSDLHOME'});
189 if ($dirname =~ m/^$gsdlhome\/.*?\/import.*?\/(archives|index)$/) {
190 print $outhandle "DirectoryPlugin: $dirname appears to be a reference to a Greenstone collection, skipping.\n";
191 return 0;
192 }
193
194 # check to see we haven't got a cyclic path...
195 if ($dirname =~ m%(/.*){,41}%) {
196 print $outhandle "DirectoryPlugin: $dirname is 40 directories deep, is this a recursive path? if not increase constant in DirectoryPlugin.pm.\n";
197 return 0;
198 }
199
200 # check to see we haven't got a cyclic path...
201 if ($dirname =~ m%.*?import/(.+?)/import/\1.*%) {
202 print $outhandle "DirectoryPlugin: $dirname appears to be in a recursive loop...\n";
203 return 0;
204 }
205
206 return 1;
207}
208
209# this may be called more than once
210sub sort_out_associated_files {
211
212 my $self = shift (@_);
213 my ($block_hash) = @_;
214 if (!scalar (keys %{$block_hash->{'shared_fileroot'}})) {
215 return;
216 }
217
218 $self->{'assocfile_info'} = {} unless defined $self->{'assocfile_info'};
219 my $metadata = $self->{'assocfile_info'};
220 foreach my $prefix (keys %{$block_hash->{'shared_fileroot'}}) {
221 my $record = $block_hash->{'shared_fileroot'}->{$prefix};
222
223 my $tie_to = $record->{'tie_to'};
224 my $exts = $record->{'exts'};
225
226 if ((defined $tie_to) && (scalar (keys %$exts) > 0)) {
227 # set up fileblocks and assocfile_tobe
228 my $base_file = "$prefix$tie_to";
229 $metadata->{$base_file} = {} unless defined $metadata->{$base_file};
230 my $base_file_metadata = $metadata->{$base_file};
231
232 $base_file_metadata->{'gsdlassocfile_tobe'} = [] unless defined $base_file_metadata->{'gsdlassocfile_tobe'};
233 my $assoc_tobe = $base_file_metadata->{'gsdlassocfile_tobe'};
234 foreach my $e (keys %$exts) {
235 # block the file
236 &util::block_filename($block_hash,"$prefix$e");
237 # set up as an associatd file
238 print STDERR " $self->{'plugin_type'}: Associating $prefix$e with $tie_to version\n";
239 my $mime_type = ""; # let system auto detect this
240 push(@$assoc_tobe,"$prefix$e:$mime_type:");
241
242 }
243 }
244 } # foreach record
245
246 $block_hash->{'shared_fileroot'} = undef;
247 $block_hash->{'shared_fileroot'} = {};
248
249}
250
251
252# do block exp OR special blocking ???
253
254sub file_is_blocked {
255 my $self = shift (@_);
256 my ($block_hash, $filename_full_path) = @_;
257
258 $filename_full_path = &util::upgrade_if_dos_filename($filename_full_path);
259### print STDERR "*** DirectoryPlugin::file_is_blocked $filename_full_path\n";
260
261 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
262 # on windows, all block paths are lowercased.
263 my $lower_filename = lc ($filename_full_path);
264 if (defined $block_hash->{'file_blocks'}->{$lower_filename}) {
265 $self->{'num_blocked'} ++;
266 return 1;
267 }
268 }
269 else {
270 if (defined $block_hash->{'file_blocks'}->{$filename_full_path}) {
271 $self->{'num_blocked'} ++;
272 return 1;
273 }
274 }
275 # check Directory plugin's own block_exp
276 if ($self->{'block_exp'} ne "" && $filename_full_path =~ /$self->{'block_exp'}/) {
277 $self->{'num_blocked'} ++;
278 return 1; # blocked
279 }
280 return 0;
281}
282
283
284
285sub file_block_read {
286 my $self = shift (@_);
287 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
288
289 my $outhandle = $self->{'outhandle'};
290 my $verbosity = $self->{'verbosity'};
291
292 # Calculate the directory name and ensure it is a directory and
293 # that it is not explicitly blocked.
294 my $dirname = $file;
295 $dirname = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
296
297 my $directory_ok = $self->check_directory_path($dirname);
298 return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
299
300 print $outhandle "Global file scan checking directory: $dirname\n";
301
302 $block_hash->{'all_files'} = {} unless defined $block_hash->{'all_files'};
303 $block_hash->{'metadata_files'} = {} unless defined $block_hash->{'metadata_files'};
304
305 $block_hash->{'file_blocks'} = {} unless defined $block_hash->{'file_blocks'};
306 $block_hash->{'shared_fileroot'} = {} unless defined $block_hash->{'shared_fileroot'};
307
308 # Recur over directory contents.
309 my (@dir, $subfile);
310 #my $count = 0;
311
312 print $outhandle "DirectoryPlugin block: getting directory $dirname\n" if ($verbosity > 2);
313
314 # find all the files in the directory
315 if (!opendir (DIR, $dirname)) {
316 if ($gli) {
317 print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
318 }
319 print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
320 return -1; # error in processing
321 }
322 @dir = readdir (DIR);
323 closedir (DIR);
324
325 for (my $i = 0; $i < scalar(@dir); $i++) {
326 my $raw_subfile = $dir[$i];
327 next if ($raw_subfile =~ m/^\.\.?$/);
328
329 my $this_file_base_dir = $base_dir;
330 my $raw_file_subfile = &util::filename_cat($file, $raw_subfile);
331
332 # Recursively read each $raw_subfile
333 print $outhandle "DirectoryPlugin block recurring: $raw_file_subfile\n" if ($verbosity > 2);
334
335 #$count += &plugin::file_block_read ($pluginfo, $this_file_base_dir,
336
337 &plugin::file_block_read ($pluginfo, $this_file_base_dir,
338 $raw_file_subfile,
339 $block_hash, $metadata, $gli);
340
341 }
342 $self->sort_out_associated_files($block_hash);
343 #return $count;
344 return 1;
345
346}
347
348# We don't do metadata_read
349sub metadata_read {
350 my $self = shift (@_);
351 my ($pluginfo, $base_dir, $file, $block_hash,
352 $extrametakeys, $extrametadata, $extrametafile,
353 $processor, $gli, $aux) = @_;
354
355 return undef;
356}
357
358
359# return number of files processed, undef if can't process
360# Note that $base_dir might be "" and that $file might
361# include directories
362
363# This function passes around metadata hash structures. Metadata hash
364# structures are hashes that map from a (scalar) key (the metadata element
365# name) to either a scalar metadata value or a reference to an array of
366# such values.
367
368sub read {
369 my $self = shift (@_);
370 my ($pluginfo, $base_dir, $file, $block_hash, $in_metadata, $processor, $maxdocs, $total_count, $gli) = @_;
371
372 my $outhandle = $self->{'outhandle'};
373 my $verbosity = $self->{'verbosity'};
374
375 # Calculate the directory name and ensure it is a directory and
376 # that it is not explicitly blocked.
377 my $dirname;
378 if ($file eq "") {
379 $dirname = $base_dir;
380 } else {
381 $dirname = $file;
382 $dirname = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
383 }
384
385 my $directory_ok = $self->check_directory_path($dirname);
386 return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
387
388 if (($verbosity > 2) && ((scalar keys %$in_metadata) > 0)) {
389 print $outhandle "DirectoryPlugin: metadata passed in: ",
390 join(", ", keys %$in_metadata), "\n";
391 }
392
393
394 # Recur over directory contents.
395 my (@dir, $subfile);
396
397 print $outhandle "DirectoryPlugin read: getting directory $dirname\n" if ($verbosity > 2);
398
399 # find all the files in the directory
400 if (!opendir (DIR, $dirname)) {
401 if ($gli) {
402 print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
403 }
404 print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
405 return -1; # error in processing
406 }
407 @dir = readdir (DIR);
408 map { $_ = &unicode::raw_filename_to_url_encoded($_) } @dir;
409 closedir (DIR);
410
411 # Re-order the files in the list so any directories ending with .all are moved to the end
412 for (my $i = scalar(@dir) - 1; $i >= 0; $i--) {
413 if (-d &util::filename_cat($dirname, $dir[$i]) && $dir[$i] =~ /\.all$/) {
414 push(@dir, splice(@dir, $i, 1));
415 }
416 }
417
418 # setup the metadata structures. we do a metadata_read pass to see if there is any additional metadata, then pass it to read
419
420 my $additionalmetadata = 0; # is there extra metadata available?
421 my %extrametadata; # maps from filespec to extra metadata keys
422 my %extrametafile; # maps from filespec to the metadata.xml (or similar) file it came from
423 my @extrametakeys; # keys of %extrametadata in order read
424
425
426 my $os_dirsep = &util::get_os_dirsep();
427 my $dirsep = &util::get_dirsep();
428 my $base_dir_regexp = $base_dir;
429 $base_dir_regexp =~ s/\//$os_dirsep/g;
430
431 # Want to get relative path of local_dirname within the base_directory
432 # but with URL style slashes.
433 my $local_dirname = &util::filename_within_directory_url_format($dirname, $base_dir);
434
435 # if we are in import folder, then local_dirname will be empty
436 if ($local_dirname ne "") {
437 # look for extra metadata passed down from higher folders
438 $local_dirname .= "/"; # closing slash must be URL type slash also and not $dirsep;
439 if (defined $self->{'subdir_extrametakeys'}->{$local_dirname}) {
440 my $extrakeys = $self->{'subdir_extrametakeys'}->{$local_dirname};
441 foreach my $ek (@$extrakeys) {
442 my $extrakeys_re = $ek->{'re'};
443 my $extrakeys_md = $ek->{'md'};
444 my $extrakeys_mf = $ek->{'mf'};
445 push(@extrametakeys,$extrakeys_re);
446 $extrametadata{$extrakeys_re} = $extrakeys_md;
447 $extrametafile{$extrakeys_re} = $extrakeys_mf;
448 }
449 delete($self->{'subdir_extrametakeys'}->{$local_dirname});
450 }
451 }
452 # apply metadata pass for each of the files in the directory -- ignore
453 # maxdocs here
454 my $num_files = scalar(@dir);
455 for (my $i = 0; $i < scalar(@dir); $i++) {
456 my $subfile = $dir[$i];
457 next if ($subfile =~ m/^\.\.?$/);
458
459 my $this_file_base_dir = $base_dir;
460 my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
461
462 my $raw_file_subfile = &util::filename_cat($file, $raw_subfile);
463 my $raw_full_filename = &util::filename_cat($this_file_base_dir, $raw_file_subfile);
464
465 if ($self->file_is_blocked($block_hash,$raw_full_filename)) {
466 print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for metadata_read\n" if ($verbosity > 2);
467 next;
468 }
469
470 # Recursively read each $raw_subfile
471 print $outhandle "DirectoryPlugin metadata recurring: $raw_subfile\n" if ($verbosity > 2);
472
473 &plugin::metadata_read ($pluginfo, $this_file_base_dir,
474 $raw_file_subfile,$block_hash,
475 \@extrametakeys, \%extrametadata,
476 \%extrametafile,
477 $processor, $gli);
478 $additionalmetadata = 1;
479 }
480
481 # filter out any extrametakeys that mention subdirectories and store
482 # for later use (i.e. when that sub-directory is being processed)
483 foreach my $ek (@extrametakeys) {
484 my ($subdir_re,$extrakey_dir) = &File::Basename::fileparse($ek);
485 $extrakey_dir = &util::unregex_filename($extrakey_dir);
486
487 my $dirsep_re = &util::get_re_dirsep();
488
489 my $ek_non_re = &util::unregex_filename($ek);
490 if ($ek_non_re =~ m/$dirsep_re/) { # specifies at least one directory
491 my $md = $extrametadata{$ek};
492 my $mf = $extrametafile{$ek};
493
494 my $subdir_extrametakeys = $self->{'subdir_extrametakeys'};
495
496 my $subdir_rec = { 're' => $subdir_re, 'md' => $md, 'mf' => $mf };
497
498 # when its looked up, it must be relative to the base dir
499 push(@{$subdir_extrametakeys->{"$local_dirname$extrakey_dir"}},$subdir_rec);
500 #push(@{$subdir_extrametakeys->{"$extrakey_dir"}},$subdir_rec);
501 }
502 }
503
504 # import each of the files in the directory
505 my $count=0;
506 for (my $i = 0; $i <= scalar(@dir); $i++) {
507 # When every file in the directory has been done, pause for a moment (figuratively!)
508 # If the -recheck_directories argument hasn't been provided, stop now (default)
509 # Otherwise, re-read the contents of the directory to check for new files
510 # Any new files are added to the @dir list and are processed as normal
511 # This is necessary when documents to be indexed are specified in bibliographic DBs
512 # These files are copied/downloaded and stored in a new folder at import time
513 if ($i == $num_files) {
514 last unless $self->{'recheck_directories'};
515
516 # Re-read the files in the directory to see if there are any new files
517 last if (!opendir (DIR, $dirname));
518 my @dirnow = readdir (DIR);
519 map { $_ = &unicode::raw_filename_to_url_encoded($_) } @dirnow;
520 closedir (DIR);
521
522 # We're only interested if there are more files than there were before
523 last if (scalar(@dirnow) <= scalar(@dir));
524
525 # Any new files are added to the end of @dir to get processed by the loop
526 my $j;
527 foreach my $subfilenow (@dirnow) {
528 for ($j = 0; $j < $num_files; $j++) {
529 last if ($subfilenow eq $dir[$j]);
530 }
531 if ($j == $num_files) {
532 # New file
533 push(@dir, $subfilenow);
534 }
535 }
536 # When the new files have been processed, check again
537 $num_files = scalar(@dir);
538 }
539
540 my $subfile = $dir[$i];
541 last if ($maxdocs != -1 && ($count + $total_count) >= $maxdocs);
542 next if ($subfile =~ /^\.\.?$/);
543
544 my $this_file_base_dir = $base_dir;
545 my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
546
547 my $raw_file_subfile = &util::filename_cat($file, $raw_subfile);
548 my $raw_full_filename
549 = &util::filename_cat($this_file_base_dir,$raw_file_subfile);
550
551 if ($self->file_is_blocked($block_hash,$raw_full_filename)) {
552 print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for read\n" if ($verbosity > 2);
553 next;
554 }
555 #print STDERR "processing $raw_full_filename\n";
556 # Follow Windows shortcuts
557 if ($raw_subfile =~ /(?i)\.lnk$/ && $ENV{'GSDLOS'} =~ /^windows$/i) {
558 require Win32::Shortcut;
559 my $shortcut = new Win32::Shortcut(&util::filename_cat($dirname, $raw_subfile));
560 if ($shortcut) {
561 # The file to be processed is now the target of the shortcut
562 $this_file_base_dir = "";
563 $file = "";
564 $raw_subfile = $shortcut->Path;
565 }
566 }
567
568 # check for a symlink pointing back to a leading directory
569 if (-d "$dirname/$raw_subfile" && -l "$dirname/$raw_subfile") {
570 # readlink gives a "fatal error" on systems that don't implement
571 # symlinks. This assumes the the -l test above would fail on those.
572 my $linkdest=readlink "$dirname/$raw_subfile";
573 if (!defined ($linkdest)) {
574 # system error - file not found?
575 warn "DirectoryPlugin: symlink problem - $!";
576 } else {
577 # see if link points to current or a parent directory
578 if ($linkdest =~ m@^[\./\\]+$@ ||
579 index($dirname, $linkdest) != -1) {
580 warn "DirectoryPlugin: Ignoring recursive symlink ($dirname/$raw_subfile -> $linkdest)\n";
581 next;
582 ;
583 }
584 }
585 }
586
587 print $outhandle "DirectoryPlugin: preparing metadata for $raw_subfile\n" if ($verbosity > 2);
588
589 # Make a copy of $in_metadata to pass to $raw_subfile
590 my $out_metadata = {};
591 &metadatautil::combine_metadata_structures($out_metadata, $in_metadata);
592
593 # check the assocfile_info
594 if (defined $self->{'assocfile_info'}->{$raw_full_filename}) {
595 &metadatautil::combine_metadata_structures($out_metadata, $self->{'assocfile_info'}->{$raw_full_filename});
596 }
597
598 # $subfile by this point is url-encoded => all ASCII chars => no need to encode as UTF8
599
600 # Next add metadata read in XML files (if it is supplied)
601 if ($additionalmetadata == 1) {
602 foreach my $filespec (@extrametakeys) {
603 ## use the url-encoded filename to do the filename comparison
604
605 if ($subfile =~ /^$filespec$/) {
606 print $outhandle "File \"$subfile\" matches filespec \"$filespec\"\n"
607 if ($verbosity > 2);
608 my $mdref = $extrametadata{$filespec};
609 my $mfref = $extrametafile{$filespec};
610
611 # Add the list files where the metadata came from
612 # into the metadata table so we can track this
613 # This mechanism is similar to how gsdlassocfile works
614
615 my @metafile_pair = ();
616 foreach my $l (keys %$mfref) {
617 my $f = $mfref->{$l};
618 push (@metafile_pair, "$f : $l");
619 }
620
621 $mdref->{'gsdlmetafile'} = \@metafile_pair;
622
623 &metadatautil::combine_metadata_structures($out_metadata, $mdref);
624 }
625 }
626 }
627
628 if (defined $self->{'inf_timestamp'}) {
629 # Look to see if it's a completely new file
630
631 if (!$block_hash->{'new_files'}->{$raw_full_filename}) {
632 # Not a new file, must be an existing file
633 # Let' see if it's newer than the last import.pl
634
635
636 if (! -d $raw_full_filename) {
637 if (!$block_hash->{'reindex_files'}->{$raw_full_filename}) {
638 # filename has been around for longer than inf_timestamp
639 print $outhandle "**** Skipping $subfile\n" if ($verbosity >3);
640 next;
641 }
642 else {
643 # Remove old folder in archives (might hash to something different)
644 # *** should be doing this on a Del one as well
645 # but leave folder name?? and ensure hashs to
646 # same again??
647
648 # Then let through as new doc??
649
650 # mark to doc-oids that rely on it for re-indexing
651 }
652 }
653 }
654 }
655
656 # Recursively read each $subfile
657 print $outhandle "DirectoryPlugin recurring: $subfile\n" if ($verbosity > 2);
658
659 $count += &plugin::read ($pluginfo, $this_file_base_dir,
660 $raw_file_subfile, $block_hash,
661 $out_metadata, $processor, $maxdocs, ($total_count + $count), $gli);
662 }
663
664 return $count;
665}
666
667sub compile_stats {
668 my $self = shift(@_);
669 my ($stats) = @_;
670}
671
6721;
Note: See TracBrowser for help on using the repository browser.