source: main/trunk/greenstone2/perllib/plugins/DirectoryPlugin.pm@ 23544

Last change on this file since 23544 was 23544, checked in by kjdon, 13 years ago

on windows, if have a .JPG cover image, then a -e xxx.jpg test works, but if that filename is put into the block list, it won't match later on the .JPG, and the file won't be blocked. Solution, lowercase entirefilepath before adding into or checking in the block_hash->file_blocks list. but only for windows. Now don't need the alternative A: a: options for drive letters.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 21.7 KB
Line 
1###########################################################################
2#
3# DirectoryPlugin.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# DirectoryPlugin is a plugin which recurses through directories processing
27# each file it finds - which basically means passing it down the plugin
28# pipeline
29
30package DirectoryPlugin;
31
32use PrintInfo;
33use plugin;
34use util;
35use metadatautil;
36
37use File::Basename;
38use strict;
39no strict 'refs';
40no strict 'subs';
41
42use Encode;
43
44BEGIN {
45 @DirectoryPlugin::ISA = ('PrintInfo');
46}
47
48my $arguments =
49 [ { 'name' => "block_exp",
50 'desc' => "{BasePlugin.block_exp}",
51 'type' => "regexp",
52 'deft' => &get_default_block_exp(),
53 'reqd' => "no" },
54 # this option has been deprecated. leave it here for now so we can warn people not to use it
55 { 'name' => "use_metadata_files",
56 'desc' => "{DirectoryPlugin.use_metadata_files}",
57 'type' => "flag",
58 'reqd' => "no",
59 'hiddengli' => "yes" },
60 { 'name' => "recheck_directories",
61 'desc' => "{DirectoryPlugin.recheck_directories}",
62 'type' => "flag",
63 'reqd' => "no" } ];
64
65my $options = { 'name' => "DirectoryPlugin",
66 'desc' => "{DirectoryPlugin.desc}",
67 'abstract' => "no",
68 'inherits' => "yes",
69 'args' => $arguments };
70
71sub new {
72 my ($class) = shift (@_);
73 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
74 push(@$pluginlist, $class);
75
76 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
77 push(@{$hashArgOptLists->{"OptList"}},$options);
78
79 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists);
80
81 if ($self->{'info_only'}) {
82 # don't worry about any options or initialisations etc
83 return bless $self, $class;
84 }
85
86 # we have left this option in so we can warn people who are still using it
87 if ($self->{'use_metadata_files'}) {
88 die "ERROR: DirectoryPlugin -use_metadata_files option has been deprecated. Please remove the option and add MetadataXMLPlug to your plugin list instead!\n";
89 }
90
91 $self->{'num_processed'} = 0;
92 $self->{'num_not_processed'} = 0;
93 $self->{'num_blocked'} = 0;
94 $self->{'num_archives'} = 0;
95
96 $self->{'subdir_extrametakeys'} = {};
97
98 return bless $self, $class;
99}
100
101# called once, at the start of processing
102sub init {
103 my $self = shift (@_);
104 my ($verbosity, $outhandle, $failhandle) = @_;
105
106 # verbosity is passed through from the processor
107 $self->{'verbosity'} = $verbosity;
108
109 # as are the outhandle and failhandle
110 $self->{'outhandle'} = $outhandle if defined $outhandle;
111 $self->{'failhandle'} = $failhandle;
112
113}
114
115# called once, after all passes have finished
116sub deinit {
117 my ($self) = @_;
118
119}
120
121# called at the beginning of each plugin pass (import has one, building has many)
122sub begin {
123 my $self = shift (@_);
124 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
125
126 # Only lookup timestamp info for import.pl, and only if incremental is set
127 my $proc_package_name = ref $processor;
128 if ($proc_package_name !~ /buildproc$/ && $self->{'incremental'} == 1) {
129 # Get the infodbtype value for this collection from the arcinfo object
130 my $infodbtype = $processor->getoutputinfo()->{'infodbtype'};
131 $infodbtype = "gdbm" if $infodbtype eq "gdbm-txtgz"; # in archives, cannot use txtgz version
132 my $output_dir = $processor->getoutputdir();
133 my $archives_inf = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $output_dir);
134
135 if ( -e $archives_inf ) {
136 $self->{'inf_timestamp'} = -M $archives_inf;
137 }
138 }
139}
140
141sub remove_all {
142 my $self = shift (@_);
143 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
144
145}
146
147
148sub remove_one {
149 my $self = shift (@_);
150 my ($file, $oids, $archivedir) = @_;
151 return undef; # this will never be called for directories (will it??)
152
153}
154
155
156# called at the end of each plugin pass
157sub end {
158 my ($self) = shift (@_);
159
160}
161
162
163
164# return 1 if this class might recurse using $pluginfo
165sub is_recursive {
166 my $self = shift (@_);
167
168 return 1;
169}
170
171sub get_default_block_exp {
172 my $self = shift (@_);
173
174 return '(?i)(CVS|\.svn|Thumbs\.db|OIDcount|~)$';
175}
176
177sub check_directory_path {
178
179 my $self = shift(@_);
180 my ($dirname) = @_;
181
182 return undef unless (-d $dirname);
183
184 return 0 if ($self->{'block_exp'} ne "" && $dirname =~ /$self->{'block_exp'}/);
185
186 my $outhandle = $self->{'outhandle'};
187
188 # check to make sure we're not reading the archives or index directory
189 my $gsdlhome = quotemeta($ENV{'GSDLHOME'});
190 if ($dirname =~ m/^$gsdlhome\/.*?\/import.*?\/(archives|index)$/) {
191 print $outhandle "DirectoryPlugin: $dirname appears to be a reference to a Greenstone collection, skipping.\n";
192 return 0;
193 }
194
195 # check to see we haven't got a cyclic path...
196 if ($dirname =~ m%(/.*){,41}%) {
197 print $outhandle "DirectoryPlugin: $dirname is 40 directories deep, is this a recursive path? if not increase constant in DirectoryPlugin.pm.\n";
198 return 0;
199 }
200
201 # check to see we haven't got a cyclic path...
202 if ($dirname =~ m%.*?import/(.+?)/import/\1.*%) {
203 print $outhandle "DirectoryPlugin: $dirname appears to be in a recursive loop...\n";
204 return 0;
205 }
206
207 return 1;
208}
209
210# this may be called more than once
211sub sort_out_associated_files {
212
213 my $self = shift (@_);
214 my ($block_hash) = @_;
215 if (!scalar (keys %{$block_hash->{'shared_fileroot'}})) {
216 return;
217 }
218
219 $self->{'assocfile_info'} = {} unless defined $self->{'assocfile_info'};
220 my $metadata = $self->{'assocfile_info'};
221 foreach my $prefix (keys %{$block_hash->{'shared_fileroot'}}) {
222 my $record = $block_hash->{'shared_fileroot'}->{$prefix};
223
224 my $tie_to = $record->{'tie_to'};
225 my $exts = $record->{'exts'};
226
227 if ((defined $tie_to) && (scalar (keys %$exts) > 0)) {
228 # set up fileblocks and assocfile_tobe
229 my $base_file = "$prefix$tie_to";
230 $metadata->{$base_file} = {} unless defined $metadata->{$base_file};
231 my $base_file_metadata = $metadata->{$base_file};
232
233 $base_file_metadata->{'gsdlassocfile_tobe'} = [] unless defined $base_file_metadata->{'gsdlassocfile_tobe'};
234 my $assoc_tobe = $base_file_metadata->{'gsdlassocfile_tobe'};
235 foreach my $e (keys %$exts) {
236 # block the file
237 $self->block_filename($block_hash,"$prefix$e");
238 # set up as an associatd file
239 print STDERR " $self->{'plugin_type'}: Associating $prefix$e with $tie_to version\n";
240 my $mime_type = ""; # let system auto detect this
241 push(@$assoc_tobe,"$prefix$e:$mime_type:");
242
243 }
244 }
245 } # foreach record
246
247 $block_hash->{'shared_fileroot'} = undef;
248 $block_hash->{'shared_fileroot'} = {};
249
250}
251
252
253# do block exp OR special blocking ???
254
255sub file_is_blocked {
256 my $self = shift (@_);
257 my ($block_hash, $filename_full_path) = @_;
258
259 $filename_full_path = &util::upgrade_if_dos_filename($filename_full_path);
260### print STDERR "*** DirectoryPlugin::file_is_blocked $filename_full_path\n";
261
262 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
263 # on windows, all block paths are lowercased.
264 my $lower_filename = lc ($filename_full_path);
265 if (defined $block_hash->{'file_blocks'}->{$lower_filename}) {
266 $self->{'num_blocked'} ++;
267 return 1;
268 }
269 }
270 else {
271 if (defined $block_hash->{'file_blocks'}->{$filename_full_path}) {
272 $self->{'num_blocked'} ++;
273 return 1;
274 }
275 }
276 # check Directory plugin's own block_exp
277 if ($self->{'block_exp'} ne "" && $filename_full_path =~ /$self->{'block_exp'}/) {
278 $self->{'num_blocked'} ++;
279 return 1; # blocked
280 }
281 return 0;
282}
283
284
285
286sub file_block_read {
287 my $self = shift (@_);
288 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
289
290 my $outhandle = $self->{'outhandle'};
291 my $verbosity = $self->{'verbosity'};
292
293 # Calculate the directory name and ensure it is a directory and
294 # that it is not explicitly blocked.
295 my $dirname = $file;
296 $dirname = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
297
298 my $directory_ok = $self->check_directory_path($dirname);
299 return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
300
301 print $outhandle "Global file scan checking directory: $dirname\n";
302
303 $block_hash->{'all_files'} = {} unless defined $block_hash->{'all_files'};
304 $block_hash->{'metadata_files'} = {} unless defined $block_hash->{'metadata_files'};
305
306 $block_hash->{'file_blocks'} = {} unless defined $block_hash->{'file_blocks'};
307 $block_hash->{'shared_fileroot'} = {} unless defined $block_hash->{'shared_fileroot'};
308
309 # Recur over directory contents.
310 my (@dir, $subfile);
311 #my $count = 0;
312
313 print $outhandle "DirectoryPlugin block: getting directory $dirname\n" if ($verbosity > 2);
314
315 # find all the files in the directory
316 if (!opendir (DIR, $dirname)) {
317 if ($gli) {
318 print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
319 }
320 print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
321 return -1; # error in processing
322 }
323 @dir = readdir (DIR);
324 closedir (DIR);
325
326 for (my $i = 0; $i < scalar(@dir); $i++) {
327 my $raw_subfile = $dir[$i];
328 next if ($raw_subfile =~ m/^\.\.?$/);
329
330 my $this_file_base_dir = $base_dir;
331 my $raw_file_subfile = &util::filename_cat($file, $raw_subfile);
332
333 # Recursively read each $raw_subfile
334 print $outhandle "DirectoryPlugin block recurring: $raw_file_subfile\n" if ($verbosity > 2);
335
336 #$count += &plugin::file_block_read ($pluginfo, $this_file_base_dir,
337
338 &plugin::file_block_read ($pluginfo, $this_file_base_dir,
339 $raw_file_subfile,
340 $block_hash, $metadata, $gli);
341
342 }
343 $self->sort_out_associated_files($block_hash);
344 #return $count;
345 return 1;
346
347}
348
349# We don't do metadata_read
350sub metadata_read {
351 my $self = shift (@_);
352 my ($pluginfo, $base_dir, $file, $block_hash,
353 $extrametakeys, $extrametadata, $extrametafile,
354 $processor, $gli, $aux) = @_;
355
356 return undef;
357}
358
359
360# return number of files processed, undef if can't process
361# Note that $base_dir might be "" and that $file might
362# include directories
363
364# This function passes around metadata hash structures. Metadata hash
365# structures are hashes that map from a (scalar) key (the metadata element
366# name) to either a scalar metadata value or a reference to an array of
367# such values.
368
369sub read {
370 my $self = shift (@_);
371 my ($pluginfo, $base_dir, $file, $block_hash, $in_metadata, $processor, $maxdocs, $total_count, $gli) = @_;
372
373 my $outhandle = $self->{'outhandle'};
374 my $verbosity = $self->{'verbosity'};
375
376 # Calculate the directory name and ensure it is a directory and
377 # that it is not explicitly blocked.
378 my $dirname;
379 if ($file eq "") {
380 $dirname = $base_dir;
381 } else {
382 $dirname = $file;
383 $dirname = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
384 }
385
386 my $directory_ok = $self->check_directory_path($dirname);
387 return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
388
389 if (($verbosity > 2) && ((scalar keys %$in_metadata) > 0)) {
390 print $outhandle "DirectoryPlugin: metadata passed in: ",
391 join(", ", keys %$in_metadata), "\n";
392 }
393
394
395 # Recur over directory contents.
396 my (@dir, $subfile);
397
398 print $outhandle "DirectoryPlugin read: getting directory $dirname\n" if ($verbosity > 2);
399
400 # find all the files in the directory
401 if (!opendir (DIR, $dirname)) {
402 if ($gli) {
403 print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
404 }
405 print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
406 return -1; # error in processing
407 }
408 @dir = readdir (DIR);
409 map { $_ = &unicode::raw_filename_to_url_encoded($_) } @dir;
410 closedir (DIR);
411
412 # Re-order the files in the list so any directories ending with .all are moved to the end
413 for (my $i = scalar(@dir) - 1; $i >= 0; $i--) {
414 if (-d &util::filename_cat($dirname, $dir[$i]) && $dir[$i] =~ /\.all$/) {
415 push(@dir, splice(@dir, $i, 1));
416 }
417 }
418
419 # setup the metadata structures. we do a metadata_read pass to see if there is any additional metadata, then pass it to read
420
421 my $additionalmetadata = 0; # is there extra metadata available?
422 my %extrametadata; # maps from filespec to extra metadata keys
423 my %extrametafile; # maps from filespec to the metadata.xml (or similar) file it came from
424 my @extrametakeys; # keys of %extrametadata in order read
425
426
427 my $os_dirsep = &util::get_os_dirsep();
428 my $dirsep = &util::get_dirsep();
429 my $base_dir_regexp = $base_dir;
430 $base_dir_regexp =~ s/\//$os_dirsep/g;
431 my $local_dirname = $dirname;
432
433 $local_dirname =~ s/^$base_dir_regexp($os_dirsep)*//;
434 # if we are in import folder, then local_dirname will be empty
435 if ($local_dirname ne "") {
436 # look for extra metadata passed down from higher folders
437 $local_dirname .= $dirsep;
438 if (defined $self->{'subdir_extrametakeys'}->{$local_dirname}) {
439 my $extrakeys = $self->{'subdir_extrametakeys'}->{$local_dirname};
440 foreach my $ek (@$extrakeys) {
441 my $extrakeys_re = $ek->{'re'};
442 my $extrakeys_md = $ek->{'md'};
443 my $extrakeys_mf = $ek->{'mf'};
444 push(@extrametakeys,$extrakeys_re);
445 $extrametadata{$extrakeys_re} = $extrakeys_md;
446 $extrametafile{$extrakeys_re} = $extrakeys_mf;
447 }
448 delete($self->{'subdir_extrametakeys'}->{$local_dirname});
449 }
450 }
451 # apply metadata pass for each of the files in the directory -- ignore
452 # maxdocs here
453 my $num_files = scalar(@dir);
454 for (my $i = 0; $i < scalar(@dir); $i++) {
455 my $subfile = $dir[$i];
456 next if ($subfile =~ m/^\.\.?$/);
457
458 my $this_file_base_dir = $base_dir;
459 my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
460
461 my $raw_file_subfile = &util::filename_cat($file, $raw_subfile);
462 my $raw_full_filename = &util::filename_cat($this_file_base_dir, $raw_file_subfile);
463
464 if ($self->file_is_blocked($block_hash,$raw_full_filename)) {
465 print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for metadata_read\n" if ($verbosity > 2);
466 next;
467 }
468
469 # Recursively read each $raw_subfile
470 print $outhandle "DirectoryPlugin metadata recurring: $raw_subfile\n" if ($verbosity > 2);
471
472 &plugin::metadata_read ($pluginfo, $this_file_base_dir,
473 $raw_file_subfile,$block_hash,
474 \@extrametakeys, \%extrametadata,
475 \%extrametafile,
476 $processor, $gli);
477 $additionalmetadata = 1;
478 }
479
480 # filter out any extrametakeys that mention subdirectories and store
481 # for later use (i.e. when that sub-directory is being processed)
482 foreach my $ek (@extrametakeys) {
483 my ($subdir_re,$extrakey_dir) = &File::Basename::fileparse($ek);
484 $extrakey_dir =~ s/\\\./\./g; # remove RE syntax for .
485 $extrakey_dir =~ s/\\\\/\\/g; # remove RE syntax for \
486
487 my $dirsep_re = &util::get_re_dirsep();
488
489 my $ek_non_re = $ek;
490 $ek_non_re =~ s/\\\./\./g; # remove RE syntax for .
491 $ek_non_re =~ s/\\\\/\\/g; # remove RE syntax for \
492 if ($ek_non_re =~ m/$dirsep_re/) { # specifies at least one directory
493 my $md = $extrametadata{$ek};
494 my $mf = $extrametafile{$ek};
495
496 my $subdir_extrametakeys = $self->{'subdir_extrametakeys'};
497
498 my $subdir_rec = { 're' => $subdir_re, 'md' => $md, 'mf' => $mf };
499
500 # when its looked up, it must be relative to the base dir
501 push(@{$subdir_extrametakeys->{"$local_dirname$extrakey_dir"}},$subdir_rec);
502 #push(@{$subdir_extrametakeys->{"$extrakey_dir"}},$subdir_rec);
503 }
504 }
505
506 # import each of the files in the directory
507 my $count=0;
508 for (my $i = 0; $i <= scalar(@dir); $i++) {
509 # When every file in the directory has been done, pause for a moment (figuratively!)
510 # If the -recheck_directories argument hasn't been provided, stop now (default)
511 # Otherwise, re-read the contents of the directory to check for new files
512 # Any new files are added to the @dir list and are processed as normal
513 # This is necessary when documents to be indexed are specified in bibliographic DBs
514 # These files are copied/downloaded and stored in a new folder at import time
515 if ($i == $num_files) {
516 last unless $self->{'recheck_directories'};
517
518 # Re-read the files in the directory to see if there are any new files
519 last if (!opendir (DIR, $dirname));
520 my @dirnow = readdir (DIR);
521 map { $_ = &unicode::raw_filename_to_url_encoded($_) } @dirnow;
522 closedir (DIR);
523
524 # We're only interested if there are more files than there were before
525 last if (scalar(@dirnow) <= scalar(@dir));
526
527 # Any new files are added to the end of @dir to get processed by the loop
528 my $j;
529 foreach my $subfilenow (@dirnow) {
530 for ($j = 0; $j < $num_files; $j++) {
531 last if ($subfilenow eq $dir[$j]);
532 }
533 if ($j == $num_files) {
534 # New file
535 push(@dir, $subfilenow);
536 }
537 }
538 # When the new files have been processed, check again
539 $num_files = scalar(@dir);
540 }
541
542 my $subfile = $dir[$i];
543 last if ($maxdocs != -1 && ($count + $total_count) >= $maxdocs);
544 next if ($subfile =~ /^\.\.?$/);
545
546 my $this_file_base_dir = $base_dir;
547 my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
548
549 my $raw_file_subfile = &util::filename_cat($file, $raw_subfile);
550 my $raw_full_filename
551 = &util::filename_cat($this_file_base_dir,$raw_file_subfile);
552
553 if ($self->file_is_blocked($block_hash,$raw_full_filename)) {
554 print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for read\n" if ($verbosity > 2);
555 next;
556 }
557 #print STDERR "processing $raw_full_filename\n";
558 # Follow Windows shortcuts
559 if ($raw_subfile =~ /(?i)\.lnk$/ && $ENV{'GSDLOS'} =~ /^windows$/i) {
560 require Win32::Shortcut;
561 my $shortcut = new Win32::Shortcut(&util::filename_cat($dirname, $raw_subfile));
562 if ($shortcut) {
563 # The file to be processed is now the target of the shortcut
564 $this_file_base_dir = "";
565 $file = "";
566 $raw_subfile = $shortcut->Path;
567 }
568 }
569
570 # check for a symlink pointing back to a leading directory
571 if (-d "$dirname/$raw_subfile" && -l "$dirname/$raw_subfile") {
572 # readlink gives a "fatal error" on systems that don't implement
573 # symlinks. This assumes the the -l test above would fail on those.
574 my $linkdest=readlink "$dirname/$raw_subfile";
575 if (!defined ($linkdest)) {
576 # system error - file not found?
577 warn "DirectoryPlugin: symlink problem - $!";
578 } else {
579 # see if link points to current or a parent directory
580 if ($linkdest =~ m@^[\./\\]+$@ ||
581 index($dirname, $linkdest) != -1) {
582 warn "DirectoryPlugin: Ignoring recursive symlink ($dirname/$raw_subfile -> $linkdest)\n";
583 next;
584 ;
585 }
586 }
587 }
588
589 print $outhandle "DirectoryPlugin: preparing metadata for $raw_subfile\n" if ($verbosity > 2);
590
591 # Make a copy of $in_metadata to pass to $raw_subfile
592 my $out_metadata = {};
593 &metadatautil::combine_metadata_structures($out_metadata, $in_metadata);
594
595 # check the assocfile_info
596 if (defined $self->{'assocfile_info'}->{$raw_full_filename}) {
597 &metadatautil::combine_metadata_structures($out_metadata, $self->{'assocfile_info'}->{$raw_full_filename});
598 }
599
600 # $subfile by this point is url-encoded => all ASCII chars => no need to encode as UTF8
601
602 # Next add metadata read in XML files (if it is supplied)
603 if ($additionalmetadata == 1) {
604 foreach my $filespec (@extrametakeys) {
605 ## use the url-encoded filename to do the filename comparison
606
607 if ($subfile =~ /^$filespec$/) {
608 print $outhandle "File \"$subfile\" matches filespec \"$filespec\"\n"
609 if ($verbosity > 2);
610 my $mdref = $extrametadata{$filespec};
611 my $mfref = $extrametafile{$filespec};
612
613 # Add the list files where the metadata came from
614 # into the metadata table so we can track this
615 # This mechanism is similar to how gsdlassocfile works
616
617 my @metafile_pair = ();
618 foreach my $l (keys %$mfref) {
619 my $f = $mfref->{$l};
620 push (@metafile_pair, "$f : $l");
621 }
622
623 $mdref->{'gsdlmetafile'} = \@metafile_pair;
624
625 &metadatautil::combine_metadata_structures($out_metadata, $mdref);
626 }
627 }
628 }
629
630 if (defined $self->{'inf_timestamp'}) {
631 # Look to see if it's a completely new file
632
633 if (!$block_hash->{'new_files'}->{$raw_full_filename}) {
634 # Not a new file, must be an existing file
635 # Let' see if it's newer than the last import.pl
636
637
638 if (! -d $raw_full_filename) {
639 if (!$block_hash->{'reindex_files'}->{$raw_full_filename}) {
640 # filename has been around for longer than inf_timestamp
641 print $outhandle "**** Skipping $subfile\n" if ($verbosity >3);
642 next;
643 }
644 else {
645 # Remove old folder in archives (might hash to something different)
646 # *** should be doing this on a Del one as well
647 # but leave folder name?? and ensure hashs to
648 # same again??
649
650 # Then let through as new doc??
651
652 # mark to doc-oids that rely on it for re-indexing
653 }
654 }
655 }
656 }
657
658 # Recursively read each $subfile
659 print $outhandle "DirectoryPlugin recurring: $subfile\n" if ($verbosity > 2);
660
661 $count += &plugin::read ($pluginfo, $this_file_base_dir,
662 $raw_file_subfile, $block_hash,
663 $out_metadata, $processor, $maxdocs, ($total_count + $count), $gli);
664 }
665
666 return $count;
667}
668
669sub compile_stats {
670 my $self = shift(@_);
671 my ($stats) = @_;
672}
673
6741;
Note: See TracBrowser for help on using the repository browser.