source: main/trunk/greenstone2/perllib/plugins/DirectoryPlugin.pm@ 31492

Last change on this file since 31492 was 31492, checked in by kjdon, 7 years ago

renamed EncodingUtil to CommonUtil, BasePlugin to BaseImporter. The idea is that only top level plugins that you can specify in your collection get to have plugin in their name. Modified all other plugins to reflect these name changes

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 21.9 KB
Line 
1###########################################################################
2#
3# DirectoryPlugin.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# DirectoryPlugin is a plugin which recurses through directories processing
27# each file it finds - which basically means passing it down the plugin
28# pipeline
29
30package DirectoryPlugin;
31
32use extrametautil;
33use CommonUtil;
34use plugin;
35use util;
36use FileUtils;
37use metadatautil;
38
39use File::Basename;
40use strict;
41no strict 'refs';
42no strict 'subs';
43
44use Encode::Locale;
45use Encode;
46use Unicode::Normalize;
47
48BEGIN {
49 @DirectoryPlugin::ISA = ('CommonUtil');
50}
51
52my $arguments =
53 [ { 'name' => "block_exp",
54 'desc' => "{BaseImporter.block_exp}",
55 'type' => "regexp",
56 'deft' => &get_default_block_exp(),
57 'reqd' => "no" },
58 # this option has been deprecated. leave it here for now so we can warn people not to use it
59 { 'name' => "use_metadata_files",
60 'desc' => "{DirectoryPlugin.use_metadata_files}",
61 'type' => "flag",
62 'reqd' => "no",
63 'hiddengli' => "yes" },
64 { 'name' => "recheck_directories",
65 'desc' => "{DirectoryPlugin.recheck_directories}",
66 'type' => "flag",
67 'reqd' => "no" } ];
68
69my $options = { 'name' => "DirectoryPlugin",
70 'desc' => "{DirectoryPlugin.desc}",
71 'abstract' => "no",
72 'inherits' => "yes",
73 'args' => $arguments };
74
75sub new {
76 my ($class) = shift (@_);
77 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
78 push(@$pluginlist, $class);
79
80 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
81 push(@{$hashArgOptLists->{"OptList"}},$options);
82
83 my $self = new CommonUtil($pluginlist, $inputargs, $hashArgOptLists);
84
85 if ($self->{'info_only'}) {
86 # don't worry about any options or initialisations etc
87 return bless $self, $class;
88 }
89
90 # we have left this option in so we can warn people who are still using it
91 if ($self->{'use_metadata_files'}) {
92 die "ERROR: DirectoryPlugin -use_metadata_files option has been deprecated. Please remove the option and add MetadataXMLPlug to your plugin list instead!\n";
93 }
94
95 $self->{'num_processed'} = 0;
96 $self->{'num_not_processed'} = 0;
97 $self->{'num_blocked'} = 0;
98 $self->{'num_archives'} = 0;
99
100 $self->{'subdir_extrametakeys'} = {};
101
102 return bless $self, $class;
103}
104
105# called once, at the start of processing
106sub init {
107 my $self = shift (@_);
108 my ($verbosity, $outhandle, $failhandle) = @_;
109
110 # verbosity is passed through from the processor
111 $self->{'verbosity'} = $verbosity;
112
113 # as are the outhandle and failhandle
114 $self->{'outhandle'} = $outhandle if defined $outhandle;
115 $self->{'failhandle'} = $failhandle;
116
117}
118
119# called once, after all passes have finished
120sub deinit {
121 my ($self) = @_;
122
123}
124
125# called at the beginning of each plugin pass (import has one, building has many)
126sub begin {
127 my $self = shift (@_);
128 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
129
130 # Only lookup timestamp info for import.pl, and only if incremental is set
131 my $proc_package_name = ref $processor;
132 if ($proc_package_name !~ /buildproc$/ && $self->{'incremental'} == 1) {
133 # Get the infodbtype value for this collection from the arcinfo object
134 my $infodbtype = $processor->getoutputinfo()->{'infodbtype'};
135 $infodbtype = "gdbm" if $infodbtype eq "gdbm-txtgz"; # in archives, cannot use txtgz version
136 my $output_dir = $processor->getoutputdir();
137 my $archives_inf = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $output_dir);
138
139 if ( -e $archives_inf ) {
140 $self->{'inf_timestamp'} = -M $archives_inf;
141 }
142 }
143}
144
145sub remove_all {
146 my $self = shift (@_);
147 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
148}
149
150
151sub remove_one {
152 my $self = shift (@_);
153 my ($file, $oids, $archivedir) = @_;
154 return undef; # this will never be called for directories (will it??)
155
156}
157
158
159# called at the end of each plugin pass
160sub end {
161 my ($self) = shift (@_);
162
163}
164
165
166
167# return 1 if this class might recurse using $pluginfo
168sub is_recursive {
169 my $self = shift (@_);
170
171 return 1;
172}
173
174sub get_default_block_exp {
175 my $self = shift (@_);
176
177 return '(?i)(CVS|\.svn|Thumbs\.db|OIDcount|\.DS_Store|~)$';
178}
179
180sub check_directory_path {
181
182 my $self = shift(@_);
183 my ($dirname) = @_;
184
185 return undef unless (-d $dirname);
186
187 return 0 if ($self->{'block_exp'} ne "" && $dirname =~ /$self->{'block_exp'}/);
188
189 my $outhandle = $self->{'outhandle'};
190
191 # check to make sure we're not reading the archives or index directory
192 my $gsdlhome = quotemeta($ENV{'GSDLHOME'});
193 if ($dirname =~ m/^$gsdlhome\/.*?\/import.*?\/(archives|index)$/) {
194 print $outhandle "DirectoryPlugin: $dirname appears to be a reference to a Greenstone collection, skipping.\n";
195 return 0;
196 }
197
198 # check to see we haven't got a cyclic path...
199 if ($dirname =~ m%(/.*){,41}%) {
200 print $outhandle "DirectoryPlugin: $dirname is 40 directories deep, is this a recursive path? if not increase constant in DirectoryPlugin.pm.\n";
201 return 0;
202 }
203
204 # check to see we haven't got a cyclic path...
205 if ($dirname =~ m%.*?import/(.+?)/import/\1.*%) {
206 print $outhandle "DirectoryPlugin: $dirname appears to be in a recursive loop...\n";
207 return 0;
208 }
209
210 return 1;
211}
212
213# this may be called more than once
214sub sort_out_associated_files {
215
216 my $self = shift (@_);
217 my ($block_hash) = @_;
218 if (!scalar (keys %{$block_hash->{'shared_fileroot'}})) {
219 return;
220 }
221
222 $self->{'assocfile_info'} = {} unless defined $self->{'assocfile_info'};
223 my $metadata = $self->{'assocfile_info'};
224 foreach my $prefix (keys %{$block_hash->{'shared_fileroot'}}) {
225 my $record = $block_hash->{'shared_fileroot'}->{$prefix};
226
227 my $tie_to = $record->{'tie_to'};
228 my $exts = $record->{'exts'};
229
230 if ((defined $tie_to) && (scalar (keys %$exts) > 0)) {
231 # set up fileblocks and assocfile_tobe
232 my $base_file = "$prefix$tie_to";
233 $metadata->{$base_file} = {} unless defined $metadata->{$base_file};
234 my $base_file_metadata = $metadata->{$base_file};
235
236 $base_file_metadata->{'gsdlassocfile_tobe'} = [] unless defined $base_file_metadata->{'gsdlassocfile_tobe'};
237 my $assoc_tobe = $base_file_metadata->{'gsdlassocfile_tobe'};
238 foreach my $e (keys %$exts) {
239 # block the file
240 $self->block_filename($block_hash,"$prefix$e");
241 # set up as an associatd file
242 print STDERR " $self->{'plugin_type'}: Associating $prefix$e with $tie_to version\n";
243 my $mime_type = ""; # let system auto detect this
244 push(@$assoc_tobe,"$prefix$e:$mime_type:");
245
246 }
247 }
248 } # foreach record
249
250 $block_hash->{'shared_fileroot'} = undef;
251 $block_hash->{'shared_fileroot'} = {};
252
253}
254
255
256
257
258sub file_block_read {
259 my $self = shift (@_);
260 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
261
262 my $outhandle = $self->{'outhandle'};
263 my $verbosity = $self->{'verbosity'};
264
265 # Calculate the directory name and ensure it is a directory and
266 # that it is not explicitly blocked.
267 my $dirname = $file;
268 $dirname = &FileUtils::filenameConcatenate($base_dir, $file) if $base_dir =~ /\w/;
269
270 my $directory_ok = $self->check_directory_path($dirname);
271 return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
272
273 print $outhandle "Global file scan checking directory: $dirname\n";
274
275 $block_hash->{'all_files'} = {} unless defined $block_hash->{'all_files'};
276 $block_hash->{'metadata_files'} = {} unless defined $block_hash->{'metadata_files'};
277
278 $block_hash->{'file_blocks'} = {} unless defined $block_hash->{'file_blocks'};
279 $block_hash->{'shared_fileroot'} = {} unless defined $block_hash->{'shared_fileroot'};
280
281 # Recur over directory contents.
282 my (@dir, $subfile);
283 #my $count = 0;
284
285 print $outhandle "DirectoryPlugin block: getting directory $dirname\n" if ($verbosity > 2);
286
287 # find all the files in the directory
288 if (!opendir (DIR, $dirname)) {
289 if ($gli) {
290 print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
291 }
292 print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
293 return -1; # error in processing
294 }
295 @dir = sort readdir (DIR);
296 closedir (DIR);
297
298 for (my $i = 0; $i < scalar(@dir); $i++) {
299 my $raw_subfile = $dir[$i];
300 next if ($raw_subfile =~ m/^\.\.?$/);
301
302 my $this_file_base_dir = $base_dir;
303 my $raw_file_subfile = &FileUtils::filenameConcatenate($file, $raw_subfile);
304
305 # Recursively read each $raw_subfile
306 print $outhandle "DirectoryPlugin block recurring: $raw_file_subfile\n" if ($verbosity > 2);
307 #$count += &plugin::file_block_read ($pluginfo, $this_file_base_dir,
308
309 &plugin::file_block_read ($pluginfo, $this_file_base_dir,
310 $raw_file_subfile,
311 $block_hash, $metadata, $gli);
312
313 }
314 $self->sort_out_associated_files($block_hash);
315 #return $count;
316 return 1;
317
318}
319
320# We don't do metadata_read
321sub metadata_read {
322 my $self = shift (@_);
323 my ($pluginfo, $base_dir, $file, $block_hash,
324 $extrametakeys, $extrametadata, $extrametafile,
325 $processor, $gli, $aux) = @_;
326
327 return undef;
328}
329
330
331# return number of files processed, undef if can't process
332# Note that $base_dir might be "" and that $file might
333# include directories
334
335# This function passes around metadata hash structures. Metadata hash
336# structures are hashes that map from a (scalar) key (the metadata element
337# name) to either a scalar metadata value or a reference to an array of
338# such values.
339
340sub read {
341 my $self = shift (@_);
342 my ($pluginfo, $base_dir, $file, $block_hash, $in_metadata, $processor, $maxdocs, $total_count, $gli) = @_;
343
344 my $outhandle = $self->{'outhandle'};
345 my $verbosity = $self->{'verbosity'};
346
347 # Calculate the directory name and ensure it is a directory and
348 # that it is not explicitly blocked.
349 my $dirname;
350 if ($file eq "") {
351 $dirname = $base_dir;
352 } else {
353 $dirname = $file;
354 $dirname = &FileUtils::filenameConcatenate($base_dir, $file) if $base_dir =~ /\w/;
355 }
356
357 my $directory_ok = $self->check_directory_path($dirname);
358 return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
359
360 if (($verbosity > 2) && ((scalar keys %$in_metadata) > 0)) {
361 print $outhandle "DirectoryPlugin: metadata passed in: ",
362 join(", ", keys %$in_metadata), "\n";
363 }
364
365 # Recur over directory contents.
366 my (@dir, $subfile);
367
368 print $outhandle "DirectoryPlugin read: getting directory $dirname\n" if ($verbosity > 2);
369
370 # find all the files in the directory
371 if (!opendir (DIR, $dirname)) {
372 if ($gli) {
373 print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
374 }
375 print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
376 return -1; # error in processing
377 }
378 @dir = sort readdir (DIR);
379 map { $_ = &unicode::raw_filename_to_url_encoded($_); } @dir;
380 closedir (DIR);
381 # Re-order the files in the list so any directories ending with .all are moved to the end
382 for (my $i = scalar(@dir) - 1; $i >= 0; $i--) {
383 if (-d &FileUtils::filenameConcatenate($dirname, $dir[$i]) && $dir[$i] =~ /\.all$/) {
384 push(@dir, splice(@dir, $i, 1));
385 }
386 }
387
388 # setup the metadata structures. we do a metadata_read pass to see if there is any additional metadata, then pass it to read
389
390 my $additionalmetadata = 0; # is there extra metadata available?
391 my %extrametadata; # maps from filespec to extra metadata keys
392 my %extrametafile; # maps from filespec to the metadata.xml (or similar) file it came from
393 my @extrametakeys; # keys of %extrametadata in order read
394
395
396 my $os_dirsep = &util::get_os_dirsep();
397 my $dirsep = &util::get_dirsep();
398 my $base_dir_regexp = $base_dir;
399 $base_dir_regexp =~ s/\//$os_dirsep/g;
400
401 # Want to get relative path of local_dirname within the base_directory
402 # but with URL style slashes.
403 my $local_dirname = &util::filename_within_directory_url_format($dirname, $base_dir);
404
405 # if we are in import folder, then local_dirname will be empty
406 if ($local_dirname ne "") {
407 # convert to perl unicode
408 $local_dirname = $self->raw_filename_to_unicode($local_dirname);
409
410 # look for extra metadata passed down from higher folders
411 $local_dirname .= "/"; # closing slash must be URL type slash also and not $dirsep;
412 if (defined $self->{'subdir_extrametakeys'}->{$local_dirname}) {
413 my $extrakeys = $self->{'subdir_extrametakeys'}->{$local_dirname};
414 foreach my $ek (@$extrakeys) {
415 my $extrakeys_re = $ek->{'re'};
416 my $extrakeys_md = $ek->{'md'};
417 my $extrakeys_mf = $ek->{'mf'};
418 &extrametautil::addmetakey(\@extrametakeys, $extrakeys_re);
419 &extrametautil::setmetadata(\%extrametadata, $extrakeys_re, $extrakeys_md);
420 &extrametautil::setmetafile(\%extrametafile, $extrakeys_re, $extrakeys_mf);
421 }
422 delete($self->{'subdir_extrametakeys'}->{$local_dirname});
423 }
424 }
425 # apply metadata pass for each of the files in the directory -- ignore
426 # maxdocs here
427 my $num_files = scalar(@dir);
428 for (my $i = 0; $i < scalar(@dir); $i++) {
429 my $subfile = $dir[$i];
430 next if ($subfile =~ m/^\.\.?$/);
431
432 my $this_file_base_dir = $base_dir;
433 my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
434
435 my $raw_file_subfile = &FileUtils::filenameConcatenate($file, $raw_subfile);
436 my $raw_full_filename = &FileUtils::filenameConcatenate($this_file_base_dir, $raw_file_subfile);
437 if ($self->raw_file_is_blocked($block_hash, $raw_full_filename)) {
438 print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for metadata_read\n";# if ($verbosity > 2);
439 print STDERR "raw file was blocked for metadata read\n";
440 print STDERR &unicode::debug_unicode_string($raw_full_filename)."\n";
441 next;
442 }
443
444 # Recursively read each $raw_subfile
445 print $outhandle "DirectoryPlugin metadata recurring: $raw_subfile\n"; # if ($verbosity > 2);
446
447 &plugin::metadata_read ($pluginfo, $this_file_base_dir,
448 $raw_file_subfile,$block_hash,
449 \@extrametakeys, \%extrametadata,
450 \%extrametafile,
451 $processor, $gli);
452 $additionalmetadata = 1;
453 }
454
455 # filter out any extrametakeys that mention subdirectories and store
456 # for later use (i.e. when that sub-directory is being processed)
457 foreach my $ek (@extrametakeys) { # where each Extrametakey (which is a filename) is stored as a url-style regex
458
459 my ($subdir_re,$extrakey_dir) = &util::url_fileparse($ek);
460 if ($extrakey_dir ne "") {
461 # a subdir was specified
462 my $md = &extrametautil::getmetadata(\%extrametadata, $ek);
463 my $mf = &extrametautil::getmetafile(\%extrametafile, $ek);
464
465 my $subdir_extrametakeys = $self->{'subdir_extrametakeys'};
466 my $subdir_rec = { 're' => $subdir_re, 'md' => $md, 'mf' => $mf };
467
468 # when it's looked up, it must be relative to the base dir
469 push(@{$subdir_extrametakeys->{"$local_dirname$extrakey_dir"}},$subdir_rec);
470 }
471 }
472
473 # import each of the files in the directory
474 my $count=0;
475 for (my $i = 0; $i <= scalar(@dir); $i++) {
476 # When every file in the directory has been done, pause for a moment (figuratively!)
477 # If the -recheck_directories argument hasn't been provided, stop now (default)
478 # Otherwise, re-read the contents of the directory to check for new files
479 # Any new files are added to the @dir list and are processed as normal
480 # This is necessary when documents to be indexed are specified in bibliographic DBs
481 # These files are copied/downloaded and stored in a new folder at import time
482 if ($i == $num_files) {
483 last unless $self->{'recheck_directories'};
484
485 # Re-read the files in the directory to see if there are any new files
486 last if (!opendir (DIR, $dirname));
487 my @dirnow = sort readdir (DIR);
488 map { $_ = &unicode::raw_filename_to_url_encoded($_) } @dirnow;
489 closedir (DIR);
490
491 # We're only interested if there are more files than there were before
492 last if (scalar(@dirnow) <= scalar(@dir));
493
494 # Any new files are added to the end of @dir to get processed by the loop
495 my $j;
496 foreach my $subfilenow (@dirnow) {
497 for ($j = 0; $j < $num_files; $j++) {
498 last if ($subfilenow eq $dir[$j]);
499 }
500 if ($j == $num_files) {
501 # New file
502 push(@dir, $subfilenow);
503 }
504 }
505 # When the new files have been processed, check again
506 $num_files = scalar(@dir);
507 }
508
509 my $subfile = $dir[$i];
510 last if ($maxdocs != -1 && ($count + $total_count) >= $maxdocs);
511 next if ($subfile =~ /^\.\.?$/);
512
513 my $this_file_base_dir = $base_dir;
514 my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
515 # get the canonical unicode version of the filename. This may not match
516 # the filename on the file system. We will use it to compare to regex
517 # in the metadata table.
518 my $unicode_subfile = &util::raw_filename_to_unicode($dirname, $raw_subfile);
519 my $raw_file_subfile = &FileUtils::filenameConcatenate($file, $raw_subfile);
520 my $raw_full_filename
521 = &FileUtils::filenameConcatenate($this_file_base_dir,$raw_file_subfile);
522 my $full_unicode_file = $self->raw_filename_to_unicode($raw_full_filename);
523 print STDERR "full unicode filename $full_unicode_file\n";
524 print STDERR &unicode::debug_unicode_string($full_unicode_file)."\n";
525 if ($self->file_is_blocked($block_hash,$full_unicode_file)) {
526 print STDERR "Actually, we have blocked the unicode version\n";
527 next;
528 }
529 if ($self->file_is_blocked($block_hash,$raw_full_filename)) {
530 print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for read\n"; # if ($verbosity > 2);
531 next;
532 }
533 # Follow Windows shortcuts
534 if ($raw_subfile =~ m/(?i)\.lnk$/ && (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin"))) {
535 require Win32::Shortcut;
536 my $shortcut = new Win32::Shortcut(&FileUtils::filenameConcatenate($dirname, $raw_subfile));
537 if ($shortcut) {
538 # The file to be processed is now the target of the shortcut
539 $this_file_base_dir = "";
540 $file = "";
541 $raw_subfile = $shortcut->Path;
542 }
543 $shortcut->Close(); # see http://cpansearch.perl.org/src/JDB/Win32-Shortcut-0.08/docs/reference.html
544 }
545
546 # check for a symlink pointing back to a leading directory
547 if (-d "$dirname/$raw_subfile" && -l "$dirname/$raw_subfile") {
548 # readlink gives a "fatal error" on systems that don't implement
549 # symlinks. This assumes the the -l test above would fail on those.
550 my $linkdest=readlink "$dirname/$raw_subfile";
551 if (!defined ($linkdest)) {
552 # system error - file not found?
553 warn "DirectoryPlugin: symlink problem - $!";
554 } else {
555 # see if link points to current or a parent directory
556 if ($linkdest =~ m@^[\./\\]+$@ ||
557 index($dirname, $linkdest) != -1) {
558 warn "DirectoryPlugin: Ignoring recursive symlink ($dirname/$raw_subfile -> $linkdest)\n";
559 next;
560 ;
561 }
562 }
563 }
564
565 print $outhandle "DirectoryPlugin: preparing metadata for $raw_subfile\n" if ($verbosity > 2);
566
567 # Make a copy of $in_metadata to pass to $raw_subfile
568 my $out_metadata = {};
569 &metadatautil::combine_metadata_structures($out_metadata, $in_metadata);
570
571 # check the assocfile_info
572 if (defined $self->{'assocfile_info'}->{$raw_full_filename}) {
573 &metadatautil::combine_metadata_structures($out_metadata, $self->{'assocfile_info'}->{$raw_full_filename});
574 }
575
576 ### Now we need to look up the metadata table to see if there is any
577 # extra metadata for us. We need the canonical unicode version here.
578 if ($additionalmetadata == 1) {
579 foreach my $filespec (@extrametakeys) {
580 if ($unicode_subfile =~ /^$filespec$/) {
581 print $outhandle "File \"$unicode_subfile\" matches filespec \"$filespec\"\n"
582 if ($verbosity > 2);
583 my $mdref = &extrametautil::getmetadata(\%extrametadata, $filespec);
584 my $mfref = &extrametautil::getmetafile(\%extrametafile, $filespec);
585
586 # Add the list files where the metadata came from
587 # into the metadata table so we can track this
588 # This mechanism is similar to how gsdlassocfile works
589
590 my @metafile_pair = ();
591 foreach my $l (keys %$mfref) {
592 my $f = $mfref->{$l};
593 push (@metafile_pair, "$f : $l");
594 }
595
596 $mdref->{'gsdlmetafile'} = \@metafile_pair;
597
598 &metadatautil::combine_metadata_structures($out_metadata, $mdref);
599 }
600 }
601 }
602
603 if (defined $self->{'inf_timestamp'}) {
604 # Look to see if it's a completely new file
605
606 if (!$block_hash->{'new_files'}->{$raw_full_filename}) {
607 # Not a new file, must be an existing file
608 # Let' see if it's newer than the last import.pl
609
610
611 if (! -d $raw_full_filename) {
612 if (!$block_hash->{'reindex_files'}->{$raw_full_filename}) {
613 # filename has been around for longer than inf_timestamp
614 print $outhandle "**** Skipping $unicode_subfile\n" if ($verbosity >3);
615 next;
616 }
617 else {
618 # Remove old folder in archives (might hash to something different)
619 # *** should be doing this on a Del one as well
620 # but leave folder name?? and ensure hashs to
621 # same again??
622
623 # Then let through as new doc??
624
625 # mark to doc-oids that rely on it for re-indexing
626 }
627 }
628 }
629 }
630
631 # Recursively read each $subfile
632 print $outhandle "DirectoryPlugin recurring: $unicode_subfile\n" if ($verbosity > 2);
633
634 $count += &plugin::read ($pluginfo, $this_file_base_dir,
635 $raw_file_subfile, $block_hash,
636 $out_metadata, $processor, $maxdocs, ($total_count + $count), $gli);
637 }
638
639 return $count;
640}
641
642sub compile_stats {
643 my $self = shift(@_);
644 my ($stats) = @_;
645}
646
6471;
Note: See TracBrowser for help on using the repository browser.