root/main/trunk/greenstone2/perllib/plugins/DirectoryPlugin.pm @ 24829

Revision 24829, 21.5 KB (checked in by ak19, 8 years ago)

Changes to bat files and perl code to deal with brackets in (Windows) filepath. Also checked winmake.bat files to see if changes were needed there. These changes go together with the commits 24826 to 24828 for gems.bat, and commit 24820 on makegs2.bat.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# DirectoryPlugin.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# DirectoryPlugin is a plugin which recurses through directories processing
27# each file it finds - which basically means passing it down the plugin
28# pipeline
29
30package DirectoryPlugin;
31
32use PrintInfo;
33use plugin;
34use util;
35use metadatautil;
36
37use File::Basename;
38use strict;
39no strict 'refs';
40no strict 'subs';
41
42use Encode;
43
44BEGIN {
45    @DirectoryPlugin::ISA = ('PrintInfo');
46}
47
48my $arguments =
49    [ { 'name' => "block_exp",
50    'desc' => "{BasePlugin.block_exp}",
51    'type' => "regexp",
52    'deft' => &get_default_block_exp(),
53    'reqd' => "no" },
54      # this option has been deprecated. leave it here for now so we can warn people not to use it
55      { 'name' => "use_metadata_files",
56    'desc' => "{DirectoryPlugin.use_metadata_files}",
57    'type' => "flag",
58    'reqd' => "no",
59    'hiddengli' => "yes" },
60      { 'name' => "recheck_directories",
61    'desc' => "{DirectoryPlugin.recheck_directories}",
62    'type' => "flag",
63    'reqd' => "no" } ];
64   
65my $options = { 'name'     => "DirectoryPlugin",
66        'desc'     => "{DirectoryPlugin.desc}",
67        'abstract' => "no",
68        'inherits' => "yes",
69        'args'     => $arguments };
70
71sub new {
72    my ($class) = shift (@_);
73    my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
74    push(@$pluginlist, $class);
75
76    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
77    push(@{$hashArgOptLists->{"OptList"}},$options);
78
79    my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists);
80   
81    if ($self->{'info_only'}) {
82    # don't worry about any options or initialisations etc
83    return bless $self, $class;
84    }
85
86    # we have left this option in so we can warn people who are still using it
87    if ($self->{'use_metadata_files'}) {
88    die "ERROR: DirectoryPlugin -use_metadata_files option has been deprecated. Please remove the option and add MetadataXMLPlug to your plugin list instead!\n";
89    }
90   
91    $self->{'num_processed'} = 0;
92    $self->{'num_not_processed'} = 0;
93    $self->{'num_blocked'} = 0;
94    $self->{'num_archives'} = 0;
95
96    $self->{'subdir_extrametakeys'} = {};
97
98    return bless $self, $class;
99}
100
101# called once, at the start of processing
102sub init {
103    my $self = shift (@_);
104    my ($verbosity, $outhandle, $failhandle) = @_;
105
106    # verbosity is passed through from the processor
107    $self->{'verbosity'} = $verbosity;
108
109    # as are the outhandle and failhandle
110    $self->{'outhandle'} = $outhandle if defined $outhandle;
111    $self->{'failhandle'} = $failhandle;
112
113}
114
115# called once, after all passes have finished
116sub deinit {
117    my ($self) = @_;
118
119}
120
121# called at the beginning of each plugin pass (import has one, building has many)
122sub begin {
123    my $self = shift (@_);
124    my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
125
126    # Only lookup timestamp info for import.pl, and only if incremental is set
127    my $proc_package_name = ref $processor;
128    if ($proc_package_name !~ /buildproc$/ && $self->{'incremental'} == 1) {
129        # Get the infodbtype value for this collection from the arcinfo object
130        my $infodbtype = $processor->getoutputinfo()->{'infodbtype'};
131    $infodbtype = "gdbm" if $infodbtype eq "gdbm-txtgz"; # in archives, cannot use txtgz version
132    my $output_dir = $processor->getoutputdir();
133        my $archives_inf = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $output_dir);
134
135    if ( -e $archives_inf ) {
136        $self->{'inf_timestamp'} = -M $archives_inf;
137    }
138    }
139}
140
141sub remove_all {
142    my $self = shift (@_);
143    my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
144}
145
146
147sub remove_one {
148    my $self = shift (@_);
149    my ($file, $oids, $archivedir) = @_;
150    return undef; # this will never be called for directories (will it??)
151
152}
153
154
155# called at the end of each plugin pass
156sub end {
157    my ($self) = shift (@_);
158
159}
160
161
162
163# return 1 if this class might recurse using $pluginfo
164sub is_recursive {
165    my $self = shift (@_);
166   
167    return 1;
168}
169
170sub get_default_block_exp {
171    my $self = shift (@_);
172   
173    return '(?i)(CVS|\.svn|Thumbs\.db|OIDcount|~)$';
174}
175
176sub check_directory_path {
177
178    my $self = shift(@_);
179    my ($dirname) = @_;
180   
181    return undef unless (-d $dirname);
182
183    return 0 if ($self->{'block_exp'} ne "" && $dirname =~ /$self->{'block_exp'}/);
184
185    my $outhandle = $self->{'outhandle'};
186   
187    # check to make sure we're not reading the archives or index directory
188    my $gsdlhome = quotemeta($ENV{'GSDLHOME'});
189    if ($dirname =~ m/^$gsdlhome\/.*?\/import.*?\/(archives|index)$/) {
190    print $outhandle "DirectoryPlugin: $dirname appears to be a reference to a Greenstone collection, skipping.\n";
191        return 0;
192    }
193   
194    # check to see we haven't got a cyclic path...
195    if ($dirname =~ m%(/.*){,41}%) {
196    print $outhandle "DirectoryPlugin: $dirname is 40 directories deep, is this a recursive path? if not increase constant in DirectoryPlugin.pm.\n";
197    return 0;
198    }
199   
200    # check to see we haven't got a cyclic path...
201    if ($dirname =~ m%.*?import/(.+?)/import/\1.*%) {
202    print $outhandle "DirectoryPlugin: $dirname appears to be in a recursive loop...\n";
203    return 0;
204    }
205
206    return 1;
207}
208
209# this may be called more than once
210sub sort_out_associated_files {
211
212    my $self = shift (@_);
213    my ($block_hash) = @_;
214    if (!scalar (keys %{$block_hash->{'shared_fileroot'}})) {
215    return;
216    }
217
218    $self->{'assocfile_info'} = {} unless defined $self->{'assocfile_info'};
219    my $metadata = $self->{'assocfile_info'};
220    foreach my $prefix (keys %{$block_hash->{'shared_fileroot'}}) {
221    my $record = $block_hash->{'shared_fileroot'}->{$prefix};
222
223    my $tie_to = $record->{'tie_to'};
224    my $exts = $record->{'exts'};
225   
226    if ((defined $tie_to) && (scalar (keys %$exts) > 0)) {
227        # set up fileblocks and assocfile_tobe
228        my $base_file = "$prefix$tie_to";
229        $metadata->{$base_file} = {} unless defined $metadata->{$base_file};
230        my $base_file_metadata = $metadata->{$base_file};
231       
232        $base_file_metadata->{'gsdlassocfile_tobe'} = [] unless defined $base_file_metadata->{'gsdlassocfile_tobe'};
233        my $assoc_tobe = $base_file_metadata->{'gsdlassocfile_tobe'};
234        foreach my $e (keys %$exts) {
235        # block the file
236        &util::block_filename($block_hash,"$prefix$e");
237        # set up as an associatd file
238        print STDERR "  $self->{'plugin_type'}: Associating $prefix$e with $tie_to version\n";
239        my $mime_type = ""; # let system auto detect this
240        push(@$assoc_tobe,"$prefix$e:$mime_type:");
241
242        }
243    }
244    } # foreach record
245
246    $block_hash->{'shared_fileroot'} = undef;
247    $block_hash->{'shared_fileroot'} = {};
248
249}
250
251
252# do block exp OR special blocking ???
253
254sub file_is_blocked {
255    my $self = shift (@_);
256    my ($block_hash, $filename_full_path) = @_;
257
258    $filename_full_path = &util::upgrade_if_dos_filename($filename_full_path);
259###    print STDERR "*** DirectoryPlugin::file_is_blocked $filename_full_path\n";
260
261    if ($ENV{'GSDLOS'} =~ m/^windows$/) {
262    # on windows, all block paths are lowercased.
263    my $lower_filename = lc ($filename_full_path);
264    if (defined $block_hash->{'file_blocks'}->{$lower_filename}) {
265        $self->{'num_blocked'} ++;
266        return 1;
267    }
268    }
269    else {
270    if (defined $block_hash->{'file_blocks'}->{$filename_full_path}) {
271        $self->{'num_blocked'} ++;
272        return 1;
273    }
274    }
275    # check Directory plugin's own block_exp
276    if ($self->{'block_exp'} ne "" && $filename_full_path =~ /$self->{'block_exp'}/) {
277    $self->{'num_blocked'} ++;
278    return 1; # blocked
279    }
280    return 0;
281}
282
283
284
285sub file_block_read {
286    my $self = shift (@_);
287    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
288
289    my $outhandle = $self->{'outhandle'};
290    my $verbosity = $self->{'verbosity'};
291   
292    # Calculate the directory name and ensure it is a directory and
293    # that it is not explicitly blocked.
294    my $dirname = $file;
295    $dirname = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
296
297    my $directory_ok = $self->check_directory_path($dirname);
298    return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
299
300    print $outhandle "Global file scan checking directory: $dirname\n";
301
302    $block_hash->{'all_files'} = {} unless defined $block_hash->{'all_files'};
303    $block_hash->{'metadata_files'} = {} unless defined $block_hash->{'metadata_files'};
304
305    $block_hash->{'file_blocks'} = {} unless defined $block_hash->{'file_blocks'};
306    $block_hash->{'shared_fileroot'} = {} unless defined $block_hash->{'shared_fileroot'};
307
308     # Recur over directory contents.
309    my (@dir, $subfile);
310    #my $count = 0;
311   
312    print $outhandle "DirectoryPlugin block: getting directory $dirname\n" if ($verbosity > 2);
313   
314    # find all the files in the directory
315    if (!opendir (DIR, $dirname)) {
316    if ($gli) {
317        print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
318    }
319    print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
320    return -1; # error in processing
321    }
322    @dir = readdir (DIR);
323    closedir (DIR);
324   
325    for (my $i = 0; $i < scalar(@dir); $i++) {
326    my $raw_subfile = $dir[$i];
327    next if ($raw_subfile =~ m/^\.\.?$/);
328
329    my $this_file_base_dir = $base_dir;
330    my $raw_file_subfile = &util::filename_cat($file, $raw_subfile);
331
332    # Recursively read each $raw_subfile
333    print $outhandle "DirectoryPlugin block recurring: $raw_file_subfile\n" if ($verbosity > 2);
334   
335    #$count += &plugin::file_block_read ($pluginfo, $this_file_base_dir,
336
337    &plugin::file_block_read ($pluginfo, $this_file_base_dir,
338                  $raw_file_subfile,
339                  $block_hash, $metadata, $gli);
340   
341    }
342    $self->sort_out_associated_files($block_hash);
343    #return $count;
344    return 1;
345   
346}
347
348# We don't do metadata_read
349sub metadata_read {
350    my $self = shift (@_);
351    my ($pluginfo, $base_dir, $file, $block_hash,
352    $extrametakeys, $extrametadata, $extrametafile,
353    $processor, $gli, $aux) = @_;
354
355    return undef;
356}
357
358
359# return number of files processed, undef if can't process
360# Note that $base_dir might be "" and that $file might
361# include directories
362
363# This function passes around metadata hash structures.  Metadata hash
364# structures are hashes that map from a (scalar) key (the metadata element
365# name) to either a scalar metadata value or a reference to an array of
366# such values.
367
368sub read {
369    my $self = shift (@_);
370    my ($pluginfo, $base_dir, $file, $block_hash, $in_metadata, $processor, $maxdocs, $total_count, $gli) = @_;
371
372    my $outhandle = $self->{'outhandle'};
373    my $verbosity = $self->{'verbosity'};
374
375    # Calculate the directory name and ensure it is a directory and
376    # that it is not explicitly blocked.
377    my $dirname;
378    if ($file eq "") {
379    $dirname = $base_dir;
380    } else {
381    $dirname = $file;
382    $dirname = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
383    }
384
385    my $directory_ok = $self->check_directory_path($dirname);
386    return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
387       
388    if (($verbosity > 2) && ((scalar keys %$in_metadata) > 0)) {
389        print $outhandle "DirectoryPlugin: metadata passed in: ",
390    join(", ", keys %$in_metadata), "\n";
391    }
392   
393
394    # Recur over directory contents.
395    my (@dir, $subfile);
396   
397    print $outhandle "DirectoryPlugin read: getting directory $dirname\n" if ($verbosity > 2);
398   
399    # find all the files in the directory
400    if (!opendir (DIR, $dirname)) {
401    if ($gli) {
402        print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
403    }
404    print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
405    return -1; # error in processing
406    }
407    @dir = readdir (DIR);
408    map { $_ = &unicode::raw_filename_to_url_encoded($_) } @dir;
409    closedir (DIR);
410
411    # Re-order the files in the list so any directories ending with .all are moved to the end
412    for (my $i = scalar(@dir) - 1; $i >= 0; $i--) {
413    if (-d &util::filename_cat($dirname, $dir[$i]) && $dir[$i] =~ /\.all$/) {
414        push(@dir, splice(@dir, $i, 1));
415    }
416    }
417
418    # setup the metadata structures. we do a metadata_read pass to see if there is any additional metadata, then pass it to read
419   
420    my $additionalmetadata = 0;      # is there extra metadata available?
421    my %extrametadata;               # maps from filespec to extra metadata keys
422    my %extrametafile;               # maps from filespec to the metadata.xml (or similar) file it came from
423    my @extrametakeys;               # keys of %extrametadata in order read
424
425
426    my $os_dirsep = &util::get_os_dirsep();
427    my $dirsep    = &util::get_dirsep();
428    my $base_dir_regexp = $base_dir;
429    $base_dir_regexp =~ s/\//$os_dirsep/g;
430    my $local_dirname = $dirname;
431   
432    $local_dirname =~ s/^$base_dir_regexp($os_dirsep)*//;
433    # if we are in import folder, then local_dirname will be empty
434    if ($local_dirname ne "") {
435    # look for extra metadata passed down from higher folders
436    $local_dirname .= $dirsep;
437    if (defined $self->{'subdir_extrametakeys'}->{$local_dirname}) {
438        my $extrakeys = $self->{'subdir_extrametakeys'}->{$local_dirname};
439        foreach my $ek (@$extrakeys) {
440        my $extrakeys_re  = $ek->{'re'};
441        my $extrakeys_md  = $ek->{'md'};
442        my $extrakeys_mf  = $ek->{'mf'};
443        push(@extrametakeys,$extrakeys_re);
444        $extrametadata{$extrakeys_re} = $extrakeys_md;
445        $extrametafile{$extrakeys_re} = $extrakeys_mf;
446        }
447        delete($self->{'subdir_extrametakeys'}->{$local_dirname});
448    }
449    }
450    # apply metadata pass for each of the files in the directory -- ignore
451    # maxdocs here
452    my $num_files = scalar(@dir);
453    for (my $i = 0; $i < scalar(@dir); $i++) {
454    my $subfile = $dir[$i];
455    next if ($subfile =~ m/^\.\.?$/);
456
457    my $this_file_base_dir = $base_dir;
458    my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
459
460    my $raw_file_subfile = &util::filename_cat($file, $raw_subfile);
461    my $raw_full_filename = &util::filename_cat($this_file_base_dir, $raw_file_subfile);
462
463    if ($self->file_is_blocked($block_hash,$raw_full_filename)) {
464        print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for metadata_read\n" if ($verbosity > 2);
465        next;
466    }
467   
468    # Recursively read each $raw_subfile
469    print $outhandle "DirectoryPlugin metadata recurring: $raw_subfile\n" if ($verbosity > 2);
470   
471    &plugin::metadata_read ($pluginfo, $this_file_base_dir,
472                $raw_file_subfile,$block_hash,
473                \@extrametakeys, \%extrametadata,
474                \%extrametafile,
475                $processor, $gli);
476    $additionalmetadata = 1;
477    }
478
479    # filter out any extrametakeys that mention subdirectories and store
480    # for later use (i.e. when that sub-directory is being processed)
481    foreach my $ek (@extrametakeys) {
482    my ($subdir_re,$extrakey_dir) = &File::Basename::fileparse($ek);
483    $extrakey_dir = &util::unregex_filename($extrakey_dir);
484
485    my $dirsep_re = &util::get_re_dirsep();
486
487    my $ek_non_re = &util::unregex_filename($ek);
488    if ($ek_non_re =~ m/$dirsep_re/) { # specifies at least one directory
489        my $md = $extrametadata{$ek};
490        my $mf = $extrametafile{$ek};
491
492        my $subdir_extrametakeys = $self->{'subdir_extrametakeys'};
493
494        my $subdir_rec = { 're' => $subdir_re, 'md' => $md, 'mf' => $mf };
495
496        # when its looked up, it must be relative to the base dir
497        push(@{$subdir_extrametakeys->{"$local_dirname$extrakey_dir"}},$subdir_rec);
498        #push(@{$subdir_extrametakeys->{"$extrakey_dir"}},$subdir_rec);
499    }
500    }
501   
502    # import each of the files in the directory
503    my $count=0;
504    for (my $i = 0; $i <= scalar(@dir); $i++) {
505    # When every file in the directory has been done, pause for a moment (figuratively!)
506    # If the -recheck_directories argument hasn't been provided, stop now (default)
507    # Otherwise, re-read the contents of the directory to check for new files
508    #   Any new files are added to the @dir list and are processed as normal
509    #   This is necessary when documents to be indexed are specified in bibliographic DBs
510    #   These files are copied/downloaded and stored in a new folder at import time
511    if ($i == $num_files) {
512        last unless $self->{'recheck_directories'};
513
514        # Re-read the files in the directory to see if there are any new files
515        last if (!opendir (DIR, $dirname));
516        my @dirnow = readdir (DIR);
517        map { $_ = &unicode::raw_filename_to_url_encoded($_) } @dirnow;
518        closedir (DIR);
519
520        # We're only interested if there are more files than there were before
521        last if (scalar(@dirnow) <= scalar(@dir));
522
523        # Any new files are added to the end of @dir to get processed by the loop
524        my $j;
525        foreach my $subfilenow (@dirnow) {
526        for ($j = 0; $j < $num_files; $j++) {
527            last if ($subfilenow eq $dir[$j]);
528        }
529        if ($j == $num_files) {
530            # New file
531            push(@dir, $subfilenow);
532        }
533        }
534        # When the new files have been processed, check again
535        $num_files = scalar(@dir);
536    }
537
538    my $subfile = $dir[$i];
539    last if ($maxdocs != -1 && ($count + $total_count) >= $maxdocs);
540    next if ($subfile =~ /^\.\.?$/);
541
542    my $this_file_base_dir = $base_dir;
543    my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
544
545    my $raw_file_subfile = &util::filename_cat($file, $raw_subfile);
546    my $raw_full_filename
547        = &util::filename_cat($this_file_base_dir,$raw_file_subfile);
548
549    if ($self->file_is_blocked($block_hash,$raw_full_filename)) {
550        print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for read\n" if ($verbosity > 2);
551        next;
552    }
553    #print STDERR "processing $raw_full_filename\n";
554    # Follow Windows shortcuts
555    if ($raw_subfile =~ /(?i)\.lnk$/ && $ENV{'GSDLOS'} =~ /^windows$/i) {
556        require Win32::Shortcut;
557        my $shortcut = new Win32::Shortcut(&util::filename_cat($dirname, $raw_subfile));
558        if ($shortcut) {
559        # The file to be processed is now the target of the shortcut
560        $this_file_base_dir = "";
561        $file = "";
562        $raw_subfile = $shortcut->Path;
563        }
564    }
565
566    # check for a symlink pointing back to a leading directory
567    if (-d "$dirname/$raw_subfile" && -l "$dirname/$raw_subfile") {
568        # readlink gives a "fatal error" on systems that don't implement
569        # symlinks. This assumes the the -l test above would fail on those.
570        my $linkdest=readlink "$dirname/$raw_subfile";
571        if (!defined ($linkdest)) {
572        # system error - file not found?
573        warn "DirectoryPlugin: symlink problem - $!";
574        } else {
575        # see if link points to current or a parent directory
576        if ($linkdest =~ m@^[\./\\]+$@ ||
577            index($dirname, $linkdest) != -1) {
578            warn "DirectoryPlugin: Ignoring recursive symlink ($dirname/$raw_subfile -> $linkdest)\n";
579            next;
580            ;
581        }
582        }
583    }
584
585    print $outhandle "DirectoryPlugin: preparing metadata for $raw_subfile\n" if ($verbosity > 2);
586
587    # Make a copy of $in_metadata to pass to $raw_subfile
588    my $out_metadata = {};
589    &metadatautil::combine_metadata_structures($out_metadata, $in_metadata);
590
591    # check the assocfile_info
592    if (defined $self->{'assocfile_info'}->{$raw_full_filename}) {
593        &metadatautil::combine_metadata_structures($out_metadata, $self->{'assocfile_info'}->{$raw_full_filename});
594    }
595
596    # $subfile by this point is url-encoded => all ASCII chars => no need to encode as UTF8
597
598    # Next add metadata read in XML files (if it is supplied)
599    if ($additionalmetadata == 1) {
600        foreach my $filespec (@extrametakeys) {
601        ## use the url-encoded filename to do the filename comparison
602
603        if ($subfile =~ /^$filespec$/) {
604            print $outhandle "File \"$subfile\" matches filespec \"$filespec\"\n"
605            if ($verbosity > 2);
606            my $mdref = $extrametadata{$filespec};
607            my $mfref = $extrametafile{$filespec};
608
609            # Add the list files where the metadata came from
610            # into the metadata table so we can track this
611            # This mechanism is similar to how gsdlassocfile works
612
613            my @metafile_pair = ();
614            foreach my $l (keys %$mfref) {
615            my $f = $mfref->{$l};
616            push (@metafile_pair, "$f : $l");
617            }
618
619            $mdref->{'gsdlmetafile'} = \@metafile_pair;
620
621            &metadatautil::combine_metadata_structures($out_metadata, $mdref);
622        }
623        }
624    }
625
626    if (defined $self->{'inf_timestamp'}) {
627        # Look to see if it's a completely new file
628
629        if (!$block_hash->{'new_files'}->{$raw_full_filename}) {
630        # Not a new file, must be an existing file
631        # Let' see if it's newer than the last import.pl
632
633
634        if (! -d $raw_full_filename) {
635            if (!$block_hash->{'reindex_files'}->{$raw_full_filename}) {
636            # filename has been around for longer than inf_timestamp
637            print $outhandle "**** Skipping $subfile\n" if ($verbosity >3);
638            next;
639            }
640            else {
641            # Remove old folder in archives (might hash to something different)
642            # *** should be doing this on a Del one as well
643            # but leave folder name?? and ensure hashs to
644            # same again??
645
646            # Then let through as new doc??
647
648            # mark to doc-oids that rely on it for re-indexing
649            }
650        }
651        }
652    }
653
654    # Recursively read each $subfile
655    print $outhandle "DirectoryPlugin recurring: $subfile\n" if ($verbosity > 2);
656   
657    $count += &plugin::read ($pluginfo, $this_file_base_dir,
658                 $raw_file_subfile, $block_hash,
659                 $out_metadata, $processor, $maxdocs, ($total_count + $count), $gli);
660    }
661
662    return $count;
663}
664
665sub compile_stats {
666    my $self = shift(@_);
667    my ($stats) = @_;
668}
669
6701;
Note: See TracBrowser for help on using the browser.