root/main/trunk/greenstone2/perllib/plugins/DirectoryPlugin.pm @ 24932

Revision 24932, 21.7 KB (checked in by ak19, 9 years ago)

Diego noticed how the metadata in a toplevel metadata.xml, which specifies metadata for files in import's subfolders, does not get attached to the files on Windows, while this works on Linux. It had to do with the difference between the file slashes used on the OS versus the URL-type fileslashes used in the metadata.xml Diego had constructed. This has now been fixed and Dr Bainbridge came up with a tidier solution of a new method in util.pm that would handle the details.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# DirectoryPlugin.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# DirectoryPlugin is a plugin which recurses through directories processing
27# each file it finds - which basically means passing it down the plugin
28# pipeline
29
30package DirectoryPlugin;
31
32use PrintInfo;
33use plugin;
34use util;
35use metadatautil;
36
37use File::Basename;
38use strict;
39no strict 'refs';
40no strict 'subs';
41
42use Encode;
43
44BEGIN {
45    @DirectoryPlugin::ISA = ('PrintInfo');
46}
47
48my $arguments =
49    [ { 'name' => "block_exp",
50    'desc' => "{BasePlugin.block_exp}",
51    'type' => "regexp",
52    'deft' => &get_default_block_exp(),
53    'reqd' => "no" },
54      # this option has been deprecated. leave it here for now so we can warn people not to use it
55      { 'name' => "use_metadata_files",
56    'desc' => "{DirectoryPlugin.use_metadata_files}",
57    'type' => "flag",
58    'reqd' => "no",
59    'hiddengli' => "yes" },
60      { 'name' => "recheck_directories",
61    'desc' => "{DirectoryPlugin.recheck_directories}",
62    'type' => "flag",
63    'reqd' => "no" } ];
64   
65my $options = { 'name'     => "DirectoryPlugin",
66        'desc'     => "{DirectoryPlugin.desc}",
67        'abstract' => "no",
68        'inherits' => "yes",
69        'args'     => $arguments };
70
71sub new {
72    my ($class) = shift (@_);
73    my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
74    push(@$pluginlist, $class);
75
76    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
77    push(@{$hashArgOptLists->{"OptList"}},$options);
78
79    my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists);
80   
81    if ($self->{'info_only'}) {
82    # don't worry about any options or initialisations etc
83    return bless $self, $class;
84    }
85
86    # we have left this option in so we can warn people who are still using it
87    if ($self->{'use_metadata_files'}) {
88    die "ERROR: DirectoryPlugin -use_metadata_files option has been deprecated. Please remove the option and add MetadataXMLPlug to your plugin list instead!\n";
89    }
90   
91    $self->{'num_processed'} = 0;
92    $self->{'num_not_processed'} = 0;
93    $self->{'num_blocked'} = 0;
94    $self->{'num_archives'} = 0;
95
96    $self->{'subdir_extrametakeys'} = {};
97
98    return bless $self, $class;
99}
100
101# called once, at the start of processing
102sub init {
103    my $self = shift (@_);
104    my ($verbosity, $outhandle, $failhandle) = @_;
105
106    # verbosity is passed through from the processor
107    $self->{'verbosity'} = $verbosity;
108
109    # as are the outhandle and failhandle
110    $self->{'outhandle'} = $outhandle if defined $outhandle;
111    $self->{'failhandle'} = $failhandle;
112
113}
114
115# called once, after all passes have finished
116sub deinit {
117    my ($self) = @_;
118
119}
120
121# called at the beginning of each plugin pass (import has one, building has many)
122sub begin {
123    my $self = shift (@_);
124    my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
125
126    # Only lookup timestamp info for import.pl, and only if incremental is set
127    my $proc_package_name = ref $processor;
128    if ($proc_package_name !~ /buildproc$/ && $self->{'incremental'} == 1) {
129        # Get the infodbtype value for this collection from the arcinfo object
130        my $infodbtype = $processor->getoutputinfo()->{'infodbtype'};
131    $infodbtype = "gdbm" if $infodbtype eq "gdbm-txtgz"; # in archives, cannot use txtgz version
132    my $output_dir = $processor->getoutputdir();
133        my $archives_inf = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $output_dir);
134
135    if ( -e $archives_inf ) {
136        $self->{'inf_timestamp'} = -M $archives_inf;
137    }
138    }
139}
140
141sub remove_all {
142    my $self = shift (@_);
143    my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
144}
145
146
147sub remove_one {
148    my $self = shift (@_);
149    my ($file, $oids, $archivedir) = @_;
150    return undef; # this will never be called for directories (will it??)
151
152}
153
154
155# called at the end of each plugin pass
156sub end {
157    my ($self) = shift (@_);
158
159}
160
161
162
163# return 1 if this class might recurse using $pluginfo
164sub is_recursive {
165    my $self = shift (@_);
166   
167    return 1;
168}
169
170sub get_default_block_exp {
171    my $self = shift (@_);
172   
173    return '(?i)(CVS|\.svn|Thumbs\.db|OIDcount|~)$';
174}
175
176sub check_directory_path {
177
178    my $self = shift(@_);
179    my ($dirname) = @_;
180   
181    return undef unless (-d $dirname);
182
183    return 0 if ($self->{'block_exp'} ne "" && $dirname =~ /$self->{'block_exp'}/);
184
185    my $outhandle = $self->{'outhandle'};
186   
187    # check to make sure we're not reading the archives or index directory
188    my $gsdlhome = quotemeta($ENV{'GSDLHOME'});
189    if ($dirname =~ m/^$gsdlhome\/.*?\/import.*?\/(archives|index)$/) {
190    print $outhandle "DirectoryPlugin: $dirname appears to be a reference to a Greenstone collection, skipping.\n";
191        return 0;
192    }
193   
194    # check to see we haven't got a cyclic path...
195    if ($dirname =~ m%(/.*){,41}%) {
196    print $outhandle "DirectoryPlugin: $dirname is 40 directories deep, is this a recursive path? if not increase constant in DirectoryPlugin.pm.\n";
197    return 0;
198    }
199   
200    # check to see we haven't got a cyclic path...
201    if ($dirname =~ m%.*?import/(.+?)/import/\1.*%) {
202    print $outhandle "DirectoryPlugin: $dirname appears to be in a recursive loop...\n";
203    return 0;
204    }
205
206    return 1;
207}
208
209# this may be called more than once
210sub sort_out_associated_files {
211
212    my $self = shift (@_);
213    my ($block_hash) = @_;
214    if (!scalar (keys %{$block_hash->{'shared_fileroot'}})) {
215    return;
216    }
217
218    $self->{'assocfile_info'} = {} unless defined $self->{'assocfile_info'};
219    my $metadata = $self->{'assocfile_info'};
220    foreach my $prefix (keys %{$block_hash->{'shared_fileroot'}}) {
221    my $record = $block_hash->{'shared_fileroot'}->{$prefix};
222
223    my $tie_to = $record->{'tie_to'};
224    my $exts = $record->{'exts'};
225   
226    if ((defined $tie_to) && (scalar (keys %$exts) > 0)) {
227        # set up fileblocks and assocfile_tobe
228        my $base_file = "$prefix$tie_to";
229        $metadata->{$base_file} = {} unless defined $metadata->{$base_file};
230        my $base_file_metadata = $metadata->{$base_file};
231       
232        $base_file_metadata->{'gsdlassocfile_tobe'} = [] unless defined $base_file_metadata->{'gsdlassocfile_tobe'};
233        my $assoc_tobe = $base_file_metadata->{'gsdlassocfile_tobe'};
234        foreach my $e (keys %$exts) {
235        # block the file
236        &util::block_filename($block_hash,"$prefix$e");
237        # set up as an associatd file
238        print STDERR "  $self->{'plugin_type'}: Associating $prefix$e with $tie_to version\n";
239        my $mime_type = ""; # let system auto detect this
240        push(@$assoc_tobe,"$prefix$e:$mime_type:");
241
242        }
243    }
244    } # foreach record
245
246    $block_hash->{'shared_fileroot'} = undef;
247    $block_hash->{'shared_fileroot'} = {};
248
249}
250
251
252# do block exp OR special blocking ???
253
254sub file_is_blocked {
255    my $self = shift (@_);
256    my ($block_hash, $filename_full_path) = @_;
257
258    $filename_full_path = &util::upgrade_if_dos_filename($filename_full_path);
259###    print STDERR "*** DirectoryPlugin::file_is_blocked $filename_full_path\n";
260
261    if ($ENV{'GSDLOS'} =~ m/^windows$/) {
262    # on windows, all block paths are lowercased.
263    my $lower_filename = lc ($filename_full_path);
264    if (defined $block_hash->{'file_blocks'}->{$lower_filename}) {
265        $self->{'num_blocked'} ++;
266        return 1;
267    }
268    }
269    else {
270    if (defined $block_hash->{'file_blocks'}->{$filename_full_path}) {
271        $self->{'num_blocked'} ++;
272        return 1;
273    }
274    }
275    # check Directory plugin's own block_exp
276    if ($self->{'block_exp'} ne "" && $filename_full_path =~ /$self->{'block_exp'}/) {
277    $self->{'num_blocked'} ++;
278    return 1; # blocked
279    }
280    return 0;
281}
282
283
284
285sub file_block_read {
286    my $self = shift (@_);
287    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
288
289    my $outhandle = $self->{'outhandle'};
290    my $verbosity = $self->{'verbosity'};
291   
292    # Calculate the directory name and ensure it is a directory and
293    # that it is not explicitly blocked.
294    my $dirname = $file;
295    $dirname = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
296
297    my $directory_ok = $self->check_directory_path($dirname);
298    return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
299
300    print $outhandle "Global file scan checking directory: $dirname\n";
301
302    $block_hash->{'all_files'} = {} unless defined $block_hash->{'all_files'};
303    $block_hash->{'metadata_files'} = {} unless defined $block_hash->{'metadata_files'};
304
305    $block_hash->{'file_blocks'} = {} unless defined $block_hash->{'file_blocks'};
306    $block_hash->{'shared_fileroot'} = {} unless defined $block_hash->{'shared_fileroot'};
307
308     # Recur over directory contents.
309    my (@dir, $subfile);
310    #my $count = 0;
311   
312    print $outhandle "DirectoryPlugin block: getting directory $dirname\n" if ($verbosity > 2);
313   
314    # find all the files in the directory
315    if (!opendir (DIR, $dirname)) {
316    if ($gli) {
317        print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
318    }
319    print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
320    return -1; # error in processing
321    }
322    @dir = readdir (DIR);
323    closedir (DIR);
324   
325    for (my $i = 0; $i < scalar(@dir); $i++) {
326    my $raw_subfile = $dir[$i];
327    next if ($raw_subfile =~ m/^\.\.?$/);
328
329    my $this_file_base_dir = $base_dir;
330    my $raw_file_subfile = &util::filename_cat($file, $raw_subfile);
331
332    # Recursively read each $raw_subfile
333    print $outhandle "DirectoryPlugin block recurring: $raw_file_subfile\n" if ($verbosity > 2);
334   
335    #$count += &plugin::file_block_read ($pluginfo, $this_file_base_dir,
336
337    &plugin::file_block_read ($pluginfo, $this_file_base_dir,
338                  $raw_file_subfile,
339                  $block_hash, $metadata, $gli);
340   
341    }
342    $self->sort_out_associated_files($block_hash);
343    #return $count;
344    return 1;
345   
346}
347
348# We don't do metadata_read
349sub metadata_read {
350    my $self = shift (@_);
351    my ($pluginfo, $base_dir, $file, $block_hash,
352    $extrametakeys, $extrametadata, $extrametafile,
353    $processor, $gli, $aux) = @_;
354
355    return undef;
356}
357
358
359# return number of files processed, undef if can't process
360# Note that $base_dir might be "" and that $file might
361# include directories
362
363# This function passes around metadata hash structures.  Metadata hash
364# structures are hashes that map from a (scalar) key (the metadata element
365# name) to either a scalar metadata value or a reference to an array of
366# such values.
367
368sub read {
369    my $self = shift (@_);
370    my ($pluginfo, $base_dir, $file, $block_hash, $in_metadata, $processor, $maxdocs, $total_count, $gli) = @_;
371
372    my $outhandle = $self->{'outhandle'};
373    my $verbosity = $self->{'verbosity'};
374
375    # Calculate the directory name and ensure it is a directory and
376    # that it is not explicitly blocked.
377    my $dirname;
378    if ($file eq "") {
379    $dirname = $base_dir;
380    } else {
381    $dirname = $file;
382    $dirname = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
383    }
384   
385    my $directory_ok = $self->check_directory_path($dirname);
386    return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
387       
388    if (($verbosity > 2) && ((scalar keys %$in_metadata) > 0)) {
389        print $outhandle "DirectoryPlugin: metadata passed in: ",
390    join(", ", keys %$in_metadata), "\n";
391    }
392   
393
394    # Recur over directory contents.
395    my (@dir, $subfile);
396   
397    print $outhandle "DirectoryPlugin read: getting directory $dirname\n" if ($verbosity > 2);
398   
399    # find all the files in the directory
400    if (!opendir (DIR, $dirname)) {
401    if ($gli) {
402        print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
403    }
404    print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
405    return -1; # error in processing
406    }
407    @dir = readdir (DIR);
408    map { $_ = &unicode::raw_filename_to_url_encoded($_) } @dir;
409    closedir (DIR);
410
411    # Re-order the files in the list so any directories ending with .all are moved to the end
412    for (my $i = scalar(@dir) - 1; $i >= 0; $i--) {
413    if (-d &util::filename_cat($dirname, $dir[$i]) && $dir[$i] =~ /\.all$/) {
414        push(@dir, splice(@dir, $i, 1));
415    }
416    }
417
418    # setup the metadata structures. we do a metadata_read pass to see if there is any additional metadata, then pass it to read
419   
420    my $additionalmetadata = 0;      # is there extra metadata available?
421    my %extrametadata;               # maps from filespec to extra metadata keys
422    my %extrametafile;               # maps from filespec to the metadata.xml (or similar) file it came from
423    my @extrametakeys;               # keys of %extrametadata in order read
424
425
426    my $os_dirsep = &util::get_os_dirsep();
427    my $dirsep    = &util::get_dirsep();
428    my $base_dir_regexp = $base_dir;
429    $base_dir_regexp =~ s/\//$os_dirsep/g;
430       
431    # Want to get relative path of local_dirname within the base_directory
432    # but with URL style slashes.
433    my $local_dirname = &util::filename_within_directory_url_format($dirname, $base_dir);
434
435    # if we are in import folder, then local_dirname will be empty
436    if ($local_dirname ne "") {
437    # look for extra metadata passed down from higher folders   
438    $local_dirname .= "/"; # closing slash must be URL type slash also and not $dirsep;
439    if (defined $self->{'subdir_extrametakeys'}->{$local_dirname}) {
440        my $extrakeys = $self->{'subdir_extrametakeys'}->{$local_dirname};
441        foreach my $ek (@$extrakeys) {
442        my $extrakeys_re  = $ek->{'re'};
443        my $extrakeys_md  = $ek->{'md'};
444        my $extrakeys_mf  = $ek->{'mf'};
445        push(@extrametakeys,$extrakeys_re);
446        $extrametadata{$extrakeys_re} = $extrakeys_md;
447        $extrametafile{$extrakeys_re} = $extrakeys_mf;
448        }
449        delete($self->{'subdir_extrametakeys'}->{$local_dirname});
450    }
451    }
452    # apply metadata pass for each of the files in the directory -- ignore
453    # maxdocs here
454    my $num_files = scalar(@dir);
455    for (my $i = 0; $i < scalar(@dir); $i++) {
456    my $subfile = $dir[$i];
457    next if ($subfile =~ m/^\.\.?$/);
458
459    my $this_file_base_dir = $base_dir;
460    my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
461
462    my $raw_file_subfile = &util::filename_cat($file, $raw_subfile);
463    my $raw_full_filename = &util::filename_cat($this_file_base_dir, $raw_file_subfile);
464
465    if ($self->file_is_blocked($block_hash,$raw_full_filename)) {
466        print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for metadata_read\n" if ($verbosity > 2);
467        next;
468    }
469   
470    # Recursively read each $raw_subfile
471    print $outhandle "DirectoryPlugin metadata recurring: $raw_subfile\n" if ($verbosity > 2);
472   
473    &plugin::metadata_read ($pluginfo, $this_file_base_dir,
474                $raw_file_subfile,$block_hash,
475                \@extrametakeys, \%extrametadata,
476                \%extrametafile,
477                $processor, $gli);
478    $additionalmetadata = 1;
479    }
480
481    # filter out any extrametakeys that mention subdirectories and store
482    # for later use (i.e. when that sub-directory is being processed)
483    foreach my $ek (@extrametakeys) {
484    my ($subdir_re,$extrakey_dir) = &File::Basename::fileparse($ek);
485    $extrakey_dir = &util::unregex_filename($extrakey_dir);
486
487    my $dirsep_re = &util::get_re_dirsep();
488
489    my $ek_non_re = &util::unregex_filename($ek);
490    if ($ek_non_re =~ m/$dirsep_re/) { # specifies at least one directory
491        my $md = $extrametadata{$ek};
492        my $mf = $extrametafile{$ek};
493
494        my $subdir_extrametakeys = $self->{'subdir_extrametakeys'};
495       
496        my $subdir_rec = { 're' => $subdir_re, 'md' => $md, 'mf' => $mf };
497
498        # when its looked up, it must be relative to the base dir
499        push(@{$subdir_extrametakeys->{"$local_dirname$extrakey_dir"}},$subdir_rec);
500        #push(@{$subdir_extrametakeys->{"$extrakey_dir"}},$subdir_rec);
501    }
502    }
503   
504    # import each of the files in the directory
505    my $count=0;
506    for (my $i = 0; $i <= scalar(@dir); $i++) {
507    # When every file in the directory has been done, pause for a moment (figuratively!)
508    # If the -recheck_directories argument hasn't been provided, stop now (default)
509    # Otherwise, re-read the contents of the directory to check for new files
510    #   Any new files are added to the @dir list and are processed as normal
511    #   This is necessary when documents to be indexed are specified in bibliographic DBs
512    #   These files are copied/downloaded and stored in a new folder at import time
513    if ($i == $num_files) {
514        last unless $self->{'recheck_directories'};
515
516        # Re-read the files in the directory to see if there are any new files
517        last if (!opendir (DIR, $dirname));
518        my @dirnow = readdir (DIR);
519        map { $_ = &unicode::raw_filename_to_url_encoded($_) } @dirnow;
520        closedir (DIR);
521
522        # We're only interested if there are more files than there were before
523        last if (scalar(@dirnow) <= scalar(@dir));
524
525        # Any new files are added to the end of @dir to get processed by the loop
526        my $j;
527        foreach my $subfilenow (@dirnow) {
528        for ($j = 0; $j < $num_files; $j++) {
529            last if ($subfilenow eq $dir[$j]);
530        }
531        if ($j == $num_files) {
532            # New file
533            push(@dir, $subfilenow);
534        }
535        }
536        # When the new files have been processed, check again
537        $num_files = scalar(@dir);
538    }
539
540    my $subfile = $dir[$i];
541    last if ($maxdocs != -1 && ($count + $total_count) >= $maxdocs);
542    next if ($subfile =~ /^\.\.?$/);
543
544    my $this_file_base_dir = $base_dir;
545    my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
546
547    my $raw_file_subfile = &util::filename_cat($file, $raw_subfile);
548    my $raw_full_filename
549        = &util::filename_cat($this_file_base_dir,$raw_file_subfile);
550
551    if ($self->file_is_blocked($block_hash,$raw_full_filename)) {
552        print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for read\n" if ($verbosity > 2);
553        next;
554    }
555    #print STDERR "processing $raw_full_filename\n";
556    # Follow Windows shortcuts
557    if ($raw_subfile =~ /(?i)\.lnk$/ && $ENV{'GSDLOS'} =~ /^windows$/i) {
558        require Win32::Shortcut;
559        my $shortcut = new Win32::Shortcut(&util::filename_cat($dirname, $raw_subfile));
560        if ($shortcut) {
561        # The file to be processed is now the target of the shortcut
562        $this_file_base_dir = "";
563        $file = "";
564        $raw_subfile = $shortcut->Path;
565        }
566    }
567
568    # check for a symlink pointing back to a leading directory
569    if (-d "$dirname/$raw_subfile" && -l "$dirname/$raw_subfile") {
570        # readlink gives a "fatal error" on systems that don't implement
571        # symlinks. This assumes the the -l test above would fail on those.
572        my $linkdest=readlink "$dirname/$raw_subfile";
573        if (!defined ($linkdest)) {
574        # system error - file not found?
575        warn "DirectoryPlugin: symlink problem - $!";
576        } else {
577        # see if link points to current or a parent directory
578        if ($linkdest =~ m@^[\./\\]+$@ ||
579            index($dirname, $linkdest) != -1) {
580            warn "DirectoryPlugin: Ignoring recursive symlink ($dirname/$raw_subfile -> $linkdest)\n";
581            next;
582            ;
583        }
584        }
585    }
586
587    print $outhandle "DirectoryPlugin: preparing metadata for $raw_subfile\n" if ($verbosity > 2);
588
589    # Make a copy of $in_metadata to pass to $raw_subfile
590    my $out_metadata = {};
591    &metadatautil::combine_metadata_structures($out_metadata, $in_metadata);
592
593    # check the assocfile_info
594    if (defined $self->{'assocfile_info'}->{$raw_full_filename}) {
595        &metadatautil::combine_metadata_structures($out_metadata, $self->{'assocfile_info'}->{$raw_full_filename});
596    }
597
598    # $subfile by this point is url-encoded => all ASCII chars => no need to encode as UTF8
599
600    # Next add metadata read in XML files (if it is supplied)
601    if ($additionalmetadata == 1) {
602        foreach my $filespec (@extrametakeys) {
603        ## use the url-encoded filename to do the filename comparison
604
605        if ($subfile =~ /^$filespec$/) {
606            print $outhandle "File \"$subfile\" matches filespec \"$filespec\"\n"
607            if ($verbosity > 2);
608            my $mdref = $extrametadata{$filespec};
609            my $mfref = $extrametafile{$filespec};
610
611            # Add the list files where the metadata came from
612            # into the metadata table so we can track this
613            # This mechanism is similar to how gsdlassocfile works
614
615            my @metafile_pair = ();
616            foreach my $l (keys %$mfref) {
617            my $f = $mfref->{$l};
618            push (@metafile_pair, "$f : $l");
619            }
620
621            $mdref->{'gsdlmetafile'} = \@metafile_pair;
622
623            &metadatautil::combine_metadata_structures($out_metadata, $mdref);
624        }
625        }
626    }
627
628    if (defined $self->{'inf_timestamp'}) {
629        # Look to see if it's a completely new file
630
631        if (!$block_hash->{'new_files'}->{$raw_full_filename}) {
632        # Not a new file, must be an existing file
633        # Let' see if it's newer than the last import.pl
634
635
636        if (! -d $raw_full_filename) {
637            if (!$block_hash->{'reindex_files'}->{$raw_full_filename}) {
638            # filename has been around for longer than inf_timestamp
639            print $outhandle "**** Skipping $subfile\n" if ($verbosity >3);
640            next;
641            }
642            else {
643            # Remove old folder in archives (might hash to something different)
644            # *** should be doing this on a Del one as well
645            # but leave folder name?? and ensure hashs to
646            # same again??
647
648            # Then let through as new doc??
649
650            # mark to doc-oids that rely on it for re-indexing
651            }
652        }
653        }
654    }
655
656    # Recursively read each $subfile
657    print $outhandle "DirectoryPlugin recurring: $subfile\n" if ($verbosity > 2);
658   
659    $count += &plugin::read ($pluginfo, $this_file_base_dir,
660                 $raw_file_subfile, $block_hash,
661                 $out_metadata, $processor, $maxdocs, ($total_count + $count), $gli);
662    }
663
664    return $count;
665}
666
667sub compile_stats {
668    my $self = shift(@_);
669    my ($stats) = @_;
670}
671
6721;
Note: See TracBrowser for help on using the browser.