source: main/trunk/greenstone2/perllib/plugins/DirectoryPlugin.pm@ 34130

Last change on this file since 34130 was 33721, checked in by ak19, 5 years ago

Inactive but committing to svn: Newer Locale.pm file, and introducing Alias.pm, and change to import Alias.pm in DirectoryPlugin.pm. These changes are for supporting perl to run in unicode (or perl to run perl code written in unicode), when using Windows in a non-English locale like Chinese. These changes were required and tested on Windows 10 in Chinese locale and other region (time) and display settings set for China.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 21.6 KB
Line 
1###########################################################################
2#
3# DirectoryPlugin.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# DirectoryPlugin is a plugin which recurses through directories processing
27# each file it finds - which basically means passing it down the plugin
28# pipeline
29
30package DirectoryPlugin;
31
32use extrametautil;
33use CommonUtil;
34use plugin;
35use util;
36use FileUtils;
37use metadatautil;
38
39use File::Basename;
40use strict;
41no strict 'refs';
42no strict 'subs';
43
44#use Encode::Alias;
45use Encode::Locale;
46use Encode;
47use Unicode::Normalize;
48
49BEGIN {
50 @DirectoryPlugin::ISA = ('CommonUtil');
51}
52
53my $arguments =
54 [ { 'name' => "block_exp",
55 'desc' => "{CommonUtil.block_exp}",
56 'type' => "regexp",
57 'deft' => &get_default_block_exp(),
58 'reqd' => "no" },
59 # this option has been deprecated. leave it here for now so we can warn people not to use it
60 { 'name' => "use_metadata_files",
61 'desc' => "{DirectoryPlugin.use_metadata_files}",
62 'type' => "flag",
63 'reqd' => "no",
64 'hiddengli' => "yes" },
65 { 'name' => "recheck_directories",
66 'desc' => "{DirectoryPlugin.recheck_directories}",
67 'type' => "flag",
68 'reqd' => "no" } ];
69
70my $options = { 'name' => "DirectoryPlugin",
71 'desc' => "{DirectoryPlugin.desc}",
72 'abstract' => "no",
73 'inherits' => "yes",
74 'args' => $arguments };
75
76sub new {
77 my ($class) = shift (@_);
78 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
79 push(@$pluginlist, $class);
80
81 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
82 push(@{$hashArgOptLists->{"OptList"}},$options);
83
84 my $self = new CommonUtil($pluginlist, $inputargs, $hashArgOptLists);
85
86 if ($self->{'info_only'}) {
87 # don't worry about any options or initialisations etc
88 return bless $self, $class;
89 }
90
91 # we have left this option in so we can warn people who are still using it
92 if ($self->{'use_metadata_files'}) {
93 die "ERROR: DirectoryPlugin -use_metadata_files option has been deprecated. Please remove the option and add MetadataXMLPlug to your plugin list instead!\n";
94 }
95
96 $self->{'num_processed'} = 0;
97 $self->{'num_not_processed'} = 0;
98 $self->{'num_blocked'} = 0;
99 $self->{'num_archives'} = 0;
100
101 $self->{'subdir_extrametakeys'} = {};
102
103 return bless $self, $class;
104}
105
106# called once, at the start of processing
107sub init {
108 my $self = shift (@_);
109 my ($verbosity, $outhandle, $failhandle) = @_;
110
111 # verbosity is passed through from the processor
112 $self->{'verbosity'} = $verbosity;
113
114 # as are the outhandle and failhandle
115 $self->{'outhandle'} = $outhandle if defined $outhandle;
116 $self->{'failhandle'} = $failhandle;
117
118}
119
120# called once, after all passes have finished
121sub deinit {
122 my ($self) = @_;
123
124}
125
126# called at the beginning of each plugin pass (import has one, building has many)
127sub begin {
128 my $self = shift (@_);
129 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
130
131 # Only lookup timestamp info for import.pl, and only if incremental is set
132 my $proc_package_name = ref $processor;
133 if ($proc_package_name !~ /buildproc$/ && $self->{'incremental'} == 1) {
134 # Get the infodbtype value for this collection from the arcinfo object
135 my $infodbtype = $processor->getoutputinfo()->{'infodbtype'};
136 $infodbtype = "gdbm" if $infodbtype eq "gdbm-txtgz"; # in archives, cannot use txtgz version
137 my $output_dir = $processor->getoutputdir();
138 my $archives_inf = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $output_dir);
139
140 if ( -e $archives_inf ) {
141 $self->{'inf_timestamp'} = -M $archives_inf;
142 }
143 }
144}
145
146sub remove_all {
147 my $self = shift (@_);
148 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
149}
150
151
152sub remove_one {
153 my $self = shift (@_);
154 my ($file, $oids, $archivedir) = @_;
155 return undef; # this will never be called for directories (will it??)
156
157}
158
159
160# called at the end of each plugin pass
161sub end {
162 my ($self) = shift (@_);
163
164}
165
166
167
168# return 1 if this class might recurse using $pluginfo
169sub is_recursive {
170 my $self = shift (@_);
171
172 return 1;
173}
174
175sub get_default_block_exp {
176 my $self = shift (@_);
177
178 return '(?i)(CVS|\.svn|Thumbs\.db|OIDcount|\.DS_Store|~)$';
179}
180
181sub check_directory_path {
182
183 my $self = shift(@_);
184 my ($dirname) = @_;
185
186 return undef unless (-d $dirname);
187
188 return 0 if ($self->{'block_exp'} ne "" && $dirname =~ /$self->{'block_exp'}/);
189
190 my $outhandle = $self->{'outhandle'};
191
192 # check to make sure we're not reading the archives or index directory
193 my $gsdlhome = quotemeta($ENV{'GSDLHOME'});
194 if ($dirname =~ m/^$gsdlhome\/.*?\/import.*?\/(archives|index)$/) {
195 print $outhandle "DirectoryPlugin: $dirname appears to be a reference to a Greenstone collection, skipping.\n";
196 return 0;
197 }
198
199 # check to see we haven't got a cyclic path...
200 if ($dirname =~ m%(/.*){41}%) {
201 print $outhandle "DirectoryPlugin: $dirname is 40 directories deep, is this a recursive path? if not increase constant in DirectoryPlugin.pm.\n";
202 return 0;
203 }
204
205 # check to see we haven't got a cyclic path...
206 if ($dirname =~ m%.*?import/(.+?)/import/\1.*%) {
207 print $outhandle "DirectoryPlugin: $dirname appears to be in a recursive loop...\n";
208 return 0;
209 }
210
211 return 1;
212}
213
214# this may be called more than once
215sub sort_out_associated_files {
216
217 my $self = shift (@_);
218 my ($block_hash) = @_;
219 if (!scalar (keys %{$block_hash->{'shared_fileroot'}})) {
220 return;
221 }
222
223 $self->{'assocfile_info'} = {} unless defined $self->{'assocfile_info'};
224 my $metadata = $self->{'assocfile_info'};
225 foreach my $prefix (keys %{$block_hash->{'shared_fileroot'}}) {
226 my $record = $block_hash->{'shared_fileroot'}->{$prefix};
227
228 my $tie_to = $record->{'tie_to'};
229 my $exts = $record->{'exts'};
230
231 if ((defined $tie_to) && (scalar (keys %$exts) > 0)) {
232 # set up fileblocks and assocfile_tobe
233 my $base_file = "$prefix$tie_to";
234 $metadata->{$base_file} = {} unless defined $metadata->{$base_file};
235 my $base_file_metadata = $metadata->{$base_file};
236
237 $base_file_metadata->{'gsdlassocfile_tobe'} = [] unless defined $base_file_metadata->{'gsdlassocfile_tobe'};
238 my $assoc_tobe = $base_file_metadata->{'gsdlassocfile_tobe'};
239 foreach my $e (keys %$exts) {
240 # block the file
241 $self->block_filename($block_hash,"$prefix$e");
242 # set up as an associatd file
243 print STDERR " $self->{'plugin_type'}: Associating $prefix$e with $tie_to version\n";
244 my $mime_type = ""; # let system auto detect this
245 push(@$assoc_tobe,"$prefix$e:$mime_type:");
246
247 }
248 }
249 } # foreach record
250
251 $block_hash->{'shared_fileroot'} = undef;
252 $block_hash->{'shared_fileroot'} = {};
253
254}
255
256
257
258
259sub file_block_read {
260 my $self = shift (@_);
261 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
262
263 my $outhandle = $self->{'outhandle'};
264 my $verbosity = $self->{'verbosity'};
265
266 # Calculate the directory name and ensure it is a directory and
267 # that it is not explicitly blocked.
268 my $dirname = $file;
269 $dirname = &FileUtils::filenameConcatenate($base_dir, $file) if $base_dir =~ /\w/;
270
271 my $directory_ok = $self->check_directory_path($dirname);
272 return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
273
274 print $outhandle "Global file scan checking directory: $dirname\n" if ($verbosity > 2);
275
276 $block_hash->{'all_files'} = {} unless defined $block_hash->{'all_files'};
277 $block_hash->{'metadata_files'} = {} unless defined $block_hash->{'metadata_files'};
278
279 $block_hash->{'file_blocks'} = {} unless defined $block_hash->{'file_blocks'};
280 $block_hash->{'shared_fileroot'} = {} unless defined $block_hash->{'shared_fileroot'};
281
282 # Recur over directory contents.
283 my (@dir, $subfile);
284 #my $count = 0;
285
286 print $outhandle "DirectoryPlugin block: getting directory $dirname\n" if ($verbosity > 2);
287
288 # find all the files in the directory
289 if (!opendir (DIR, $dirname)) {
290 if ($gli) {
291 print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
292 }
293 print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
294 return -1; # error in processing
295 }
296 @dir = sort readdir (DIR);
297 closedir (DIR);
298
299 for (my $i = 0; $i < scalar(@dir); $i++) {
300 my $raw_subfile = $dir[$i];
301 next if ($raw_subfile =~ m/^\.\.?$/);
302
303 my $this_file_base_dir = $base_dir;
304 my $raw_file_subfile = &FileUtils::filenameConcatenate($file, $raw_subfile);
305
306 # Recursively read each $raw_subfile
307 print $outhandle "DirectoryPlugin block recurring: $raw_file_subfile\n" if ($verbosity > 2);
308 #$count += &plugin::file_block_read ($pluginfo, $this_file_base_dir,
309
310 &plugin::file_block_read ($pluginfo, $this_file_base_dir,
311 $raw_file_subfile,
312 $block_hash, $metadata, $gli);
313
314 }
315 $self->sort_out_associated_files($block_hash);
316 #return $count;
317 return 1;
318
319}
320
321# We don't do metadata_read
322sub metadata_read {
323 my $self = shift (@_);
324 my ($pluginfo, $base_dir, $file, $block_hash,
325 $extrametakeys, $extrametadata, $extrametafile,
326 $processor, $gli, $aux) = @_;
327
328 return undef;
329}
330
331
332# return number of files processed, undef if can't process
333# Note that $base_dir might be "" and that $file might
334# include directories
335
336# This function passes around metadata hash structures. Metadata hash
337# structures are hashes that map from a (scalar) key (the metadata element
338# name) to either a scalar metadata value or a reference to an array of
339# such values.
340
341sub read {
342 my $self = shift (@_);
343 my ($pluginfo, $base_dir, $file, $block_hash, $in_metadata, $processor, $maxdocs, $total_count, $gli) = @_;
344
345 my $outhandle = $self->{'outhandle'};
346 my $verbosity = $self->{'verbosity'};
347
348 # Calculate the directory name and ensure it is a directory and
349 # that it is not explicitly blocked.
350 my $dirname;
351 if ($file eq "") {
352 $dirname = $base_dir;
353 } else {
354 $dirname = $file;
355 $dirname = &FileUtils::filenameConcatenate($base_dir, $file) if $base_dir =~ /\w/;
356 }
357
358 my $directory_ok = $self->check_directory_path($dirname);
359 return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
360
361 if (($verbosity > 2) && ((scalar keys %$in_metadata) > 0)) {
362 print $outhandle "DirectoryPlugin: metadata passed in: ",
363 join(", ", keys %$in_metadata), "\n";
364 }
365
366 # Recur over directory contents.
367 my (@dir, $subfile);
368
369 print $outhandle "DirectoryPlugin read: getting directory $dirname\n" if ($verbosity > 2);
370
371 # find all the files in the directory
372 if (!opendir (DIR, $dirname)) {
373 if ($gli) {
374 print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
375 }
376 print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
377 return -1; # error in processing
378 }
379 @dir = sort readdir (DIR);
380 map { $_ = &unicode::raw_filename_to_url_encoded($_); } @dir;
381 closedir (DIR);
382 # Re-order the files in the list so any directories ending with .all are moved to the end
383 for (my $i = scalar(@dir) - 1; $i >= 0; $i--) {
384 if (-d &FileUtils::filenameConcatenate($dirname, $dir[$i]) && $dir[$i] =~ /\.all$/) {
385 push(@dir, splice(@dir, $i, 1));
386 }
387 }
388
389 # setup the metadata structures. we do a metadata_read pass to see if there is any additional metadata, then pass it to read
390
391 my $additionalmetadata = 0; # is there extra metadata available?
392 my %extrametadata; # maps from filespec to extra metadata keys
393 my %extrametafile; # maps from filespec to the metadata.xml (or similar) file it came from
394 my @extrametakeys; # keys of %extrametadata in order read
395
396
397 my $os_dirsep = &util::get_os_dirsep();
398 my $dirsep = &util::get_dirsep();
399 my $base_dir_regexp = $base_dir;
400 $base_dir_regexp =~ s/\//$os_dirsep/g;
401
402 # Want to get relative path of local_dirname within the base_directory
403 # but with URL style slashes.
404 my $local_dirname = &util::filename_within_directory_url_format($dirname, $base_dir);
405
406 # if we are in import folder, then local_dirname will be empty
407 if ($local_dirname ne "") {
408 # convert to perl unicode
409 $local_dirname = $self->raw_filename_to_unicode($local_dirname);
410
411 # look for extra metadata passed down from higher folders
412 $local_dirname .= "/"; # closing slash must be URL type slash also and not $dirsep;
413 if (defined $self->{'subdir_extrametakeys'}->{$local_dirname}) {
414 my $extrakeys = $self->{'subdir_extrametakeys'}->{$local_dirname};
415 foreach my $ek (@$extrakeys) {
416 my $extrakeys_re = $ek->{'re'};
417 my $extrakeys_md = $ek->{'md'};
418 my $extrakeys_mf = $ek->{'mf'};
419 &extrametautil::addmetakey(\@extrametakeys, $extrakeys_re);
420 &extrametautil::setmetadata(\%extrametadata, $extrakeys_re, $extrakeys_md);
421 &extrametautil::setmetafile(\%extrametafile, $extrakeys_re, $extrakeys_mf);
422 }
423 delete($self->{'subdir_extrametakeys'}->{$local_dirname});
424 }
425 }
426 # apply metadata pass for each of the files in the directory -- ignore
427 # maxdocs here
428 my $num_files = scalar(@dir);
429 for (my $i = 0; $i < scalar(@dir); $i++) {
430 my $subfile = $dir[$i];
431 next if ($subfile =~ m/^\.\.?$/);
432
433 my $this_file_base_dir = $base_dir;
434 my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
435
436 my $raw_file_subfile = &FileUtils::filenameConcatenate($file, $raw_subfile);
437 my $raw_full_filename = &FileUtils::filenameConcatenate($this_file_base_dir, $raw_file_subfile);
438 if ($self->raw_file_is_blocked($block_hash, $raw_full_filename)) {
439 print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for metadata_read\n" if ($verbosity > 2);
440 next;
441 }
442
443 # Recursively read each $raw_subfile
444 print $outhandle "DirectoryPlugin metadata recurring: $raw_subfile\n" if ($verbosity > 2);
445
446 &plugin::metadata_read ($pluginfo, $this_file_base_dir,
447 $raw_file_subfile,$block_hash,
448 \@extrametakeys, \%extrametadata,
449 \%extrametafile,
450 $processor, $gli);
451 $additionalmetadata = 1;
452 }
453
454 # filter out any extrametakeys that mention subdirectories and store
455 # for later use (i.e. when that sub-directory is being processed)
456 foreach my $ek (@extrametakeys) { # where each Extrametakey (which is a filename) is stored as a url-style regex
457
458 my ($subdir_re,$extrakey_dir) = &util::url_fileparse($ek);
459 if ($extrakey_dir ne "") {
460 # a subdir was specified
461 my $md = &extrametautil::getmetadata(\%extrametadata, $ek);
462 my $mf = &extrametautil::getmetafile(\%extrametafile, $ek);
463
464 my $subdir_extrametakeys = $self->{'subdir_extrametakeys'};
465 my $subdir_rec = { 're' => $subdir_re, 'md' => $md, 'mf' => $mf };
466
467 # when it's looked up, it must be relative to the base dir
468 push(@{$subdir_extrametakeys->{"$local_dirname$extrakey_dir"}},$subdir_rec);
469 }
470 }
471
472 # import each of the files in the directory
473 my $count=0;
474 for (my $i = 0; $i <= scalar(@dir); $i++) {
475 # When every file in the directory has been done, pause for a moment (figuratively!)
476 # If the -recheck_directories argument hasn't been provided, stop now (default)
477 # Otherwise, re-read the contents of the directory to check for new files
478 # Any new files are added to the @dir list and are processed as normal
479 # This is necessary when documents to be indexed are specified in bibliographic DBs
480 # These files are copied/downloaded and stored in a new folder at import time
481 if ($i == $num_files) {
482 last unless $self->{'recheck_directories'};
483
484 # Re-read the files in the directory to see if there are any new files
485 last if (!opendir (DIR, $dirname));
486 my @dirnow = sort readdir (DIR);
487 map { $_ = &unicode::raw_filename_to_url_encoded($_) } @dirnow;
488 closedir (DIR);
489
490 # We're only interested if there are more files than there were before
491 last if (scalar(@dirnow) <= scalar(@dir));
492
493 # Any new files are added to the end of @dir to get processed by the loop
494 my $j;
495 foreach my $subfilenow (@dirnow) {
496 for ($j = 0; $j < $num_files; $j++) {
497 last if ($subfilenow eq $dir[$j]);
498 }
499 if ($j == $num_files) {
500 # New file
501 push(@dir, $subfilenow);
502 }
503 }
504 # When the new files have been processed, check again
505 $num_files = scalar(@dir);
506 }
507
508 my $subfile = $dir[$i];
509 last if ($maxdocs != -1 && ($count + $total_count) >= $maxdocs);
510 next if ($subfile =~ /^\.\.?$/);
511
512 my $this_file_base_dir = $base_dir;
513 my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
514 # get the canonical unicode version of the filename. This may not match
515 # the filename on the file system. We will use it to compare to regex
516 # in the metadata table.
517 my $unicode_subfile = &util::raw_filename_to_unicode($dirname, $raw_subfile);
518 my $raw_file_subfile = &FileUtils::filenameConcatenate($file, $raw_subfile);
519 my $raw_full_filename
520 = &FileUtils::filenameConcatenate($this_file_base_dir,$raw_file_subfile);
521 my $full_unicode_file = $self->raw_filename_to_unicode($raw_full_filename);
522
523 if ($self->file_is_blocked($block_hash,$full_unicode_file)) {
524 next;
525 }
526 if ($self->file_is_blocked($block_hash,$raw_full_filename)) {
527 print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for read\n" if ($verbosity > 2);
528 next;
529 }
530 # Follow Windows shortcuts
531 if ($raw_subfile =~ m/(?i)\.lnk$/ && (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin"))) {
532 require Win32::Shortcut;
533 my $shortcut = new Win32::Shortcut(&FileUtils::filenameConcatenate($dirname, $raw_subfile));
534 if ($shortcut) {
535 # The file to be processed is now the target of the shortcut
536 $this_file_base_dir = "";
537 $file = "";
538 $raw_subfile = $shortcut->Path;
539 }
540 $shortcut->Close(); # see http://cpansearch.perl.org/src/JDB/Win32-Shortcut-0.08/docs/reference.html
541 }
542
543 # check for a symlink pointing back to a leading directory
544 if (-d "$dirname/$raw_subfile" && -l "$dirname/$raw_subfile") {
545 # readlink gives a "fatal error" on systems that don't implement
546 # symlinks. This assumes the the -l test above would fail on those.
547 my $linkdest=readlink "$dirname/$raw_subfile";
548 if (!defined ($linkdest)) {
549 # system error - file not found?
550 warn "DirectoryPlugin: symlink problem - $!";
551 } else {
552 # see if link points to current or a parent directory
553 if ($linkdest =~ m@^[\./\\]+$@ ||
554 index($dirname, $linkdest) != -1) {
555 warn "DirectoryPlugin: Ignoring recursive symlink ($dirname/$raw_subfile -> $linkdest)\n";
556 next;
557 ;
558 }
559 }
560 }
561
562 print $outhandle "DirectoryPlugin: preparing metadata for $raw_subfile\n" if ($verbosity > 2);
563
564 # Make a copy of $in_metadata to pass to $raw_subfile
565 my $out_metadata = {};
566 &metadatautil::combine_metadata_structures($out_metadata, $in_metadata);
567
568 # check the assocfile_info
569 if (defined $self->{'assocfile_info'}->{$raw_full_filename}) {
570 &metadatautil::combine_metadata_structures($out_metadata, $self->{'assocfile_info'}->{$raw_full_filename});
571 }
572
573 ### Now we need to look up the metadata table to see if there is any
574 # extra metadata for us. We need the canonical unicode version here.
575 if ($additionalmetadata == 1) {
576 foreach my $filespec (@extrametakeys) {
577 if ($unicode_subfile =~ /^$filespec$/) {
578 print $outhandle "File \"$unicode_subfile\" matches filespec \"$filespec\"\n"
579 if ($verbosity > 2);
580 my $mdref = &extrametautil::getmetadata(\%extrametadata, $filespec);
581 my $mfref = &extrametautil::getmetafile(\%extrametafile, $filespec);
582
583 # Add the list files where the metadata came from
584 # into the metadata table so we can track this
585 # This mechanism is similar to how gsdlassocfile works
586
587 my @metafile_pair = ();
588 foreach my $l (keys %$mfref) {
589 my $f = $mfref->{$l};
590 push (@metafile_pair, "$f : $l");
591 }
592
593 $mdref->{'gsdlmetafile'} = \@metafile_pair;
594
595 &metadatautil::combine_metadata_structures($out_metadata, $mdref);
596 }
597 }
598 }
599
600 if (defined $self->{'inf_timestamp'}) {
601 # Look to see if it's a completely new file
602
603 if (!$block_hash->{'new_files'}->{$raw_full_filename}) {
604 # Not a new file, must be an existing file
605 # Let' see if it's newer than the last import.pl
606
607
608 if (! -d $raw_full_filename) {
609 if (!$block_hash->{'reindex_files'}->{$raw_full_filename}) {
610 # filename has been around for longer than inf_timestamp
611 print $outhandle "**** Skipping $unicode_subfile\n" if ($verbosity >3);
612 next;
613 }
614 else {
615 # Remove old folder in archives (might hash to something different)
616 # *** should be doing this on a Del one as well
617 # but leave folder name?? and ensure hashs to
618 # same again??
619
620 # Then let through as new doc??
621
622 # mark to doc-oids that rely on it for re-indexing
623 }
624 }
625 }
626 }
627
628 # Recursively read each $subfile
629 print $outhandle "DirectoryPlugin recurring: $unicode_subfile\n" if ($verbosity > 2);
630
631 $count += &plugin::read ($pluginfo, $this_file_base_dir,
632 $raw_file_subfile, $block_hash,
633 $out_metadata, $processor, $maxdocs, ($total_count + $count), $gli);
634 }
635
636 return $count;
637}
638
639sub compile_stats {
640 my $self = shift(@_);
641 my ($stats) = @_;
642}
643
6441;
Note: See TracBrowser for help on using the repository browser.