source: main/trunk/greenstone2/perllib/plugins/DirectoryPlugin.pm@ 29745

Last change on this file since 29745 was 29745, checked in by kjdon, 9 years ago

using Encode::decode to make the filenames 'unicode aware'. For file_block_read(), only done so that the print statement looks nice. But for read(), we are using the filename to look up in the extrametadata hash thing for metadata coming from metadata.xml. This is unicode aware, so our lookup name needs to be so too. Some debug and extra stuff left in, needs to be removed once windows testing done

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 22.5 KB
Line 
1###########################################################################
2#
3# DirectoryPlugin.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# DirectoryPlugin is a plugin which recurses through directories processing
27# each file it finds - which basically means passing it down the plugin
28# pipeline
29
30package DirectoryPlugin;
31
32use extrametautil;
33use PrintInfo;
34use plugin;
35use util;
36use FileUtils;
37use metadatautil;
38
39use File::Basename;
40use strict;
41no strict 'refs';
42no strict 'subs';
43
44use Encode;
45
46BEGIN {
47 @DirectoryPlugin::ISA = ('PrintInfo');
48}
49
50my $arguments =
51 [ { 'name' => "block_exp",
52 'desc' => "{BasePlugin.block_exp}",
53 'type' => "regexp",
54 'deft' => &get_default_block_exp(),
55 'reqd' => "no" },
56 # this option has been deprecated. leave it here for now so we can warn people not to use it
57 { 'name' => "use_metadata_files",
58 'desc' => "{DirectoryPlugin.use_metadata_files}",
59 'type' => "flag",
60 'reqd' => "no",
61 'hiddengli' => "yes" },
62 { 'name' => "recheck_directories",
63 'desc' => "{DirectoryPlugin.recheck_directories}",
64 'type' => "flag",
65 'reqd' => "no" } ];
66
67my $options = { 'name' => "DirectoryPlugin",
68 'desc' => "{DirectoryPlugin.desc}",
69 'abstract' => "no",
70 'inherits' => "yes",
71 'args' => $arguments };
72
73sub new {
74 my ($class) = shift (@_);
75 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
76 push(@$pluginlist, $class);
77
78 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
79 push(@{$hashArgOptLists->{"OptList"}},$options);
80
81 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists);
82
83 if ($self->{'info_only'}) {
84 # don't worry about any options or initialisations etc
85 return bless $self, $class;
86 }
87
88 # we have left this option in so we can warn people who are still using it
89 if ($self->{'use_metadata_files'}) {
90 die "ERROR: DirectoryPlugin -use_metadata_files option has been deprecated. Please remove the option and add MetadataXMLPlug to your plugin list instead!\n";
91 }
92
93 $self->{'num_processed'} = 0;
94 $self->{'num_not_processed'} = 0;
95 $self->{'num_blocked'} = 0;
96 $self->{'num_archives'} = 0;
97
98 $self->{'subdir_extrametakeys'} = {};
99
100 return bless $self, $class;
101}
102
103# called once, at the start of processing
104sub init {
105 my $self = shift (@_);
106 my ($verbosity, $outhandle, $failhandle) = @_;
107
108 # verbosity is passed through from the processor
109 $self->{'verbosity'} = $verbosity;
110
111 # as are the outhandle and failhandle
112 $self->{'outhandle'} = $outhandle if defined $outhandle;
113 $self->{'failhandle'} = $failhandle;
114
115}
116
117# called once, after all passes have finished
118sub deinit {
119 my ($self) = @_;
120
121}
122
123# called at the beginning of each plugin pass (import has one, building has many)
124sub begin {
125 my $self = shift (@_);
126 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
127
128 # Only lookup timestamp info for import.pl, and only if incremental is set
129 my $proc_package_name = ref $processor;
130 if ($proc_package_name !~ /buildproc$/ && $self->{'incremental'} == 1) {
131 # Get the infodbtype value for this collection from the arcinfo object
132 my $infodbtype = $processor->getoutputinfo()->{'infodbtype'};
133 $infodbtype = "gdbm" if $infodbtype eq "gdbm-txtgz"; # in archives, cannot use txtgz version
134 my $output_dir = $processor->getoutputdir();
135 my $archives_inf = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $output_dir);
136
137 if ( -e $archives_inf ) {
138 $self->{'inf_timestamp'} = -M $archives_inf;
139 }
140 }
141}
142
143sub remove_all {
144 my $self = shift (@_);
145 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
146}
147
148
149sub remove_one {
150 my $self = shift (@_);
151 my ($file, $oids, $archivedir) = @_;
152 return undef; # this will never be called for directories (will it??)
153
154}
155
156
157# called at the end of each plugin pass
158sub end {
159 my ($self) = shift (@_);
160
161}
162
163
164
165# return 1 if this class might recurse using $pluginfo
166sub is_recursive {
167 my $self = shift (@_);
168
169 return 1;
170}
171
172sub get_default_block_exp {
173 my $self = shift (@_);
174
175 return '(?i)(CVS|\.svn|Thumbs\.db|OIDcount|\.DS_Store|~)$';
176}
177
178sub check_directory_path {
179
180 my $self = shift(@_);
181 my ($dirname) = @_;
182
183 return undef unless (-d $dirname);
184
185 return 0 if ($self->{'block_exp'} ne "" && $dirname =~ /$self->{'block_exp'}/);
186
187 my $outhandle = $self->{'outhandle'};
188
189 # check to make sure we're not reading the archives or index directory
190 my $gsdlhome = quotemeta($ENV{'GSDLHOME'});
191 if ($dirname =~ m/^$gsdlhome\/.*?\/import.*?\/(archives|index)$/) {
192 print $outhandle "DirectoryPlugin: $dirname appears to be a reference to a Greenstone collection, skipping.\n";
193 return 0;
194 }
195
196 # check to see we haven't got a cyclic path...
197 if ($dirname =~ m%(/.*){,41}%) {
198 print $outhandle "DirectoryPlugin: $dirname is 40 directories deep, is this a recursive path? if not increase constant in DirectoryPlugin.pm.\n";
199 return 0;
200 }
201
202 # check to see we haven't got a cyclic path...
203 if ($dirname =~ m%.*?import/(.+?)/import/\1.*%) {
204 print $outhandle "DirectoryPlugin: $dirname appears to be in a recursive loop...\n";
205 return 0;
206 }
207
208 return 1;
209}
210
211# this may be called more than once
212sub sort_out_associated_files {
213
214 my $self = shift (@_);
215 my ($block_hash) = @_;
216 if (!scalar (keys %{$block_hash->{'shared_fileroot'}})) {
217 return;
218 }
219
220 $self->{'assocfile_info'} = {} unless defined $self->{'assocfile_info'};
221 my $metadata = $self->{'assocfile_info'};
222 foreach my $prefix (keys %{$block_hash->{'shared_fileroot'}}) {
223 my $record = $block_hash->{'shared_fileroot'}->{$prefix};
224
225 my $tie_to = $record->{'tie_to'};
226 my $exts = $record->{'exts'};
227
228 if ((defined $tie_to) && (scalar (keys %$exts) > 0)) {
229 # set up fileblocks and assocfile_tobe
230 my $base_file = "$prefix$tie_to";
231 $metadata->{$base_file} = {} unless defined $metadata->{$base_file};
232 my $base_file_metadata = $metadata->{$base_file};
233
234 $base_file_metadata->{'gsdlassocfile_tobe'} = [] unless defined $base_file_metadata->{'gsdlassocfile_tobe'};
235 my $assoc_tobe = $base_file_metadata->{'gsdlassocfile_tobe'};
236 foreach my $e (keys %$exts) {
237 # block the file
238 &util::block_filename($block_hash,"$prefix$e");
239 # set up as an associatd file
240 print STDERR " $self->{'plugin_type'}: Associating $prefix$e with $tie_to version\n";
241 my $mime_type = ""; # let system auto detect this
242 push(@$assoc_tobe,"$prefix$e:$mime_type:");
243
244 }
245 }
246 } # foreach record
247
248 $block_hash->{'shared_fileroot'} = undef;
249 $block_hash->{'shared_fileroot'} = {};
250
251}
252
253
254# do block exp OR special blocking ???
255
256sub file_is_blocked {
257 my $self = shift (@_);
258 my ($block_hash, $filename_full_path) = @_;
259
260 $filename_full_path = &util::upgrade_if_dos_filename($filename_full_path);
261
262 if (($ENV{'GSDLOS'} =~ m/^windows$/) && ($^O ne "cygwin")) {
263 # on windows, all block paths are lowercased.
264 my $lower_filename = lc ($filename_full_path);
265 if (defined $block_hash->{'file_blocks'}->{$lower_filename}) {
266 $self->{'num_blocked'} ++;
267 return 1;
268 }
269 }
270 else {
271 if (defined $block_hash->{'file_blocks'}->{$filename_full_path}) {
272 $self->{'num_blocked'} ++;
273 return 1;
274 }
275 }
276 # check Directory plugin's own block_exp
277 if ($self->{'block_exp'} ne "" && $filename_full_path =~ /$self->{'block_exp'}/) {
278 $self->{'num_blocked'} ++;
279 return 1; # blocked
280 }
281 return 0;
282}
283
284
285
286sub file_block_read {
287 my $self = shift (@_);
288 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
289
290 my $outhandle = $self->{'outhandle'};
291 my $verbosity = $self->{'verbosity'};
292
293 # Calculate the directory name and ensure it is a directory and
294 # that it is not explicitly blocked.
295 my $dirname = $file;
296 $dirname = &FileUtils::filenameConcatenate($base_dir, $file) if $base_dir =~ /\w/;
297
298 my $directory_ok = $self->check_directory_path($dirname);
299 return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
300
301 print $outhandle "Global file scan checking directory: $dirname\n";
302
303 $block_hash->{'all_files'} = {} unless defined $block_hash->{'all_files'};
304 $block_hash->{'metadata_files'} = {} unless defined $block_hash->{'metadata_files'};
305
306 $block_hash->{'file_blocks'} = {} unless defined $block_hash->{'file_blocks'};
307 $block_hash->{'shared_fileroot'} = {} unless defined $block_hash->{'shared_fileroot'};
308
309 # Recur over directory contents.
310 my (@dir, $subfile);
311 #my $count = 0;
312
313 print $outhandle "DirectoryPlugin block: getting directory $dirname\n" if ($verbosity > 2);
314
315 # find all the files in the directory
316 if (!opendir (DIR, $dirname)) {
317 if ($gli) {
318 print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
319 }
320 print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
321 return -1; # error in processing
322 }
323 @dir = sort readdir (DIR);
324 closedir (DIR);
325
326 for (my $i = 0; $i < scalar(@dir); $i++) {
327 my $raw_subfile = $dir[$i];
328 next if ($raw_subfile =~ m/^\.\.?$/);
329
330 my $this_file_base_dir = $base_dir;
331 my $raw_file_subfile = &FileUtils::filenameConcatenate($file, $raw_subfile);
332
333 # Recursively read each $raw_subfile
334 print $outhandle "DirectoryPlugin block recurring: ". Encode::decode("utf8", $raw_file_subfile) ."\n" if ($verbosity > 2);
335
336 #$count += &plugin::file_block_read ($pluginfo, $this_file_base_dir,
337
338 &plugin::file_block_read ($pluginfo, $this_file_base_dir,
339 $raw_file_subfile,
340 $block_hash, $metadata, $gli);
341
342 }
343 $self->sort_out_associated_files($block_hash);
344 #return $count;
345 return 1;
346
347}
348
349# We don't do metadata_read
350sub metadata_read {
351 my $self = shift (@_);
352 my ($pluginfo, $base_dir, $file, $block_hash,
353 $extrametakeys, $extrametadata, $extrametafile,
354 $processor, $gli, $aux) = @_;
355
356 return undef;
357}
358
359
360# return number of files processed, undef if can't process
361# Note that $base_dir might be "" and that $file might
362# include directories
363
364# This function passes around metadata hash structures. Metadata hash
365# structures are hashes that map from a (scalar) key (the metadata element
366# name) to either a scalar metadata value or a reference to an array of
367# such values.
368
369sub read {
370 my $self = shift (@_);
371 my ($pluginfo, $base_dir, $file, $block_hash, $in_metadata, $processor, $maxdocs, $total_count, $gli) = @_;
372
373 my $outhandle = $self->{'outhandle'};
374 my $verbosity = $self->{'verbosity'};
375
376 # Calculate the directory name and ensure it is a directory and
377 # that it is not explicitly blocked.
378 my $dirname;
379 if ($file eq "") {
380 $dirname = $base_dir;
381 } else {
382 $dirname = $file;
383 $dirname = &FileUtils::filenameConcatenate($base_dir, $file) if $base_dir =~ /\w/;
384 }
385
386 my $directory_ok = $self->check_directory_path($dirname);
387 return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
388
389 if (($verbosity > 2) && ((scalar keys %$in_metadata) > 0)) {
390 print $outhandle "DirectoryPlugin: metadata passed in: ",
391 join(", ", keys %$in_metadata), "\n";
392 }
393
394
395 # Recur over directory contents.
396 my (@dir, $subfile);
397
398 print $outhandle "DirectoryPlugin read: getting directory $dirname\n" if ($verbosity > 2);
399
400 # find all the files in the directory
401 if (!opendir (DIR, $dirname)) {
402 if ($gli) {
403 print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
404 }
405 print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
406 return -1; # error in processing
407 }
408 @dir = sort readdir (DIR);
409 map { $_ = &unicode::raw_filename_to_url_encoded($_); print STDERR "****$_\n"; } @dir;
410 closedir (DIR);
411
412 # Re-order the files in the list so any directories ending with .all are moved to the end
413 for (my $i = scalar(@dir) - 1; $i >= 0; $i--) {
414 if (-d &FileUtils::filenameConcatenate($dirname, $dir[$i]) && $dir[$i] =~ /\.all$/) {
415 push(@dir, splice(@dir, $i, 1));
416 }
417 }
418
419 # setup the metadata structures. we do a metadata_read pass to see if there is any additional metadata, then pass it to read
420
421 my $additionalmetadata = 0; # is there extra metadata available?
422 my %extrametadata; # maps from filespec to extra metadata keys
423 my %extrametafile; # maps from filespec to the metadata.xml (or similar) file it came from
424 my @extrametakeys; # keys of %extrametadata in order read
425
426
427 my $os_dirsep = &util::get_os_dirsep();
428 my $dirsep = &util::get_dirsep();
429 my $base_dir_regexp = $base_dir;
430 $base_dir_regexp =~ s/\//$os_dirsep/g;
431
432 # Want to get relative path of local_dirname within the base_directory
433 # but with URL style slashes.
434 my $local_dirname = &util::filename_within_directory_url_format($dirname, $base_dir);
435
436 # if we are in import folder, then local_dirname will be empty
437 if ($local_dirname ne "") {
438 # look for extra metadata passed down from higher folders
439 $local_dirname .= "/"; # closing slash must be URL type slash also and not $dirsep;
440 if (defined $self->{'subdir_extrametakeys'}->{$local_dirname}) {
441 my $extrakeys = $self->{'subdir_extrametakeys'}->{$local_dirname};
442 foreach my $ek (@$extrakeys) {
443 my $extrakeys_re = $ek->{'re'};
444 my $extrakeys_md = $ek->{'md'};
445 my $extrakeys_mf = $ek->{'mf'};
446 &extrametautil::addmetakey(\@extrametakeys, $extrakeys_re);
447 &extrametautil::setmetadata(\%extrametadata, $extrakeys_re, $extrakeys_md);
448 &extrametautil::setmetafile(\%extrametafile, $extrakeys_re, $extrakeys_mf);
449 }
450 delete($self->{'subdir_extrametakeys'}->{$local_dirname});
451 }
452 }
453 # apply metadata pass for each of the files in the directory -- ignore
454 # maxdocs here
455 my $num_files = scalar(@dir);
456 for (my $i = 0; $i < scalar(@dir); $i++) {
457 my $subfile = $dir[$i];
458 next if ($subfile =~ m/^\.\.?$/);
459
460 my $this_file_base_dir = $base_dir;
461 my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
462
463 my $raw_file_subfile = &FileUtils::filenameConcatenate($file, $raw_subfile);
464 my $raw_full_filename = &FileUtils::filenameConcatenate($this_file_base_dir, $raw_file_subfile);
465
466 if ($self->file_is_blocked($block_hash,$raw_full_filename)) {
467 print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for metadata_read\n" if ($verbosity > 2);
468 next;
469 }
470
471 # Recursively read each $raw_subfile
472 print $outhandle "DirectoryPlugin metadata recurring: $raw_subfile\n" if ($verbosity > 2);
473
474 &plugin::metadata_read ($pluginfo, $this_file_base_dir,
475 $raw_file_subfile,$block_hash,
476 \@extrametakeys, \%extrametadata,
477 \%extrametafile,
478 $processor, $gli);
479 $additionalmetadata = 1;
480 }
481
482 # filter out any extrametakeys that mention subdirectories and store
483 # for later use (i.e. when that sub-directory is being processed)
484 foreach my $ek (@extrametakeys) { # where each Extrametakey (which is a filename) is stored as a url-style regex
485
486 my ($subdir_re,$extrakey_dir) = &util::url_fileparse($ek);
487
488 if ($extrakey_dir ne "") {
489 # a subdir was specified
490 my $md = &extrametautil::getmetadata(\%extrametadata, $ek);
491 my $mf = &extrametautil::getmetafile(\%extrametafile, $ek);
492
493 my $subdir_extrametakeys = $self->{'subdir_extrametakeys'};
494 my $subdir_rec = { 're' => $subdir_re, 'md' => $md, 'mf' => $mf };
495
496 # when it's looked up, it must be relative to the base dir
497 push(@{$subdir_extrametakeys->{"$local_dirname$extrakey_dir"}},$subdir_rec);
498 }
499 }
500
501 # import each of the files in the directory
502 my $count=0;
503 for (my $i = 0; $i <= scalar(@dir); $i++) {
504 # When every file in the directory has been done, pause for a moment (figuratively!)
505 # If the -recheck_directories argument hasn't been provided, stop now (default)
506 # Otherwise, re-read the contents of the directory to check for new files
507 # Any new files are added to the @dir list and are processed as normal
508 # This is necessary when documents to be indexed are specified in bibliographic DBs
509 # These files are copied/downloaded and stored in a new folder at import time
510 if ($i == $num_files) {
511 last unless $self->{'recheck_directories'};
512
513 # Re-read the files in the directory to see if there are any new files
514 last if (!opendir (DIR, $dirname));
515 my @dirnow = sort readdir (DIR);
516 map { $_ = &unicode::raw_filename_to_url_encoded($_) } @dirnow;
517 closedir (DIR);
518
519 # We're only interested if there are more files than there were before
520 last if (scalar(@dirnow) <= scalar(@dir));
521
522 # Any new files are added to the end of @dir to get processed by the loop
523 my $j;
524 foreach my $subfilenow (@dirnow) {
525 for ($j = 0; $j < $num_files; $j++) {
526 last if ($subfilenow eq $dir[$j]);
527 }
528 if ($j == $num_files) {
529 # New file
530 push(@dir, $subfilenow);
531 }
532 }
533 # When the new files have been processed, check again
534 $num_files = scalar(@dir);
535 }
536
537 my $subfile = $dir[$i];
538 last if ($maxdocs != -1 && ($count + $total_count) >= $maxdocs);
539 next if ($subfile =~ /^\.\.?$/);
540
541 my $this_file_base_dir = $base_dir;
542 my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
543 my $unicode_subfile = &Encode::decode("utf8", $raw_subfile);
544
545 my $raw_file_subfile = &FileUtils::filenameConcatenate($file, $raw_subfile);
546 my $raw_full_filename
547 = &FileUtils::filenameConcatenate($this_file_base_dir,$raw_file_subfile);
548
549 if ($self->file_is_blocked($block_hash,$raw_full_filename)) {
550 print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for read\n" if ($verbosity > 2);
551 next;
552 }
553 print STDERR "processing $raw_full_filename\n";
554 # Follow Windows shortcuts
555 if ($raw_subfile =~ m/(?i)\.lnk$/ && (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin"))) {
556 require Win32::Shortcut;
557 my $shortcut = new Win32::Shortcut(&FileUtils::filenameConcatenate($dirname, $raw_subfile));
558 if ($shortcut) {
559 # The file to be processed is now the target of the shortcut
560 $this_file_base_dir = "";
561 $file = "";
562 $raw_subfile = $shortcut->Path;
563 }
564 }
565
566 # check for a symlink pointing back to a leading directory
567 if (-d "$dirname/$raw_subfile" && -l "$dirname/$raw_subfile") {
568 # readlink gives a "fatal error" on systems that don't implement
569 # symlinks. This assumes the the -l test above would fail on those.
570 my $linkdest=readlink "$dirname/$raw_subfile";
571 if (!defined ($linkdest)) {
572 # system error - file not found?
573 warn "DirectoryPlugin: symlink problem - $!";
574 } else {
575 # see if link points to current or a parent directory
576 if ($linkdest =~ m@^[\./\\]+$@ ||
577 index($dirname, $linkdest) != -1) {
578 warn "DirectoryPlugin: Ignoring recursive symlink ($dirname/$raw_subfile -> $linkdest)\n";
579 next;
580 ;
581 }
582 }
583 }
584
585 print $outhandle "DirectoryPlugin: preparing metadata for $raw_subfile\n" if ($verbosity > 2);
586
587 # Make a copy of $in_metadata to pass to $raw_subfile
588 my $out_metadata = {};
589 &metadatautil::combine_metadata_structures($out_metadata, $in_metadata);
590
591 # check the assocfile_info
592 if (defined $self->{'assocfile_info'}->{$raw_full_filename}) {
593 &metadatautil::combine_metadata_structures($out_metadata, $self->{'assocfile_info'}->{$raw_full_filename});
594 }
595
596 ### $subfile by this point is url-encoded => all ASCII chars => no need to encode as UTF8
597 ###print STDERR "subfile = $subfile, raw_subfile = $raw_subfile\n";
598 ###print STDERR &unicode::debug_unicode_string("subfile = $subfile, raw_subfile = $raw_subfile\n");
599 # instead of using the subfile, we need unicode aware string
600 ###my $lookup_name = decode("utf8", $raw_subfile);
601 ####print STDERR "lookup nmae = $lookup_name\n";
602 # Next add metadata read in XML files (if it is supplied)
603 if ($additionalmetadata == 1) {
604 foreach my $filespec (@extrametakeys) {
605 ## use the url-encoded filename to do the filename comparison
606 print STDERR "comparing against filespec $filespec\n";
607 print STDERR &unicode::debug_unicode_string("comparing against filespec $filespec\n");
608 if ($unicode_subfile =~ /^$filespec$/) {
609 ###if ($lookup_name =~ /^$filespec$/) {
610 print $outhandle "File \"$unicode_subfile\" matches filespec \"$filespec\"\n"
611 if ($verbosity > 2);
612 my $mdref = &extrametautil::getmetadata(\%extrametadata, $filespec);
613 my $mfref = &extrametautil::getmetafile(\%extrametafile, $filespec);
614
615 # Add the list files where the metadata came from
616 # into the metadata table so we can track this
617 # This mechanism is similar to how gsdlassocfile works
618
619 my @metafile_pair = ();
620 foreach my $l (keys %$mfref) {
621 my $f = $mfref->{$l};
622 push (@metafile_pair, "$f : $l");
623 }
624
625 $mdref->{'gsdlmetafile'} = \@metafile_pair;
626
627 &metadatautil::combine_metadata_structures($out_metadata, $mdref);
628 }
629 }
630 }
631
632 if (defined $self->{'inf_timestamp'}) {
633 # Look to see if it's a completely new file
634
635 if (!$block_hash->{'new_files'}->{$raw_full_filename}) {
636 # Not a new file, must be an existing file
637 # Let' see if it's newer than the last import.pl
638
639
640 if (! -d $raw_full_filename) {
641 if (!$block_hash->{'reindex_files'}->{$raw_full_filename}) {
642 # filename has been around for longer than inf_timestamp
643 print $outhandle "**** Skipping $unicode_subfile\n" if ($verbosity >3);
644 next;
645 }
646 else {
647 # Remove old folder in archives (might hash to something different)
648 # *** should be doing this on a Del one as well
649 # but leave folder name?? and ensure hashs to
650 # same again??
651
652 # Then let through as new doc??
653
654 # mark to doc-oids that rely on it for re-indexing
655 }
656 }
657 }
658 }
659
660 # Recursively read each $subfile
661 print $outhandle "DirectoryPlugin recurring: $unicode_subfile\n" if ($verbosity > 2);
662
663 $count += &plugin::read ($pluginfo, $this_file_base_dir,
664 $raw_file_subfile, $block_hash,
665 $out_metadata, $processor, $maxdocs, ($total_count + $count), $gli);
666 }
667
668 return $count;
669}
670
671sub compile_stats {
672 my $self = shift(@_);
673 my ($stats) = @_;
674}
675
6761;
Note: See TracBrowser for help on using the repository browser.