source: main/trunk/greenstone2/perllib/plugins/DirectoryPlugin.pm@ 31284

Last change on this file since 31284 was 30681, checked in by ak19, 8 years ago

3 new strings introduced by Kathy contained the :, which is used as a separator in the properties file. Although Kathy tried to escape it with a backslash, it broke GTI because GTI doesn't recognise the backslash as a separator and all kinds of weird things happened from then on, so that the Gujarati translator kept having to translte the current date rather than a real GS3 interface string. Modified the gti.pl code (to be committed) and the new strings that Kathy introduced, so that hopefully, GTI can now handle it. Property names and values will be split at the right-most separator character now (= or :) and any on the left should not be escaped.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 22.4 KB
Line 
1###########################################################################
2#
3# DirectoryPlugin.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# DirectoryPlugin is a plugin which recurses through directories processing
27# each file it finds - which basically means passing it down the plugin
28# pipeline
29
30package DirectoryPlugin;
31
32use extrametautil;
33use PrintInfo;
34use plugin;
35use util;
36use FileUtils;
37use metadatautil;
38
39use File::Basename;
40use strict;
41no strict 'refs';
42no strict 'subs';
43
44use Encode::Locale;
45use Encode;
46use Unicode::Normalize;
47
48BEGIN {
49 @DirectoryPlugin::ISA = ('PrintInfo');
50}
51
52my $arguments =
53 [ { 'name' => "block_exp",
54 'desc' => "{BasePlugin.block_exp}",
55 'type' => "regexp",
56 'deft' => &get_default_block_exp(),
57 'reqd' => "no" },
58 # this option has been deprecated. leave it here for now so we can warn people not to use it
59 { 'name' => "use_metadata_files",
60 'desc' => "{DirectoryPlugin.use_metadata_files}",
61 'type' => "flag",
62 'reqd' => "no",
63 'hiddengli' => "yes" },
64 { 'name' => "recheck_directories",
65 'desc' => "{DirectoryPlugin.recheck_directories}",
66 'type' => "flag",
67 'reqd' => "no" } ];
68
69my $options = { 'name' => "DirectoryPlugin",
70 'desc' => "{DirectoryPlugin.desc}",
71 'abstract' => "no",
72 'inherits' => "yes",
73 'args' => $arguments };
74
75sub new {
76 my ($class) = shift (@_);
77 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
78 push(@$pluginlist, $class);
79
80 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
81 push(@{$hashArgOptLists->{"OptList"}},$options);
82
83 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists);
84
85 if ($self->{'info_only'}) {
86 # don't worry about any options or initialisations etc
87 return bless $self, $class;
88 }
89
90 # we have left this option in so we can warn people who are still using it
91 if ($self->{'use_metadata_files'}) {
92 die "ERROR: DirectoryPlugin -use_metadata_files option has been deprecated. Please remove the option and add MetadataXMLPlug to your plugin list instead!\n";
93 }
94
95 $self->{'num_processed'} = 0;
96 $self->{'num_not_processed'} = 0;
97 $self->{'num_blocked'} = 0;
98 $self->{'num_archives'} = 0;
99
100 $self->{'subdir_extrametakeys'} = {};
101
102 return bless $self, $class;
103}
104
105# called once, at the start of processing
106sub init {
107 my $self = shift (@_);
108 my ($verbosity, $outhandle, $failhandle) = @_;
109
110 # verbosity is passed through from the processor
111 $self->{'verbosity'} = $verbosity;
112
113 # as are the outhandle and failhandle
114 $self->{'outhandle'} = $outhandle if defined $outhandle;
115 $self->{'failhandle'} = $failhandle;
116
117}
118
119# called once, after all passes have finished
120sub deinit {
121 my ($self) = @_;
122
123}
124
125# called at the beginning of each plugin pass (import has one, building has many)
126sub begin {
127 my $self = shift (@_);
128 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
129
130 # Only lookup timestamp info for import.pl, and only if incremental is set
131 my $proc_package_name = ref $processor;
132 if ($proc_package_name !~ /buildproc$/ && $self->{'incremental'} == 1) {
133 # Get the infodbtype value for this collection from the arcinfo object
134 my $infodbtype = $processor->getoutputinfo()->{'infodbtype'};
135 $infodbtype = "gdbm" if $infodbtype eq "gdbm-txtgz"; # in archives, cannot use txtgz version
136 my $output_dir = $processor->getoutputdir();
137 my $archives_inf = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $output_dir);
138
139 if ( -e $archives_inf ) {
140 $self->{'inf_timestamp'} = -M $archives_inf;
141 }
142 }
143}
144
145sub remove_all {
146 my $self = shift (@_);
147 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
148}
149
150
151sub remove_one {
152 my $self = shift (@_);
153 my ($file, $oids, $archivedir) = @_;
154 return undef; # this will never be called for directories (will it??)
155
156}
157
158
159# called at the end of each plugin pass
160sub end {
161 my ($self) = shift (@_);
162
163}
164
165
166
167# return 1 if this class might recurse using $pluginfo
168sub is_recursive {
169 my $self = shift (@_);
170
171 return 1;
172}
173
174sub get_default_block_exp {
175 my $self = shift (@_);
176
177 return '(?i)(CVS|\.svn|Thumbs\.db|OIDcount|\.DS_Store|~)$';
178}
179
180sub check_directory_path {
181
182 my $self = shift(@_);
183 my ($dirname) = @_;
184
185 return undef unless (-d $dirname);
186
187 return 0 if ($self->{'block_exp'} ne "" && $dirname =~ /$self->{'block_exp'}/);
188
189 my $outhandle = $self->{'outhandle'};
190
191 # check to make sure we're not reading the archives or index directory
192 my $gsdlhome = quotemeta($ENV{'GSDLHOME'});
193 if ($dirname =~ m/^$gsdlhome\/.*?\/import.*?\/(archives|index)$/) {
194 print $outhandle "DirectoryPlugin: $dirname appears to be a reference to a Greenstone collection, skipping.\n";
195 return 0;
196 }
197
198 # check to see we haven't got a cyclic path...
199 if ($dirname =~ m%(/.*){,41}%) {
200 print $outhandle "DirectoryPlugin: $dirname is 40 directories deep, is this a recursive path? if not increase constant in DirectoryPlugin.pm.\n";
201 return 0;
202 }
203
204 # check to see we haven't got a cyclic path...
205 if ($dirname =~ m%.*?import/(.+?)/import/\1.*%) {
206 print $outhandle "DirectoryPlugin: $dirname appears to be in a recursive loop...\n";
207 return 0;
208 }
209
210 return 1;
211}
212
213# this may be called more than once
214sub sort_out_associated_files {
215
216 my $self = shift (@_);
217 my ($block_hash) = @_;
218 if (!scalar (keys %{$block_hash->{'shared_fileroot'}})) {
219 return;
220 }
221
222 $self->{'assocfile_info'} = {} unless defined $self->{'assocfile_info'};
223 my $metadata = $self->{'assocfile_info'};
224 foreach my $prefix (keys %{$block_hash->{'shared_fileroot'}}) {
225 my $record = $block_hash->{'shared_fileroot'}->{$prefix};
226
227 my $tie_to = $record->{'tie_to'};
228 my $exts = $record->{'exts'};
229
230 if ((defined $tie_to) && (scalar (keys %$exts) > 0)) {
231 # set up fileblocks and assocfile_tobe
232 my $base_file = "$prefix$tie_to";
233 $metadata->{$base_file} = {} unless defined $metadata->{$base_file};
234 my $base_file_metadata = $metadata->{$base_file};
235
236 $base_file_metadata->{'gsdlassocfile_tobe'} = [] unless defined $base_file_metadata->{'gsdlassocfile_tobe'};
237 my $assoc_tobe = $base_file_metadata->{'gsdlassocfile_tobe'};
238 foreach my $e (keys %$exts) {
239 # block the file
240 &util::block_filename($block_hash,"$prefix$e");
241 # set up as an associatd file
242 print STDERR " $self->{'plugin_type'}: Associating $prefix$e with $tie_to version\n";
243 my $mime_type = ""; # let system auto detect this
244 push(@$assoc_tobe,"$prefix$e:$mime_type:");
245
246 }
247 }
248 } # foreach record
249
250 $block_hash->{'shared_fileroot'} = undef;
251 $block_hash->{'shared_fileroot'} = {};
252
253}
254
255
256# do block exp OR special blocking ???
257
258sub file_is_blocked {
259 my $self = shift (@_);
260 my ($block_hash, $filename_full_path) = @_;
261
262 $filename_full_path = &util::upgrade_if_dos_filename($filename_full_path);
263
264 if (($ENV{'GSDLOS'} =~ m/^windows$/) && ($^O ne "cygwin")) {
265 # on windows, all block paths are lowercased.
266 my $lower_filename = lc ($filename_full_path);
267 if (defined $block_hash->{'file_blocks'}->{$lower_filename}) {
268 $self->{'num_blocked'} ++;
269 return 1;
270 }
271 }
272 else {
273 if (defined $block_hash->{'file_blocks'}->{$filename_full_path}) {
274 $self->{'num_blocked'} ++;
275 return 1;
276 }
277 }
278 # check Directory plugin's own block_exp
279 if ($self->{'block_exp'} ne "" && $filename_full_path =~ /$self->{'block_exp'}/) {
280 $self->{'num_blocked'} ++;
281 return 1; # blocked
282 }
283 return 0;
284}
285
286
287
288sub file_block_read {
289 my $self = shift (@_);
290 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
291
292 my $outhandle = $self->{'outhandle'};
293 my $verbosity = $self->{'verbosity'};
294
295 # Calculate the directory name and ensure it is a directory and
296 # that it is not explicitly blocked.
297 my $dirname = $file;
298 $dirname = &FileUtils::filenameConcatenate($base_dir, $file) if $base_dir =~ /\w/;
299
300 my $directory_ok = $self->check_directory_path($dirname);
301 return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
302
303 print $outhandle "Global file scan checking directory: $dirname\n";
304
305 $block_hash->{'all_files'} = {} unless defined $block_hash->{'all_files'};
306 $block_hash->{'metadata_files'} = {} unless defined $block_hash->{'metadata_files'};
307
308 $block_hash->{'file_blocks'} = {} unless defined $block_hash->{'file_blocks'};
309 $block_hash->{'shared_fileroot'} = {} unless defined $block_hash->{'shared_fileroot'};
310
311 # Recur over directory contents.
312 my (@dir, $subfile);
313 #my $count = 0;
314
315 print $outhandle "DirectoryPlugin block: getting directory $dirname\n" if ($verbosity > 2);
316
317 # find all the files in the directory
318 if (!opendir (DIR, $dirname)) {
319 if ($gli) {
320 print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
321 }
322 print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
323 return -1; # error in processing
324 }
325 @dir = sort readdir (DIR);
326 closedir (DIR);
327
328 for (my $i = 0; $i < scalar(@dir); $i++) {
329 my $raw_subfile = $dir[$i];
330 next if ($raw_subfile =~ m/^\.\.?$/);
331
332 my $this_file_base_dir = $base_dir;
333 my $raw_file_subfile = &FileUtils::filenameConcatenate($file, $raw_subfile);
334
335 # Recursively read each $raw_subfile
336 print $outhandle "DirectoryPlugin block recurring: ". Encode::decode("utf8", $raw_file_subfile) ."\n" if ($verbosity > 2);
337 print $outhandle "DirectoryPlugin block recurring: ". Encode::decode(locale =>$raw_file_subfile) ."\n" if ($verbosity > 2);
338
339 #$count += &plugin::file_block_read ($pluginfo, $this_file_base_dir,
340
341 &plugin::file_block_read ($pluginfo, $this_file_base_dir,
342 $raw_file_subfile,
343 $block_hash, $metadata, $gli);
344
345 }
346 $self->sort_out_associated_files($block_hash);
347 #return $count;
348 return 1;
349
350}
351
352# We don't do metadata_read
353sub metadata_read {
354 my $self = shift (@_);
355 my ($pluginfo, $base_dir, $file, $block_hash,
356 $extrametakeys, $extrametadata, $extrametafile,
357 $processor, $gli, $aux) = @_;
358
359 return undef;
360}
361
362
363# return number of files processed, undef if can't process
364# Note that $base_dir might be "" and that $file might
365# include directories
366
367# This function passes around metadata hash structures. Metadata hash
368# structures are hashes that map from a (scalar) key (the metadata element
369# name) to either a scalar metadata value or a reference to an array of
370# such values.
371
372sub read {
373 my $self = shift (@_);
374 my ($pluginfo, $base_dir, $file, $block_hash, $in_metadata, $processor, $maxdocs, $total_count, $gli) = @_;
375
376 my $outhandle = $self->{'outhandle'};
377 my $verbosity = $self->{'verbosity'};
378
379 # Calculate the directory name and ensure it is a directory and
380 # that it is not explicitly blocked.
381 my $dirname;
382 if ($file eq "") {
383 $dirname = $base_dir;
384 } else {
385 $dirname = $file;
386 $dirname = &FileUtils::filenameConcatenate($base_dir, $file) if $base_dir =~ /\w/;
387 }
388
389 my $directory_ok = $self->check_directory_path($dirname);
390 return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
391
392 if (($verbosity > 2) && ((scalar keys %$in_metadata) > 0)) {
393 print $outhandle "DirectoryPlugin: metadata passed in: ",
394 join(", ", keys %$in_metadata), "\n";
395 }
396
397
398 # Recur over directory contents.
399 my (@dir, $subfile);
400
401 print $outhandle "DirectoryPlugin read: getting directory $dirname\n" if ($verbosity > 2);
402
403 # find all the files in the directory
404 if (!opendir (DIR, $dirname)) {
405 if ($gli) {
406 print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
407 }
408 print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
409 return -1; # error in processing
410 }
411 @dir = sort readdir (DIR);
412 map { $_ = &unicode::raw_filename_to_url_encoded($_); } @dir;
413 closedir (DIR);
414 # Re-order the files in the list so any directories ending with .all are moved to the end
415 for (my $i = scalar(@dir) - 1; $i >= 0; $i--) {
416 if (-d &FileUtils::filenameConcatenate($dirname, $dir[$i]) && $dir[$i] =~ /\.all$/) {
417 push(@dir, splice(@dir, $i, 1));
418 }
419 }
420
421 # setup the metadata structures. we do a metadata_read pass to see if there is any additional metadata, then pass it to read
422
423 my $additionalmetadata = 0; # is there extra metadata available?
424 my %extrametadata; # maps from filespec to extra metadata keys
425 my %extrametafile; # maps from filespec to the metadata.xml (or similar) file it came from
426 my @extrametakeys; # keys of %extrametadata in order read
427
428
429 my $os_dirsep = &util::get_os_dirsep();
430 my $dirsep = &util::get_dirsep();
431 my $base_dir_regexp = $base_dir;
432 $base_dir_regexp =~ s/\//$os_dirsep/g;
433
434 # Want to get relative path of local_dirname within the base_directory
435 # but with URL style slashes.
436 my $local_dirname = &util::filename_within_directory_url_format($dirname, $base_dir);
437
438 # if we are in import folder, then local_dirname will be empty
439 if ($local_dirname ne "") {
440 # look for extra metadata passed down from higher folders
441 $local_dirname .= "/"; # closing slash must be URL type slash also and not $dirsep;
442 if (defined $self->{'subdir_extrametakeys'}->{$local_dirname}) {
443 my $extrakeys = $self->{'subdir_extrametakeys'}->{$local_dirname};
444 foreach my $ek (@$extrakeys) {
445 my $extrakeys_re = $ek->{'re'};
446 my $extrakeys_md = $ek->{'md'};
447 my $extrakeys_mf = $ek->{'mf'};
448 &extrametautil::addmetakey(\@extrametakeys, $extrakeys_re);
449 &extrametautil::setmetadata(\%extrametadata, $extrakeys_re, $extrakeys_md);
450 &extrametautil::setmetafile(\%extrametafile, $extrakeys_re, $extrakeys_mf);
451 }
452 delete($self->{'subdir_extrametakeys'}->{$local_dirname});
453 }
454 }
455 # apply metadata pass for each of the files in the directory -- ignore
456 # maxdocs here
457 my $num_files = scalar(@dir);
458 for (my $i = 0; $i < scalar(@dir); $i++) {
459 my $subfile = $dir[$i];
460 next if ($subfile =~ m/^\.\.?$/);
461
462 my $this_file_base_dir = $base_dir;
463 my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
464
465 my $raw_file_subfile = &FileUtils::filenameConcatenate($file, $raw_subfile);
466 my $raw_full_filename = &FileUtils::filenameConcatenate($this_file_base_dir, $raw_file_subfile);
467
468 if ($self->file_is_blocked($block_hash,$raw_full_filename)) {
469 print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for metadata_read\n" if ($verbosity > 2);
470 next;
471 }
472
473 # Recursively read each $raw_subfile
474 print $outhandle "DirectoryPlugin metadata recurring: $raw_subfile\n" if ($verbosity > 2);
475
476 &plugin::metadata_read ($pluginfo, $this_file_base_dir,
477 $raw_file_subfile,$block_hash,
478 \@extrametakeys, \%extrametadata,
479 \%extrametafile,
480 $processor, $gli);
481 $additionalmetadata = 1;
482 }
483
484 # filter out any extrametakeys that mention subdirectories and store
485 # for later use (i.e. when that sub-directory is being processed)
486 foreach my $ek (@extrametakeys) { # where each Extrametakey (which is a filename) is stored as a url-style regex
487
488 my ($subdir_re,$extrakey_dir) = &util::url_fileparse($ek);
489
490 if ($extrakey_dir ne "") {
491 # a subdir was specified
492 my $md = &extrametautil::getmetadata(\%extrametadata, $ek);
493 my $mf = &extrametautil::getmetafile(\%extrametafile, $ek);
494
495 my $subdir_extrametakeys = $self->{'subdir_extrametakeys'};
496 my $subdir_rec = { 're' => $subdir_re, 'md' => $md, 'mf' => $mf };
497
498 # when it's looked up, it must be relative to the base dir
499 push(@{$subdir_extrametakeys->{"$local_dirname$extrakey_dir"}},$subdir_rec);
500 }
501 }
502
503 # import each of the files in the directory
504 my $count=0;
505 for (my $i = 0; $i <= scalar(@dir); $i++) {
506 # When every file in the directory has been done, pause for a moment (figuratively!)
507 # If the -recheck_directories argument hasn't been provided, stop now (default)
508 # Otherwise, re-read the contents of the directory to check for new files
509 # Any new files are added to the @dir list and are processed as normal
510 # This is necessary when documents to be indexed are specified in bibliographic DBs
511 # These files are copied/downloaded and stored in a new folder at import time
512 if ($i == $num_files) {
513 last unless $self->{'recheck_directories'};
514
515 # Re-read the files in the directory to see if there are any new files
516 last if (!opendir (DIR, $dirname));
517 my @dirnow = sort readdir (DIR);
518 map { $_ = &unicode::raw_filename_to_url_encoded($_) } @dirnow;
519 closedir (DIR);
520
521 # We're only interested if there are more files than there were before
522 last if (scalar(@dirnow) <= scalar(@dir));
523
524 # Any new files are added to the end of @dir to get processed by the loop
525 my $j;
526 foreach my $subfilenow (@dirnow) {
527 for ($j = 0; $j < $num_files; $j++) {
528 last if ($subfilenow eq $dir[$j]);
529 }
530 if ($j == $num_files) {
531 # New file
532 push(@dir, $subfilenow);
533 }
534 }
535 # When the new files have been processed, check again
536 $num_files = scalar(@dir);
537 }
538
539 my $subfile = $dir[$i];
540 last if ($maxdocs != -1 && ($count + $total_count) >= $maxdocs);
541 next if ($subfile =~ /^\.\.?$/);
542
543 my $this_file_base_dir = $base_dir;
544 my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
545 # get the canonical unicode version of the filename. This may not match
546 # the filename on the file system. We will use it to compare to regex
547 # in the metadata table.
548 my $unicode_subfile = &util::raw_filename_to_unicode($dirname, $raw_subfile);
549 my $raw_file_subfile = &FileUtils::filenameConcatenate($file, $raw_subfile);
550 my $raw_full_filename
551 = &FileUtils::filenameConcatenate($this_file_base_dir,$raw_file_subfile);
552
553 if ($self->file_is_blocked($block_hash,$raw_full_filename)) {
554 print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for read\n" if ($verbosity > 2);
555 next;
556 }
557 print STDERR "** DirectoryPlugin processing $raw_full_filename\n";
558 # Follow Windows shortcuts
559 if ($raw_subfile =~ m/(?i)\.lnk$/ && (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin"))) {
560 require Win32::Shortcut;
561 my $shortcut = new Win32::Shortcut(&FileUtils::filenameConcatenate($dirname, $raw_subfile));
562 if ($shortcut) {
563 # The file to be processed is now the target of the shortcut
564 $this_file_base_dir = "";
565 $file = "";
566 $raw_subfile = $shortcut->Path;
567 }
568 $shortcut->Close(); # see http://cpansearch.perl.org/src/JDB/Win32-Shortcut-0.08/docs/reference.html
569 }
570
571 # check for a symlink pointing back to a leading directory
572 if (-d "$dirname/$raw_subfile" && -l "$dirname/$raw_subfile") {
573 # readlink gives a "fatal error" on systems that don't implement
574 # symlinks. This assumes the the -l test above would fail on those.
575 my $linkdest=readlink "$dirname/$raw_subfile";
576 if (!defined ($linkdest)) {
577 # system error - file not found?
578 warn "DirectoryPlugin: symlink problem - $!";
579 } else {
580 # see if link points to current or a parent directory
581 if ($linkdest =~ m@^[\./\\]+$@ ||
582 index($dirname, $linkdest) != -1) {
583 warn "DirectoryPlugin: Ignoring recursive symlink ($dirname/$raw_subfile -> $linkdest)\n";
584 next;
585 ;
586 }
587 }
588 }
589
590 print $outhandle "DirectoryPlugin: preparing metadata for $raw_subfile\n" if ($verbosity > 2);
591
592 # Make a copy of $in_metadata to pass to $raw_subfile
593 my $out_metadata = {};
594 &metadatautil::combine_metadata_structures($out_metadata, $in_metadata);
595
596 # check the assocfile_info
597 if (defined $self->{'assocfile_info'}->{$raw_full_filename}) {
598 &metadatautil::combine_metadata_structures($out_metadata, $self->{'assocfile_info'}->{$raw_full_filename});
599 }
600
601 ### Now we need to look up the metadata table to see if there is any
602 # extra metadata for us. We need the canonical unicode version here.
603 if ($additionalmetadata == 1) {
604 foreach my $filespec (@extrametakeys) {
605 if ($unicode_subfile =~ /^$filespec$/) {
606 print $outhandle "File \"$unicode_subfile\" matches filespec \"$filespec\"\n"
607 if ($verbosity > 2);
608 my $mdref = &extrametautil::getmetadata(\%extrametadata, $filespec);
609 my $mfref = &extrametautil::getmetafile(\%extrametafile, $filespec);
610
611 # Add the list files where the metadata came from
612 # into the metadata table so we can track this
613 # This mechanism is similar to how gsdlassocfile works
614
615 my @metafile_pair = ();
616 foreach my $l (keys %$mfref) {
617 my $f = $mfref->{$l};
618 push (@metafile_pair, "$f : $l");
619 }
620
621 $mdref->{'gsdlmetafile'} = \@metafile_pair;
622
623 &metadatautil::combine_metadata_structures($out_metadata, $mdref);
624 }
625 }
626 }
627
628 if (defined $self->{'inf_timestamp'}) {
629 # Look to see if it's a completely new file
630
631 if (!$block_hash->{'new_files'}->{$raw_full_filename}) {
632 # Not a new file, must be an existing file
633 # Let' see if it's newer than the last import.pl
634
635
636 if (! -d $raw_full_filename) {
637 if (!$block_hash->{'reindex_files'}->{$raw_full_filename}) {
638 # filename has been around for longer than inf_timestamp
639 print $outhandle "**** Skipping $unicode_subfile\n" if ($verbosity >3);
640 next;
641 }
642 else {
643 # Remove old folder in archives (might hash to something different)
644 # *** should be doing this on a Del one as well
645 # but leave folder name?? and ensure hashs to
646 # same again??
647
648 # Then let through as new doc??
649
650 # mark to doc-oids that rely on it for re-indexing
651 }
652 }
653 }
654 }
655
656 # Recursively read each $subfile
657 print $outhandle "DirectoryPlugin recurring: $unicode_subfile\n" if ($verbosity > 2);
658
659 $count += &plugin::read ($pluginfo, $this_file_base_dir,
660 $raw_file_subfile, $block_hash,
661 $out_metadata, $processor, $maxdocs, ($total_count + $count), $gli);
662 }
663
664 return $count;
665}
666
667sub compile_stats {
668 my $self = shift(@_);
669 my ($stats) = @_;
670}
671
6721;
Note: See TracBrowser for help on using the repository browser.