source: main/trunk/greenstone2/perllib/plugins/DirectoryPlugin.pm@ 29763

Last change on this file since 29763 was 29763, checked in by ak19, 9 years ago

on macos, accented chars in filenames are in decomposed form, eg the letter plus the accent. Convert to canonical composed form for looking up in the extra metadata table.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 22.9 KB
Line 
1###########################################################################
2#
3# DirectoryPlugin.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# DirectoryPlugin is a plugin which recurses through directories processing
27# each file it finds - which basically means passing it down the plugin
28# pipeline
29
30package DirectoryPlugin;
31
32use extrametautil;
33use PrintInfo;
34use plugin;
35use util;
36use FileUtils;
37use metadatautil;
38
39use File::Basename;
40use strict;
41no strict 'refs';
42no strict 'subs';
43
44use Encode::Locale;
45use Encode;
46use Unicode::Normalize;
47
48BEGIN {
49 @DirectoryPlugin::ISA = ('PrintInfo');
50}
51
52my $arguments =
53 [ { 'name' => "block_exp",
54 'desc' => "{BasePlugin.block_exp}",
55 'type' => "regexp",
56 'deft' => &get_default_block_exp(),
57 'reqd' => "no" },
58 # this option has been deprecated. leave it here for now so we can warn people not to use it
59 { 'name' => "use_metadata_files",
60 'desc' => "{DirectoryPlugin.use_metadata_files}",
61 'type' => "flag",
62 'reqd' => "no",
63 'hiddengli' => "yes" },
64 { 'name' => "recheck_directories",
65 'desc' => "{DirectoryPlugin.recheck_directories}",
66 'type' => "flag",
67 'reqd' => "no" } ];
68
69my $options = { 'name' => "DirectoryPlugin",
70 'desc' => "{DirectoryPlugin.desc}",
71 'abstract' => "no",
72 'inherits' => "yes",
73 'args' => $arguments };
74
75sub new {
76 my ($class) = shift (@_);
77 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
78 push(@$pluginlist, $class);
79
80 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
81 push(@{$hashArgOptLists->{"OptList"}},$options);
82
83 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists);
84
85 if ($self->{'info_only'}) {
86 # don't worry about any options or initialisations etc
87 return bless $self, $class;
88 }
89
90 # we have left this option in so we can warn people who are still using it
91 if ($self->{'use_metadata_files'}) {
92 die "ERROR: DirectoryPlugin -use_metadata_files option has been deprecated. Please remove the option and add MetadataXMLPlug to your plugin list instead!\n";
93 }
94
95 $self->{'num_processed'} = 0;
96 $self->{'num_not_processed'} = 0;
97 $self->{'num_blocked'} = 0;
98 $self->{'num_archives'} = 0;
99
100 $self->{'subdir_extrametakeys'} = {};
101
102 return bless $self, $class;
103}
104
105# called once, at the start of processing
106sub init {
107 my $self = shift (@_);
108 my ($verbosity, $outhandle, $failhandle) = @_;
109
110 # verbosity is passed through from the processor
111 $self->{'verbosity'} = $verbosity;
112
113 # as are the outhandle and failhandle
114 $self->{'outhandle'} = $outhandle if defined $outhandle;
115 $self->{'failhandle'} = $failhandle;
116
117}
118
119# called once, after all passes have finished
120sub deinit {
121 my ($self) = @_;
122
123}
124
125# called at the beginning of each plugin pass (import has one, building has many)
126sub begin {
127 my $self = shift (@_);
128 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
129
130 # Only lookup timestamp info for import.pl, and only if incremental is set
131 my $proc_package_name = ref $processor;
132 if ($proc_package_name !~ /buildproc$/ && $self->{'incremental'} == 1) {
133 # Get the infodbtype value for this collection from the arcinfo object
134 my $infodbtype = $processor->getoutputinfo()->{'infodbtype'};
135 $infodbtype = "gdbm" if $infodbtype eq "gdbm-txtgz"; # in archives, cannot use txtgz version
136 my $output_dir = $processor->getoutputdir();
137 my $archives_inf = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $output_dir);
138
139 if ( -e $archives_inf ) {
140 $self->{'inf_timestamp'} = -M $archives_inf;
141 }
142 }
143}
144
145sub remove_all {
146 my $self = shift (@_);
147 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
148}
149
150
151sub remove_one {
152 my $self = shift (@_);
153 my ($file, $oids, $archivedir) = @_;
154 return undef; # this will never be called for directories (will it??)
155
156}
157
158
159# called at the end of each plugin pass
160sub end {
161 my ($self) = shift (@_);
162
163}
164
165
166
167# return 1 if this class might recurse using $pluginfo
168sub is_recursive {
169 my $self = shift (@_);
170
171 return 1;
172}
173
174sub get_default_block_exp {
175 my $self = shift (@_);
176
177 return '(?i)(CVS|\.svn|Thumbs\.db|OIDcount|\.DS_Store|~)$';
178}
179
180sub check_directory_path {
181
182 my $self = shift(@_);
183 my ($dirname) = @_;
184
185 return undef unless (-d $dirname);
186
187 return 0 if ($self->{'block_exp'} ne "" && $dirname =~ /$self->{'block_exp'}/);
188
189 my $outhandle = $self->{'outhandle'};
190
191 # check to make sure we're not reading the archives or index directory
192 my $gsdlhome = quotemeta($ENV{'GSDLHOME'});
193 if ($dirname =~ m/^$gsdlhome\/.*?\/import.*?\/(archives|index)$/) {
194 print $outhandle "DirectoryPlugin: $dirname appears to be a reference to a Greenstone collection, skipping.\n";
195 return 0;
196 }
197
198 # check to see we haven't got a cyclic path...
199 if ($dirname =~ m%(/.*){,41}%) {
200 print $outhandle "DirectoryPlugin: $dirname is 40 directories deep, is this a recursive path? if not increase constant in DirectoryPlugin.pm.\n";
201 return 0;
202 }
203
204 # check to see we haven't got a cyclic path...
205 if ($dirname =~ m%.*?import/(.+?)/import/\1.*%) {
206 print $outhandle "DirectoryPlugin: $dirname appears to be in a recursive loop...\n";
207 return 0;
208 }
209
210 return 1;
211}
212
213# this may be called more than once
214sub sort_out_associated_files {
215
216 my $self = shift (@_);
217 my ($block_hash) = @_;
218 if (!scalar (keys %{$block_hash->{'shared_fileroot'}})) {
219 return;
220 }
221
222 $self->{'assocfile_info'} = {} unless defined $self->{'assocfile_info'};
223 my $metadata = $self->{'assocfile_info'};
224 foreach my $prefix (keys %{$block_hash->{'shared_fileroot'}}) {
225 my $record = $block_hash->{'shared_fileroot'}->{$prefix};
226
227 my $tie_to = $record->{'tie_to'};
228 my $exts = $record->{'exts'};
229
230 if ((defined $tie_to) && (scalar (keys %$exts) > 0)) {
231 # set up fileblocks and assocfile_tobe
232 my $base_file = "$prefix$tie_to";
233 $metadata->{$base_file} = {} unless defined $metadata->{$base_file};
234 my $base_file_metadata = $metadata->{$base_file};
235
236 $base_file_metadata->{'gsdlassocfile_tobe'} = [] unless defined $base_file_metadata->{'gsdlassocfile_tobe'};
237 my $assoc_tobe = $base_file_metadata->{'gsdlassocfile_tobe'};
238 foreach my $e (keys %$exts) {
239 # block the file
240 &util::block_filename($block_hash,"$prefix$e");
241 # set up as an associatd file
242 print STDERR " $self->{'plugin_type'}: Associating $prefix$e with $tie_to version\n";
243 my $mime_type = ""; # let system auto detect this
244 push(@$assoc_tobe,"$prefix$e:$mime_type:");
245
246 }
247 }
248 } # foreach record
249
250 $block_hash->{'shared_fileroot'} = undef;
251 $block_hash->{'shared_fileroot'} = {};
252
253}
254
255
256# do block exp OR special blocking ???
257
258sub file_is_blocked {
259 my $self = shift (@_);
260 my ($block_hash, $filename_full_path) = @_;
261
262 $filename_full_path = &util::upgrade_if_dos_filename($filename_full_path);
263
264 if (($ENV{'GSDLOS'} =~ m/^windows$/) && ($^O ne "cygwin")) {
265 # on windows, all block paths are lowercased.
266 my $lower_filename = lc ($filename_full_path);
267 if (defined $block_hash->{'file_blocks'}->{$lower_filename}) {
268 $self->{'num_blocked'} ++;
269 return 1;
270 }
271 }
272 else {
273 if (defined $block_hash->{'file_blocks'}->{$filename_full_path}) {
274 $self->{'num_blocked'} ++;
275 return 1;
276 }
277 }
278 # check Directory plugin's own block_exp
279 if ($self->{'block_exp'} ne "" && $filename_full_path =~ /$self->{'block_exp'}/) {
280 $self->{'num_blocked'} ++;
281 return 1; # blocked
282 }
283 return 0;
284}
285
286
287
288sub file_block_read {
289 my $self = shift (@_);
290 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
291
292 my $outhandle = $self->{'outhandle'};
293 my $verbosity = $self->{'verbosity'};
294
295 # Calculate the directory name and ensure it is a directory and
296 # that it is not explicitly blocked.
297 my $dirname = $file;
298 $dirname = &FileUtils::filenameConcatenate($base_dir, $file) if $base_dir =~ /\w/;
299
300 my $directory_ok = $self->check_directory_path($dirname);
301 return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
302
303 print $outhandle "Global file scan checking directory: $dirname\n";
304
305 $block_hash->{'all_files'} = {} unless defined $block_hash->{'all_files'};
306 $block_hash->{'metadata_files'} = {} unless defined $block_hash->{'metadata_files'};
307
308 $block_hash->{'file_blocks'} = {} unless defined $block_hash->{'file_blocks'};
309 $block_hash->{'shared_fileroot'} = {} unless defined $block_hash->{'shared_fileroot'};
310
311 # Recur over directory contents.
312 my (@dir, $subfile);
313 #my $count = 0;
314
315 print $outhandle "DirectoryPlugin block: getting directory $dirname\n" if ($verbosity > 2);
316
317 # find all the files in the directory
318 if (!opendir (DIR, $dirname)) {
319 if ($gli) {
320 print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
321 }
322 print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
323 return -1; # error in processing
324 }
325 @dir = sort readdir (DIR);
326 closedir (DIR);
327
328 for (my $i = 0; $i < scalar(@dir); $i++) {
329 my $raw_subfile = $dir[$i];
330 next if ($raw_subfile =~ m/^\.\.?$/);
331
332 my $this_file_base_dir = $base_dir;
333 my $raw_file_subfile = &FileUtils::filenameConcatenate($file, $raw_subfile);
334
335 # Recursively read each $raw_subfile
336 print $outhandle "DirectoryPlugin block recurring: ". Encode::decode("utf8", $raw_file_subfile) ."\n" if ($verbosity > 2);
337 print $outhandle "DirectoryPlugin block recurring: ". Encode::decode(locale =>$raw_file_subfile) ."\n" if ($verbosity > 2);
338
339 #$count += &plugin::file_block_read ($pluginfo, $this_file_base_dir,
340
341 &plugin::file_block_read ($pluginfo, $this_file_base_dir,
342 $raw_file_subfile,
343 $block_hash, $metadata, $gli);
344
345 }
346 $self->sort_out_associated_files($block_hash);
347 #return $count;
348 return 1;
349
350}
351
352# We don't do metadata_read
353sub metadata_read {
354 my $self = shift (@_);
355 my ($pluginfo, $base_dir, $file, $block_hash,
356 $extrametakeys, $extrametadata, $extrametafile,
357 $processor, $gli, $aux) = @_;
358
359 return undef;
360}
361
362
363# return number of files processed, undef if can't process
364# Note that $base_dir might be "" and that $file might
365# include directories
366
367# This function passes around metadata hash structures. Metadata hash
368# structures are hashes that map from a (scalar) key (the metadata element
369# name) to either a scalar metadata value or a reference to an array of
370# such values.
371
372sub read {
373 my $self = shift (@_);
374 my ($pluginfo, $base_dir, $file, $block_hash, $in_metadata, $processor, $maxdocs, $total_count, $gli) = @_;
375
376 my $outhandle = $self->{'outhandle'};
377 my $verbosity = $self->{'verbosity'};
378
379 # Calculate the directory name and ensure it is a directory and
380 # that it is not explicitly blocked.
381 my $dirname;
382 if ($file eq "") {
383 $dirname = $base_dir;
384 } else {
385 $dirname = $file;
386 $dirname = &FileUtils::filenameConcatenate($base_dir, $file) if $base_dir =~ /\w/;
387 }
388
389 my $directory_ok = $self->check_directory_path($dirname);
390 return $directory_ok unless (defined $directory_ok && $directory_ok == 1);
391
392 if (($verbosity > 2) && ((scalar keys %$in_metadata) > 0)) {
393 print $outhandle "DirectoryPlugin: metadata passed in: ",
394 join(", ", keys %$in_metadata), "\n";
395 }
396
397
398 # Recur over directory contents.
399 my (@dir, $subfile);
400
401 print $outhandle "DirectoryPlugin read: getting directory $dirname\n" if ($verbosity > 2);
402
403 # find all the files in the directory
404 if (!opendir (DIR, $dirname)) {
405 if ($gli) {
406 print STDERR "<ProcessingError n='$file' r='Could not read directory $dirname'>\n";
407 }
408 print $outhandle "DirectoryPlugin: WARNING - couldn't read directory $dirname\n";
409 return -1; # error in processing
410 }
411 @dir = sort readdir (DIR);
412 map { $_ = &unicode::raw_filename_to_url_encoded($_); print STDERR "****$_\n"; } @dir;
413 closedir (DIR);
414
415 # Re-order the files in the list so any directories ending with .all are moved to the end
416 for (my $i = scalar(@dir) - 1; $i >= 0; $i--) {
417 if (-d &FileUtils::filenameConcatenate($dirname, $dir[$i]) && $dir[$i] =~ /\.all$/) {
418 push(@dir, splice(@dir, $i, 1));
419 }
420 }
421
422 # setup the metadata structures. we do a metadata_read pass to see if there is any additional metadata, then pass it to read
423
424 my $additionalmetadata = 0; # is there extra metadata available?
425 my %extrametadata; # maps from filespec to extra metadata keys
426 my %extrametafile; # maps from filespec to the metadata.xml (or similar) file it came from
427 my @extrametakeys; # keys of %extrametadata in order read
428
429
430 my $os_dirsep = &util::get_os_dirsep();
431 my $dirsep = &util::get_dirsep();
432 my $base_dir_regexp = $base_dir;
433 $base_dir_regexp =~ s/\//$os_dirsep/g;
434
435 # Want to get relative path of local_dirname within the base_directory
436 # but with URL style slashes.
437 my $local_dirname = &util::filename_within_directory_url_format($dirname, $base_dir);
438
439 # if we are in import folder, then local_dirname will be empty
440 if ($local_dirname ne "") {
441 # look for extra metadata passed down from higher folders
442 $local_dirname .= "/"; # closing slash must be URL type slash also and not $dirsep;
443 if (defined $self->{'subdir_extrametakeys'}->{$local_dirname}) {
444 my $extrakeys = $self->{'subdir_extrametakeys'}->{$local_dirname};
445 foreach my $ek (@$extrakeys) {
446 my $extrakeys_re = $ek->{'re'};
447 my $extrakeys_md = $ek->{'md'};
448 my $extrakeys_mf = $ek->{'mf'};
449 &extrametautil::addmetakey(\@extrametakeys, $extrakeys_re);
450 &extrametautil::setmetadata(\%extrametadata, $extrakeys_re, $extrakeys_md);
451 &extrametautil::setmetafile(\%extrametafile, $extrakeys_re, $extrakeys_mf);
452 }
453 delete($self->{'subdir_extrametakeys'}->{$local_dirname});
454 }
455 }
456 # apply metadata pass for each of the files in the directory -- ignore
457 # maxdocs here
458 my $num_files = scalar(@dir);
459 for (my $i = 0; $i < scalar(@dir); $i++) {
460 my $subfile = $dir[$i];
461 next if ($subfile =~ m/^\.\.?$/);
462
463 my $this_file_base_dir = $base_dir;
464 my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
465
466 my $raw_file_subfile = &FileUtils::filenameConcatenate($file, $raw_subfile);
467 my $raw_full_filename = &FileUtils::filenameConcatenate($this_file_base_dir, $raw_file_subfile);
468
469 if ($self->file_is_blocked($block_hash,$raw_full_filename)) {
470 print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for metadata_read\n" if ($verbosity > 2);
471 next;
472 }
473
474 # Recursively read each $raw_subfile
475 print $outhandle "DirectoryPlugin metadata recurring: $raw_subfile\n" if ($verbosity > 2);
476
477 &plugin::metadata_read ($pluginfo, $this_file_base_dir,
478 $raw_file_subfile,$block_hash,
479 \@extrametakeys, \%extrametadata,
480 \%extrametafile,
481 $processor, $gli);
482 $additionalmetadata = 1;
483 }
484
485 # filter out any extrametakeys that mention subdirectories and store
486 # for later use (i.e. when that sub-directory is being processed)
487 foreach my $ek (@extrametakeys) { # where each Extrametakey (which is a filename) is stored as a url-style regex
488
489 my ($subdir_re,$extrakey_dir) = &util::url_fileparse($ek);
490
491 if ($extrakey_dir ne "") {
492 # a subdir was specified
493 my $md = &extrametautil::getmetadata(\%extrametadata, $ek);
494 my $mf = &extrametautil::getmetafile(\%extrametafile, $ek);
495
496 my $subdir_extrametakeys = $self->{'subdir_extrametakeys'};
497 my $subdir_rec = { 're' => $subdir_re, 'md' => $md, 'mf' => $mf };
498
499 # when it's looked up, it must be relative to the base dir
500 push(@{$subdir_extrametakeys->{"$local_dirname$extrakey_dir"}},$subdir_rec);
501 }
502 }
503
504 # import each of the files in the directory
505 my $count=0;
506 for (my $i = 0; $i <= scalar(@dir); $i++) {
507 # When every file in the directory has been done, pause for a moment (figuratively!)
508 # If the -recheck_directories argument hasn't been provided, stop now (default)
509 # Otherwise, re-read the contents of the directory to check for new files
510 # Any new files are added to the @dir list and are processed as normal
511 # This is necessary when documents to be indexed are specified in bibliographic DBs
512 # These files are copied/downloaded and stored in a new folder at import time
513 if ($i == $num_files) {
514 last unless $self->{'recheck_directories'};
515
516 # Re-read the files in the directory to see if there are any new files
517 last if (!opendir (DIR, $dirname));
518 my @dirnow = sort readdir (DIR);
519 map { $_ = &unicode::raw_filename_to_url_encoded($_) } @dirnow;
520 closedir (DIR);
521
522 # We're only interested if there are more files than there were before
523 last if (scalar(@dirnow) <= scalar(@dir));
524
525 # Any new files are added to the end of @dir to get processed by the loop
526 my $j;
527 foreach my $subfilenow (@dirnow) {
528 for ($j = 0; $j < $num_files; $j++) {
529 last if ($subfilenow eq $dir[$j]);
530 }
531 if ($j == $num_files) {
532 # New file
533 push(@dir, $subfilenow);
534 }
535 }
536 # When the new files have been processed, check again
537 $num_files = scalar(@dir);
538 }
539
540 my $subfile = $dir[$i];
541 last if ($maxdocs != -1 && ($count + $total_count) >= $maxdocs);
542 next if ($subfile =~ /^\.\.?$/);
543
544 my $this_file_base_dir = $base_dir;
545 my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
546 #my $unicode_subfile = &Encode::decode("utf8", $raw_subfile);
547 my $unicode_subfile = &Encode::decode(locale => $raw_subfile);
548
549 # Normalise the filename to canonical composition - on mac, filenames use decopmposed form for accented chars
550 $unicode_subfile = NFC($unicode_subfile);
551
552 my $raw_file_subfile = &FileUtils::filenameConcatenate($file, $raw_subfile);
553 my $raw_full_filename
554 = &FileUtils::filenameConcatenate($this_file_base_dir,$raw_file_subfile);
555
556 if ($self->file_is_blocked($block_hash,$raw_full_filename)) {
557 print STDERR "DirectoryPlugin: file $raw_full_filename was blocked for read\n" if ($verbosity > 2);
558 next;
559 }
560 print STDERR "processing $raw_full_filename\n";
561 # Follow Windows shortcuts
562 if ($raw_subfile =~ m/(?i)\.lnk$/ && (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin"))) {
563 require Win32::Shortcut;
564 my $shortcut = new Win32::Shortcut(&FileUtils::filenameConcatenate($dirname, $raw_subfile));
565 if ($shortcut) {
566 # The file to be processed is now the target of the shortcut
567 $this_file_base_dir = "";
568 $file = "";
569 $raw_subfile = $shortcut->Path;
570 }
571 }
572
573 # check for a symlink pointing back to a leading directory
574 if (-d "$dirname/$raw_subfile" && -l "$dirname/$raw_subfile") {
575 # readlink gives a "fatal error" on systems that don't implement
576 # symlinks. This assumes the the -l test above would fail on those.
577 my $linkdest=readlink "$dirname/$raw_subfile";
578 if (!defined ($linkdest)) {
579 # system error - file not found?
580 warn "DirectoryPlugin: symlink problem - $!";
581 } else {
582 # see if link points to current or a parent directory
583 if ($linkdest =~ m@^[\./\\]+$@ ||
584 index($dirname, $linkdest) != -1) {
585 warn "DirectoryPlugin: Ignoring recursive symlink ($dirname/$raw_subfile -> $linkdest)\n";
586 next;
587 ;
588 }
589 }
590 }
591
592 print $outhandle "DirectoryPlugin: preparing metadata for $raw_subfile\n" if ($verbosity > 2);
593
594 # Make a copy of $in_metadata to pass to $raw_subfile
595 my $out_metadata = {};
596 &metadatautil::combine_metadata_structures($out_metadata, $in_metadata);
597
598 # check the assocfile_info
599 if (defined $self->{'assocfile_info'}->{$raw_full_filename}) {
600 &metadatautil::combine_metadata_structures($out_metadata, $self->{'assocfile_info'}->{$raw_full_filename});
601 }
602
603 ### $subfile by this point is url-encoded => all ASCII chars => no need to encode as UTF8
604 print STDERR "****** subfile = $subfile, raw_subfile = $raw_subfile\n";
605 print STDERR &unicode::debug_unicode_string("subfile = $subfile, raw_subfile = $raw_subfile\n");
606 # instead of using the subfile, we need unicode aware string
607 ###my $lookup_name = decode("utf8", $raw_subfile);
608 ####print STDERR "lookup nmae = $lookup_name\n";
609 # Next add metadata read in XML files (if it is supplied)
610 if ($additionalmetadata == 1) {
611 foreach my $filespec (@extrametakeys) {
612 ## use the url-encoded filename to do the filename comparison
613 print STDERR "### comparing against filespec $filespec (";
614 print STDERR &unicode::debug_unicode_string("$filespec");
615 print STDERR ")\n";
616 if ($unicode_subfile =~ /^$filespec$/) {
617 ###if ($lookup_name =~ /^$filespec$/) {
618 print $outhandle "File \"$unicode_subfile\" matches filespec \"$filespec\"\n"
619 if ($verbosity > 2);
620 my $mdref = &extrametautil::getmetadata(\%extrametadata, $filespec);
621 my $mfref = &extrametautil::getmetafile(\%extrametafile, $filespec);
622
623 # Add the list files where the metadata came from
624 # into the metadata table so we can track this
625 # This mechanism is similar to how gsdlassocfile works
626
627 my @metafile_pair = ();
628 foreach my $l (keys %$mfref) {
629 my $f = $mfref->{$l};
630 push (@metafile_pair, "$f : $l");
631 }
632
633 $mdref->{'gsdlmetafile'} = \@metafile_pair;
634
635 &metadatautil::combine_metadata_structures($out_metadata, $mdref);
636 }
637 }
638 }
639
640 if (defined $self->{'inf_timestamp'}) {
641 # Look to see if it's a completely new file
642
643 if (!$block_hash->{'new_files'}->{$raw_full_filename}) {
644 # Not a new file, must be an existing file
645 # Let' see if it's newer than the last import.pl
646
647
648 if (! -d $raw_full_filename) {
649 if (!$block_hash->{'reindex_files'}->{$raw_full_filename}) {
650 # filename has been around for longer than inf_timestamp
651 print $outhandle "**** Skipping $unicode_subfile\n" if ($verbosity >3);
652 next;
653 }
654 else {
655 # Remove old folder in archives (might hash to something different)
656 # *** should be doing this on a Del one as well
657 # but leave folder name?? and ensure hashs to
658 # same again??
659
660 # Then let through as new doc??
661
662 # mark to doc-oids that rely on it for re-indexing
663 }
664 }
665 }
666 }
667
668 # Recursively read each $subfile
669 print $outhandle "DirectoryPlugin recurring: $unicode_subfile\n" if ($verbosity > 2);
670
671 $count += &plugin::read ($pluginfo, $this_file_base_dir,
672 $raw_file_subfile, $block_hash,
673 $out_metadata, $processor, $maxdocs, ($total_count + $count), $gli);
674 }
675
676 return $count;
677}
678
679sub compile_stats {
680 my $self = shift(@_);
681 my ($stats) = @_;
682}
683
6841;
Note: See TracBrowser for help on using the repository browser.