source: trunk/gsdl/perllib/plugins/RecPlug.pm@ 1917

Last change on this file since 1917 was 1810, checked in by sjboddie, 23 years ago

Fixed a bug that showed up when using Perl 5.6 on windows

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 3.6 KB
RevLine 
[537]1###########################################################################
2#
3# RecPlug.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
[4]26# plugin which recurses through directories processing
27# each file it finds
28
29package RecPlug;
30
31use BasPlug;
32use plugin;
[136]33use util;
[4]34
35
36BEGIN {
37 @ISA = ('BasPlug');
38}
39
40sub new {
41 my ($class) = @_;
[1244]42 my $self = new BasPlug ("RecPlug", @_);
[4]43
[620]44 $self->{'exclude_tail_dirs'} = []; # empty by default
45
[4]46 return bless $self, $class;
47}
48
49# return 1 if this class might recurse using $pluginfo
50sub is_recursive {
51 my $self = shift (@_);
52
53 return 1;
54}
55
[317]56
57# return number of files processed, undef if can't process
[4]58# Note that $base_dir might be "" and that $file might
59# include directories
60sub read {
61 my $self = shift (@_);
[317]62 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
[1424]63 my $outhandle = $self->{'outhandle'};
[4]64
[1244]65 foreach my $etd ( @{$self->{'exclude_tail_dirs'}} )
[620]66 {
67 return 0 if ($file =~ m/$etd/);
68 }
69
[4]70 my (@dir, $subfile);
[317]71 my $count = 0;
72
[4]73 # see if this is a directory
[1244]74 my $dirname = &util::filename_cat ($base_dir, $file);
[1755]75
76 # check to make sure we're not reading our own archives
77 # or index directory
[1810]78 my $gsdlhome = quotemeta ($ENV{'GSDLHOME'});
79 if ($dirname =~ m%^${gsdlhome}/.*?/import.*?/(archives|index)$%) {
[1755]80 print $outhandle "RecPlug: $dirname appears to be a reference to a Greenstone collection, skipping.\n";
81 return 1;
82 }
83
84 # check to see we haven't got a cyclic path...
85 if ($dirname =~ m%(/.*){,41}%) {
86 print $outhandle "RecPlug: $dirname is 40 directories deep, is this a recursive path? if not increase constant in RecPlug.pm.\n";
87 return 1;
88 }
89
90 # check to see we haven't got a cyclic path...
91 if ($dirname =~ m%.*?import/(.+?)/import/\1.*%) {
92 print $outhandle "RecPlug: $dirname appears to a recursive loop ...\n";
93 return 1;
94 }
95
96
[136]97 if (-d $dirname) {
[4]98
[1755]99 if ($dirname =~ m|/CVS$|) {
100 print $outhandle "RecPlug: $dirname is a CVS directory, skipping.\n";
101 return 1;
102 }
[4]103 # read all the files in the directory
[593]104 if (!opendir (DIR, $dirname))
105 {
[1424]106 print $outhandle "RecPlug: WARNING - couldn't read directory $dirname\n";
[593]107 return;
108 }
109
[4]110 @dir = readdir (DIR);
111 closedir (DIR);
112
[1424]113 print $outhandle "RecPlug: getting directory $dirname\n";
[317]114
[4]115 # process each file
116 foreach $subfile (@dir) {
[809]117 last if ($maxdocs != -1 && $count >= $maxdocs);
[317]118
[4]119 if ($subfile !~ /^\.\.?$/) {
120 # note: metadata is not carried on to the next level
[317]121 $count += &plugin::read ($pluginfo, $base_dir, &util::filename_cat($file, $subfile),
122 {}, $processor, $maxdocs);
[4]123 }
124 }
[317]125 return $count;
[4]126 }
127
128 # wasn't a directory, someone else will have to process it
[317]129 return undef;
[4]130}
131
132
1331;
Note: See TracBrowser for help on using the repository browser.