source: trunk/gsdl/perllib/plugins/RecPlug.pm@ 1917

Last change on this file since 1917 was 1810, checked in by sjboddie, 23 years ago

Fixed a bug that showed up when using Perl 5.6 on windows

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 3.6 KB
Line 
1###########################################################################
2#
3# RecPlug.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin which recurses through directories processing
27# each file it finds
28
29package RecPlug;
30
31use BasPlug;
32use plugin;
33use util;
34
35
36BEGIN {
37 @ISA = ('BasPlug');
38}
39
40sub new {
41 my ($class) = @_;
42 my $self = new BasPlug ("RecPlug", @_);
43
44 $self->{'exclude_tail_dirs'} = []; # empty by default
45
46 return bless $self, $class;
47}
48
49# return 1 if this class might recurse using $pluginfo
50sub is_recursive {
51 my $self = shift (@_);
52
53 return 1;
54}
55
56
57# return number of files processed, undef if can't process
58# Note that $base_dir might be "" and that $file might
59# include directories
60sub read {
61 my $self = shift (@_);
62 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
63 my $outhandle = $self->{'outhandle'};
64
65 foreach my $etd ( @{$self->{'exclude_tail_dirs'}} )
66 {
67 return 0 if ($file =~ m/$etd/);
68 }
69
70 my (@dir, $subfile);
71 my $count = 0;
72
73 # see if this is a directory
74 my $dirname = &util::filename_cat ($base_dir, $file);
75
76 # check to make sure we're not reading our own archives
77 # or index directory
78 my $gsdlhome = quotemeta ($ENV{'GSDLHOME'});
79 if ($dirname =~ m%^${gsdlhome}/.*?/import.*?/(archives|index)$%) {
80 print $outhandle "RecPlug: $dirname appears to be a reference to a Greenstone collection, skipping.\n";
81 return 1;
82 }
83
84 # check to see we haven't got a cyclic path...
85 if ($dirname =~ m%(/.*){,41}%) {
86 print $outhandle "RecPlug: $dirname is 40 directories deep, is this a recursive path? if not increase constant in RecPlug.pm.\n";
87 return 1;
88 }
89
90 # check to see we haven't got a cyclic path...
91 if ($dirname =~ m%.*?import/(.+?)/import/\1.*%) {
92 print $outhandle "RecPlug: $dirname appears to a recursive loop ...\n";
93 return 1;
94 }
95
96
97 if (-d $dirname) {
98
99 if ($dirname =~ m|/CVS$|) {
100 print $outhandle "RecPlug: $dirname is a CVS directory, skipping.\n";
101 return 1;
102 }
103 # read all the files in the directory
104 if (!opendir (DIR, $dirname))
105 {
106 print $outhandle "RecPlug: WARNING - couldn't read directory $dirname\n";
107 return;
108 }
109
110 @dir = readdir (DIR);
111 closedir (DIR);
112
113 print $outhandle "RecPlug: getting directory $dirname\n";
114
115 # process each file
116 foreach $subfile (@dir) {
117 last if ($maxdocs != -1 && $count >= $maxdocs);
118
119 if ($subfile !~ /^\.\.?$/) {
120 # note: metadata is not carried on to the next level
121 $count += &plugin::read ($pluginfo, $base_dir, &util::filename_cat($file, $subfile),
122 {}, $processor, $maxdocs);
123 }
124 }
125 return $count;
126 }
127
128 # wasn't a directory, someone else will have to process it
129 return undef;
130}
131
132
1331;
Note: See TracBrowser for help on using the repository browser.