source: trunk/gsdl/perllib/plugins/RecPlug.pm@ 1755

Last change on this file since 1755 was 1755, checked in by say1, 23 years ago

added better cycle detection (but still not perfect)

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 3.6 KB
Line 
1###########################################################################
2#
3# RecPlug.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin which recurses through directories processing
27# each file it finds
28
29package RecPlug;
30
31use BasPlug;
32use plugin;
33use util;
34
35
36BEGIN {
37 @ISA = ('BasPlug');
38}
39
40sub new {
41 my ($class) = @_;
42 my $self = new BasPlug ("RecPlug", @_);
43
44 $self->{'exclude_tail_dirs'} = []; # empty by default
45
46 return bless $self, $class;
47}
48
49# return 1 if this class might recurse using $pluginfo
50sub is_recursive {
51 my $self = shift (@_);
52
53 return 1;
54}
55
56
57# return number of files processed, undef if can't process
58# Note that $base_dir might be "" and that $file might
59# include directories
60sub read {
61 my $self = shift (@_);
62 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
63 my $outhandle = $self->{'outhandle'};
64
65 foreach my $etd ( @{$self->{'exclude_tail_dirs'}} )
66 {
67 return 0 if ($file =~ m/$etd/);
68 }
69
70 my (@dir, $subfile);
71 my $count = 0;
72
73 # see if this is a directory
74 my $dirname = &util::filename_cat ($base_dir, $file);
75
76 # check to make sure we're not reading our own archives
77 # or index directory
78 if ($dirname =~ m%^$ENV{'GSDLHOME'}/.*/import.*/(archives|index)$%) {
79 print $outhandle "RecPlug: $dirname appears to be a reference to a Greenstone collection, skipping.\n";
80 return 1;
81 }
82
83 # check to see we haven't got a cyclic path...
84 if ($dirname =~ m%(/.*){,41}%) {
85 print $outhandle "RecPlug: $dirname is 40 directories deep, is this a recursive path? if not increase constant in RecPlug.pm.\n";
86 return 1;
87 }
88
89 # check to see we haven't got a cyclic path...
90 if ($dirname =~ m%.*?import/(.+?)/import/\1.*%) {
91 print $outhandle "RecPlug: $dirname appears to a recursive loop ...\n";
92 return 1;
93 }
94
95
96 if (-d $dirname) {
97
98 if ($dirname =~ m|/CVS$|) {
99 print $outhandle "RecPlug: $dirname is a CVS directory, skipping.\n";
100 return 1;
101 }
102 # read all the files in the directory
103 if (!opendir (DIR, $dirname))
104 {
105 print $outhandle "RecPlug: WARNING - couldn't read directory $dirname\n";
106 return;
107 }
108
109 @dir = readdir (DIR);
110 closedir (DIR);
111
112 print $outhandle "RecPlug: getting directory $dirname\n";
113
114 # process each file
115 foreach $subfile (@dir) {
116 last if ($maxdocs != -1 && $count >= $maxdocs);
117
118 if ($subfile !~ /^\.\.?$/) {
119 # note: metadata is not carried on to the next level
120 $count += &plugin::read ($pluginfo, $base_dir, &util::filename_cat($file, $subfile),
121 {}, $processor, $maxdocs);
122 }
123 }
124 return $count;
125 }
126
127 # wasn't a directory, someone else will have to process it
128 return undef;
129}
130
131
1321;
Note: See TracBrowser for help on using the repository browser.