source: main/trunk/greenstone2/perllib/cpan/File/Listing.pm@ 27181

Last change on this file since 27181 was 27181, checked in by davidb, 11 years ago

Latest libwww-perl (v6x) isn't as self-sufficeint as earlier (v5.x) in terms of supporting Perl modules. Dropping back to to this earlier version so activate.pl runs smoothly when system-installed Perl on Unix system does not have the LWP and related modules installed

File size: 9.4 KB
Line 
1package File::Listing;
2
3sub Version { $VERSION; }
4$VERSION = "5.837";
5
6require Exporter;
7@ISA = qw(Exporter);
8@EXPORT = qw(parse_dir);
9
10use strict;
11
12use Carp ();
13use HTTP::Date qw(str2time);
14
15
16
17sub parse_dir ($;$$$)
18{
19 my($dir, $tz, $fstype, $error) = @_;
20
21 $fstype ||= 'unix';
22 $fstype = "File::Listing::" . lc $fstype;
23
24 my @args = $_[0];
25 push(@args, $tz) if(@_ >= 2);
26 push(@args, $error) if(@_ >= 4);
27
28 $fstype->parse(@args);
29}
30
31
32sub line { Carp::croak("Not implemented yet"); }
33sub init { } # Dummy sub
34
35
36sub file_mode ($)
37{
38 # This routine was originally borrowed from Graham Barr's
39 # Net::FTP package.
40
41 local $_ = shift;
42 my $mode = 0;
43 my($type,$ch);
44
45 s/^(.)// and $type = $1;
46
47 while (/(.)/g) {
48 $mode <<= 1;
49 $mode |= 1 if $1 ne "-" &&
50 $1 ne 'S' &&
51 $1 ne 't' &&
52 $1 ne 'T';
53 }
54
55 $type eq "d" and $mode |= 0040000 or # Directory
56 $type eq "l" and $mode |= 0120000 or # Symbolic Link
57 $mode |= 0100000; # Regular File
58
59 $mode |= 0004000 if /^...s....../i;
60 $mode |= 0002000 if /^......s.../i;
61 $mode |= 0001000 if /^.........t/i;
62
63 $mode;
64}
65
66
67sub parse
68{
69 my($pkg, $dir, $tz, $error) = @_;
70
71 # First let's try to determine what kind of dir parameter we have
72 # received. We allow both listings, reference to arrays and
73 # file handles to read from.
74
75 if (ref($dir) eq 'ARRAY') {
76 # Already splitted up
77 }
78 elsif (ref($dir) eq 'GLOB') {
79 # A file handle
80 }
81 elsif (ref($dir)) {
82 Carp::croak("Illegal argument to parse_dir()");
83 }
84 elsif ($dir =~ /^\*\w+(::\w+)+$/) {
85 # This scalar looks like a file handle, so we assume it is
86 }
87 else {
88 # A normal scalar listing
89 $dir = [ split(/\n/, $dir) ];
90 }
91
92 $pkg->init();
93
94 my @files = ();
95 if (ref($dir) eq 'ARRAY') {
96 for (@$dir) {
97 push(@files, $pkg->line($_, $tz, $error));
98 }
99 }
100 else {
101 local($_);
102 while (<$dir>) {
103 chomp;
104 push(@files, $pkg->line($_, $tz, $error));
105 }
106 }
107 wantarray ? @files : \@files;
108}
109
110
111
112package File::Listing::unix;
113
114use HTTP::Date qw(str2time);
115
116# A place to remember current directory from last line parsed.
117use vars qw($curdir @ISA);
118
119@ISA = qw(File::Listing);
120
121
122
123sub init
124{
125 $curdir = '';
126}
127
128
129sub line
130{
131 shift; # package name
132 local($_) = shift;
133 my($tz, $error) = @_;
134
135 s/\015//g;
136 #study;
137
138 my ($kind, $size, $date, $name);
139 if (($kind, $size, $date, $name) =
140 /^([\-FlrwxsStTdD]{10}) # Type and permission bits
141 .* # Graps
142 \D(\d+) # File size
143 \s+ # Some space
144 (\w{3}\s+\d+\s+(?:\d{1,2}:\d{2}|\d{4})|\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}) # Date
145 \s+ # Some more space
146 (.*)$ # File name
147 /x )
148
149 {
150 return if $name eq '.' || $name eq '..';
151 $name = "$curdir/$name" if length $curdir;
152 my $type = '?';
153 if ($kind =~ /^l/ && $name =~ /(.*) -> (.*)/ ) {
154 $name = $1;
155 $type = "l $2";
156 }
157 elsif ($kind =~ /^[\-F]/) { # (hopefully) a regular file
158 $type = 'f';
159 }
160 elsif ($kind =~ /^[dD]/) {
161 $type = 'd';
162 $size = undef; # Don't believe the reported size
163 }
164 return [$name, $type, $size, str2time($date, $tz),
165 File::Listing::file_mode($kind)];
166
167 }
168 elsif (/^(.+):$/ && !/^[dcbsp].*\s.*\s.*:$/ ) {
169 my $dir = $1;
170 return () if $dir eq '.';
171 $curdir = $dir;
172 return ();
173 }
174 elsif (/^[Tt]otal\s+(\d+)$/ || /^\s*$/) {
175 return ();
176 }
177 elsif (/not found/ || # OSF1, HPUX, and SunOS return
178 # "$file not found"
179 /No such file/ || # IRIX returns
180 # "UX:ls: ERROR: Cannot access $file: No such file or directory"
181 # Solaris returns
182 # "$file: No such file or directory"
183 /cannot find/ # Windows NT returns
184 # "The system cannot find the path specified."
185 ) {
186 return () unless defined $error;
187 &$error($_) if ref($error) eq 'CODE';
188 warn "Error: $_\n" if $error eq 'warn';
189 return ();
190 }
191 elsif ($_ eq '') { # AIX, and Linux return nothing
192 return () unless defined $error;
193 &$error("No such file or directory") if ref($error) eq 'CODE';
194 warn "Warning: No such file or directory\n" if $error eq 'warn';
195 return ();
196 }
197 else {
198 # parse failed, check if the dosftp parse understands it
199 File::Listing::dosftp->init();
200 return(File::Listing::dosftp->line($_,$tz,$error));
201 }
202
203}
204
205
206
207package File::Listing::dosftp;
208
209use HTTP::Date qw(str2time);
210
211# A place to remember current directory from last line parsed.
212use vars qw($curdir @ISA);
213
214@ISA = qw(File::Listing);
215
216
217
218sub init
219{
220 $curdir = '';
221}
222
223
224sub line
225{
226 shift; # package name
227 local($_) = shift;
228 my($tz, $error) = @_;
229
230 s/\015//g;
231
232 my ($date, $size_or_dir, $name, $size);
233
234 # 02-05-96 10:48AM 1415 src.slf
235 # 09-10-96 09:18AM <DIR> sl_util
236 if (($date, $size_or_dir, $name) =
237 /^(\d\d-\d\d-\d\d\s+\d\d:\d\d\wM) # Date and time info
238 \s+ # Some space
239 (<\w{3}>|\d+) # Dir or Size
240 \s+ # Some more space
241 (.+)$ # File name
242 /x )
243 {
244 return if $name eq '.' || $name eq '..';
245 $name = "$curdir/$name" if length $curdir;
246 my $type = '?';
247 if ($size_or_dir eq '<DIR>') {
248 $type = "d";
249 $size = ""; # directories have no size in the pc listing
250 }
251 else {
252 $type = 'f';
253 $size = $size_or_dir;
254 }
255 return [$name, $type, $size, str2time($date, $tz), undef];
256 }
257 else {
258 return () unless defined $error;
259 &$error($_) if ref($error) eq 'CODE';
260 warn "Can't parse: $_\n" if $error eq 'warn';
261 return ();
262 }
263
264}
265
266
267
268package File::Listing::vms;
269@File::Listing::vms::ISA = qw(File::Listing);
270
271package File::Listing::netware;
272@File::Listing::netware::ISA = qw(File::Listing);
273
274
275
276package File::Listing::apache;
277
278use vars qw(@ISA);
279
280@ISA = qw(File::Listing);
281
282
283sub init { }
284
285
286sub line {
287 shift; # package name
288 local($_) = shift;
289 my($tz, $error) = @_; # ignored for now...
290
291 if (m!<A\s+HREF=\"([^\"]+)\">.*</A>.*?(\d+)-([a-zA-Z]+|\d+)-(\d+)\s+(\d+):(\d+)\s+(?:([\d\.]+[kMG]?|-))!i) {
292 my($filename, $filesize) = ($1, $7);
293 my($d,$m,$y, $H,$M) = ($2,$3,$4,$5,$6);
294 if ($m =~ /^\d+$/) {
295 ($d,$y) = ($y,$d) # iso date
296 }
297 else {
298 $m = _monthabbrev_number($m);
299 }
300
301 $filesize = 0 if $filesize eq '-';
302 if ($filesize =~ s/k$//i) {
303 $filesize *= 1024;
304 }
305 elsif ($filesize =~ s/M$//) {
306 $filesize *= 1024*1024;
307 }
308 elsif ($filesize =~ s/G$//) {
309 $filesize *= 1024*1024*1024;
310 }
311 $filesize = int $filesize;
312
313 require Time::Local;
314 my $filetime = Time::Local::timelocal(0,$M,$H,$d,$m-1,_guess_year($y)-1900);
315 my $filetype = ($filename =~ s|/$|| ? "d" : "f");
316 return [$filename, $filetype, $filesize, $filetime, undef];
317 }
318
319 return ();
320}
321
322
323sub _guess_year {
324 my $y = shift;
325 if ($y >= 90) {
326 $y = 1900+$y;
327 }
328 elsif ($y < 100) {
329 $y = 2000+$y;
330 }
331 $y;
332}
333
334
335sub _monthabbrev_number {
336 my $mon = shift;
337 +{'Jan' => 1,
338 'Feb' => 2,
339 'Mar' => 3,
340 'Apr' => 4,
341 'May' => 5,
342 'Jun' => 6,
343 'Jul' => 7,
344 'Aug' => 8,
345 'Sep' => 9,
346 'Oct' => 10,
347 'Nov' => 11,
348 'Dec' => 12,
349 }->{$mon};
350}
351
352
3531;
354
355__END__
356
357=head1 NAME
358
359File::Listing - parse directory listing
360
361=head1 SYNOPSIS
362
363 use File::Listing qw(parse_dir);
364 $ENV{LANG} = "C"; # dates in non-English locales not supported
365 for (parse_dir(`ls -l`)) {
366 ($name, $type, $size, $mtime, $mode) = @$_;
367 next if $type ne 'f'; # plain file
368 #...
369 }
370
371 # directory listing can also be read from a file
372 open(LISTING, "zcat ls-lR.gz|");
373 $dir = parse_dir(\*LISTING, '+0000');
374
375=head1 DESCRIPTION
376
377This module exports a single function called parse_dir(), which can be
378used to parse directory listings.
379
380The first parameter to parse_dir() is the directory listing to parse.
381It can be a scalar, a reference to an array of directory lines or a
382glob representing a filehandle to read the directory listing from.
383
384The second parameter is the time zone to use when parsing time stamps
385in the listing. If this value is undefined, then the local time zone is
386assumed.
387
388The third parameter is the type of listing to assume. Currently
389supported formats are 'unix', 'apache' and 'dosftp'. The default
390value 'unix'. Ideally, the listing type should be determined
391automatically.
392
393The fourth parameter specifies how unparseable lines should be treated.
394Values can be 'ignore', 'warn' or a code reference. Warn means that
395the perl warn() function will be called. If a code reference is
396passed, then this routine will be called and the return value from it
397will be incorporated in the listing. The default is 'ignore'.
398
399Only the first parameter is mandatory.
400
401The return value from parse_dir() is a list of directory entries. In
402a scalar context the return value is a reference to the list. The
403directory entries are represented by an array consisting of [
404$filename, $filetype, $filesize, $filetime, $filemode ]. The
405$filetype value is one of the letters 'f', 'd', 'l' or '?'. The
406$filetime value is the seconds since Jan 1, 1970. The
407$filemode is a bitmask like the mode returned by stat().
408
409=head1 CREDITS
410
411Based on lsparse.pl (from Lee McLoughlin's ftp mirror package) and
412Net::FTP's parse_dir (Graham Barr).
Note: See TracBrowser for help on using the repository browser.