1 | package File::Listing;
|
---|
2 |
|
---|
3 | sub Version { $VERSION; }
|
---|
4 | $VERSION = "5.837";
|
---|
5 |
|
---|
6 | require Exporter;
|
---|
7 | @ISA = qw(Exporter);
|
---|
8 | @EXPORT = qw(parse_dir);
|
---|
9 |
|
---|
10 | use strict;
|
---|
11 |
|
---|
12 | use Carp ();
|
---|
13 | use HTTP::Date qw(str2time);
|
---|
14 |
|
---|
15 |
|
---|
16 |
|
---|
17 | sub parse_dir ($;$$$)
|
---|
18 | {
|
---|
19 | my($dir, $tz, $fstype, $error) = @_;
|
---|
20 |
|
---|
21 | $fstype ||= 'unix';
|
---|
22 | $fstype = "File::Listing::" . lc $fstype;
|
---|
23 |
|
---|
24 | my @args = $_[0];
|
---|
25 | push(@args, $tz) if(@_ >= 2);
|
---|
26 | push(@args, $error) if(@_ >= 4);
|
---|
27 |
|
---|
28 | $fstype->parse(@args);
|
---|
29 | }
|
---|
30 |
|
---|
31 |
|
---|
32 | sub line { Carp::croak("Not implemented yet"); }
|
---|
33 | sub init { } # Dummy sub
|
---|
34 |
|
---|
35 |
|
---|
36 | sub file_mode ($)
|
---|
37 | {
|
---|
38 | # This routine was originally borrowed from Graham Barr's
|
---|
39 | # Net::FTP package.
|
---|
40 |
|
---|
41 | local $_ = shift;
|
---|
42 | my $mode = 0;
|
---|
43 | my($type,$ch);
|
---|
44 |
|
---|
45 | s/^(.)// and $type = $1;
|
---|
46 |
|
---|
47 | while (/(.)/g) {
|
---|
48 | $mode <<= 1;
|
---|
49 | $mode |= 1 if $1 ne "-" &&
|
---|
50 | $1 ne 'S' &&
|
---|
51 | $1 ne 't' &&
|
---|
52 | $1 ne 'T';
|
---|
53 | }
|
---|
54 |
|
---|
55 | $type eq "d" and $mode |= 0040000 or # Directory
|
---|
56 | $type eq "l" and $mode |= 0120000 or # Symbolic Link
|
---|
57 | $mode |= 0100000; # Regular File
|
---|
58 |
|
---|
59 | $mode |= 0004000 if /^...s....../i;
|
---|
60 | $mode |= 0002000 if /^......s.../i;
|
---|
61 | $mode |= 0001000 if /^.........t/i;
|
---|
62 |
|
---|
63 | $mode;
|
---|
64 | }
|
---|
65 |
|
---|
66 |
|
---|
67 | sub parse
|
---|
68 | {
|
---|
69 | my($pkg, $dir, $tz, $error) = @_;
|
---|
70 |
|
---|
71 | # First let's try to determine what kind of dir parameter we have
|
---|
72 | # received. We allow both listings, reference to arrays and
|
---|
73 | # file handles to read from.
|
---|
74 |
|
---|
75 | if (ref($dir) eq 'ARRAY') {
|
---|
76 | # Already splitted up
|
---|
77 | }
|
---|
78 | elsif (ref($dir) eq 'GLOB') {
|
---|
79 | # A file handle
|
---|
80 | }
|
---|
81 | elsif (ref($dir)) {
|
---|
82 | Carp::croak("Illegal argument to parse_dir()");
|
---|
83 | }
|
---|
84 | elsif ($dir =~ /^\*\w+(::\w+)+$/) {
|
---|
85 | # This scalar looks like a file handle, so we assume it is
|
---|
86 | }
|
---|
87 | else {
|
---|
88 | # A normal scalar listing
|
---|
89 | $dir = [ split(/\n/, $dir) ];
|
---|
90 | }
|
---|
91 |
|
---|
92 | $pkg->init();
|
---|
93 |
|
---|
94 | my @files = ();
|
---|
95 | if (ref($dir) eq 'ARRAY') {
|
---|
96 | for (@$dir) {
|
---|
97 | push(@files, $pkg->line($_, $tz, $error));
|
---|
98 | }
|
---|
99 | }
|
---|
100 | else {
|
---|
101 | local($_);
|
---|
102 | while (<$dir>) {
|
---|
103 | chomp;
|
---|
104 | push(@files, $pkg->line($_, $tz, $error));
|
---|
105 | }
|
---|
106 | }
|
---|
107 | wantarray ? @files : \@files;
|
---|
108 | }
|
---|
109 |
|
---|
110 |
|
---|
111 |
|
---|
112 | package File::Listing::unix;
|
---|
113 |
|
---|
114 | use HTTP::Date qw(str2time);
|
---|
115 |
|
---|
116 | # A place to remember current directory from last line parsed.
|
---|
117 | use vars qw($curdir @ISA);
|
---|
118 |
|
---|
119 | @ISA = qw(File::Listing);
|
---|
120 |
|
---|
121 |
|
---|
122 |
|
---|
123 | sub init
|
---|
124 | {
|
---|
125 | $curdir = '';
|
---|
126 | }
|
---|
127 |
|
---|
128 |
|
---|
129 | sub line
|
---|
130 | {
|
---|
131 | shift; # package name
|
---|
132 | local($_) = shift;
|
---|
133 | my($tz, $error) = @_;
|
---|
134 |
|
---|
135 | s/\015//g;
|
---|
136 | #study;
|
---|
137 |
|
---|
138 | my ($kind, $size, $date, $name);
|
---|
139 | if (($kind, $size, $date, $name) =
|
---|
140 | /^([\-FlrwxsStTdD]{10}) # Type and permission bits
|
---|
141 | .* # Graps
|
---|
142 | \D(\d+) # File size
|
---|
143 | \s+ # Some space
|
---|
144 | (\w{3}\s+\d+\s+(?:\d{1,2}:\d{2}|\d{4})|\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}) # Date
|
---|
145 | \s+ # Some more space
|
---|
146 | (.*)$ # File name
|
---|
147 | /x )
|
---|
148 |
|
---|
149 | {
|
---|
150 | return if $name eq '.' || $name eq '..';
|
---|
151 | $name = "$curdir/$name" if length $curdir;
|
---|
152 | my $type = '?';
|
---|
153 | if ($kind =~ /^l/ && $name =~ /(.*) -> (.*)/ ) {
|
---|
154 | $name = $1;
|
---|
155 | $type = "l $2";
|
---|
156 | }
|
---|
157 | elsif ($kind =~ /^[\-F]/) { # (hopefully) a regular file
|
---|
158 | $type = 'f';
|
---|
159 | }
|
---|
160 | elsif ($kind =~ /^[dD]/) {
|
---|
161 | $type = 'd';
|
---|
162 | $size = undef; # Don't believe the reported size
|
---|
163 | }
|
---|
164 | return [$name, $type, $size, str2time($date, $tz),
|
---|
165 | File::Listing::file_mode($kind)];
|
---|
166 |
|
---|
167 | }
|
---|
168 | elsif (/^(.+):$/ && !/^[dcbsp].*\s.*\s.*:$/ ) {
|
---|
169 | my $dir = $1;
|
---|
170 | return () if $dir eq '.';
|
---|
171 | $curdir = $dir;
|
---|
172 | return ();
|
---|
173 | }
|
---|
174 | elsif (/^[Tt]otal\s+(\d+)$/ || /^\s*$/) {
|
---|
175 | return ();
|
---|
176 | }
|
---|
177 | elsif (/not found/ || # OSF1, HPUX, and SunOS return
|
---|
178 | # "$file not found"
|
---|
179 | /No such file/ || # IRIX returns
|
---|
180 | # "UX:ls: ERROR: Cannot access $file: No such file or directory"
|
---|
181 | # Solaris returns
|
---|
182 | # "$file: No such file or directory"
|
---|
183 | /cannot find/ # Windows NT returns
|
---|
184 | # "The system cannot find the path specified."
|
---|
185 | ) {
|
---|
186 | return () unless defined $error;
|
---|
187 | &$error($_) if ref($error) eq 'CODE';
|
---|
188 | warn "Error: $_\n" if $error eq 'warn';
|
---|
189 | return ();
|
---|
190 | }
|
---|
191 | elsif ($_ eq '') { # AIX, and Linux return nothing
|
---|
192 | return () unless defined $error;
|
---|
193 | &$error("No such file or directory") if ref($error) eq 'CODE';
|
---|
194 | warn "Warning: No such file or directory\n" if $error eq 'warn';
|
---|
195 | return ();
|
---|
196 | }
|
---|
197 | else {
|
---|
198 | # parse failed, check if the dosftp parse understands it
|
---|
199 | File::Listing::dosftp->init();
|
---|
200 | return(File::Listing::dosftp->line($_,$tz,$error));
|
---|
201 | }
|
---|
202 |
|
---|
203 | }
|
---|
204 |
|
---|
205 |
|
---|
206 |
|
---|
207 | package File::Listing::dosftp;
|
---|
208 |
|
---|
209 | use HTTP::Date qw(str2time);
|
---|
210 |
|
---|
211 | # A place to remember current directory from last line parsed.
|
---|
212 | use vars qw($curdir @ISA);
|
---|
213 |
|
---|
214 | @ISA = qw(File::Listing);
|
---|
215 |
|
---|
216 |
|
---|
217 |
|
---|
218 | sub init
|
---|
219 | {
|
---|
220 | $curdir = '';
|
---|
221 | }
|
---|
222 |
|
---|
223 |
|
---|
224 | sub line
|
---|
225 | {
|
---|
226 | shift; # package name
|
---|
227 | local($_) = shift;
|
---|
228 | my($tz, $error) = @_;
|
---|
229 |
|
---|
230 | s/\015//g;
|
---|
231 |
|
---|
232 | my ($date, $size_or_dir, $name, $size);
|
---|
233 |
|
---|
234 | # 02-05-96 10:48AM 1415 src.slf
|
---|
235 | # 09-10-96 09:18AM <DIR> sl_util
|
---|
236 | if (($date, $size_or_dir, $name) =
|
---|
237 | /^(\d\d-\d\d-\d\d\s+\d\d:\d\d\wM) # Date and time info
|
---|
238 | \s+ # Some space
|
---|
239 | (<\w{3}>|\d+) # Dir or Size
|
---|
240 | \s+ # Some more space
|
---|
241 | (.+)$ # File name
|
---|
242 | /x )
|
---|
243 | {
|
---|
244 | return if $name eq '.' || $name eq '..';
|
---|
245 | $name = "$curdir/$name" if length $curdir;
|
---|
246 | my $type = '?';
|
---|
247 | if ($size_or_dir eq '<DIR>') {
|
---|
248 | $type = "d";
|
---|
249 | $size = ""; # directories have no size in the pc listing
|
---|
250 | }
|
---|
251 | else {
|
---|
252 | $type = 'f';
|
---|
253 | $size = $size_or_dir;
|
---|
254 | }
|
---|
255 | return [$name, $type, $size, str2time($date, $tz), undef];
|
---|
256 | }
|
---|
257 | else {
|
---|
258 | return () unless defined $error;
|
---|
259 | &$error($_) if ref($error) eq 'CODE';
|
---|
260 | warn "Can't parse: $_\n" if $error eq 'warn';
|
---|
261 | return ();
|
---|
262 | }
|
---|
263 |
|
---|
264 | }
|
---|
265 |
|
---|
266 |
|
---|
267 |
|
---|
268 | package File::Listing::vms;
|
---|
269 | @File::Listing::vms::ISA = qw(File::Listing);
|
---|
270 |
|
---|
271 | package File::Listing::netware;
|
---|
272 | @File::Listing::netware::ISA = qw(File::Listing);
|
---|
273 |
|
---|
274 |
|
---|
275 |
|
---|
276 | package File::Listing::apache;
|
---|
277 |
|
---|
278 | use vars qw(@ISA);
|
---|
279 |
|
---|
280 | @ISA = qw(File::Listing);
|
---|
281 |
|
---|
282 |
|
---|
283 | sub init { }
|
---|
284 |
|
---|
285 |
|
---|
286 | sub line {
|
---|
287 | shift; # package name
|
---|
288 | local($_) = shift;
|
---|
289 | my($tz, $error) = @_; # ignored for now...
|
---|
290 |
|
---|
291 | if (m!<A\s+HREF=\"([^\"]+)\">.*</A>.*?(\d+)-([a-zA-Z]+|\d+)-(\d+)\s+(\d+):(\d+)\s+(?:([\d\.]+[kMG]?|-))!i) {
|
---|
292 | my($filename, $filesize) = ($1, $7);
|
---|
293 | my($d,$m,$y, $H,$M) = ($2,$3,$4,$5,$6);
|
---|
294 | if ($m =~ /^\d+$/) {
|
---|
295 | ($d,$y) = ($y,$d) # iso date
|
---|
296 | }
|
---|
297 | else {
|
---|
298 | $m = _monthabbrev_number($m);
|
---|
299 | }
|
---|
300 |
|
---|
301 | $filesize = 0 if $filesize eq '-';
|
---|
302 | if ($filesize =~ s/k$//i) {
|
---|
303 | $filesize *= 1024;
|
---|
304 | }
|
---|
305 | elsif ($filesize =~ s/M$//) {
|
---|
306 | $filesize *= 1024*1024;
|
---|
307 | }
|
---|
308 | elsif ($filesize =~ s/G$//) {
|
---|
309 | $filesize *= 1024*1024*1024;
|
---|
310 | }
|
---|
311 | $filesize = int $filesize;
|
---|
312 |
|
---|
313 | require Time::Local;
|
---|
314 | my $filetime = Time::Local::timelocal(0,$M,$H,$d,$m-1,_guess_year($y)-1900);
|
---|
315 | my $filetype = ($filename =~ s|/$|| ? "d" : "f");
|
---|
316 | return [$filename, $filetype, $filesize, $filetime, undef];
|
---|
317 | }
|
---|
318 |
|
---|
319 | return ();
|
---|
320 | }
|
---|
321 |
|
---|
322 |
|
---|
323 | sub _guess_year {
|
---|
324 | my $y = shift;
|
---|
325 | if ($y >= 90) {
|
---|
326 | $y = 1900+$y;
|
---|
327 | }
|
---|
328 | elsif ($y < 100) {
|
---|
329 | $y = 2000+$y;
|
---|
330 | }
|
---|
331 | $y;
|
---|
332 | }
|
---|
333 |
|
---|
334 |
|
---|
335 | sub _monthabbrev_number {
|
---|
336 | my $mon = shift;
|
---|
337 | +{'Jan' => 1,
|
---|
338 | 'Feb' => 2,
|
---|
339 | 'Mar' => 3,
|
---|
340 | 'Apr' => 4,
|
---|
341 | 'May' => 5,
|
---|
342 | 'Jun' => 6,
|
---|
343 | 'Jul' => 7,
|
---|
344 | 'Aug' => 8,
|
---|
345 | 'Sep' => 9,
|
---|
346 | 'Oct' => 10,
|
---|
347 | 'Nov' => 11,
|
---|
348 | 'Dec' => 12,
|
---|
349 | }->{$mon};
|
---|
350 | }
|
---|
351 |
|
---|
352 |
|
---|
353 | 1;
|
---|
354 |
|
---|
355 | __END__
|
---|
356 |
|
---|
357 | =head1 NAME
|
---|
358 |
|
---|
359 | File::Listing - parse directory listing
|
---|
360 |
|
---|
361 | =head1 SYNOPSIS
|
---|
362 |
|
---|
363 | use File::Listing qw(parse_dir);
|
---|
364 | $ENV{LANG} = "C"; # dates in non-English locales not supported
|
---|
365 | for (parse_dir(`ls -l`)) {
|
---|
366 | ($name, $type, $size, $mtime, $mode) = @$_;
|
---|
367 | next if $type ne 'f'; # plain file
|
---|
368 | #...
|
---|
369 | }
|
---|
370 |
|
---|
371 | # directory listing can also be read from a file
|
---|
372 | open(LISTING, "zcat ls-lR.gz|");
|
---|
373 | $dir = parse_dir(\*LISTING, '+0000');
|
---|
374 |
|
---|
375 | =head1 DESCRIPTION
|
---|
376 |
|
---|
377 | This module exports a single function called parse_dir(), which can be
|
---|
378 | used to parse directory listings.
|
---|
379 |
|
---|
380 | The first parameter to parse_dir() is the directory listing to parse.
|
---|
381 | It can be a scalar, a reference to an array of directory lines or a
|
---|
382 | glob representing a filehandle to read the directory listing from.
|
---|
383 |
|
---|
384 | The second parameter is the time zone to use when parsing time stamps
|
---|
385 | in the listing. If this value is undefined, then the local time zone is
|
---|
386 | assumed.
|
---|
387 |
|
---|
388 | The third parameter is the type of listing to assume. Currently
|
---|
389 | supported formats are 'unix', 'apache' and 'dosftp'. The default
|
---|
390 | value 'unix'. Ideally, the listing type should be determined
|
---|
391 | automatically.
|
---|
392 |
|
---|
393 | The fourth parameter specifies how unparseable lines should be treated.
|
---|
394 | Values can be 'ignore', 'warn' or a code reference. Warn means that
|
---|
395 | the perl warn() function will be called. If a code reference is
|
---|
396 | passed, then this routine will be called and the return value from it
|
---|
397 | will be incorporated in the listing. The default is 'ignore'.
|
---|
398 |
|
---|
399 | Only the first parameter is mandatory.
|
---|
400 |
|
---|
401 | The return value from parse_dir() is a list of directory entries. In
|
---|
402 | a scalar context the return value is a reference to the list. The
|
---|
403 | directory entries are represented by an array consisting of [
|
---|
404 | $filename, $filetype, $filesize, $filetime, $filemode ]. The
|
---|
405 | $filetype value is one of the letters 'f', 'd', 'l' or '?'. The
|
---|
406 | $filetime value is the seconds since Jan 1, 1970. The
|
---|
407 | $filemode is a bitmask like the mode returned by stat().
|
---|
408 |
|
---|
409 | =head1 CREDITS
|
---|
410 |
|
---|
411 | Based on lsparse.pl (from Lee McLoughlin's ftp mirror package) and
|
---|
412 | Net::FTP's parse_dir (Graham Barr).
|
---|