[27181] | 1 | package File::Listing;
|
---|
| 2 |
|
---|
| 3 | sub Version { $VERSION; }
|
---|
| 4 | $VERSION = "5.837";
|
---|
| 5 |
|
---|
| 6 | require Exporter;
|
---|
| 7 | @ISA = qw(Exporter);
|
---|
| 8 | @EXPORT = qw(parse_dir);
|
---|
| 9 |
|
---|
| 10 | use strict;
|
---|
| 11 |
|
---|
| 12 | use Carp ();
|
---|
| 13 | use HTTP::Date qw(str2time);
|
---|
| 14 |
|
---|
| 15 |
|
---|
| 16 |
|
---|
| 17 | sub parse_dir ($;$$$)
|
---|
| 18 | {
|
---|
| 19 | my($dir, $tz, $fstype, $error) = @_;
|
---|
| 20 |
|
---|
| 21 | $fstype ||= 'unix';
|
---|
| 22 | $fstype = "File::Listing::" . lc $fstype;
|
---|
| 23 |
|
---|
| 24 | my @args = $_[0];
|
---|
| 25 | push(@args, $tz) if(@_ >= 2);
|
---|
| 26 | push(@args, $error) if(@_ >= 4);
|
---|
| 27 |
|
---|
| 28 | $fstype->parse(@args);
|
---|
| 29 | }
|
---|
| 30 |
|
---|
| 31 |
|
---|
| 32 | sub line { Carp::croak("Not implemented yet"); }
|
---|
| 33 | sub init { } # Dummy sub
|
---|
| 34 |
|
---|
| 35 |
|
---|
| 36 | sub file_mode ($)
|
---|
| 37 | {
|
---|
| 38 | # This routine was originally borrowed from Graham Barr's
|
---|
| 39 | # Net::FTP package.
|
---|
| 40 |
|
---|
| 41 | local $_ = shift;
|
---|
| 42 | my $mode = 0;
|
---|
| 43 | my($type,$ch);
|
---|
| 44 |
|
---|
| 45 | s/^(.)// and $type = $1;
|
---|
| 46 |
|
---|
| 47 | while (/(.)/g) {
|
---|
| 48 | $mode <<= 1;
|
---|
| 49 | $mode |= 1 if $1 ne "-" &&
|
---|
| 50 | $1 ne 'S' &&
|
---|
| 51 | $1 ne 't' &&
|
---|
| 52 | $1 ne 'T';
|
---|
| 53 | }
|
---|
| 54 |
|
---|
| 55 | $type eq "d" and $mode |= 0040000 or # Directory
|
---|
| 56 | $type eq "l" and $mode |= 0120000 or # Symbolic Link
|
---|
| 57 | $mode |= 0100000; # Regular File
|
---|
| 58 |
|
---|
| 59 | $mode |= 0004000 if /^...s....../i;
|
---|
| 60 | $mode |= 0002000 if /^......s.../i;
|
---|
| 61 | $mode |= 0001000 if /^.........t/i;
|
---|
| 62 |
|
---|
| 63 | $mode;
|
---|
| 64 | }
|
---|
| 65 |
|
---|
| 66 |
|
---|
| 67 | sub parse
|
---|
| 68 | {
|
---|
| 69 | my($pkg, $dir, $tz, $error) = @_;
|
---|
| 70 |
|
---|
| 71 | # First let's try to determine what kind of dir parameter we have
|
---|
| 72 | # received. We allow both listings, reference to arrays and
|
---|
| 73 | # file handles to read from.
|
---|
| 74 |
|
---|
| 75 | if (ref($dir) eq 'ARRAY') {
|
---|
| 76 | # Already splitted up
|
---|
| 77 | }
|
---|
| 78 | elsif (ref($dir) eq 'GLOB') {
|
---|
| 79 | # A file handle
|
---|
| 80 | }
|
---|
| 81 | elsif (ref($dir)) {
|
---|
| 82 | Carp::croak("Illegal argument to parse_dir()");
|
---|
| 83 | }
|
---|
| 84 | elsif ($dir =~ /^\*\w+(::\w+)+$/) {
|
---|
| 85 | # This scalar looks like a file handle, so we assume it is
|
---|
| 86 | }
|
---|
| 87 | else {
|
---|
| 88 | # A normal scalar listing
|
---|
| 89 | $dir = [ split(/\n/, $dir) ];
|
---|
| 90 | }
|
---|
| 91 |
|
---|
| 92 | $pkg->init();
|
---|
| 93 |
|
---|
| 94 | my @files = ();
|
---|
| 95 | if (ref($dir) eq 'ARRAY') {
|
---|
| 96 | for (@$dir) {
|
---|
| 97 | push(@files, $pkg->line($_, $tz, $error));
|
---|
| 98 | }
|
---|
| 99 | }
|
---|
| 100 | else {
|
---|
| 101 | local($_);
|
---|
| 102 | while (<$dir>) {
|
---|
| 103 | chomp;
|
---|
| 104 | push(@files, $pkg->line($_, $tz, $error));
|
---|
| 105 | }
|
---|
| 106 | }
|
---|
| 107 | wantarray ? @files : \@files;
|
---|
| 108 | }
|
---|
| 109 |
|
---|
| 110 |
|
---|
| 111 |
|
---|
| 112 | package File::Listing::unix;
|
---|
| 113 |
|
---|
| 114 | use HTTP::Date qw(str2time);
|
---|
| 115 |
|
---|
| 116 | # A place to remember current directory from last line parsed.
|
---|
| 117 | use vars qw($curdir @ISA);
|
---|
| 118 |
|
---|
| 119 | @ISA = qw(File::Listing);
|
---|
| 120 |
|
---|
| 121 |
|
---|
| 122 |
|
---|
| 123 | sub init
|
---|
| 124 | {
|
---|
| 125 | $curdir = '';
|
---|
| 126 | }
|
---|
| 127 |
|
---|
| 128 |
|
---|
| 129 | sub line
|
---|
| 130 | {
|
---|
| 131 | shift; # package name
|
---|
| 132 | local($_) = shift;
|
---|
| 133 | my($tz, $error) = @_;
|
---|
| 134 |
|
---|
| 135 | s/\015//g;
|
---|
| 136 | #study;
|
---|
| 137 |
|
---|
| 138 | my ($kind, $size, $date, $name);
|
---|
| 139 | if (($kind, $size, $date, $name) =
|
---|
| 140 | /^([\-FlrwxsStTdD]{10}) # Type and permission bits
|
---|
| 141 | .* # Graps
|
---|
| 142 | \D(\d+) # File size
|
---|
| 143 | \s+ # Some space
|
---|
| 144 | (\w{3}\s+\d+\s+(?:\d{1,2}:\d{2}|\d{4})|\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}) # Date
|
---|
| 145 | \s+ # Some more space
|
---|
| 146 | (.*)$ # File name
|
---|
| 147 | /x )
|
---|
| 148 |
|
---|
| 149 | {
|
---|
| 150 | return if $name eq '.' || $name eq '..';
|
---|
| 151 | $name = "$curdir/$name" if length $curdir;
|
---|
| 152 | my $type = '?';
|
---|
| 153 | if ($kind =~ /^l/ && $name =~ /(.*) -> (.*)/ ) {
|
---|
| 154 | $name = $1;
|
---|
| 155 | $type = "l $2";
|
---|
| 156 | }
|
---|
| 157 | elsif ($kind =~ /^[\-F]/) { # (hopefully) a regular file
|
---|
| 158 | $type = 'f';
|
---|
| 159 | }
|
---|
| 160 | elsif ($kind =~ /^[dD]/) {
|
---|
| 161 | $type = 'd';
|
---|
| 162 | $size = undef; # Don't believe the reported size
|
---|
| 163 | }
|
---|
| 164 | return [$name, $type, $size, str2time($date, $tz),
|
---|
| 165 | File::Listing::file_mode($kind)];
|
---|
| 166 |
|
---|
| 167 | }
|
---|
| 168 | elsif (/^(.+):$/ && !/^[dcbsp].*\s.*\s.*:$/ ) {
|
---|
| 169 | my $dir = $1;
|
---|
| 170 | return () if $dir eq '.';
|
---|
| 171 | $curdir = $dir;
|
---|
| 172 | return ();
|
---|
| 173 | }
|
---|
| 174 | elsif (/^[Tt]otal\s+(\d+)$/ || /^\s*$/) {
|
---|
| 175 | return ();
|
---|
| 176 | }
|
---|
| 177 | elsif (/not found/ || # OSF1, HPUX, and SunOS return
|
---|
| 178 | # "$file not found"
|
---|
| 179 | /No such file/ || # IRIX returns
|
---|
| 180 | # "UX:ls: ERROR: Cannot access $file: No such file or directory"
|
---|
| 181 | # Solaris returns
|
---|
| 182 | # "$file: No such file or directory"
|
---|
| 183 | /cannot find/ # Windows NT returns
|
---|
| 184 | # "The system cannot find the path specified."
|
---|
| 185 | ) {
|
---|
| 186 | return () unless defined $error;
|
---|
| 187 | &$error($_) if ref($error) eq 'CODE';
|
---|
| 188 | warn "Error: $_\n" if $error eq 'warn';
|
---|
| 189 | return ();
|
---|
| 190 | }
|
---|
| 191 | elsif ($_ eq '') { # AIX, and Linux return nothing
|
---|
| 192 | return () unless defined $error;
|
---|
| 193 | &$error("No such file or directory") if ref($error) eq 'CODE';
|
---|
| 194 | warn "Warning: No such file or directory\n" if $error eq 'warn';
|
---|
| 195 | return ();
|
---|
| 196 | }
|
---|
| 197 | else {
|
---|
| 198 | # parse failed, check if the dosftp parse understands it
|
---|
| 199 | File::Listing::dosftp->init();
|
---|
| 200 | return(File::Listing::dosftp->line($_,$tz,$error));
|
---|
| 201 | }
|
---|
| 202 |
|
---|
| 203 | }
|
---|
| 204 |
|
---|
| 205 |
|
---|
| 206 |
|
---|
| 207 | package File::Listing::dosftp;
|
---|
| 208 |
|
---|
| 209 | use HTTP::Date qw(str2time);
|
---|
| 210 |
|
---|
| 211 | # A place to remember current directory from last line parsed.
|
---|
| 212 | use vars qw($curdir @ISA);
|
---|
| 213 |
|
---|
| 214 | @ISA = qw(File::Listing);
|
---|
| 215 |
|
---|
| 216 |
|
---|
| 217 |
|
---|
| 218 | sub init
|
---|
| 219 | {
|
---|
| 220 | $curdir = '';
|
---|
| 221 | }
|
---|
| 222 |
|
---|
| 223 |
|
---|
| 224 | sub line
|
---|
| 225 | {
|
---|
| 226 | shift; # package name
|
---|
| 227 | local($_) = shift;
|
---|
| 228 | my($tz, $error) = @_;
|
---|
| 229 |
|
---|
| 230 | s/\015//g;
|
---|
| 231 |
|
---|
| 232 | my ($date, $size_or_dir, $name, $size);
|
---|
| 233 |
|
---|
| 234 | # 02-05-96 10:48AM 1415 src.slf
|
---|
| 235 | # 09-10-96 09:18AM <DIR> sl_util
|
---|
| 236 | if (($date, $size_or_dir, $name) =
|
---|
| 237 | /^(\d\d-\d\d-\d\d\s+\d\d:\d\d\wM) # Date and time info
|
---|
| 238 | \s+ # Some space
|
---|
| 239 | (<\w{3}>|\d+) # Dir or Size
|
---|
| 240 | \s+ # Some more space
|
---|
| 241 | (.+)$ # File name
|
---|
| 242 | /x )
|
---|
| 243 | {
|
---|
| 244 | return if $name eq '.' || $name eq '..';
|
---|
| 245 | $name = "$curdir/$name" if length $curdir;
|
---|
| 246 | my $type = '?';
|
---|
| 247 | if ($size_or_dir eq '<DIR>') {
|
---|
| 248 | $type = "d";
|
---|
| 249 | $size = ""; # directories have no size in the pc listing
|
---|
| 250 | }
|
---|
| 251 | else {
|
---|
| 252 | $type = 'f';
|
---|
| 253 | $size = $size_or_dir;
|
---|
| 254 | }
|
---|
| 255 | return [$name, $type, $size, str2time($date, $tz), undef];
|
---|
| 256 | }
|
---|
| 257 | else {
|
---|
| 258 | return () unless defined $error;
|
---|
| 259 | &$error($_) if ref($error) eq 'CODE';
|
---|
| 260 | warn "Can't parse: $_\n" if $error eq 'warn';
|
---|
| 261 | return ();
|
---|
| 262 | }
|
---|
| 263 |
|
---|
| 264 | }
|
---|
| 265 |
|
---|
| 266 |
|
---|
| 267 |
|
---|
| 268 | package File::Listing::vms;
|
---|
| 269 | @File::Listing::vms::ISA = qw(File::Listing);
|
---|
| 270 |
|
---|
| 271 | package File::Listing::netware;
|
---|
| 272 | @File::Listing::netware::ISA = qw(File::Listing);
|
---|
| 273 |
|
---|
| 274 |
|
---|
| 275 |
|
---|
| 276 | package File::Listing::apache;
|
---|
| 277 |
|
---|
| 278 | use vars qw(@ISA);
|
---|
| 279 |
|
---|
| 280 | @ISA = qw(File::Listing);
|
---|
| 281 |
|
---|
| 282 |
|
---|
| 283 | sub init { }
|
---|
| 284 |
|
---|
| 285 |
|
---|
| 286 | sub line {
|
---|
| 287 | shift; # package name
|
---|
| 288 | local($_) = shift;
|
---|
| 289 | my($tz, $error) = @_; # ignored for now...
|
---|
| 290 |
|
---|
| 291 | if (m!<A\s+HREF=\"([^\"]+)\">.*</A>.*?(\d+)-([a-zA-Z]+|\d+)-(\d+)\s+(\d+):(\d+)\s+(?:([\d\.]+[kMG]?|-))!i) {
|
---|
| 292 | my($filename, $filesize) = ($1, $7);
|
---|
| 293 | my($d,$m,$y, $H,$M) = ($2,$3,$4,$5,$6);
|
---|
| 294 | if ($m =~ /^\d+$/) {
|
---|
| 295 | ($d,$y) = ($y,$d) # iso date
|
---|
| 296 | }
|
---|
| 297 | else {
|
---|
| 298 | $m = _monthabbrev_number($m);
|
---|
| 299 | }
|
---|
| 300 |
|
---|
| 301 | $filesize = 0 if $filesize eq '-';
|
---|
| 302 | if ($filesize =~ s/k$//i) {
|
---|
| 303 | $filesize *= 1024;
|
---|
| 304 | }
|
---|
| 305 | elsif ($filesize =~ s/M$//) {
|
---|
| 306 | $filesize *= 1024*1024;
|
---|
| 307 | }
|
---|
| 308 | elsif ($filesize =~ s/G$//) {
|
---|
| 309 | $filesize *= 1024*1024*1024;
|
---|
| 310 | }
|
---|
| 311 | $filesize = int $filesize;
|
---|
| 312 |
|
---|
| 313 | require Time::Local;
|
---|
| 314 | my $filetime = Time::Local::timelocal(0,$M,$H,$d,$m-1,_guess_year($y)-1900);
|
---|
| 315 | my $filetype = ($filename =~ s|/$|| ? "d" : "f");
|
---|
| 316 | return [$filename, $filetype, $filesize, $filetime, undef];
|
---|
| 317 | }
|
---|
| 318 |
|
---|
| 319 | return ();
|
---|
| 320 | }
|
---|
| 321 |
|
---|
| 322 |
|
---|
| 323 | sub _guess_year {
|
---|
| 324 | my $y = shift;
|
---|
| 325 | if ($y >= 90) {
|
---|
| 326 | $y = 1900+$y;
|
---|
| 327 | }
|
---|
| 328 | elsif ($y < 100) {
|
---|
| 329 | $y = 2000+$y;
|
---|
| 330 | }
|
---|
| 331 | $y;
|
---|
| 332 | }
|
---|
| 333 |
|
---|
| 334 |
|
---|
| 335 | sub _monthabbrev_number {
|
---|
| 336 | my $mon = shift;
|
---|
| 337 | +{'Jan' => 1,
|
---|
| 338 | 'Feb' => 2,
|
---|
| 339 | 'Mar' => 3,
|
---|
| 340 | 'Apr' => 4,
|
---|
| 341 | 'May' => 5,
|
---|
| 342 | 'Jun' => 6,
|
---|
| 343 | 'Jul' => 7,
|
---|
| 344 | 'Aug' => 8,
|
---|
| 345 | 'Sep' => 9,
|
---|
| 346 | 'Oct' => 10,
|
---|
| 347 | 'Nov' => 11,
|
---|
| 348 | 'Dec' => 12,
|
---|
| 349 | }->{$mon};
|
---|
| 350 | }
|
---|
| 351 |
|
---|
| 352 |
|
---|
| 353 | 1;
|
---|
| 354 |
|
---|
| 355 | __END__
|
---|
| 356 |
|
---|
| 357 | =head1 NAME
|
---|
| 358 |
|
---|
| 359 | File::Listing - parse directory listing
|
---|
| 360 |
|
---|
| 361 | =head1 SYNOPSIS
|
---|
| 362 |
|
---|
| 363 | use File::Listing qw(parse_dir);
|
---|
| 364 | $ENV{LANG} = "C"; # dates in non-English locales not supported
|
---|
| 365 | for (parse_dir(`ls -l`)) {
|
---|
| 366 | ($name, $type, $size, $mtime, $mode) = @$_;
|
---|
| 367 | next if $type ne 'f'; # plain file
|
---|
| 368 | #...
|
---|
| 369 | }
|
---|
| 370 |
|
---|
| 371 | # directory listing can also be read from a file
|
---|
| 372 | open(LISTING, "zcat ls-lR.gz|");
|
---|
| 373 | $dir = parse_dir(\*LISTING, '+0000');
|
---|
| 374 |
|
---|
| 375 | =head1 DESCRIPTION
|
---|
| 376 |
|
---|
| 377 | This module exports a single function called parse_dir(), which can be
|
---|
| 378 | used to parse directory listings.
|
---|
| 379 |
|
---|
| 380 | The first parameter to parse_dir() is the directory listing to parse.
|
---|
| 381 | It can be a scalar, a reference to an array of directory lines or a
|
---|
| 382 | glob representing a filehandle to read the directory listing from.
|
---|
| 383 |
|
---|
| 384 | The second parameter is the time zone to use when parsing time stamps
|
---|
| 385 | in the listing. If this value is undefined, then the local time zone is
|
---|
| 386 | assumed.
|
---|
| 387 |
|
---|
| 388 | The third parameter is the type of listing to assume. Currently
|
---|
| 389 | supported formats are 'unix', 'apache' and 'dosftp'. The default
|
---|
| 390 | value 'unix'. Ideally, the listing type should be determined
|
---|
| 391 | automatically.
|
---|
| 392 |
|
---|
| 393 | The fourth parameter specifies how unparseable lines should be treated.
|
---|
| 394 | Values can be 'ignore', 'warn' or a code reference. Warn means that
|
---|
| 395 | the perl warn() function will be called. If a code reference is
|
---|
| 396 | passed, then this routine will be called and the return value from it
|
---|
| 397 | will be incorporated in the listing. The default is 'ignore'.
|
---|
| 398 |
|
---|
| 399 | Only the first parameter is mandatory.
|
---|
| 400 |
|
---|
| 401 | The return value from parse_dir() is a list of directory entries. In
|
---|
| 402 | a scalar context the return value is a reference to the list. The
|
---|
| 403 | directory entries are represented by an array consisting of [
|
---|
| 404 | $filename, $filetype, $filesize, $filetime, $filemode ]. The
|
---|
| 405 | $filetype value is one of the letters 'f', 'd', 'l' or '?'. The
|
---|
| 406 | $filetime value is the seconds since Jan 1, 1970. The
|
---|
| 407 | $filemode is a bitmask like the mode returned by stat().
|
---|
| 408 |
|
---|
| 409 | =head1 CREDITS
|
---|
| 410 |
|
---|
| 411 | Based on lsparse.pl (from Lee McLoughlin's ftp mirror package) and
|
---|
| 412 | Net::FTP's parse_dir (Graham Barr).
|
---|