source: main/trunk/greenstone2/perllib/cpan/LWP/Protocol/ftp.pm@ 27174

Last change on this file since 27174 was 27174, checked in by davidb, 11 years ago

Perl modules from CPAN that are used in supporting activate.pl, but not part of the Perl core. Only PMs included.

File size: 16.4 KB
Line 
1package LWP::Protocol::ftp;
2
3# Implementation of the ftp protocol (RFC 959). We let the Net::FTP
4# package do all the dirty work.
5
6use Carp ();
7
8use HTTP::Status ();
9use HTTP::Negotiate ();
10use HTTP::Response ();
11use LWP::MediaTypes ();
12use File::Listing ();
13
14require LWP::Protocol;
15@ISA = qw(LWP::Protocol);
16
17use strict;
18eval {
19 package LWP::Protocol::MyFTP;
20
21 require Net::FTP;
22 Net::FTP->require_version(2.00);
23
24 use vars qw(@ISA);
25 @ISA=qw(Net::FTP);
26
27 sub new {
28 my $class = shift;
29
30 my $self = $class->SUPER::new(@_) || return undef;
31
32 my $mess = $self->message; # welcome message
33 $mess =~ s|\n.*||s; # only first line left
34 $mess =~ s|\s*ready\.?$||;
35 # Make the version number more HTTP like
36 $mess =~ s|\s*\(Version\s*|/| and $mess =~ s|\)$||;
37 ${*$self}{myftp_server} = $mess;
38 #$response->header("Server", $mess);
39
40 $self;
41 }
42
43 sub http_server {
44 my $self = shift;
45 ${*$self}{myftp_server};
46 }
47
48 sub home {
49 my $self = shift;
50 my $old = ${*$self}{myftp_home};
51 if (@_) {
52 ${*$self}{myftp_home} = shift;
53 }
54 $old;
55 }
56
57 sub go_home {
58 my $self = shift;
59 $self->cwd(${*$self}{myftp_home});
60 }
61
62 sub request_count {
63 my $self = shift;
64 ++${*$self}{myftp_reqcount};
65 }
66
67 sub ping {
68 my $self = shift;
69 return $self->go_home;
70 }
71
72};
73my $init_failed = $@;
74
75
76sub _connect {
77 my($self, $host, $port, $user, $account, $password, $timeout) = @_;
78
79 my $key;
80 my $conn_cache = $self->{ua}{conn_cache};
81 if ($conn_cache) {
82 $key = "$host:$port:$user";
83 $key .= ":$account" if defined($account);
84 if (my $ftp = $conn_cache->withdraw("ftp", $key)) {
85 if ($ftp->ping) {
86 # save it again
87 $conn_cache->deposit("ftp", $key, $ftp);
88 return $ftp;
89 }
90 }
91 }
92
93 # try to make a connection
94 my $ftp = LWP::Protocol::MyFTP->new($host,
95 Port => $port,
96 Timeout => $timeout,
97 LocalAddr => $self->{ua}{local_address},
98 );
99 # XXX Should be some what to pass on 'Passive' (header??)
100 unless ($ftp) {
101 $@ =~ s/^Net::FTP: //;
102 return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, $@);
103 }
104
105 unless ($ftp->login($user, $password, $account)) {
106 # Unauthorized. Let's fake a RC_UNAUTHORIZED response
107 my $mess = scalar($ftp->message);
108 $mess =~ s/\n$//;
109 my $res = HTTP::Response->new(&HTTP::Status::RC_UNAUTHORIZED, $mess);
110 $res->header("Server", $ftp->http_server);
111 $res->header("WWW-Authenticate", qq(Basic Realm="FTP login"));
112 return $res;
113 }
114
115 my $home = $ftp->pwd;
116 $ftp->home($home);
117
118 $conn_cache->deposit("ftp", $key, $ftp) if $conn_cache;
119
120 return $ftp;
121}
122
123
124sub request
125{
126 my($self, $request, $proxy, $arg, $size, $timeout) = @_;
127
128 $size = 4096 unless $size;
129
130 # check proxy
131 if (defined $proxy)
132 {
133 return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
134 'You can not proxy through the ftp');
135 }
136
137 my $url = $request->uri;
138 if ($url->scheme ne 'ftp') {
139 my $scheme = $url->scheme;
140 return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR,
141 "LWP::Protocol::ftp::request called for '$scheme'");
142 }
143
144 # check method
145 my $method = $request->method;
146
147 unless ($method eq 'GET' || $method eq 'HEAD' || $method eq 'PUT') {
148 return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
149 'Library does not allow method ' .
150 "$method for 'ftp:' URLs");
151 }
152
153 if ($init_failed) {
154 return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR,
155 $init_failed);
156 }
157
158 my $host = $url->host;
159 my $port = $url->port;
160 my $user = $url->user;
161 my $password = $url->password;
162
163 # If a basic autorization header is present than we prefer these over
164 # the username/password specified in the URL.
165 {
166 my($u,$p) = $request->authorization_basic;
167 if (defined $u) {
168 $user = $u;
169 $password = $p;
170 }
171 }
172
173 # We allow the account to be specified in the "Account" header
174 my $account = $request->header('Account');
175
176 my $ftp = $self->_connect($host, $port, $user, $account, $password, $timeout);
177 return $ftp if ref($ftp) eq "HTTP::Response"; # ugh!
178
179 # Create an initial response object
180 my $response = HTTP::Response->new(&HTTP::Status::RC_OK, "OK");
181 $response->header(Server => $ftp->http_server);
182 $response->header('Client-Request-Num' => $ftp->request_count);
183 $response->request($request);
184
185 # Get & fix the path
186 my @path = grep { length } $url->path_segments;
187 my $remote_file = pop(@path);
188 $remote_file = '' unless defined $remote_file;
189
190 my $type;
191 if (ref $remote_file) {
192 my @params;
193 ($remote_file, @params) = @$remote_file;
194 for (@params) {
195 $type = $_ if s/^type=//;
196 }
197 }
198
199 if ($type && $type eq 'a') {
200 $ftp->ascii;
201 }
202 else {
203 $ftp->binary;
204 }
205
206 for (@path) {
207 unless ($ftp->cwd($_)) {
208 return HTTP::Response->new(&HTTP::Status::RC_NOT_FOUND,
209 "Can't chdir to $_");
210 }
211 }
212
213 if ($method eq 'GET' || $method eq 'HEAD') {
214 if (my $mod_time = $ftp->mdtm($remote_file)) {
215 $response->last_modified($mod_time);
216 if (my $ims = $request->if_modified_since) {
217 if ($mod_time <= $ims) {
218 $response->code(&HTTP::Status::RC_NOT_MODIFIED);
219 $response->message("Not modified");
220 return $response;
221 }
222 }
223 }
224
225 # We'll use this later to abort the transfer if necessary.
226 # if $max_size is defined, we need to abort early. Otherwise, it's
227 # a normal transfer
228 my $max_size = undef;
229
230 # Set resume location, if the client requested it
231 if ($request->header('Range') && $ftp->supported('REST'))
232 {
233 my $range_info = $request->header('Range');
234
235 # Change bytes=2772992-6781209 to just 2772992
236 my ($start_byte,$end_byte) = $range_info =~ /.*=\s*(\d+)-(\d+)?/;
237 if ( defined $start_byte && !defined $end_byte ) {
238
239 # open range -- only the start is specified
240
241 $ftp->restart( $start_byte );
242 # don't define $max_size, we don't want to abort early
243 }
244 elsif ( defined $start_byte && defined $end_byte &&
245 $start_byte >= 0 && $end_byte >= $start_byte ) {
246
247 $ftp->restart( $start_byte );
248 $max_size = $end_byte - $start_byte;
249 }
250 else {
251
252 return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
253 'Incorrect syntax for Range request');
254 }
255 }
256 elsif ($request->header('Range') && !$ftp->supported('REST'))
257 {
258 return HTTP::Response->new(&HTTP::Status::RC_NOT_IMPLEMENTED,
259 "Server does not support resume.");
260 }
261
262 my $data; # the data handle
263 if (length($remote_file) and $data = $ftp->retr($remote_file)) {
264 my($type, @enc) = LWP::MediaTypes::guess_media_type($remote_file);
265 $response->header('Content-Type', $type) if $type;
266 for (@enc) {
267 $response->push_header('Content-Encoding', $_);
268 }
269 my $mess = $ftp->message;
270 if ($mess =~ /\((\d+)\s+bytes\)/) {
271 $response->header('Content-Length', "$1");
272 }
273
274 if ($method ne 'HEAD') {
275 # Read data from server
276 $response = $self->collect($arg, $response, sub {
277 my $content = '';
278 my $result = $data->read($content, $size);
279
280 # Stop early if we need to.
281 if (defined $max_size)
282 {
283 # We need an interface to Net::FTP::dataconn for getting
284 # the number of bytes already read
285 my $bytes_received = $data->bytes_read();
286
287 # We were already over the limit. (Should only happen
288 # once at the end.)
289 if ($bytes_received - length($content) > $max_size)
290 {
291 $content = '';
292 }
293 # We just went over the limit
294 elsif ($bytes_received > $max_size)
295 {
296 # Trim content
297 $content = substr($content, 0,
298 $max_size - ($bytes_received - length($content)) );
299 }
300 # We're under the limit
301 else
302 {
303 }
304 }
305
306 return \$content;
307 } );
308 }
309 # abort is needed for HEAD, it's == close if the transfer has
310 # already completed.
311 unless ($data->abort) {
312 # Something did not work too well. Note that we treat
313 # responses to abort() with code 0 in case of HEAD as ok
314 # (at least wu-ftpd 2.6.1(1) does that).
315 if ($method ne 'HEAD' || $ftp->code != 0) {
316 $response->code(&HTTP::Status::RC_INTERNAL_SERVER_ERROR);
317 $response->message("FTP close response: " . $ftp->code .
318 " " . $ftp->message);
319 }
320 }
321 }
322 elsif (!length($remote_file) || ( $ftp->code >= 400 && $ftp->code < 600 )) {
323 # not a plain file, try to list instead
324 if (length($remote_file) && !$ftp->cwd($remote_file)) {
325 return HTTP::Response->new(&HTTP::Status::RC_NOT_FOUND,
326 "File '$remote_file' not found");
327 }
328
329 # It should now be safe to try to list the directory
330 my @lsl = $ftp->dir;
331
332 # Try to figure out if the user want us to convert the
333 # directory listing to HTML.
334 my @variants =
335 (
336 ['html', 0.60, 'text/html' ],
337 ['dir', 1.00, 'text/ftp-dir-listing' ]
338 );
339 #$HTTP::Negotiate::DEBUG=1;
340 my $prefer = HTTP::Negotiate::choose(\@variants, $request);
341
342 my $content = '';
343
344 if (!defined($prefer)) {
345 return HTTP::Response->new(&HTTP::Status::RC_NOT_ACCEPTABLE,
346 "Neither HTML nor directory listing wanted");
347 }
348 elsif ($prefer eq 'html') {
349 $response->header('Content-Type' => 'text/html');
350 $content = "<HEAD><TITLE>File Listing</TITLE>\n";
351 my $base = $request->uri->clone;
352 my $path = $base->path;
353 $base->path("$path/") unless $path =~ m|/$|;
354 $content .= qq(<BASE HREF="$base">\n</HEAD>\n);
355 $content .= "<BODY>\n<UL>\n";
356 for (File::Listing::parse_dir(\@lsl, 'GMT')) {
357 my($name, $type, $size, $mtime, $mode) = @$_;
358 $content .= qq( <LI> <a href="$name">$name</a>);
359 $content .= " $size bytes" if $type eq 'f';
360 $content .= "\n";
361 }
362 $content .= "</UL></body>\n";
363 }
364 else {
365 $response->header('Content-Type', 'text/ftp-dir-listing');
366 $content = join("\n", @lsl, '');
367 }
368
369 $response->header('Content-Length', length($content));
370
371 if ($method ne 'HEAD') {
372 $response = $self->collect_once($arg, $response, $content);
373 }
374 }
375 else {
376 my $res = HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
377 "FTP return code " . $ftp->code);
378 $res->content_type("text/plain");
379 $res->content($ftp->message);
380 return $res;
381 }
382 }
383 elsif ($method eq 'PUT') {
384 # method must be PUT
385 unless (length($remote_file)) {
386 return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
387 "Must have a file name to PUT to");
388 }
389 my $data;
390 if ($data = $ftp->stor($remote_file)) {
391 my $content = $request->content;
392 my $bytes = 0;
393 if (defined $content) {
394 if (ref($content) eq 'SCALAR') {
395 $bytes = $data->write($$content, length($$content));
396 }
397 elsif (ref($content) eq 'CODE') {
398 my($buf, $n);
399 while (length($buf = &$content)) {
400 $n = $data->write($buf, length($buf));
401 last unless $n;
402 $bytes += $n;
403 }
404 }
405 elsif (!ref($content)) {
406 if (defined $content && length($content)) {
407 $bytes = $data->write($content, length($content));
408 }
409 }
410 else {
411 die "Bad content";
412 }
413 }
414 $data->close;
415
416 $response->code(&HTTP::Status::RC_CREATED);
417 $response->header('Content-Type', 'text/plain');
418 $response->content("$bytes bytes stored as $remote_file on $host\n")
419
420 }
421 else {
422 my $res = HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
423 "FTP return code " . $ftp->code);
424 $res->content_type("text/plain");
425 $res->content($ftp->message);
426 return $res;
427 }
428 }
429 else {
430 return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
431 "Illegal method $method");
432 }
433
434 $response;
435}
436
4371;
438
439__END__
440
441# This is what RFC 1738 has to say about FTP access:
442# --------------------------------------------------
443#
444# 3.2. FTP
445#
446# The FTP URL scheme is used to designate files and directories on
447# Internet hosts accessible using the FTP protocol (RFC959).
448#
449# A FTP URL follow the syntax described in Section 3.1. If :<port> is
450# omitted, the port defaults to 21.
451#
452# 3.2.1. FTP Name and Password
453#
454# A user name and password may be supplied; they are used in the ftp
455# "USER" and "PASS" commands after first making the connection to the
456# FTP server. If no user name or password is supplied and one is
457# requested by the FTP server, the conventions for "anonymous" FTP are
458# to be used, as follows:
459#
460# The user name "anonymous" is supplied.
461#
462# The password is supplied as the Internet e-mail address
463# of the end user accessing the resource.
464#
465# If the URL supplies a user name but no password, and the remote
466# server requests a password, the program interpreting the FTP URL
467# should request one from the user.
468#
469# 3.2.2. FTP url-path
470#
471# The url-path of a FTP URL has the following syntax:
472#
473# <cwd1>/<cwd2>/.../<cwdN>/<name>;type=<typecode>
474#
475# Where <cwd1> through <cwdN> and <name> are (possibly encoded) strings
476# and <typecode> is one of the characters "a", "i", or "d". The part
477# ";type=<typecode>" may be omitted. The <cwdx> and <name> parts may be
478# empty. The whole url-path may be omitted, including the "/"
479# delimiting it from the prefix containing user, password, host, and
480# port.
481#
482# The url-path is interpreted as a series of FTP commands as follows:
483#
484# Each of the <cwd> elements is to be supplied, sequentially, as the
485# argument to a CWD (change working directory) command.
486#
487# If the typecode is "d", perform a NLST (name list) command with
488# <name> as the argument, and interpret the results as a file
489# directory listing.
490#
491# Otherwise, perform a TYPE command with <typecode> as the argument,
492# and then access the file whose name is <name> (for example, using
493# the RETR command.)
494#
495# Within a name or CWD component, the characters "/" and ";" are
496# reserved and must be encoded. The components are decoded prior to
497# their use in the FTP protocol. In particular, if the appropriate FTP
498# sequence to access a particular file requires supplying a string
499# containing a "/" as an argument to a CWD or RETR command, it is
500# necessary to encode each "/".
501#
502# For example, the URL <URL:ftp://[email protected]/%2Fetc/motd> is
503# interpreted by FTP-ing to "host.dom", logging in as "myname"
504# (prompting for a password if it is asked for), and then executing
505# "CWD /etc" and then "RETR motd". This has a different meaning from
506# <URL:ftp://[email protected]/etc/motd> which would "CWD etc" and then
507# "RETR motd"; the initial "CWD" might be executed relative to the
508# default directory for "myname". On the other hand,
509# <URL:ftp://[email protected]//etc/motd>, would "CWD " with a null
510# argument, then "CWD etc", and then "RETR motd".
511#
512# FTP URLs may also be used for other operations; for example, it is
513# possible to update a file on a remote file server, or infer
514# information about it from the directory listings. The mechanism for
515# doing so is not spelled out here.
516#
517# 3.2.3. FTP Typecode is Optional
518#
519# The entire ;type=<typecode> part of a FTP URL is optional. If it is
520# omitted, the client program interpreting the URL must guess the
521# appropriate mode to use. In general, the data content type of a file
522# can only be guessed from the name, e.g., from the suffix of the name;
523# the appropriate type code to be used for transfer of the file can
524# then be deduced from the data content of the file.
525#
526# 3.2.4 Hierarchy
527#
528# For some file systems, the "/" used to denote the hierarchical
529# structure of the URL corresponds to the delimiter used to construct a
530# file name hierarchy, and thus, the filename will look similar to the
531# URL path. This does NOT mean that the URL is a Unix filename.
532#
533# 3.2.5. Optimization
534#
535# Clients accessing resources via FTP may employ additional heuristics
536# to optimize the interaction. For some FTP servers, for example, it
537# may be reasonable to keep the control connection open while accessing
538# multiple URLs from the same server. However, there is no common
539# hierarchical model to the FTP protocol, so if a directory change
540# command has been given, it is impossible in general to deduce what
541# sequence should be given to navigate to another directory for a
542# second retrieval, if the paths are different. The only reliable
543# algorithm is to disconnect and reestablish the control connection.
Note: See TracBrowser for help on using the repository browser.