source: main/trunk/greenstone2/perllib/cpan/LWP/Protocol/ftp.pm@ 27183

Last change on this file since 27183 was 27183, checked in by davidb, 11 years ago

Changing to using installed version of LWP that comes from libwww-perl, which is more self-contained than v6.x

File size: 16.4 KB
Line 
1package LWP::Protocol::ftp;
2
3# Implementation of the ftp protocol (RFC 959). We let the Net::FTP
4# package do all the dirty work.
5
6use Carp ();
7
8use HTTP::Status ();
9use HTTP::Negotiate ();
10use HTTP::Response ();
11use LWP::MediaTypes ();
12use File::Listing ();
13
14require LWP::Protocol;
15@ISA = qw(LWP::Protocol);
16
17use strict;
18eval {
19 package LWP::Protocol::MyFTP;
20
21 require Net::FTP;
22 Net::FTP->require_version(2.00);
23
24 use vars qw(@ISA);
25 @ISA=qw(Net::FTP);
26
27 sub new {
28 my $class = shift;
29
30 my $self = $class->SUPER::new(@_) || return undef;
31
32 my $mess = $self->message; # welcome message
33 $mess =~ s|\n.*||s; # only first line left
34 $mess =~ s|\s*ready\.?$||;
35 # Make the version number more HTTP like
36 $mess =~ s|\s*\(Version\s*|/| and $mess =~ s|\)$||;
37 ${*$self}{myftp_server} = $mess;
38 #$response->header("Server", $mess);
39
40 $self;
41 }
42
43 sub http_server {
44 my $self = shift;
45 ${*$self}{myftp_server};
46 }
47
48 sub home {
49 my $self = shift;
50 my $old = ${*$self}{myftp_home};
51 if (@_) {
52 ${*$self}{myftp_home} = shift;
53 }
54 $old;
55 }
56
57 sub go_home {
58 my $self = shift;
59 $self->cwd(${*$self}{myftp_home});
60 }
61
62 sub request_count {
63 my $self = shift;
64 ++${*$self}{myftp_reqcount};
65 }
66
67 sub ping {
68 my $self = shift;
69 return $self->go_home;
70 }
71
72};
73my $init_failed = $@;
74
75
76sub _connect {
77 my($self, $host, $port, $user, $account, $password, $timeout) = @_;
78
79 my $key;
80 my $conn_cache = $self->{ua}{conn_cache};
81 if ($conn_cache) {
82 $key = "$host:$port:$user";
83 $key .= ":$account" if defined($account);
84 if (my $ftp = $conn_cache->withdraw("ftp", $key)) {
85 if ($ftp->ping) {
86 # save it again
87 $conn_cache->deposit("ftp", $key, $ftp);
88 return $ftp;
89 }
90 }
91 }
92
93 # try to make a connection
94 my $ftp = LWP::Protocol::MyFTP->new($host,
95 Port => $port,
96 Timeout => $timeout,
97 LocalAddr => $self->{ua}{local_address},
98 );
99 # XXX Should be some what to pass on 'Passive' (header??)
100 unless ($ftp) {
101 $@ =~ s/^Net::FTP: //;
102 return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, $@);
103 }
104
105 unless ($ftp->login($user, $password, $account)) {
106 # Unauthorized. Let's fake a RC_UNAUTHORIZED response
107 my $mess = scalar($ftp->message);
108 $mess =~ s/\n$//;
109 my $res = HTTP::Response->new(&HTTP::Status::RC_UNAUTHORIZED, $mess);
110 $res->header("Server", $ftp->http_server);
111 $res->header("WWW-Authenticate", qq(Basic Realm="FTP login"));
112 return $res;
113 }
114
115 my $home = $ftp->pwd;
116 $ftp->home($home);
117
118 $conn_cache->deposit("ftp", $key, $ftp) if $conn_cache;
119
120 return $ftp;
121}
122
123
124sub request
125{
126 my($self, $request, $proxy, $arg, $size, $timeout) = @_;
127
128 $size = 4096 unless $size;
129
130 # check proxy
131 if (defined $proxy)
132 {
133 return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
134 'You can not proxy through the ftp');
135 }
136
137 my $url = $request->uri;
138 if ($url->scheme ne 'ftp') {
139 my $scheme = $url->scheme;
140 return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR,
141 "LWP::Protocol::ftp::request called for '$scheme'");
142 }
143
144 # check method
145 my $method = $request->method;
146
147 unless ($method eq 'GET' || $method eq 'HEAD' || $method eq 'PUT') {
148 return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
149 'Library does not allow method ' .
150 "$method for 'ftp:' URLs");
151 }
152
153 if ($init_failed) {
154 return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR,
155 $init_failed);
156 }
157
158 my $host = $url->host;
159 my $port = $url->port;
160 my $user = $url->user;
161 my $password = $url->password;
162
163 # If a basic autorization header is present than we prefer these over
164 # the username/password specified in the URL.
165 {
166 my($u,$p) = $request->authorization_basic;
167 if (defined $u) {
168 $user = $u;
169 $password = $p;
170 }
171 }
172
173 # We allow the account to be specified in the "Account" header
174 my $account = $request->header('Account');
175
176 my $ftp = $self->_connect($host, $port, $user, $account, $password, $timeout);
177 return $ftp if ref($ftp) eq "HTTP::Response"; # ugh!
178
179 # Create an initial response object
180 my $response = HTTP::Response->new(&HTTP::Status::RC_OK, "OK");
181 $response->header(Server => $ftp->http_server);
182 $response->header('Client-Request-Num' => $ftp->request_count);
183 $response->request($request);
184
185 # Get & fix the path
186 my @path = grep { length } $url->path_segments;
187 my $remote_file = pop(@path);
188 $remote_file = '' unless defined $remote_file;
189
190 my $type;
191 if (ref $remote_file) {
192 my @params;
193 ($remote_file, @params) = @$remote_file;
194 for (@params) {
195 $type = $_ if s/^type=//;
196 }
197 }
198
199 if ($type && $type eq 'a') {
200 $ftp->ascii;
201 }
202 else {
203 $ftp->binary;
204 }
205
206 for (@path) {
207 unless ($ftp->cwd($_)) {
208 return HTTP::Response->new(&HTTP::Status::RC_NOT_FOUND,
209 "Can't chdir to $_");
210 }
211 }
212
213 if ($method eq 'GET' || $method eq 'HEAD') {
214 if (my $mod_time = $ftp->mdtm($remote_file)) {
215 $response->last_modified($mod_time);
216 if (my $ims = $request->if_modified_since) {
217 if ($mod_time <= $ims) {
218 $response->code(&HTTP::Status::RC_NOT_MODIFIED);
219 $response->message("Not modified");
220 return $response;
221 }
222 }
223 }
224
225 # We'll use this later to abort the transfer if necessary.
226 # if $max_size is defined, we need to abort early. Otherwise, it's
227 # a normal transfer
228 my $max_size = undef;
229
230 # Set resume location, if the client requested it
231 if ($request->header('Range') && $ftp->supported('REST'))
232 {
233 my $range_info = $request->header('Range');
234
235 # Change bytes=2772992-6781209 to just 2772992
236 my ($start_byte,$end_byte) = $range_info =~ /.*=\s*(\d+)-(\d+)?/;
237 if ( defined $start_byte && !defined $end_byte ) {
238
239 # open range -- only the start is specified
240
241 $ftp->restart( $start_byte );
242 # don't define $max_size, we don't want to abort early
243 }
244 elsif ( defined $start_byte && defined $end_byte &&
245 $start_byte >= 0 && $end_byte >= $start_byte ) {
246
247 $ftp->restart( $start_byte );
248 $max_size = $end_byte - $start_byte;
249 }
250 else {
251
252 return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
253 'Incorrect syntax for Range request');
254 }
255 }
256 elsif ($request->header('Range') && !$ftp->supported('REST'))
257 {
258 return HTTP::Response->new(&HTTP::Status::RC_NOT_IMPLEMENTED,
259 "Server does not support resume.");
260 }
261
262 my $data; # the data handle
263 if (length($remote_file) and $data = $ftp->retr($remote_file)) {
264 my($type, @enc) = LWP::MediaTypes::guess_media_type($remote_file);
265 $response->header('Content-Type', $type) if $type;
266 for (@enc) {
267 $response->push_header('Content-Encoding', $_);
268 }
269 my $mess = $ftp->message;
270 if ($mess =~ /\((\d+)\s+bytes\)/) {
271 $response->header('Content-Length', "$1");
272 }
273
274 if ($method ne 'HEAD') {
275 # Read data from server
276 $response = $self->collect($arg, $response, sub {
277 my $content = '';
278 my $result = $data->read($content, $size);
279
280 # Stop early if we need to.
281 if (defined $max_size)
282 {
283 # We need an interface to Net::FTP::dataconn for getting
284 # the number of bytes already read
285 my $bytes_received = $data->bytes_read();
286
287 # We were already over the limit. (Should only happen
288 # once at the end.)
289 if ($bytes_received - length($content) > $max_size)
290 {
291 $content = '';
292 }
293 # We just went over the limit
294 elsif ($bytes_received > $max_size)
295 {
296 # Trim content
297 $content = substr($content, 0,
298 $max_size - ($bytes_received - length($content)) );
299 }
300 # We're under the limit
301 else
302 {
303 }
304 }
305
306 return \$content;
307 } );
308 }
309 # abort is needed for HEAD, it's == close if the transfer has
310 # already completed.
311 unless ($data->abort) {
312 # Something did not work too well. Note that we treat
313 # responses to abort() with code 0 in case of HEAD as ok
314 # (at least wu-ftpd 2.6.1(1) does that).
315 if ($method ne 'HEAD' || $ftp->code != 0) {
316 $response->code(&HTTP::Status::RC_INTERNAL_SERVER_ERROR);
317 $response->message("FTP close response: " . $ftp->code .
318 " " . $ftp->message);
319 }
320 }
321 }
322 elsif (!length($remote_file) || ( $ftp->code >= 400 && $ftp->code < 600 )) {
323 # not a plain file, try to list instead
324 if (length($remote_file) && !$ftp->cwd($remote_file)) {
325 return HTTP::Response->new(&HTTP::Status::RC_NOT_FOUND,
326 "File '$remote_file' not found");
327 }
328
329 # It should now be safe to try to list the directory
330 my @lsl = $ftp->dir;
331
332 # Try to figure out if the user want us to convert the
333 # directory listing to HTML.
334 my @variants =
335 (
336 ['html', 0.60, 'text/html' ],
337 ['dir', 1.00, 'text/ftp-dir-listing' ]
338 );
339 #$HTTP::Negotiate::DEBUG=1;
340 my $prefer = HTTP::Negotiate::choose(\@variants, $request);
341
342 my $content = '';
343
344 if (!defined($prefer)) {
345 return HTTP::Response->new(&HTTP::Status::RC_NOT_ACCEPTABLE,
346 "Neither HTML nor directory listing wanted");
347 }
348 elsif ($prefer eq 'html') {
349 $response->header('Content-Type' => 'text/html');
350 $content = "<HEAD><TITLE>File Listing</TITLE>\n";
351 my $base = $request->uri->clone;
352 my $path = $base->path;
353 $base->path("$path/") unless $path =~ m|/$|;
354 $content .= qq(<BASE HREF="$base">\n</HEAD>\n);
355 $content .= "<BODY>\n<UL>\n";
356 for (File::Listing::parse_dir(\@lsl, 'GMT')) {
357 my($name, $type, $size, $mtime, $mode) = @$_;
358 $content .= qq( <LI> <a href="$name">$name</a>);
359 $content .= " $size bytes" if $type eq 'f';
360 $content .= "\n";
361 }
362 $content .= "</UL></body>\n";
363 }
364 else {
365 $response->header('Content-Type', 'text/ftp-dir-listing');
366 $content = join("\n", @lsl, '');
367 }
368
369 $response->header('Content-Length', length($content));
370
371 if ($method ne 'HEAD') {
372 $response = $self->collect_once($arg, $response, $content);
373 }
374 }
375 else {
376 my $res = HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
377 "FTP return code " . $ftp->code);
378 $res->content_type("text/plain");
379 $res->content($ftp->message);
380 return $res;
381 }
382 }
383 elsif ($method eq 'PUT') {
384 # method must be PUT
385 unless (length($remote_file)) {
386 return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
387 "Must have a file name to PUT to");
388 }
389 my $data;
390 if ($data = $ftp->stor($remote_file)) {
391 my $content = $request->content;
392 my $bytes = 0;
393 if (defined $content) {
394 if (ref($content) eq 'SCALAR') {
395 $bytes = $data->write($$content, length($$content));
396 }
397 elsif (ref($content) eq 'CODE') {
398 my($buf, $n);
399 while (length($buf = &$content)) {
400 $n = $data->write($buf, length($buf));
401 last unless $n;
402 $bytes += $n;
403 }
404 }
405 elsif (!ref($content)) {
406 if (defined $content && length($content)) {
407 $bytes = $data->write($content, length($content));
408 }
409 }
410 else {
411 die "Bad content";
412 }
413 }
414 $data->close;
415
416 $response->code(&HTTP::Status::RC_CREATED);
417 $response->header('Content-Type', 'text/plain');
418 $response->content("$bytes bytes stored as $remote_file on $host\n")
419
420 }
421 else {
422 my $res = HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
423 "FTP return code " . $ftp->code);
424 $res->content_type("text/plain");
425 $res->content($ftp->message);
426 return $res;
427 }
428 }
429 else {
430 return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
431 "Illegal method $method");
432 }
433
434 $response;
435}
436
4371;
438
439__END__
440
441# This is what RFC 1738 has to say about FTP access:
442# --------------------------------------------------
443#
444# 3.2. FTP
445#
446# The FTP URL scheme is used to designate files and directories on
447# Internet hosts accessible using the FTP protocol (RFC959).
448#
449# A FTP URL follow the syntax described in Section 3.1. If :<port> is
450# omitted, the port defaults to 21.
451#
452# 3.2.1. FTP Name and Password
453#
454# A user name and password may be supplied; they are used in the ftp
455# "USER" and "PASS" commands after first making the connection to the
456# FTP server. If no user name or password is supplied and one is
457# requested by the FTP server, the conventions for "anonymous" FTP are
458# to be used, as follows:
459#
460# The user name "anonymous" is supplied.
461#
462# The password is supplied as the Internet e-mail address
463# of the end user accessing the resource.
464#
465# If the URL supplies a user name but no password, and the remote
466# server requests a password, the program interpreting the FTP URL
467# should request one from the user.
468#
469# 3.2.2. FTP url-path
470#
471# The url-path of a FTP URL has the following syntax:
472#
473# <cwd1>/<cwd2>/.../<cwdN>/<name>;type=<typecode>
474#
475# Where <cwd1> through <cwdN> and <name> are (possibly encoded) strings
476# and <typecode> is one of the characters "a", "i", or "d". The part
477# ";type=<typecode>" may be omitted. The <cwdx> and <name> parts may be
478# empty. The whole url-path may be omitted, including the "/"
479# delimiting it from the prefix containing user, password, host, and
480# port.
481#
482# The url-path is interpreted as a series of FTP commands as follows:
483#
484# Each of the <cwd> elements is to be supplied, sequentially, as the
485# argument to a CWD (change working directory) command.
486#
487# If the typecode is "d", perform a NLST (name list) command with
488# <name> as the argument, and interpret the results as a file
489# directory listing.
490#
491# Otherwise, perform a TYPE command with <typecode> as the argument,
492# and then access the file whose name is <name> (for example, using
493# the RETR command.)
494#
495# Within a name or CWD component, the characters "/" and ";" are
496# reserved and must be encoded. The components are decoded prior to
497# their use in the FTP protocol. In particular, if the appropriate FTP
498# sequence to access a particular file requires supplying a string
499# containing a "/" as an argument to a CWD or RETR command, it is
500# necessary to encode each "/".
501#
502# For example, the URL <URL:ftp://[email protected]/%2Fetc/motd> is
503# interpreted by FTP-ing to "host.dom", logging in as "myname"
504# (prompting for a password if it is asked for), and then executing
505# "CWD /etc" and then "RETR motd". This has a different meaning from
506# <URL:ftp://[email protected]/etc/motd> which would "CWD etc" and then
507# "RETR motd"; the initial "CWD" might be executed relative to the
508# default directory for "myname". On the other hand,
509# <URL:ftp://[email protected]//etc/motd>, would "CWD " with a null
510# argument, then "CWD etc", and then "RETR motd".
511#
512# FTP URLs may also be used for other operations; for example, it is
513# possible to update a file on a remote file server, or infer
514# information about it from the directory listings. The mechanism for
515# doing so is not spelled out here.
516#
517# 3.2.3. FTP Typecode is Optional
518#
519# The entire ;type=<typecode> part of a FTP URL is optional. If it is
520# omitted, the client program interpreting the URL must guess the
521# appropriate mode to use. In general, the data content type of a file
522# can only be guessed from the name, e.g., from the suffix of the name;
523# the appropriate type code to be used for transfer of the file can
524# then be deduced from the data content of the file.
525#
526# 3.2.4 Hierarchy
527#
528# For some file systems, the "/" used to denote the hierarchical
529# structure of the URL corresponds to the delimiter used to construct a
530# file name hierarchy, and thus, the filename will look similar to the
531# URL path. This does NOT mean that the URL is a Unix filename.
532#
533# 3.2.5. Optimization
534#
535# Clients accessing resources via FTP may employ additional heuristics
536# to optimize the interaction. For some FTP servers, for example, it
537# may be reasonable to keep the control connection open while accessing
538# multiple URLs from the same server. However, there is no common
539# hierarchical model to the FTP protocol, so if a directory change
540# command has been given, it is impossible in general to deduce what
541# sequence should be given to navigate to another directory for a
542# second retrieval, if the paths are different. The only reliable
543# algorithm is to disconnect and reestablish the control connection.
Note: See TracBrowser for help on using the repository browser.