1 | package LWP::UserAgent;
|
---|
2 |
|
---|
3 | use strict;
|
---|
4 | use vars qw(@ISA $VERSION);
|
---|
5 |
|
---|
6 | require LWP::MemberMixin;
|
---|
7 | @ISA = qw(LWP::MemberMixin);
|
---|
8 | $VERSION = "6.05";
|
---|
9 |
|
---|
10 | use HTTP::Request ();
|
---|
11 | use HTTP::Response ();
|
---|
12 | use HTTP::Date ();
|
---|
13 |
|
---|
14 | use LWP ();
|
---|
15 | use LWP::Protocol ();
|
---|
16 |
|
---|
17 | use Carp ();
|
---|
18 |
|
---|
19 |
|
---|
20 | sub new
|
---|
21 | {
|
---|
22 | # Check for common user mistake
|
---|
23 | Carp::croak("Options to LWP::UserAgent should be key/value pairs, not hash reference")
|
---|
24 | if ref($_[1]) eq 'HASH';
|
---|
25 |
|
---|
26 | my($class, %cnf) = @_;
|
---|
27 |
|
---|
28 | my $agent = delete $cnf{agent};
|
---|
29 | my $from = delete $cnf{from};
|
---|
30 | my $def_headers = delete $cnf{default_headers};
|
---|
31 | my $timeout = delete $cnf{timeout};
|
---|
32 | $timeout = 3*60 unless defined $timeout;
|
---|
33 | my $local_address = delete $cnf{local_address};
|
---|
34 | my $ssl_opts = delete $cnf{ssl_opts} || {};
|
---|
35 | unless (exists $ssl_opts->{verify_hostname}) {
|
---|
36 | # The processing of HTTPS_CA_* below is for compatibility with Crypt::SSLeay
|
---|
37 | if (exists $ENV{PERL_LWP_SSL_VERIFY_HOSTNAME}) {
|
---|
38 | $ssl_opts->{verify_hostname} = $ENV{PERL_LWP_SSL_VERIFY_HOSTNAME};
|
---|
39 | }
|
---|
40 | elsif ($ENV{HTTPS_CA_FILE} || $ENV{HTTPS_CA_DIR}) {
|
---|
41 | # Crypt-SSLeay compatibility (verify peer certificate; but not the hostname)
|
---|
42 | $ssl_opts->{verify_hostname} = 0;
|
---|
43 | $ssl_opts->{SSL_verify_mode} = 1;
|
---|
44 | }
|
---|
45 | else {
|
---|
46 | $ssl_opts->{verify_hostname} = 1;
|
---|
47 | }
|
---|
48 | }
|
---|
49 | unless (exists $ssl_opts->{SSL_ca_file}) {
|
---|
50 | if (my $ca_file = $ENV{PERL_LWP_SSL_CA_FILE} || $ENV{HTTPS_CA_FILE}) {
|
---|
51 | $ssl_opts->{SSL_ca_file} = $ca_file;
|
---|
52 | }
|
---|
53 | }
|
---|
54 | unless (exists $ssl_opts->{SSL_ca_path}) {
|
---|
55 | if (my $ca_path = $ENV{PERL_LWP_SSL_CA_PATH} || $ENV{HTTPS_CA_DIR}) {
|
---|
56 | $ssl_opts->{SSL_ca_path} = $ca_path;
|
---|
57 | }
|
---|
58 | }
|
---|
59 | my $use_eval = delete $cnf{use_eval};
|
---|
60 | $use_eval = 1 unless defined $use_eval;
|
---|
61 | my $parse_head = delete $cnf{parse_head};
|
---|
62 | $parse_head = 1 unless defined $parse_head;
|
---|
63 | my $show_progress = delete $cnf{show_progress};
|
---|
64 | my $max_size = delete $cnf{max_size};
|
---|
65 | my $max_redirect = delete $cnf{max_redirect};
|
---|
66 | $max_redirect = 7 unless defined $max_redirect;
|
---|
67 | my $env_proxy = exists $cnf{env_proxy} ? delete $cnf{env_proxy} : $ENV{PERL_LWP_ENV_PROXY};
|
---|
68 |
|
---|
69 | my $cookie_jar = delete $cnf{cookie_jar};
|
---|
70 | my $conn_cache = delete $cnf{conn_cache};
|
---|
71 | my $keep_alive = delete $cnf{keep_alive};
|
---|
72 |
|
---|
73 | Carp::croak("Can't mix conn_cache and keep_alive")
|
---|
74 | if $conn_cache && $keep_alive;
|
---|
75 |
|
---|
76 | my $protocols_allowed = delete $cnf{protocols_allowed};
|
---|
77 | my $protocols_forbidden = delete $cnf{protocols_forbidden};
|
---|
78 |
|
---|
79 | my $requests_redirectable = delete $cnf{requests_redirectable};
|
---|
80 | $requests_redirectable = ['GET', 'HEAD']
|
---|
81 | unless defined $requests_redirectable;
|
---|
82 |
|
---|
83 | # Actually ""s are just as good as 0's, but for concision we'll just say:
|
---|
84 | Carp::croak("protocols_allowed has to be an arrayref or 0, not \"$protocols_allowed\"!")
|
---|
85 | if $protocols_allowed and ref($protocols_allowed) ne 'ARRAY';
|
---|
86 | Carp::croak("protocols_forbidden has to be an arrayref or 0, not \"$protocols_forbidden\"!")
|
---|
87 | if $protocols_forbidden and ref($protocols_forbidden) ne 'ARRAY';
|
---|
88 | Carp::croak("requests_redirectable has to be an arrayref or 0, not \"$requests_redirectable\"!")
|
---|
89 | if $requests_redirectable and ref($requests_redirectable) ne 'ARRAY';
|
---|
90 |
|
---|
91 |
|
---|
92 | if (%cnf && $^W) {
|
---|
93 | Carp::carp("Unrecognized LWP::UserAgent options: @{[sort keys %cnf]}");
|
---|
94 | }
|
---|
95 |
|
---|
96 | my $self = bless {
|
---|
97 | def_headers => $def_headers,
|
---|
98 | timeout => $timeout,
|
---|
99 | local_address => $local_address,
|
---|
100 | ssl_opts => $ssl_opts,
|
---|
101 | use_eval => $use_eval,
|
---|
102 | show_progress=> $show_progress,
|
---|
103 | max_size => $max_size,
|
---|
104 | max_redirect => $max_redirect,
|
---|
105 | proxy => {},
|
---|
106 | no_proxy => [],
|
---|
107 | protocols_allowed => $protocols_allowed,
|
---|
108 | protocols_forbidden => $protocols_forbidden,
|
---|
109 | requests_redirectable => $requests_redirectable,
|
---|
110 | }, $class;
|
---|
111 |
|
---|
112 | $self->agent(defined($agent) ? $agent : $class->_agent)
|
---|
113 | if defined($agent) || !$def_headers || !$def_headers->header("User-Agent");
|
---|
114 | $self->from($from) if $from;
|
---|
115 | $self->cookie_jar($cookie_jar) if $cookie_jar;
|
---|
116 | $self->parse_head($parse_head);
|
---|
117 | $self->env_proxy if $env_proxy;
|
---|
118 |
|
---|
119 | $self->protocols_allowed( $protocols_allowed ) if $protocols_allowed;
|
---|
120 | $self->protocols_forbidden($protocols_forbidden) if $protocols_forbidden;
|
---|
121 |
|
---|
122 | if ($keep_alive) {
|
---|
123 | $conn_cache ||= { total_capacity => $keep_alive };
|
---|
124 | }
|
---|
125 | $self->conn_cache($conn_cache) if $conn_cache;
|
---|
126 |
|
---|
127 | return $self;
|
---|
128 | }
|
---|
129 |
|
---|
130 |
|
---|
131 | sub send_request
|
---|
132 | {
|
---|
133 | my($self, $request, $arg, $size) = @_;
|
---|
134 | my($method, $url) = ($request->method, $request->uri);
|
---|
135 | my $scheme = $url->scheme;
|
---|
136 |
|
---|
137 | local($SIG{__DIE__}); # protect against user defined die handlers
|
---|
138 |
|
---|
139 | $self->progress("begin", $request);
|
---|
140 |
|
---|
141 | my $response = $self->run_handlers("request_send", $request);
|
---|
142 |
|
---|
143 | unless ($response) {
|
---|
144 | my $protocol;
|
---|
145 |
|
---|
146 | {
|
---|
147 | # Honor object-specific restrictions by forcing protocol objects
|
---|
148 | # into class LWP::Protocol::nogo.
|
---|
149 | my $x;
|
---|
150 | if($x = $self->protocols_allowed) {
|
---|
151 | if (grep lc($_) eq $scheme, @$x) {
|
---|
152 | }
|
---|
153 | else {
|
---|
154 | require LWP::Protocol::nogo;
|
---|
155 | $protocol = LWP::Protocol::nogo->new;
|
---|
156 | }
|
---|
157 | }
|
---|
158 | elsif ($x = $self->protocols_forbidden) {
|
---|
159 | if(grep lc($_) eq $scheme, @$x) {
|
---|
160 | require LWP::Protocol::nogo;
|
---|
161 | $protocol = LWP::Protocol::nogo->new;
|
---|
162 | }
|
---|
163 | }
|
---|
164 | # else fall thru and create the protocol object normally
|
---|
165 | }
|
---|
166 |
|
---|
167 | # Locate protocol to use
|
---|
168 | my $proxy = $request->{proxy};
|
---|
169 | if ($proxy) {
|
---|
170 | $scheme = $proxy->scheme;
|
---|
171 | }
|
---|
172 |
|
---|
173 | unless ($protocol) {
|
---|
174 | $protocol = eval { LWP::Protocol::create($scheme, $self) };
|
---|
175 | if ($@) {
|
---|
176 | $@ =~ s/ at .* line \d+.*//s; # remove file/line number
|
---|
177 | $response = _new_response($request, &HTTP::Status::RC_NOT_IMPLEMENTED, $@);
|
---|
178 | if ($scheme eq "https") {
|
---|
179 | $response->message($response->message . " (LWP::Protocol::https not installed)");
|
---|
180 | $response->content_type("text/plain");
|
---|
181 | $response->content(<<EOT);
|
---|
182 | LWP will support https URLs if the LWP::Protocol::https module
|
---|
183 | is installed.
|
---|
184 | EOT
|
---|
185 | }
|
---|
186 | }
|
---|
187 | }
|
---|
188 |
|
---|
189 | if (!$response && $self->{use_eval}) {
|
---|
190 | # we eval, and turn dies into responses below
|
---|
191 | eval {
|
---|
192 | $response = $protocol->request($request, $proxy, $arg, $size, $self->{timeout}) ||
|
---|
193 | die "No response returned by $protocol";
|
---|
194 | };
|
---|
195 | if ($@) {
|
---|
196 | if (UNIVERSAL::isa($@, "HTTP::Response")) {
|
---|
197 | $response = $@;
|
---|
198 | $response->request($request);
|
---|
199 | }
|
---|
200 | else {
|
---|
201 | my $full = $@;
|
---|
202 | (my $status = $@) =~ s/\n.*//s;
|
---|
203 | $status =~ s/ at .* line \d+.*//s; # remove file/line number
|
---|
204 | my $code = ($status =~ s/^(\d\d\d)\s+//) ? $1 : &HTTP::Status::RC_INTERNAL_SERVER_ERROR;
|
---|
205 | $response = _new_response($request, $code, $status, $full);
|
---|
206 | }
|
---|
207 | }
|
---|
208 | }
|
---|
209 | elsif (!$response) {
|
---|
210 | $response = $protocol->request($request, $proxy,
|
---|
211 | $arg, $size, $self->{timeout});
|
---|
212 | # XXX: Should we die unless $response->is_success ???
|
---|
213 | }
|
---|
214 | }
|
---|
215 |
|
---|
216 | $response->request($request); # record request for reference
|
---|
217 | $response->header("Client-Date" => HTTP::Date::time2str(time));
|
---|
218 |
|
---|
219 | $self->run_handlers("response_done", $response);
|
---|
220 |
|
---|
221 | $self->progress("end", $response);
|
---|
222 | return $response;
|
---|
223 | }
|
---|
224 |
|
---|
225 |
|
---|
226 | sub prepare_request
|
---|
227 | {
|
---|
228 | my($self, $request) = @_;
|
---|
229 | die "Method missing" unless $request->method;
|
---|
230 | my $url = $request->uri;
|
---|
231 | die "URL missing" unless $url;
|
---|
232 | die "URL must be absolute" unless $url->scheme;
|
---|
233 |
|
---|
234 | $self->run_handlers("request_preprepare", $request);
|
---|
235 |
|
---|
236 | if (my $def_headers = $self->{def_headers}) {
|
---|
237 | for my $h ($def_headers->header_field_names) {
|
---|
238 | $request->init_header($h => [$def_headers->header($h)]);
|
---|
239 | }
|
---|
240 | }
|
---|
241 |
|
---|
242 | $self->run_handlers("request_prepare", $request);
|
---|
243 |
|
---|
244 | return $request;
|
---|
245 | }
|
---|
246 |
|
---|
247 |
|
---|
248 | sub simple_request
|
---|
249 | {
|
---|
250 | my($self, $request, $arg, $size) = @_;
|
---|
251 |
|
---|
252 | # sanity check the request passed in
|
---|
253 | if (defined $request) {
|
---|
254 | if (ref $request) {
|
---|
255 | Carp::croak("You need a request object, not a " . ref($request) . " object")
|
---|
256 | if ref($request) eq 'ARRAY' or ref($request) eq 'HASH' or
|
---|
257 | !$request->can('method') or !$request->can('uri');
|
---|
258 | }
|
---|
259 | else {
|
---|
260 | Carp::croak("You need a request object, not '$request'");
|
---|
261 | }
|
---|
262 | }
|
---|
263 | else {
|
---|
264 | Carp::croak("No request object passed in");
|
---|
265 | }
|
---|
266 |
|
---|
267 | eval {
|
---|
268 | $request = $self->prepare_request($request);
|
---|
269 | };
|
---|
270 | if ($@) {
|
---|
271 | $@ =~ s/ at .* line \d+.*//s; # remove file/line number
|
---|
272 | return _new_response($request, &HTTP::Status::RC_BAD_REQUEST, $@);
|
---|
273 | }
|
---|
274 | return $self->send_request($request, $arg, $size);
|
---|
275 | }
|
---|
276 |
|
---|
277 |
|
---|
278 | sub request
|
---|
279 | {
|
---|
280 | my($self, $request, $arg, $size, $previous) = @_;
|
---|
281 |
|
---|
282 | my $response = $self->simple_request($request, $arg, $size);
|
---|
283 | $response->previous($previous) if $previous;
|
---|
284 |
|
---|
285 | if ($response->redirects >= $self->{max_redirect}) {
|
---|
286 | $response->header("Client-Warning" =>
|
---|
287 | "Redirect loop detected (max_redirect = $self->{max_redirect})");
|
---|
288 | return $response;
|
---|
289 | }
|
---|
290 |
|
---|
291 | if (my $req = $self->run_handlers("response_redirect", $response)) {
|
---|
292 | return $self->request($req, $arg, $size, $response);
|
---|
293 | }
|
---|
294 |
|
---|
295 | my $code = $response->code;
|
---|
296 |
|
---|
297 | if ($code == &HTTP::Status::RC_MOVED_PERMANENTLY or
|
---|
298 | $code == &HTTP::Status::RC_FOUND or
|
---|
299 | $code == &HTTP::Status::RC_SEE_OTHER or
|
---|
300 | $code == &HTTP::Status::RC_TEMPORARY_REDIRECT)
|
---|
301 | {
|
---|
302 | my $referral = $request->clone;
|
---|
303 |
|
---|
304 | # These headers should never be forwarded
|
---|
305 | $referral->remove_header('Host', 'Cookie');
|
---|
306 |
|
---|
307 | if ($referral->header('Referer') &&
|
---|
308 | $request->uri->scheme eq 'https' &&
|
---|
309 | $referral->uri->scheme eq 'http')
|
---|
310 | {
|
---|
311 | # RFC 2616, section 15.1.3.
|
---|
312 | # https -> http redirect, suppressing Referer
|
---|
313 | $referral->remove_header('Referer');
|
---|
314 | }
|
---|
315 |
|
---|
316 | if ($code == &HTTP::Status::RC_SEE_OTHER ||
|
---|
317 | $code == &HTTP::Status::RC_FOUND)
|
---|
318 | {
|
---|
319 | my $method = uc($referral->method);
|
---|
320 | unless ($method eq "GET" || $method eq "HEAD") {
|
---|
321 | $referral->method("GET");
|
---|
322 | $referral->content("");
|
---|
323 | $referral->remove_content_headers;
|
---|
324 | }
|
---|
325 | }
|
---|
326 |
|
---|
327 | # And then we update the URL based on the Location:-header.
|
---|
328 | my $referral_uri = $response->header('Location');
|
---|
329 | {
|
---|
330 | # Some servers erroneously return a relative URL for redirects,
|
---|
331 | # so make it absolute if it not already is.
|
---|
332 | local $URI::ABS_ALLOW_RELATIVE_SCHEME = 1;
|
---|
333 | my $base = $response->base;
|
---|
334 | $referral_uri = "" unless defined $referral_uri;
|
---|
335 | $referral_uri = $HTTP::URI_CLASS->new($referral_uri, $base)
|
---|
336 | ->abs($base);
|
---|
337 | }
|
---|
338 | $referral->uri($referral_uri);
|
---|
339 |
|
---|
340 | return $response unless $self->redirect_ok($referral, $response);
|
---|
341 | return $self->request($referral, $arg, $size, $response);
|
---|
342 |
|
---|
343 | }
|
---|
344 | elsif ($code == &HTTP::Status::RC_UNAUTHORIZED ||
|
---|
345 | $code == &HTTP::Status::RC_PROXY_AUTHENTICATION_REQUIRED
|
---|
346 | )
|
---|
347 | {
|
---|
348 | my $proxy = ($code == &HTTP::Status::RC_PROXY_AUTHENTICATION_REQUIRED);
|
---|
349 | my $ch_header = $proxy ? "Proxy-Authenticate" : "WWW-Authenticate";
|
---|
350 | my @challenge = $response->header($ch_header);
|
---|
351 | unless (@challenge) {
|
---|
352 | $response->header("Client-Warning" =>
|
---|
353 | "Missing Authenticate header");
|
---|
354 | return $response;
|
---|
355 | }
|
---|
356 |
|
---|
357 | require HTTP::Headers::Util;
|
---|
358 | CHALLENGE: for my $challenge (@challenge) {
|
---|
359 | $challenge =~ tr/,/;/; # "," is used to separate auth-params!!
|
---|
360 | ($challenge) = HTTP::Headers::Util::split_header_words($challenge);
|
---|
361 | my $scheme = shift(@$challenge);
|
---|
362 | shift(@$challenge); # no value
|
---|
363 | $challenge = { @$challenge }; # make rest into a hash
|
---|
364 |
|
---|
365 | unless ($scheme =~ /^([a-z]+(?:-[a-z]+)*)$/) {
|
---|
366 | $response->header("Client-Warning" =>
|
---|
367 | "Bad authentication scheme '$scheme'");
|
---|
368 | return $response;
|
---|
369 | }
|
---|
370 | $scheme = $1; # untainted now
|
---|
371 | my $class = "LWP::Authen::\u$scheme";
|
---|
372 | $class =~ s/-/_/g;
|
---|
373 |
|
---|
374 | no strict 'refs';
|
---|
375 | unless (%{"$class\::"}) {
|
---|
376 | # try to load it
|
---|
377 | eval "require $class";
|
---|
378 | if ($@) {
|
---|
379 | if ($@ =~ /^Can\'t locate/) {
|
---|
380 | $response->header("Client-Warning" =>
|
---|
381 | "Unsupported authentication scheme '$scheme'");
|
---|
382 | }
|
---|
383 | else {
|
---|
384 | $response->header("Client-Warning" => $@);
|
---|
385 | }
|
---|
386 | next CHALLENGE;
|
---|
387 | }
|
---|
388 | }
|
---|
389 | unless ($class->can("authenticate")) {
|
---|
390 | $response->header("Client-Warning" =>
|
---|
391 | "Unsupported authentication scheme '$scheme'");
|
---|
392 | next CHALLENGE;
|
---|
393 | }
|
---|
394 | return $class->authenticate($self, $proxy, $challenge, $response,
|
---|
395 | $request, $arg, $size);
|
---|
396 | }
|
---|
397 | return $response;
|
---|
398 | }
|
---|
399 | return $response;
|
---|
400 | }
|
---|
401 |
|
---|
402 |
|
---|
403 | #
|
---|
404 | # Now the shortcuts...
|
---|
405 | #
|
---|
406 | sub get {
|
---|
407 | require HTTP::Request::Common;
|
---|
408 | my($self, @parameters) = @_;
|
---|
409 | my @suff = $self->_process_colonic_headers(\@parameters,1);
|
---|
410 | return $self->request( HTTP::Request::Common::GET( @parameters ), @suff );
|
---|
411 | }
|
---|
412 |
|
---|
413 |
|
---|
414 | sub post {
|
---|
415 | require HTTP::Request::Common;
|
---|
416 | my($self, @parameters) = @_;
|
---|
417 | my @suff = $self->_process_colonic_headers(\@parameters, (ref($parameters[1]) ? 2 : 1));
|
---|
418 | return $self->request( HTTP::Request::Common::POST( @parameters ), @suff );
|
---|
419 | }
|
---|
420 |
|
---|
421 |
|
---|
422 | sub head {
|
---|
423 | require HTTP::Request::Common;
|
---|
424 | my($self, @parameters) = @_;
|
---|
425 | my @suff = $self->_process_colonic_headers(\@parameters,1);
|
---|
426 | return $self->request( HTTP::Request::Common::HEAD( @parameters ), @suff );
|
---|
427 | }
|
---|
428 |
|
---|
429 |
|
---|
430 | sub put {
|
---|
431 | require HTTP::Request::Common;
|
---|
432 | my($self, @parameters) = @_;
|
---|
433 | my @suff = $self->_process_colonic_headers(\@parameters, (ref($parameters[1]) ? 2 : 1));
|
---|
434 | return $self->request( HTTP::Request::Common::PUT( @parameters ), @suff );
|
---|
435 | }
|
---|
436 |
|
---|
437 |
|
---|
438 | sub delete {
|
---|
439 | require HTTP::Request::Common;
|
---|
440 | my($self, @parameters) = @_;
|
---|
441 | my @suff = $self->_process_colonic_headers(\@parameters,1);
|
---|
442 | return $self->request( HTTP::Request::Common::DELETE( @parameters ), @suff );
|
---|
443 | }
|
---|
444 |
|
---|
445 |
|
---|
446 | sub _process_colonic_headers {
|
---|
447 | # Process :content_cb / :content_file / :read_size_hint headers.
|
---|
448 | my($self, $args, $start_index) = @_;
|
---|
449 |
|
---|
450 | my($arg, $size);
|
---|
451 | for(my $i = $start_index; $i < @$args; $i += 2) {
|
---|
452 | next unless defined $args->[$i];
|
---|
453 |
|
---|
454 | #printf "Considering %s => %s\n", $args->[$i], $args->[$i + 1];
|
---|
455 |
|
---|
456 | if($args->[$i] eq ':content_cb') {
|
---|
457 | # Some sanity-checking...
|
---|
458 | $arg = $args->[$i + 1];
|
---|
459 | Carp::croak("A :content_cb value can't be undef") unless defined $arg;
|
---|
460 | Carp::croak("A :content_cb value must be a coderef")
|
---|
461 | unless ref $arg and UNIVERSAL::isa($arg, 'CODE');
|
---|
462 |
|
---|
463 | }
|
---|
464 | elsif ($args->[$i] eq ':content_file') {
|
---|
465 | $arg = $args->[$i + 1];
|
---|
466 |
|
---|
467 | # Some sanity-checking...
|
---|
468 | Carp::croak("A :content_file value can't be undef")
|
---|
469 | unless defined $arg;
|
---|
470 | Carp::croak("A :content_file value can't be a reference")
|
---|
471 | if ref $arg;
|
---|
472 | Carp::croak("A :content_file value can't be \"\"")
|
---|
473 | unless length $arg;
|
---|
474 |
|
---|
475 | }
|
---|
476 | elsif ($args->[$i] eq ':read_size_hint') {
|
---|
477 | $size = $args->[$i + 1];
|
---|
478 | # Bother checking it?
|
---|
479 |
|
---|
480 | }
|
---|
481 | else {
|
---|
482 | next;
|
---|
483 | }
|
---|
484 | splice @$args, $i, 2;
|
---|
485 | $i -= 2;
|
---|
486 | }
|
---|
487 |
|
---|
488 | # And return a suitable suffix-list for request(REQ,...)
|
---|
489 |
|
---|
490 | return unless defined $arg;
|
---|
491 | return $arg, $size if defined $size;
|
---|
492 | return $arg;
|
---|
493 | }
|
---|
494 |
|
---|
495 |
|
---|
496 | sub is_online {
|
---|
497 | my $self = shift;
|
---|
498 | return 1 if $self->get("http://www.msftncsi.com/ncsi.txt")->content eq "Microsoft NCSI";
|
---|
499 | return 1 if $self->get("http://www.apple.com")->content =~ m,<title>Apple</title>,;
|
---|
500 | return 0;
|
---|
501 | }
|
---|
502 |
|
---|
503 |
|
---|
504 | my @ANI = qw(- \ | /);
|
---|
505 |
|
---|
506 | sub progress {
|
---|
507 | my($self, $status, $m) = @_;
|
---|
508 | return unless $self->{show_progress};
|
---|
509 |
|
---|
510 | local($,, $\);
|
---|
511 | if ($status eq "begin") {
|
---|
512 | print STDERR "** ", $m->method, " ", $m->uri, " ==> ";
|
---|
513 | $self->{progress_start} = time;
|
---|
514 | $self->{progress_lastp} = "";
|
---|
515 | $self->{progress_ani} = 0;
|
---|
516 | }
|
---|
517 | elsif ($status eq "end") {
|
---|
518 | delete $self->{progress_lastp};
|
---|
519 | delete $self->{progress_ani};
|
---|
520 | print STDERR $m->status_line;
|
---|
521 | my $t = time - delete $self->{progress_start};
|
---|
522 | print STDERR " (${t}s)" if $t;
|
---|
523 | print STDERR "\n";
|
---|
524 | }
|
---|
525 | elsif ($status eq "tick") {
|
---|
526 | print STDERR "$ANI[$self->{progress_ani}++]\b";
|
---|
527 | $self->{progress_ani} %= @ANI;
|
---|
528 | }
|
---|
529 | else {
|
---|
530 | my $p = sprintf "%3.0f%%", $status * 100;
|
---|
531 | return if $p eq $self->{progress_lastp};
|
---|
532 | print STDERR "$p\b\b\b\b";
|
---|
533 | $self->{progress_lastp} = $p;
|
---|
534 | }
|
---|
535 | STDERR->flush;
|
---|
536 | }
|
---|
537 |
|
---|
538 |
|
---|
539 | #
|
---|
540 | # This whole allow/forbid thing is based on man 1 at's way of doing things.
|
---|
541 | #
|
---|
542 | sub is_protocol_supported
|
---|
543 | {
|
---|
544 | my($self, $scheme) = @_;
|
---|
545 | if (ref $scheme) {
|
---|
546 | # assume we got a reference to an URI object
|
---|
547 | $scheme = $scheme->scheme;
|
---|
548 | }
|
---|
549 | else {
|
---|
550 | Carp::croak("Illegal scheme '$scheme' passed to is_protocol_supported")
|
---|
551 | if $scheme =~ /\W/;
|
---|
552 | $scheme = lc $scheme;
|
---|
553 | }
|
---|
554 |
|
---|
555 | my $x;
|
---|
556 | if(ref($self) and $x = $self->protocols_allowed) {
|
---|
557 | return 0 unless grep lc($_) eq $scheme, @$x;
|
---|
558 | }
|
---|
559 | elsif (ref($self) and $x = $self->protocols_forbidden) {
|
---|
560 | return 0 if grep lc($_) eq $scheme, @$x;
|
---|
561 | }
|
---|
562 |
|
---|
563 | local($SIG{__DIE__}); # protect against user defined die handlers
|
---|
564 | $x = LWP::Protocol::implementor($scheme);
|
---|
565 | return 1 if $x and $x ne 'LWP::Protocol::nogo';
|
---|
566 | return 0;
|
---|
567 | }
|
---|
568 |
|
---|
569 |
|
---|
570 | sub protocols_allowed { shift->_elem('protocols_allowed' , @_) }
|
---|
571 | sub protocols_forbidden { shift->_elem('protocols_forbidden' , @_) }
|
---|
572 | sub requests_redirectable { shift->_elem('requests_redirectable', @_) }
|
---|
573 |
|
---|
574 |
|
---|
575 | sub redirect_ok
|
---|
576 | {
|
---|
577 | # RFC 2616, section 10.3.2 and 10.3.3 say:
|
---|
578 | # If the 30[12] status code is received in response to a request other
|
---|
579 | # than GET or HEAD, the user agent MUST NOT automatically redirect the
|
---|
580 | # request unless it can be confirmed by the user, since this might
|
---|
581 | # change the conditions under which the request was issued.
|
---|
582 |
|
---|
583 | # Note that this routine used to be just:
|
---|
584 | # return 0 if $_[1]->method eq "POST"; return 1;
|
---|
585 |
|
---|
586 | my($self, $new_request, $response) = @_;
|
---|
587 | my $method = $response->request->method;
|
---|
588 | return 0 unless grep $_ eq $method,
|
---|
589 | @{ $self->requests_redirectable || [] };
|
---|
590 |
|
---|
591 | if ($new_request->uri->scheme eq 'file') {
|
---|
592 | $response->header("Client-Warning" =>
|
---|
593 | "Can't redirect to a file:// URL!");
|
---|
594 | return 0;
|
---|
595 | }
|
---|
596 |
|
---|
597 | # Otherwise it's apparently okay...
|
---|
598 | return 1;
|
---|
599 | }
|
---|
600 |
|
---|
601 |
|
---|
602 | sub credentials
|
---|
603 | {
|
---|
604 | my $self = shift;
|
---|
605 | my $netloc = lc(shift);
|
---|
606 | my $realm = shift || "";
|
---|
607 | my $old = $self->{basic_authentication}{$netloc}{$realm};
|
---|
608 | if (@_) {
|
---|
609 | $self->{basic_authentication}{$netloc}{$realm} = [@_];
|
---|
610 | }
|
---|
611 | return unless $old;
|
---|
612 | return @$old if wantarray;
|
---|
613 | return join(":", @$old);
|
---|
614 | }
|
---|
615 |
|
---|
616 |
|
---|
617 | sub get_basic_credentials
|
---|
618 | {
|
---|
619 | my($self, $realm, $uri, $proxy) = @_;
|
---|
620 | return if $proxy;
|
---|
621 | return $self->credentials($uri->host_port, $realm);
|
---|
622 | }
|
---|
623 |
|
---|
624 |
|
---|
625 | sub timeout { shift->_elem('timeout', @_); }
|
---|
626 | sub local_address{ shift->_elem('local_address',@_); }
|
---|
627 | sub max_size { shift->_elem('max_size', @_); }
|
---|
628 | sub max_redirect { shift->_elem('max_redirect', @_); }
|
---|
629 | sub show_progress{ shift->_elem('show_progress', @_); }
|
---|
630 |
|
---|
631 | sub ssl_opts {
|
---|
632 | my $self = shift;
|
---|
633 | if (@_ == 1) {
|
---|
634 | my $k = shift;
|
---|
635 | return $self->{ssl_opts}{$k};
|
---|
636 | }
|
---|
637 | if (@_) {
|
---|
638 | my $old;
|
---|
639 | while (@_) {
|
---|
640 | my($k, $v) = splice(@_, 0, 2);
|
---|
641 | $old = $self->{ssl_opts}{$k} unless @_;
|
---|
642 | if (defined $v) {
|
---|
643 | $self->{ssl_opts}{$k} = $v;
|
---|
644 | }
|
---|
645 | else {
|
---|
646 | delete $self->{ssl_opts}{$k};
|
---|
647 | }
|
---|
648 | }
|
---|
649 | %{$self->{ssl_opts}} = (%{$self->{ssl_opts}}, @_);
|
---|
650 | return $old;
|
---|
651 | }
|
---|
652 |
|
---|
653 | return keys %{$self->{ssl_opts}};
|
---|
654 | }
|
---|
655 |
|
---|
656 | sub parse_head {
|
---|
657 | my $self = shift;
|
---|
658 | if (@_) {
|
---|
659 | my $flag = shift;
|
---|
660 | my $parser;
|
---|
661 | my $old = $self->set_my_handler("response_header", $flag ? sub {
|
---|
662 | my($response, $ua) = @_;
|
---|
663 | require HTML::HeadParser;
|
---|
664 | $parser = HTML::HeadParser->new;
|
---|
665 | $parser->xml_mode(1) if $response->content_is_xhtml;
|
---|
666 | $parser->utf8_mode(1) if $] >= 5.008 && $HTML::Parser::VERSION >= 3.40;
|
---|
667 |
|
---|
668 | push(@{$response->{handlers}{response_data}}, {
|
---|
669 | callback => sub {
|
---|
670 | return unless $parser;
|
---|
671 | unless ($parser->parse($_[3])) {
|
---|
672 | my $h = $parser->header;
|
---|
673 | my $r = $_[0];
|
---|
674 | for my $f ($h->header_field_names) {
|
---|
675 | $r->init_header($f, [$h->header($f)]);
|
---|
676 | }
|
---|
677 | undef($parser);
|
---|
678 | }
|
---|
679 | },
|
---|
680 | });
|
---|
681 |
|
---|
682 | } : undef,
|
---|
683 | m_media_type => "html",
|
---|
684 | );
|
---|
685 | return !!$old;
|
---|
686 | }
|
---|
687 | else {
|
---|
688 | return !!$self->get_my_handler("response_header");
|
---|
689 | }
|
---|
690 | }
|
---|
691 |
|
---|
692 | sub cookie_jar {
|
---|
693 | my $self = shift;
|
---|
694 | my $old = $self->{cookie_jar};
|
---|
695 | if (@_) {
|
---|
696 | my $jar = shift;
|
---|
697 | if (ref($jar) eq "HASH") {
|
---|
698 | require HTTP::Cookies;
|
---|
699 | $jar = HTTP::Cookies->new(%$jar);
|
---|
700 | }
|
---|
701 | $self->{cookie_jar} = $jar;
|
---|
702 | $self->set_my_handler("request_prepare",
|
---|
703 | $jar ? sub { $jar->add_cookie_header($_[0]); } : undef,
|
---|
704 | );
|
---|
705 | $self->set_my_handler("response_done",
|
---|
706 | $jar ? sub { $jar->extract_cookies($_[0]); } : undef,
|
---|
707 | );
|
---|
708 | }
|
---|
709 | $old;
|
---|
710 | }
|
---|
711 |
|
---|
712 | sub default_headers {
|
---|
713 | my $self = shift;
|
---|
714 | my $old = $self->{def_headers} ||= HTTP::Headers->new;
|
---|
715 | if (@_) {
|
---|
716 | Carp::croak("default_headers not set to HTTP::Headers compatible object")
|
---|
717 | unless @_ == 1 && $_[0]->can("header_field_names");
|
---|
718 | $self->{def_headers} = shift;
|
---|
719 | }
|
---|
720 | return $old;
|
---|
721 | }
|
---|
722 |
|
---|
723 | sub default_header {
|
---|
724 | my $self = shift;
|
---|
725 | return $self->default_headers->header(@_);
|
---|
726 | }
|
---|
727 |
|
---|
728 | sub _agent { "libwww-perl/$LWP::VERSION" }
|
---|
729 |
|
---|
730 | sub agent {
|
---|
731 | my $self = shift;
|
---|
732 | if (@_) {
|
---|
733 | my $agent = shift;
|
---|
734 | if ($agent) {
|
---|
735 | $agent .= $self->_agent if $agent =~ /\s+$/;
|
---|
736 | }
|
---|
737 | else {
|
---|
738 | undef($agent)
|
---|
739 | }
|
---|
740 | return $self->default_header("User-Agent", $agent);
|
---|
741 | }
|
---|
742 | return $self->default_header("User-Agent");
|
---|
743 | }
|
---|
744 |
|
---|
745 | sub from { # legacy
|
---|
746 | my $self = shift;
|
---|
747 | return $self->default_header("From", @_);
|
---|
748 | }
|
---|
749 |
|
---|
750 |
|
---|
751 | sub conn_cache {
|
---|
752 | my $self = shift;
|
---|
753 | my $old = $self->{conn_cache};
|
---|
754 | if (@_) {
|
---|
755 | my $cache = shift;
|
---|
756 | if (ref($cache) eq "HASH") {
|
---|
757 | require LWP::ConnCache;
|
---|
758 | $cache = LWP::ConnCache->new(%$cache);
|
---|
759 | }
|
---|
760 | $self->{conn_cache} = $cache;
|
---|
761 | }
|
---|
762 | $old;
|
---|
763 | }
|
---|
764 |
|
---|
765 |
|
---|
766 | sub add_handler {
|
---|
767 | my($self, $phase, $cb, %spec) = @_;
|
---|
768 | $spec{line} ||= join(":", (caller)[1,2]);
|
---|
769 | my $conf = $self->{handlers}{$phase} ||= do {
|
---|
770 | require HTTP::Config;
|
---|
771 | HTTP::Config->new;
|
---|
772 | };
|
---|
773 | $conf->add(%spec, callback => $cb);
|
---|
774 | }
|
---|
775 |
|
---|
776 | sub set_my_handler {
|
---|
777 | my($self, $phase, $cb, %spec) = @_;
|
---|
778 | $spec{owner} = (caller(1))[3] unless exists $spec{owner};
|
---|
779 | $self->remove_handler($phase, %spec);
|
---|
780 | $spec{line} ||= join(":", (caller)[1,2]);
|
---|
781 | $self->add_handler($phase, $cb, %spec) if $cb;
|
---|
782 | }
|
---|
783 |
|
---|
784 | sub get_my_handler {
|
---|
785 | my $self = shift;
|
---|
786 | my $phase = shift;
|
---|
787 | my $init = pop if @_ % 2;
|
---|
788 | my %spec = @_;
|
---|
789 | my $conf = $self->{handlers}{$phase};
|
---|
790 | unless ($conf) {
|
---|
791 | return unless $init;
|
---|
792 | require HTTP::Config;
|
---|
793 | $conf = $self->{handlers}{$phase} = HTTP::Config->new;
|
---|
794 | }
|
---|
795 | $spec{owner} = (caller(1))[3] unless exists $spec{owner};
|
---|
796 | my @h = $conf->find(%spec);
|
---|
797 | if (!@h && $init) {
|
---|
798 | if (ref($init) eq "CODE") {
|
---|
799 | $init->(\%spec);
|
---|
800 | }
|
---|
801 | elsif (ref($init) eq "HASH") {
|
---|
802 | while (my($k, $v) = each %$init) {
|
---|
803 | $spec{$k} = $v;
|
---|
804 | }
|
---|
805 | }
|
---|
806 | $spec{callback} ||= sub {};
|
---|
807 | $spec{line} ||= join(":", (caller)[1,2]);
|
---|
808 | $conf->add(\%spec);
|
---|
809 | return \%spec;
|
---|
810 | }
|
---|
811 | return wantarray ? @h : $h[0];
|
---|
812 | }
|
---|
813 |
|
---|
814 | sub remove_handler {
|
---|
815 | my($self, $phase, %spec) = @_;
|
---|
816 | if ($phase) {
|
---|
817 | my $conf = $self->{handlers}{$phase} || return;
|
---|
818 | my @h = $conf->remove(%spec);
|
---|
819 | delete $self->{handlers}{$phase} if $conf->empty;
|
---|
820 | return @h;
|
---|
821 | }
|
---|
822 |
|
---|
823 | return unless $self->{handlers};
|
---|
824 | return map $self->remove_handler($_), sort keys %{$self->{handlers}};
|
---|
825 | }
|
---|
826 |
|
---|
827 | sub handlers {
|
---|
828 | my($self, $phase, $o) = @_;
|
---|
829 | my @h;
|
---|
830 | if ($o->{handlers} && $o->{handlers}{$phase}) {
|
---|
831 | push(@h, @{$o->{handlers}{$phase}});
|
---|
832 | }
|
---|
833 | if (my $conf = $self->{handlers}{$phase}) {
|
---|
834 | push(@h, $conf->matching($o));
|
---|
835 | }
|
---|
836 | return @h;
|
---|
837 | }
|
---|
838 |
|
---|
839 | sub run_handlers {
|
---|
840 | my($self, $phase, $o) = @_;
|
---|
841 | if (defined(wantarray)) {
|
---|
842 | for my $h ($self->handlers($phase, $o)) {
|
---|
843 | my $ret = $h->{callback}->($o, $self, $h);
|
---|
844 | return $ret if $ret;
|
---|
845 | }
|
---|
846 | return undef;
|
---|
847 | }
|
---|
848 |
|
---|
849 | for my $h ($self->handlers($phase, $o)) {
|
---|
850 | $h->{callback}->($o, $self, $h);
|
---|
851 | }
|
---|
852 | }
|
---|
853 |
|
---|
854 |
|
---|
855 | # deprecated
|
---|
856 | sub use_eval { shift->_elem('use_eval', @_); }
|
---|
857 | sub use_alarm
|
---|
858 | {
|
---|
859 | Carp::carp("LWP::UserAgent->use_alarm(BOOL) is a no-op")
|
---|
860 | if @_ > 1 && $^W;
|
---|
861 | "";
|
---|
862 | }
|
---|
863 |
|
---|
864 |
|
---|
865 | sub clone
|
---|
866 | {
|
---|
867 | my $self = shift;
|
---|
868 | my $copy = bless { %$self }, ref $self; # copy most fields
|
---|
869 |
|
---|
870 | delete $copy->{handlers};
|
---|
871 | delete $copy->{conn_cache};
|
---|
872 |
|
---|
873 | # copy any plain arrays and hashes; known not to need recursive copy
|
---|
874 | for my $k (qw(proxy no_proxy requests_redirectable ssl_opts)) {
|
---|
875 | next unless $copy->{$k};
|
---|
876 | if (ref($copy->{$k}) eq "ARRAY") {
|
---|
877 | $copy->{$k} = [ @{$copy->{$k}} ];
|
---|
878 | }
|
---|
879 | elsif (ref($copy->{$k}) eq "HASH") {
|
---|
880 | $copy->{$k} = { %{$copy->{$k}} };
|
---|
881 | }
|
---|
882 | }
|
---|
883 |
|
---|
884 | if ($self->{def_headers}) {
|
---|
885 | $copy->{def_headers} = $self->{def_headers}->clone;
|
---|
886 | }
|
---|
887 |
|
---|
888 | # re-enable standard handlers
|
---|
889 | $copy->parse_head($self->parse_head);
|
---|
890 |
|
---|
891 | # no easy way to clone the cookie jar; so let's just remove it for now
|
---|
892 | $copy->cookie_jar(undef);
|
---|
893 |
|
---|
894 | $copy;
|
---|
895 | }
|
---|
896 |
|
---|
897 |
|
---|
898 | sub mirror
|
---|
899 | {
|
---|
900 | my($self, $url, $file) = @_;
|
---|
901 |
|
---|
902 | my $request = HTTP::Request->new('GET', $url);
|
---|
903 |
|
---|
904 | # If the file exists, add a cache-related header
|
---|
905 | if ( -e $file ) {
|
---|
906 | my ($mtime) = ( stat($file) )[9];
|
---|
907 | if ($mtime) {
|
---|
908 | $request->header( 'If-Modified-Since' => HTTP::Date::time2str($mtime) );
|
---|
909 | }
|
---|
910 | }
|
---|
911 | my $tmpfile = "$file-$$";
|
---|
912 |
|
---|
913 | my $response = $self->request($request, $tmpfile);
|
---|
914 | if ( $response->header('X-Died') ) {
|
---|
915 | die $response->header('X-Died');
|
---|
916 | }
|
---|
917 |
|
---|
918 | # Only fetching a fresh copy of the would be considered success.
|
---|
919 | # If the file was not modified, "304" would returned, which
|
---|
920 | # is considered by HTTP::Status to be a "redirect", /not/ "success"
|
---|
921 | if ( $response->is_success ) {
|
---|
922 | my @stat = stat($tmpfile) or die "Could not stat tmpfile '$tmpfile': $!";
|
---|
923 | my $file_length = $stat[7];
|
---|
924 | my ($content_length) = $response->header('Content-length');
|
---|
925 |
|
---|
926 | if ( defined $content_length and $file_length < $content_length ) {
|
---|
927 | unlink($tmpfile);
|
---|
928 | die "Transfer truncated: " . "only $file_length out of $content_length bytes received\n";
|
---|
929 | }
|
---|
930 | elsif ( defined $content_length and $file_length > $content_length ) {
|
---|
931 | unlink($tmpfile);
|
---|
932 | die "Content-length mismatch: " . "expected $content_length bytes, got $file_length\n";
|
---|
933 | }
|
---|
934 | # The file was the expected length.
|
---|
935 | else {
|
---|
936 | # Replace the stale file with a fresh copy
|
---|
937 | if ( -e $file ) {
|
---|
938 | # Some dosish systems fail to rename if the target exists
|
---|
939 | chmod 0777, $file;
|
---|
940 | unlink $file;
|
---|
941 | }
|
---|
942 | rename( $tmpfile, $file )
|
---|
943 | or die "Cannot rename '$tmpfile' to '$file': $!\n";
|
---|
944 |
|
---|
945 | # make sure the file has the same last modification time
|
---|
946 | if ( my $lm = $response->last_modified ) {
|
---|
947 | utime $lm, $lm, $file;
|
---|
948 | }
|
---|
949 | }
|
---|
950 | }
|
---|
951 | # The local copy is fresh enough, so just delete the temp file
|
---|
952 | else {
|
---|
953 | unlink($tmpfile);
|
---|
954 | }
|
---|
955 | return $response;
|
---|
956 | }
|
---|
957 |
|
---|
958 |
|
---|
959 | sub _need_proxy {
|
---|
960 | my($req, $ua) = @_;
|
---|
961 | return if exists $req->{proxy};
|
---|
962 | my $proxy = $ua->{proxy}{$req->uri->scheme} || return;
|
---|
963 | if ($ua->{no_proxy}) {
|
---|
964 | if (my $host = eval { $req->uri->host }) {
|
---|
965 | for my $domain (@{$ua->{no_proxy}}) {
|
---|
966 | if ($host =~ /\Q$domain\E$/) {
|
---|
967 | return;
|
---|
968 | }
|
---|
969 | }
|
---|
970 | }
|
---|
971 | }
|
---|
972 | $req->{proxy} = $HTTP::URI_CLASS->new($proxy);
|
---|
973 | }
|
---|
974 |
|
---|
975 |
|
---|
976 | sub proxy
|
---|
977 | {
|
---|
978 | my $self = shift;
|
---|
979 | my $key = shift;
|
---|
980 | return map $self->proxy($_, @_), @$key if ref $key;
|
---|
981 |
|
---|
982 | Carp::croak("'$key' is not a valid URI scheme") unless $key =~ /^$URI::scheme_re\z/;
|
---|
983 | my $old = $self->{'proxy'}{$key};
|
---|
984 | if (@_) {
|
---|
985 | my $url = shift;
|
---|
986 | if (defined($url) && length($url)) {
|
---|
987 | Carp::croak("Proxy must be specified as absolute URI; '$url' is not") unless $url =~ /^$URI::scheme_re:/;
|
---|
988 | Carp::croak("Bad http proxy specification '$url'") if $url =~ /^https?:/ && $url !~ m,^https?://\w,;
|
---|
989 | }
|
---|
990 | $self->{proxy}{$key} = $url;
|
---|
991 | $self->set_my_handler("request_preprepare", \&_need_proxy)
|
---|
992 | }
|
---|
993 | return $old;
|
---|
994 | }
|
---|
995 |
|
---|
996 |
|
---|
997 | sub env_proxy {
|
---|
998 | my ($self) = @_;
|
---|
999 | require Encode;
|
---|
1000 | require Encode::Locale;
|
---|
1001 | my($k,$v);
|
---|
1002 | while(($k, $v) = each %ENV) {
|
---|
1003 | if ($ENV{REQUEST_METHOD}) {
|
---|
1004 | # Need to be careful when called in the CGI environment, as
|
---|
1005 | # the HTTP_PROXY variable is under control of that other guy.
|
---|
1006 | next if $k =~ /^HTTP_/;
|
---|
1007 | $k = "HTTP_PROXY" if $k eq "CGI_HTTP_PROXY";
|
---|
1008 | }
|
---|
1009 | $k = lc($k);
|
---|
1010 | next unless $k =~ /^(.*)_proxy$/;
|
---|
1011 | $k = $1;
|
---|
1012 | if ($k eq 'no') {
|
---|
1013 | $self->no_proxy(split(/\s*,\s*/, $v));
|
---|
1014 | }
|
---|
1015 | else {
|
---|
1016 | # Ignore random _proxy variables, allow only valid schemes
|
---|
1017 | next unless $k =~ /^$URI::scheme_re\z/;
|
---|
1018 | # Ignore xxx_proxy variables if xxx isn't a supported protocol
|
---|
1019 | next unless LWP::Protocol::implementor($k);
|
---|
1020 | $self->proxy($k, Encode::decode(locale => $v));
|
---|
1021 | }
|
---|
1022 | }
|
---|
1023 | }
|
---|
1024 |
|
---|
1025 |
|
---|
1026 | sub no_proxy {
|
---|
1027 | my($self, @no) = @_;
|
---|
1028 | if (@no) {
|
---|
1029 | push(@{ $self->{'no_proxy'} }, @no);
|
---|
1030 | }
|
---|
1031 | else {
|
---|
1032 | $self->{'no_proxy'} = [];
|
---|
1033 | }
|
---|
1034 | }
|
---|
1035 |
|
---|
1036 |
|
---|
1037 | sub _new_response {
|
---|
1038 | my($request, $code, $message, $content) = @_;
|
---|
1039 | $message ||= HTTP::Status::status_message($code);
|
---|
1040 | my $response = HTTP::Response->new($code, $message);
|
---|
1041 | $response->request($request);
|
---|
1042 | $response->header("Client-Date" => HTTP::Date::time2str(time));
|
---|
1043 | $response->header("Client-Warning" => "Internal response");
|
---|
1044 | $response->header("Content-Type" => "text/plain");
|
---|
1045 | $response->content($content || "$code $message\n");
|
---|
1046 | return $response;
|
---|
1047 | }
|
---|
1048 |
|
---|
1049 |
|
---|
1050 | 1;
|
---|
1051 |
|
---|
1052 | __END__
|
---|
1053 |
|
---|
1054 | =head1 NAME
|
---|
1055 |
|
---|
1056 | LWP::UserAgent - Web user agent class
|
---|
1057 |
|
---|
1058 | =head1 SYNOPSIS
|
---|
1059 |
|
---|
1060 | require LWP::UserAgent;
|
---|
1061 |
|
---|
1062 | my $ua = LWP::UserAgent->new;
|
---|
1063 | $ua->timeout(10);
|
---|
1064 | $ua->env_proxy;
|
---|
1065 |
|
---|
1066 | my $response = $ua->get('http://search.cpan.org/');
|
---|
1067 |
|
---|
1068 | if ($response->is_success) {
|
---|
1069 | print $response->decoded_content; # or whatever
|
---|
1070 | }
|
---|
1071 | else {
|
---|
1072 | die $response->status_line;
|
---|
1073 | }
|
---|
1074 |
|
---|
1075 | =head1 DESCRIPTION
|
---|
1076 |
|
---|
1077 | The C<LWP::UserAgent> is a class implementing a web user agent.
|
---|
1078 | C<LWP::UserAgent> objects can be used to dispatch web requests.
|
---|
1079 |
|
---|
1080 | In normal use the application creates an C<LWP::UserAgent> object, and
|
---|
1081 | then configures it with values for timeouts, proxies, name, etc. It
|
---|
1082 | then creates an instance of C<HTTP::Request> for the request that
|
---|
1083 | needs to be performed. This request is then passed to one of the
|
---|
1084 | request method the UserAgent, which dispatches it using the relevant
|
---|
1085 | protocol, and returns a C<HTTP::Response> object. There are
|
---|
1086 | convenience methods for sending the most common request types: get(),
|
---|
1087 | head(), post(), put() and delete(). When using these methods then the
|
---|
1088 | creation of the request object is hidden as shown in the synopsis above.
|
---|
1089 |
|
---|
1090 | The basic approach of the library is to use HTTP style communication
|
---|
1091 | for all protocol schemes. This means that you will construct
|
---|
1092 | C<HTTP::Request> objects and receive C<HTTP::Response> objects even
|
---|
1093 | for non-HTTP resources like I<gopher> and I<ftp>. In order to achieve
|
---|
1094 | even more similarity to HTTP style communications, gopher menus and
|
---|
1095 | file directories are converted to HTML documents.
|
---|
1096 |
|
---|
1097 | =head1 CONSTRUCTOR METHODS
|
---|
1098 |
|
---|
1099 | The following constructor methods are available:
|
---|
1100 |
|
---|
1101 | =over 4
|
---|
1102 |
|
---|
1103 | =item $ua = LWP::UserAgent->new( %options )
|
---|
1104 |
|
---|
1105 | This method constructs a new C<LWP::UserAgent> object and returns it.
|
---|
1106 | Key/value pair arguments may be provided to set up the initial state.
|
---|
1107 | The following options correspond to attribute methods described below:
|
---|
1108 |
|
---|
1109 | KEY DEFAULT
|
---|
1110 | ----------- --------------------
|
---|
1111 | agent "libwww-perl/#.###"
|
---|
1112 | from undef
|
---|
1113 | conn_cache undef
|
---|
1114 | cookie_jar undef
|
---|
1115 | default_headers HTTP::Headers->new
|
---|
1116 | local_address undef
|
---|
1117 | ssl_opts { verify_hostname => 1 }
|
---|
1118 | max_size undef
|
---|
1119 | max_redirect 7
|
---|
1120 | parse_head 1
|
---|
1121 | protocols_allowed undef
|
---|
1122 | protocols_forbidden undef
|
---|
1123 | requests_redirectable ['GET', 'HEAD']
|
---|
1124 | timeout 180
|
---|
1125 |
|
---|
1126 | The following additional options are also accepted: If the C<env_proxy> option
|
---|
1127 | is passed in with a TRUE value, then proxy settings are read from environment
|
---|
1128 | variables (see env_proxy() method below). If C<env_proxy> isn't provided the
|
---|
1129 | C<PERL_LWP_ENV_PROXY> environment variable controls if env_proxy() is called
|
---|
1130 | during initialization. If the C<keep_alive> option is passed in, then a
|
---|
1131 | C<LWP::ConnCache> is set up (see conn_cache() method below). The C<keep_alive>
|
---|
1132 | value is passed on as the C<total_capacity> for the connection cache.
|
---|
1133 |
|
---|
1134 | =item $ua->clone
|
---|
1135 |
|
---|
1136 | Returns a copy of the LWP::UserAgent object.
|
---|
1137 |
|
---|
1138 | =back
|
---|
1139 |
|
---|
1140 | =head1 ATTRIBUTES
|
---|
1141 |
|
---|
1142 | The settings of the configuration attributes modify the behaviour of the
|
---|
1143 | C<LWP::UserAgent> when it dispatches requests. Most of these can also
|
---|
1144 | be initialized by options passed to the constructor method.
|
---|
1145 |
|
---|
1146 | The following attribute methods are provided. The attribute value is
|
---|
1147 | left unchanged if no argument is given. The return value from each
|
---|
1148 | method is the old attribute value.
|
---|
1149 |
|
---|
1150 | =over
|
---|
1151 |
|
---|
1152 | =item $ua->agent
|
---|
1153 |
|
---|
1154 | =item $ua->agent( $product_id )
|
---|
1155 |
|
---|
1156 | Get/set the product token that is used to identify the user agent on
|
---|
1157 | the network. The agent value is sent as the "User-Agent" header in
|
---|
1158 | the requests. The default is the string returned by the _agent()
|
---|
1159 | method (see below).
|
---|
1160 |
|
---|
1161 | If the $product_id ends with space then the _agent() string is
|
---|
1162 | appended to it.
|
---|
1163 |
|
---|
1164 | The user agent string should be one or more simple product identifiers
|
---|
1165 | with an optional version number separated by the "/" character.
|
---|
1166 | Examples are:
|
---|
1167 |
|
---|
1168 | $ua->agent('Checkbot/0.4 ' . $ua->_agent);
|
---|
1169 | $ua->agent('Checkbot/0.4 '); # same as above
|
---|
1170 | $ua->agent('Mozilla/5.0');
|
---|
1171 | $ua->agent(""); # don't identify
|
---|
1172 |
|
---|
1173 | =item $ua->_agent
|
---|
1174 |
|
---|
1175 | Returns the default agent identifier. This is a string of the form
|
---|
1176 | "libwww-perl/#.###", where "#.###" is substituted with the version number
|
---|
1177 | of this library.
|
---|
1178 |
|
---|
1179 | =item $ua->from
|
---|
1180 |
|
---|
1181 | =item $ua->from( $email_address )
|
---|
1182 |
|
---|
1183 | Get/set the e-mail address for the human user who controls
|
---|
1184 | the requesting user agent. The address should be machine-usable, as
|
---|
1185 | defined in RFC 822. The C<from> value is send as the "From" header in
|
---|
1186 | the requests. Example:
|
---|
1187 |
|
---|
1188 | $ua->from('[email protected]');
|
---|
1189 |
|
---|
1190 | The default is to not send a "From" header. See the default_headers()
|
---|
1191 | method for the more general interface that allow any header to be defaulted.
|
---|
1192 |
|
---|
1193 | =item $ua->cookie_jar
|
---|
1194 |
|
---|
1195 | =item $ua->cookie_jar( $cookie_jar_obj )
|
---|
1196 |
|
---|
1197 | Get/set the cookie jar object to use. The only requirement is that
|
---|
1198 | the cookie jar object must implement the extract_cookies($request) and
|
---|
1199 | add_cookie_header($response) methods. These methods will then be
|
---|
1200 | invoked by the user agent as requests are sent and responses are
|
---|
1201 | received. Normally this will be a C<HTTP::Cookies> object or some
|
---|
1202 | subclass.
|
---|
1203 |
|
---|
1204 | The default is to have no cookie_jar, i.e. never automatically add
|
---|
1205 | "Cookie" headers to the requests.
|
---|
1206 |
|
---|
1207 | Shortcut: If a reference to a plain hash is passed in as the
|
---|
1208 | $cookie_jar_object, then it is replaced with an instance of
|
---|
1209 | C<HTTP::Cookies> that is initialized based on the hash. This form also
|
---|
1210 | automatically loads the C<HTTP::Cookies> module. It means that:
|
---|
1211 |
|
---|
1212 | $ua->cookie_jar({ file => "$ENV{HOME}/.cookies.txt" });
|
---|
1213 |
|
---|
1214 | is really just a shortcut for:
|
---|
1215 |
|
---|
1216 | require HTTP::Cookies;
|
---|
1217 | $ua->cookie_jar(HTTP::Cookies->new(file => "$ENV{HOME}/.cookies.txt"));
|
---|
1218 |
|
---|
1219 | =item $ua->default_headers
|
---|
1220 |
|
---|
1221 | =item $ua->default_headers( $headers_obj )
|
---|
1222 |
|
---|
1223 | Get/set the headers object that will provide default header values for
|
---|
1224 | any requests sent. By default this will be an empty C<HTTP::Headers>
|
---|
1225 | object.
|
---|
1226 |
|
---|
1227 | =item $ua->default_header( $field )
|
---|
1228 |
|
---|
1229 | =item $ua->default_header( $field => $value )
|
---|
1230 |
|
---|
1231 | This is just a short-cut for $ua->default_headers->header( $field =>
|
---|
1232 | $value ). Example:
|
---|
1233 |
|
---|
1234 | $ua->default_header('Accept-Encoding' => scalar HTTP::Message::decodable());
|
---|
1235 | $ua->default_header('Accept-Language' => "no, en");
|
---|
1236 |
|
---|
1237 | =item $ua->conn_cache
|
---|
1238 |
|
---|
1239 | =item $ua->conn_cache( $cache_obj )
|
---|
1240 |
|
---|
1241 | Get/set the C<LWP::ConnCache> object to use. See L<LWP::ConnCache>
|
---|
1242 | for details.
|
---|
1243 |
|
---|
1244 | =item $ua->credentials( $netloc, $realm )
|
---|
1245 |
|
---|
1246 | =item $ua->credentials( $netloc, $realm, $uname, $pass )
|
---|
1247 |
|
---|
1248 | Get/set the user name and password to be used for a realm.
|
---|
1249 |
|
---|
1250 | The $netloc is a string of the form "<host>:<port>". The username and
|
---|
1251 | password will only be passed to this server. Example:
|
---|
1252 |
|
---|
1253 | $ua->credentials("www.example.com:80", "Some Realm", "foo", "secret");
|
---|
1254 |
|
---|
1255 | =item $ua->local_address
|
---|
1256 |
|
---|
1257 | =item $ua->local_address( $address )
|
---|
1258 |
|
---|
1259 | Get/set the local interface to bind to for network connections. The interface
|
---|
1260 | can be specified as a hostname or an IP address. This value is passed as the
|
---|
1261 | C<LocalAddr> argument to L<IO::Socket::INET>.
|
---|
1262 |
|
---|
1263 | =item $ua->max_size
|
---|
1264 |
|
---|
1265 | =item $ua->max_size( $bytes )
|
---|
1266 |
|
---|
1267 | Get/set the size limit for response content. The default is C<undef>,
|
---|
1268 | which means that there is no limit. If the returned response content
|
---|
1269 | is only partial, because the size limit was exceeded, then a
|
---|
1270 | "Client-Aborted" header will be added to the response. The content
|
---|
1271 | might end up longer than C<max_size> as we abort once appending a
|
---|
1272 | chunk of data makes the length exceed the limit. The "Content-Length"
|
---|
1273 | header, if present, will indicate the length of the full content and
|
---|
1274 | will normally not be the same as C<< length($res->content) >>.
|
---|
1275 |
|
---|
1276 | =item $ua->max_redirect
|
---|
1277 |
|
---|
1278 | =item $ua->max_redirect( $n )
|
---|
1279 |
|
---|
1280 | This reads or sets the object's limit of how many times it will obey
|
---|
1281 | redirection responses in a given request cycle.
|
---|
1282 |
|
---|
1283 | By default, the value is 7. This means that if you call request()
|
---|
1284 | method and the response is a redirect elsewhere which is in turn a
|
---|
1285 | redirect, and so on seven times, then LWP gives up after that seventh
|
---|
1286 | request.
|
---|
1287 |
|
---|
1288 | =item $ua->parse_head
|
---|
1289 |
|
---|
1290 | =item $ua->parse_head( $boolean )
|
---|
1291 |
|
---|
1292 | Get/set a value indicating whether we should initialize response
|
---|
1293 | headers from the E<lt>head> section of HTML documents. The default is
|
---|
1294 | TRUE. Do not turn this off, unless you know what you are doing.
|
---|
1295 |
|
---|
1296 | =item $ua->protocols_allowed
|
---|
1297 |
|
---|
1298 | =item $ua->protocols_allowed( \@protocols )
|
---|
1299 |
|
---|
1300 | This reads (or sets) this user agent's list of protocols that the
|
---|
1301 | request methods will exclusively allow. The protocol names are case
|
---|
1302 | insensitive.
|
---|
1303 |
|
---|
1304 | For example: C<$ua-E<gt>protocols_allowed( [ 'http', 'https'] );>
|
---|
1305 | means that this user agent will I<allow only> those protocols,
|
---|
1306 | and attempts to use this user agent to access URLs with any other
|
---|
1307 | schemes (like "ftp://...") will result in a 500 error.
|
---|
1308 |
|
---|
1309 | To delete the list, call: C<$ua-E<gt>protocols_allowed(undef)>
|
---|
1310 |
|
---|
1311 | By default, an object has neither a C<protocols_allowed> list, nor a
|
---|
1312 | C<protocols_forbidden> list.
|
---|
1313 |
|
---|
1314 | Note that having a C<protocols_allowed> list causes any
|
---|
1315 | C<protocols_forbidden> list to be ignored.
|
---|
1316 |
|
---|
1317 | =item $ua->protocols_forbidden
|
---|
1318 |
|
---|
1319 | =item $ua->protocols_forbidden( \@protocols )
|
---|
1320 |
|
---|
1321 | This reads (or sets) this user agent's list of protocols that the
|
---|
1322 | request method will I<not> allow. The protocol names are case
|
---|
1323 | insensitive.
|
---|
1324 |
|
---|
1325 | For example: C<$ua-E<gt>protocols_forbidden( [ 'file', 'mailto'] );>
|
---|
1326 | means that this user agent will I<not> allow those protocols, and
|
---|
1327 | attempts to use this user agent to access URLs with those schemes
|
---|
1328 | will result in a 500 error.
|
---|
1329 |
|
---|
1330 | To delete the list, call: C<$ua-E<gt>protocols_forbidden(undef)>
|
---|
1331 |
|
---|
1332 | =item $ua->requests_redirectable
|
---|
1333 |
|
---|
1334 | =item $ua->requests_redirectable( \@requests )
|
---|
1335 |
|
---|
1336 | This reads or sets the object's list of request names that
|
---|
1337 | C<$ua-E<gt>redirect_ok(...)> will allow redirection for. By
|
---|
1338 | default, this is C<['GET', 'HEAD']>, as per RFC 2616. To
|
---|
1339 | change to include 'POST', consider:
|
---|
1340 |
|
---|
1341 | push @{ $ua->requests_redirectable }, 'POST';
|
---|
1342 |
|
---|
1343 | =item $ua->show_progress
|
---|
1344 |
|
---|
1345 | =item $ua->show_progress( $boolean )
|
---|
1346 |
|
---|
1347 | Get/set a value indicating whether a progress bar should be displayed
|
---|
1348 | on on the terminal as requests are processed. The default is FALSE.
|
---|
1349 |
|
---|
1350 | =item $ua->timeout
|
---|
1351 |
|
---|
1352 | =item $ua->timeout( $secs )
|
---|
1353 |
|
---|
1354 | Get/set the timeout value in seconds. The default timeout() value is
|
---|
1355 | 180 seconds, i.e. 3 minutes.
|
---|
1356 |
|
---|
1357 | The requests is aborted if no activity on the connection to the server
|
---|
1358 | is observed for C<timeout> seconds. This means that the time it takes
|
---|
1359 | for the complete transaction and the request() method to actually
|
---|
1360 | return might be longer.
|
---|
1361 |
|
---|
1362 | =item $ua->ssl_opts
|
---|
1363 |
|
---|
1364 | =item $ua->ssl_opts( $key )
|
---|
1365 |
|
---|
1366 | =item $ua->ssl_opts( $key => $value )
|
---|
1367 |
|
---|
1368 | Get/set the options for SSL connections. Without argument return the list
|
---|
1369 | of options keys currently set. With a single argument return the current
|
---|
1370 | value for the given option. With 2 arguments set the option value and return
|
---|
1371 | the old. Setting an option to the value C<undef> removes this option.
|
---|
1372 |
|
---|
1373 | The options that LWP relates to are:
|
---|
1374 |
|
---|
1375 | =over
|
---|
1376 |
|
---|
1377 | =item C<verify_hostname> => $bool
|
---|
1378 |
|
---|
1379 | When TRUE LWP will for secure protocol schemes ensure it connects to servers
|
---|
1380 | that have a valid certificate matching the expected hostname. If FALSE no
|
---|
1381 | checks are made and you can't be sure that you communicate with the expected peer.
|
---|
1382 | The no checks behaviour was the default for libwww-perl-5.837 and earlier releases.
|
---|
1383 |
|
---|
1384 | This option is initialized from the L<PERL_LWP_SSL_VERIFY_HOSTNAME> environment
|
---|
1385 | variable. If this environment variable isn't set; then C<verify_hostname>
|
---|
1386 | defaults to 1.
|
---|
1387 |
|
---|
1388 | =item C<SSL_ca_file> => $path
|
---|
1389 |
|
---|
1390 | The path to a file containing Certificate Authority certificates.
|
---|
1391 | A default setting for this option is provided by checking the environment
|
---|
1392 | variables C<PERL_LWP_SSL_CA_FILE> and C<HTTPS_CA_FILE> in order.
|
---|
1393 |
|
---|
1394 | =item C<SSL_ca_path> => $path
|
---|
1395 |
|
---|
1396 | The path to a directory containing files containing Certificate Authority
|
---|
1397 | certificates.
|
---|
1398 | A default setting for this option is provided by checking the environment
|
---|
1399 | variables C<PERL_LWP_SSL_CA_PATH> and C<HTTPS_CA_DIR> in order.
|
---|
1400 |
|
---|
1401 | =back
|
---|
1402 |
|
---|
1403 | Other options can be set and are processed directly by the SSL Socket implementation
|
---|
1404 | in use. See L<IO::Socket::SSL> or L<Net::SSL> for details.
|
---|
1405 |
|
---|
1406 | The libwww-perl core no longer bundles protocol plugins for SSL. You will need
|
---|
1407 | to install L<LWP::Protocol::https> separately to enable support for processing
|
---|
1408 | https-URLs.
|
---|
1409 |
|
---|
1410 | =back
|
---|
1411 |
|
---|
1412 | =head2 Proxy attributes
|
---|
1413 |
|
---|
1414 | The following methods set up when requests should be passed via a
|
---|
1415 | proxy server.
|
---|
1416 |
|
---|
1417 | =over
|
---|
1418 |
|
---|
1419 | =item $ua->proxy(\@schemes, $proxy_url)
|
---|
1420 |
|
---|
1421 | =item $ua->proxy($scheme, $proxy_url)
|
---|
1422 |
|
---|
1423 | Set/retrieve proxy URL for a scheme:
|
---|
1424 |
|
---|
1425 | $ua->proxy(['http', 'ftp'], 'http://proxy.sn.no:8001/');
|
---|
1426 | $ua->proxy('gopher', 'http://proxy.sn.no:8001/');
|
---|
1427 |
|
---|
1428 | The first form specifies that the URL is to be used for proxying of
|
---|
1429 | access methods listed in the list in the first method argument,
|
---|
1430 | i.e. 'http' and 'ftp'.
|
---|
1431 |
|
---|
1432 | The second form shows a shorthand form for specifying
|
---|
1433 | proxy URL for a single access scheme.
|
---|
1434 |
|
---|
1435 | =item $ua->no_proxy( $domain, ... )
|
---|
1436 |
|
---|
1437 | Do not proxy requests to the given domains. Calling no_proxy without
|
---|
1438 | any domains clears the list of domains. Eg:
|
---|
1439 |
|
---|
1440 | $ua->no_proxy('localhost', 'example.com');
|
---|
1441 |
|
---|
1442 | =item $ua->env_proxy
|
---|
1443 |
|
---|
1444 | Load proxy settings from *_proxy environment variables. You might
|
---|
1445 | specify proxies like this (sh-syntax):
|
---|
1446 |
|
---|
1447 | gopher_proxy=http://proxy.my.place/
|
---|
1448 | wais_proxy=http://proxy.my.place/
|
---|
1449 | no_proxy="localhost,example.com"
|
---|
1450 | export gopher_proxy wais_proxy no_proxy
|
---|
1451 |
|
---|
1452 | csh or tcsh users should use the C<setenv> command to define these
|
---|
1453 | environment variables.
|
---|
1454 |
|
---|
1455 | On systems with case insensitive environment variables there exists a
|
---|
1456 | name clash between the CGI environment variables and the C<HTTP_PROXY>
|
---|
1457 | environment variable normally picked up by env_proxy(). Because of
|
---|
1458 | this C<HTTP_PROXY> is not honored for CGI scripts. The
|
---|
1459 | C<CGI_HTTP_PROXY> environment variable can be used instead.
|
---|
1460 |
|
---|
1461 | =back
|
---|
1462 |
|
---|
1463 | =head2 Handlers
|
---|
1464 |
|
---|
1465 | Handlers are code that injected at various phases during the
|
---|
1466 | processing of requests. The following methods are provided to manage
|
---|
1467 | the active handlers:
|
---|
1468 |
|
---|
1469 | =over
|
---|
1470 |
|
---|
1471 | =item $ua->add_handler( $phase => \&cb, %matchspec )
|
---|
1472 |
|
---|
1473 | Add handler to be invoked in the given processing phase. For how to
|
---|
1474 | specify %matchspec see L<HTTP::Config/"Matching">.
|
---|
1475 |
|
---|
1476 | The possible values $phase and the corresponding callback signatures are:
|
---|
1477 |
|
---|
1478 | =over
|
---|
1479 |
|
---|
1480 | =item request_preprepare => sub { my($request, $ua, $h) = @_; ... }
|
---|
1481 |
|
---|
1482 | The handler is called before the C<request_prepare> and other standard
|
---|
1483 | initialization of of the request. This can be used to set up headers
|
---|
1484 | and attributes that the C<request_prepare> handler depends on. Proxy
|
---|
1485 | initialization should take place here; but in general don't register
|
---|
1486 | handlers for this phase.
|
---|
1487 |
|
---|
1488 | =item request_prepare => sub { my($request, $ua, $h) = @_; ... }
|
---|
1489 |
|
---|
1490 | The handler is called before the request is sent and can modify the
|
---|
1491 | request any way it see fit. This can for instance be used to add
|
---|
1492 | certain headers to specific requests.
|
---|
1493 |
|
---|
1494 | The method can assign a new request object to $_[0] to replace the
|
---|
1495 | request that is sent fully.
|
---|
1496 |
|
---|
1497 | The return value from the callback is ignored. If an exception is
|
---|
1498 | raised it will abort the request and make the request method return a
|
---|
1499 | "400 Bad request" response.
|
---|
1500 |
|
---|
1501 | =item request_send => sub { my($request, $ua, $h) = @_; ... }
|
---|
1502 |
|
---|
1503 | This handler gets a chance of handling requests before they're sent to the
|
---|
1504 | protocol handlers. It should return an HTTP::Response object if it
|
---|
1505 | wishes to terminate the processing; otherwise it should return nothing.
|
---|
1506 |
|
---|
1507 | The C<response_header> and C<response_data> handlers will not be
|
---|
1508 | invoked for this response, but the C<response_done> will be.
|
---|
1509 |
|
---|
1510 | =item response_header => sub { my($response, $ua, $h) = @_; ... }
|
---|
1511 |
|
---|
1512 | This handler is called right after the response headers have been
|
---|
1513 | received, but before any content data. The handler might set up
|
---|
1514 | handlers for data and might croak to abort the request.
|
---|
1515 |
|
---|
1516 | The handler might set the $response->{default_add_content} value to
|
---|
1517 | control if any received data should be added to the response object
|
---|
1518 | directly. This will initially be false if the $ua->request() method
|
---|
1519 | was called with a $content_file or $content_cb argument; otherwise true.
|
---|
1520 |
|
---|
1521 | =item response_data => sub { my($response, $ua, $h, $data) = @_; ... }
|
---|
1522 |
|
---|
1523 | This handler is called for each chunk of data received for the
|
---|
1524 | response. The handler might croak to abort the request.
|
---|
1525 |
|
---|
1526 | This handler needs to return a TRUE value to be called again for
|
---|
1527 | subsequent chunks for the same request.
|
---|
1528 |
|
---|
1529 | =item response_done => sub { my($response, $ua, $h) = @_; ... }
|
---|
1530 |
|
---|
1531 | The handler is called after the response has been fully received, but
|
---|
1532 | before any redirect handling is attempted. The handler can be used to
|
---|
1533 | extract information or modify the response.
|
---|
1534 |
|
---|
1535 | =item response_redirect => sub { my($response, $ua, $h) = @_; ... }
|
---|
1536 |
|
---|
1537 | The handler is called in $ua->request after C<response_done>. If the
|
---|
1538 | handler returns an HTTP::Request object we'll start over with processing
|
---|
1539 | this request instead.
|
---|
1540 |
|
---|
1541 | =back
|
---|
1542 |
|
---|
1543 | =item $ua->remove_handler( undef, %matchspec )
|
---|
1544 |
|
---|
1545 | =item $ua->remove_handler( $phase, %matchspec )
|
---|
1546 |
|
---|
1547 | Remove handlers that match the given %matchspec. If $phase is not
|
---|
1548 | provided remove handlers from all phases.
|
---|
1549 |
|
---|
1550 | Be careful as calling this function with %matchspec that is not not
|
---|
1551 | specific enough can remove handlers not owned by you. It's probably
|
---|
1552 | better to use the set_my_handler() method instead.
|
---|
1553 |
|
---|
1554 | The removed handlers are returned.
|
---|
1555 |
|
---|
1556 | =item $ua->set_my_handler( $phase, $cb, %matchspec )
|
---|
1557 |
|
---|
1558 | Set handlers private to the executing subroutine. Works by defaulting
|
---|
1559 | an C<owner> field to the %matchspec that holds the name of the called
|
---|
1560 | subroutine. You might pass an explicit C<owner> to override this.
|
---|
1561 |
|
---|
1562 | If $cb is passed as C<undef>, remove the handler.
|
---|
1563 |
|
---|
1564 | =item $ua->get_my_handler( $phase, %matchspec )
|
---|
1565 |
|
---|
1566 | =item $ua->get_my_handler( $phase, %matchspec, $init )
|
---|
1567 |
|
---|
1568 | Will retrieve the matching handler as hash ref.
|
---|
1569 |
|
---|
1570 | If C<$init> is passed passed as a TRUE value, create and add the
|
---|
1571 | handler if it's not found. If $init is a subroutine reference, then
|
---|
1572 | it's called with the created handler hash as argument. This sub might
|
---|
1573 | populate the hash with extra fields; especially the callback. If
|
---|
1574 | $init is a hash reference, merge the hashes.
|
---|
1575 |
|
---|
1576 | =item $ua->handlers( $phase, $request )
|
---|
1577 |
|
---|
1578 | =item $ua->handlers( $phase, $response )
|
---|
1579 |
|
---|
1580 | Returns the handlers that apply to the given request or response at
|
---|
1581 | the given processing phase.
|
---|
1582 |
|
---|
1583 | =back
|
---|
1584 |
|
---|
1585 | =head1 REQUEST METHODS
|
---|
1586 |
|
---|
1587 | The methods described in this section are used to dispatch requests
|
---|
1588 | via the user agent. The following request methods are provided:
|
---|
1589 |
|
---|
1590 | =over
|
---|
1591 |
|
---|
1592 | =item $ua->get( $url )
|
---|
1593 |
|
---|
1594 | =item $ua->get( $url , $field_name => $value, ... )
|
---|
1595 |
|
---|
1596 | This method will dispatch a C<GET> request on the given $url. Further
|
---|
1597 | arguments can be given to initialize the headers of the request. These
|
---|
1598 | are given as separate name/value pairs. The return value is a
|
---|
1599 | response object. See L<HTTP::Response> for a description of the
|
---|
1600 | interface it provides.
|
---|
1601 |
|
---|
1602 | There will still be a response object returned when LWP can't connect to the
|
---|
1603 | server specified in the URL or when other failures in protocol handlers occur.
|
---|
1604 | These internal responses use the standard HTTP status codes, so the responses
|
---|
1605 | can't be differentiated by testing the response status code alone. Error
|
---|
1606 | responses that LWP generates internally will have the "Client-Warning" header
|
---|
1607 | set to the value "Internal response". If you need to differentiate these
|
---|
1608 | internal responses from responses that a remote server actually generates, you
|
---|
1609 | need to test this header value.
|
---|
1610 |
|
---|
1611 | Fields names that start with ":" are special. These will not
|
---|
1612 | initialize headers of the request but will determine how the response
|
---|
1613 | content is treated. The following special field names are recognized:
|
---|
1614 |
|
---|
1615 | :content_file => $filename
|
---|
1616 | :content_cb => \&callback
|
---|
1617 | :read_size_hint => $bytes
|
---|
1618 |
|
---|
1619 | If a $filename is provided with the C<:content_file> option, then the
|
---|
1620 | response content will be saved here instead of in the response
|
---|
1621 | object. If a callback is provided with the C<:content_cb> option then
|
---|
1622 | this function will be called for each chunk of the response content as
|
---|
1623 | it is received from the server. If neither of these options are
|
---|
1624 | given, then the response content will accumulate in the response
|
---|
1625 | object itself. This might not be suitable for very large response
|
---|
1626 | bodies. Only one of C<:content_file> or C<:content_cb> can be
|
---|
1627 | specified. The content of unsuccessful responses will always
|
---|
1628 | accumulate in the response object itself, regardless of the
|
---|
1629 | C<:content_file> or C<:content_cb> options passed in.
|
---|
1630 |
|
---|
1631 | The C<:read_size_hint> option is passed to the protocol module which
|
---|
1632 | will try to read data from the server in chunks of this size. A
|
---|
1633 | smaller value for the C<:read_size_hint> will result in a higher
|
---|
1634 | number of callback invocations.
|
---|
1635 |
|
---|
1636 | The callback function is called with 3 arguments: a chunk of data, a
|
---|
1637 | reference to the response object, and a reference to the protocol
|
---|
1638 | object. The callback can abort the request by invoking die(). The
|
---|
1639 | exception message will show up as the "X-Died" header field in the
|
---|
1640 | response returned by the get() function.
|
---|
1641 |
|
---|
1642 | =item $ua->head( $url )
|
---|
1643 |
|
---|
1644 | =item $ua->head( $url , $field_name => $value, ... )
|
---|
1645 |
|
---|
1646 | This method will dispatch a C<HEAD> request on the given $url.
|
---|
1647 | Otherwise it works like the get() method described above.
|
---|
1648 |
|
---|
1649 | =item $ua->post( $url, \%form )
|
---|
1650 |
|
---|
1651 | =item $ua->post( $url, \@form )
|
---|
1652 |
|
---|
1653 | =item $ua->post( $url, \%form, $field_name => $value, ... )
|
---|
1654 |
|
---|
1655 | =item $ua->post( $url, $field_name => $value,... Content => \%form )
|
---|
1656 |
|
---|
1657 | =item $ua->post( $url, $field_name => $value,... Content => \@form )
|
---|
1658 |
|
---|
1659 | =item $ua->post( $url, $field_name => $value,... Content => $content )
|
---|
1660 |
|
---|
1661 | This method will dispatch a C<POST> request on the given $url, with
|
---|
1662 | %form or @form providing the key/value pairs for the fill-in form
|
---|
1663 | content. Additional headers and content options are the same as for
|
---|
1664 | the get() method.
|
---|
1665 |
|
---|
1666 | This method will use the POST() function from C<HTTP::Request::Common>
|
---|
1667 | to build the request. See L<HTTP::Request::Common> for a details on
|
---|
1668 | how to pass form content and other advanced features.
|
---|
1669 |
|
---|
1670 | =item $ua->put( $url, \%form )
|
---|
1671 |
|
---|
1672 | =item $ua->put( $url, \@form )
|
---|
1673 |
|
---|
1674 | =item $ua->put( $url, \%form, $field_name => $value, ... )
|
---|
1675 |
|
---|
1676 | =item $ua->put( $url, $field_name => $value,... Content => \%form )
|
---|
1677 |
|
---|
1678 | =item $ua->put( $url, $field_name => $value,... Content => \@form )
|
---|
1679 |
|
---|
1680 | =item $ua->put( $url, $field_name => $value,... Content => $content )
|
---|
1681 |
|
---|
1682 | This method will dispatch a C<PUT> request on the given $url, with
|
---|
1683 | %form or @form providing the key/value pairs for the fill-in form
|
---|
1684 | content. Additional headers and content options are the same as for
|
---|
1685 | the get() method.
|
---|
1686 |
|
---|
1687 | This method will use the PUT() function from C<HTTP::Request::Common>
|
---|
1688 | to build the request. See L<HTTP::Request::Common> for a details on
|
---|
1689 | how to pass form content and other advanced features.
|
---|
1690 |
|
---|
1691 | =item $ua->delete( $url )
|
---|
1692 |
|
---|
1693 | =item $ua->delete( $url, $field_name => $value, ... )
|
---|
1694 |
|
---|
1695 | This method will dispatch a C<DELETE> request on the given $url. Additional
|
---|
1696 | headers and content options are the same as for the get() method.
|
---|
1697 |
|
---|
1698 | This method will use the DELETE() function from C<HTTP::Request::Common>
|
---|
1699 | to build the request. See L<HTTP::Request::Common> for a details on
|
---|
1700 | how to pass form content and other advanced features.
|
---|
1701 |
|
---|
1702 | =item $ua->mirror( $url, $filename )
|
---|
1703 |
|
---|
1704 | This method will get the document identified by $url and store it in
|
---|
1705 | file called $filename. If the file already exists, then the request
|
---|
1706 | will contain an "If-Modified-Since" header matching the modification
|
---|
1707 | time of the file. If the document on the server has not changed since
|
---|
1708 | this time, then nothing happens. If the document has been updated, it
|
---|
1709 | will be downloaded again. The modification time of the file will be
|
---|
1710 | forced to match that of the server.
|
---|
1711 |
|
---|
1712 | The return value is the the response object.
|
---|
1713 |
|
---|
1714 | =item $ua->request( $request )
|
---|
1715 |
|
---|
1716 | =item $ua->request( $request, $content_file )
|
---|
1717 |
|
---|
1718 | =item $ua->request( $request, $content_cb )
|
---|
1719 |
|
---|
1720 | =item $ua->request( $request, $content_cb, $read_size_hint )
|
---|
1721 |
|
---|
1722 | This method will dispatch the given $request object. Normally this
|
---|
1723 | will be an instance of the C<HTTP::Request> class, but any object with
|
---|
1724 | a similar interface will do. The return value is a response object.
|
---|
1725 | See L<HTTP::Request> and L<HTTP::Response> for a description of the
|
---|
1726 | interface provided by these classes.
|
---|
1727 |
|
---|
1728 | The request() method will process redirects and authentication
|
---|
1729 | responses transparently. This means that it may actually send several
|
---|
1730 | simple requests via the simple_request() method described below.
|
---|
1731 |
|
---|
1732 | The request methods described above; get(), head(), post() and
|
---|
1733 | mirror(), will all dispatch the request they build via this method.
|
---|
1734 | They are convenience methods that simply hides the creation of the
|
---|
1735 | request object for you.
|
---|
1736 |
|
---|
1737 | The $content_file, $content_cb and $read_size_hint all correspond to
|
---|
1738 | options described with the get() method above.
|
---|
1739 |
|
---|
1740 | You are allowed to use a CODE reference as C<content> in the request
|
---|
1741 | object passed in. The C<content> function should return the content
|
---|
1742 | when called. The content can be returned in chunks. The content
|
---|
1743 | function will be invoked repeatedly until it return an empty string to
|
---|
1744 | signal that there is no more content.
|
---|
1745 |
|
---|
1746 | =item $ua->simple_request( $request )
|
---|
1747 |
|
---|
1748 | =item $ua->simple_request( $request, $content_file )
|
---|
1749 |
|
---|
1750 | =item $ua->simple_request( $request, $content_cb )
|
---|
1751 |
|
---|
1752 | =item $ua->simple_request( $request, $content_cb, $read_size_hint )
|
---|
1753 |
|
---|
1754 | This method dispatches a single request and returns the response
|
---|
1755 | received. Arguments are the same as for request() described above.
|
---|
1756 |
|
---|
1757 | The difference from request() is that simple_request() will not try to
|
---|
1758 | handle redirects or authentication responses. The request() method
|
---|
1759 | will in fact invoke this method for each simple request it sends.
|
---|
1760 |
|
---|
1761 | =item $ua->is_online
|
---|
1762 |
|
---|
1763 | Tries to determine if you have access to the Internet. Returns
|
---|
1764 | TRUE if the built-in heuristics determine that the user agent is
|
---|
1765 | able to access the Internet (over HTTP). See also L<LWP::Online>.
|
---|
1766 |
|
---|
1767 | =item $ua->is_protocol_supported( $scheme )
|
---|
1768 |
|
---|
1769 | You can use this method to test whether this user agent object supports the
|
---|
1770 | specified C<scheme>. (The C<scheme> might be a string (like 'http' or
|
---|
1771 | 'ftp') or it might be an URI object reference.)
|
---|
1772 |
|
---|
1773 | Whether a scheme is supported, is determined by the user agent's
|
---|
1774 | C<protocols_allowed> or C<protocols_forbidden> lists (if any), and by
|
---|
1775 | the capabilities of LWP. I.e., this will return TRUE only if LWP
|
---|
1776 | supports this protocol I<and> it's permitted for this particular
|
---|
1777 | object.
|
---|
1778 |
|
---|
1779 | =back
|
---|
1780 |
|
---|
1781 | =head2 Callback methods
|
---|
1782 |
|
---|
1783 | The following methods will be invoked as requests are processed. These
|
---|
1784 | methods are documented here because subclasses of C<LWP::UserAgent>
|
---|
1785 | might want to override their behaviour.
|
---|
1786 |
|
---|
1787 | =over
|
---|
1788 |
|
---|
1789 | =item $ua->prepare_request( $request )
|
---|
1790 |
|
---|
1791 | This method is invoked by simple_request(). Its task is to modify the
|
---|
1792 | given $request object by setting up various headers based on the
|
---|
1793 | attributes of the user agent. The return value should normally be the
|
---|
1794 | $request object passed in. If a different request object is returned
|
---|
1795 | it will be the one actually processed.
|
---|
1796 |
|
---|
1797 | The headers affected by the base implementation are; "User-Agent",
|
---|
1798 | "From", "Range" and "Cookie".
|
---|
1799 |
|
---|
1800 | =item $ua->redirect_ok( $prospective_request, $response )
|
---|
1801 |
|
---|
1802 | This method is called by request() before it tries to follow a
|
---|
1803 | redirection to the request in $response. This should return a TRUE
|
---|
1804 | value if this redirection is permissible. The $prospective_request
|
---|
1805 | will be the request to be sent if this method returns TRUE.
|
---|
1806 |
|
---|
1807 | The base implementation will return FALSE unless the method
|
---|
1808 | is in the object's C<requests_redirectable> list,
|
---|
1809 | FALSE if the proposed redirection is to a "file://..."
|
---|
1810 | URL, and TRUE otherwise.
|
---|
1811 |
|
---|
1812 | =item $ua->get_basic_credentials( $realm, $uri, $isproxy )
|
---|
1813 |
|
---|
1814 | This is called by request() to retrieve credentials for documents
|
---|
1815 | protected by Basic or Digest Authentication. The arguments passed in
|
---|
1816 | is the $realm provided by the server, the $uri requested and a boolean
|
---|
1817 | flag to indicate if this is authentication against a proxy server.
|
---|
1818 |
|
---|
1819 | The method should return a username and password. It should return an
|
---|
1820 | empty list to abort the authentication resolution attempt. Subclasses
|
---|
1821 | can override this method to prompt the user for the information. An
|
---|
1822 | example of this can be found in C<lwp-request> program distributed
|
---|
1823 | with this library.
|
---|
1824 |
|
---|
1825 | The base implementation simply checks a set of pre-stored member
|
---|
1826 | variables, set up with the credentials() method.
|
---|
1827 |
|
---|
1828 | =item $ua->progress( $status, $request_or_response )
|
---|
1829 |
|
---|
1830 | This is called frequently as the response is received regardless of
|
---|
1831 | how the content is processed. The method is called with $status
|
---|
1832 | "begin" at the start of processing the request and with $state "end"
|
---|
1833 | before the request method returns. In between these $status will be
|
---|
1834 | the fraction of the response currently received or the string "tick"
|
---|
1835 | if the fraction can't be calculated.
|
---|
1836 |
|
---|
1837 | When $status is "begin" the second argument is the request object,
|
---|
1838 | otherwise it is the response object.
|
---|
1839 |
|
---|
1840 | =back
|
---|
1841 |
|
---|
1842 | =head1 SEE ALSO
|
---|
1843 |
|
---|
1844 | See L<LWP> for a complete overview of libwww-perl5. See L<lwpcook>
|
---|
1845 | and the scripts F<lwp-request> and F<lwp-download> for examples of
|
---|
1846 | usage.
|
---|
1847 |
|
---|
1848 | See L<HTTP::Request> and L<HTTP::Response> for a description of the
|
---|
1849 | message objects dispatched and received. See L<HTTP::Request::Common>
|
---|
1850 | and L<HTML::Form> for other ways to build request objects.
|
---|
1851 |
|
---|
1852 | See L<WWW::Mechanize> and L<WWW::Search> for examples of more
|
---|
1853 | specialized user agents based on C<LWP::UserAgent>.
|
---|
1854 |
|
---|
1855 | =head1 COPYRIGHT
|
---|
1856 |
|
---|
1857 | Copyright 1995-2009 Gisle Aas.
|
---|
1858 |
|
---|
1859 | This library is free software; you can redistribute it and/or
|
---|
1860 | modify it under the same terms as Perl itself.
|
---|