source: main/trunk/greenstone2/perllib/cpan/LWP/UserAgent.pm@ 27174

Last change on this file since 27174 was 27174, checked in by davidb, 11 years ago

Perl modules from CPAN that are used in supporting activate.pl, but not part of the Perl core. Only PMs included.

File size: 58.4 KB
Line 
1package LWP::UserAgent;
2
3use strict;
4use vars qw(@ISA $VERSION);
5
6require LWP::MemberMixin;
7@ISA = qw(LWP::MemberMixin);
8$VERSION = "6.05";
9
10use HTTP::Request ();
11use HTTP::Response ();
12use HTTP::Date ();
13
14use LWP ();
15use LWP::Protocol ();
16
17use Carp ();
18
19
20sub new
21{
22 # Check for common user mistake
23 Carp::croak("Options to LWP::UserAgent should be key/value pairs, not hash reference")
24 if ref($_[1]) eq 'HASH';
25
26 my($class, %cnf) = @_;
27
28 my $agent = delete $cnf{agent};
29 my $from = delete $cnf{from};
30 my $def_headers = delete $cnf{default_headers};
31 my $timeout = delete $cnf{timeout};
32 $timeout = 3*60 unless defined $timeout;
33 my $local_address = delete $cnf{local_address};
34 my $ssl_opts = delete $cnf{ssl_opts} || {};
35 unless (exists $ssl_opts->{verify_hostname}) {
36 # The processing of HTTPS_CA_* below is for compatibility with Crypt::SSLeay
37 if (exists $ENV{PERL_LWP_SSL_VERIFY_HOSTNAME}) {
38 $ssl_opts->{verify_hostname} = $ENV{PERL_LWP_SSL_VERIFY_HOSTNAME};
39 }
40 elsif ($ENV{HTTPS_CA_FILE} || $ENV{HTTPS_CA_DIR}) {
41 # Crypt-SSLeay compatibility (verify peer certificate; but not the hostname)
42 $ssl_opts->{verify_hostname} = 0;
43 $ssl_opts->{SSL_verify_mode} = 1;
44 }
45 else {
46 $ssl_opts->{verify_hostname} = 1;
47 }
48 }
49 unless (exists $ssl_opts->{SSL_ca_file}) {
50 if (my $ca_file = $ENV{PERL_LWP_SSL_CA_FILE} || $ENV{HTTPS_CA_FILE}) {
51 $ssl_opts->{SSL_ca_file} = $ca_file;
52 }
53 }
54 unless (exists $ssl_opts->{SSL_ca_path}) {
55 if (my $ca_path = $ENV{PERL_LWP_SSL_CA_PATH} || $ENV{HTTPS_CA_DIR}) {
56 $ssl_opts->{SSL_ca_path} = $ca_path;
57 }
58 }
59 my $use_eval = delete $cnf{use_eval};
60 $use_eval = 1 unless defined $use_eval;
61 my $parse_head = delete $cnf{parse_head};
62 $parse_head = 1 unless defined $parse_head;
63 my $show_progress = delete $cnf{show_progress};
64 my $max_size = delete $cnf{max_size};
65 my $max_redirect = delete $cnf{max_redirect};
66 $max_redirect = 7 unless defined $max_redirect;
67 my $env_proxy = exists $cnf{env_proxy} ? delete $cnf{env_proxy} : $ENV{PERL_LWP_ENV_PROXY};
68
69 my $cookie_jar = delete $cnf{cookie_jar};
70 my $conn_cache = delete $cnf{conn_cache};
71 my $keep_alive = delete $cnf{keep_alive};
72
73 Carp::croak("Can't mix conn_cache and keep_alive")
74 if $conn_cache && $keep_alive;
75
76 my $protocols_allowed = delete $cnf{protocols_allowed};
77 my $protocols_forbidden = delete $cnf{protocols_forbidden};
78
79 my $requests_redirectable = delete $cnf{requests_redirectable};
80 $requests_redirectable = ['GET', 'HEAD']
81 unless defined $requests_redirectable;
82
83 # Actually ""s are just as good as 0's, but for concision we'll just say:
84 Carp::croak("protocols_allowed has to be an arrayref or 0, not \"$protocols_allowed\"!")
85 if $protocols_allowed and ref($protocols_allowed) ne 'ARRAY';
86 Carp::croak("protocols_forbidden has to be an arrayref or 0, not \"$protocols_forbidden\"!")
87 if $protocols_forbidden and ref($protocols_forbidden) ne 'ARRAY';
88 Carp::croak("requests_redirectable has to be an arrayref or 0, not \"$requests_redirectable\"!")
89 if $requests_redirectable and ref($requests_redirectable) ne 'ARRAY';
90
91
92 if (%cnf && $^W) {
93 Carp::carp("Unrecognized LWP::UserAgent options: @{[sort keys %cnf]}");
94 }
95
96 my $self = bless {
97 def_headers => $def_headers,
98 timeout => $timeout,
99 local_address => $local_address,
100 ssl_opts => $ssl_opts,
101 use_eval => $use_eval,
102 show_progress=> $show_progress,
103 max_size => $max_size,
104 max_redirect => $max_redirect,
105 proxy => {},
106 no_proxy => [],
107 protocols_allowed => $protocols_allowed,
108 protocols_forbidden => $protocols_forbidden,
109 requests_redirectable => $requests_redirectable,
110 }, $class;
111
112 $self->agent(defined($agent) ? $agent : $class->_agent)
113 if defined($agent) || !$def_headers || !$def_headers->header("User-Agent");
114 $self->from($from) if $from;
115 $self->cookie_jar($cookie_jar) if $cookie_jar;
116 $self->parse_head($parse_head);
117 $self->env_proxy if $env_proxy;
118
119 $self->protocols_allowed( $protocols_allowed ) if $protocols_allowed;
120 $self->protocols_forbidden($protocols_forbidden) if $protocols_forbidden;
121
122 if ($keep_alive) {
123 $conn_cache ||= { total_capacity => $keep_alive };
124 }
125 $self->conn_cache($conn_cache) if $conn_cache;
126
127 return $self;
128}
129
130
131sub send_request
132{
133 my($self, $request, $arg, $size) = @_;
134 my($method, $url) = ($request->method, $request->uri);
135 my $scheme = $url->scheme;
136
137 local($SIG{__DIE__}); # protect against user defined die handlers
138
139 $self->progress("begin", $request);
140
141 my $response = $self->run_handlers("request_send", $request);
142
143 unless ($response) {
144 my $protocol;
145
146 {
147 # Honor object-specific restrictions by forcing protocol objects
148 # into class LWP::Protocol::nogo.
149 my $x;
150 if($x = $self->protocols_allowed) {
151 if (grep lc($_) eq $scheme, @$x) {
152 }
153 else {
154 require LWP::Protocol::nogo;
155 $protocol = LWP::Protocol::nogo->new;
156 }
157 }
158 elsif ($x = $self->protocols_forbidden) {
159 if(grep lc($_) eq $scheme, @$x) {
160 require LWP::Protocol::nogo;
161 $protocol = LWP::Protocol::nogo->new;
162 }
163 }
164 # else fall thru and create the protocol object normally
165 }
166
167 # Locate protocol to use
168 my $proxy = $request->{proxy};
169 if ($proxy) {
170 $scheme = $proxy->scheme;
171 }
172
173 unless ($protocol) {
174 $protocol = eval { LWP::Protocol::create($scheme, $self) };
175 if ($@) {
176 $@ =~ s/ at .* line \d+.*//s; # remove file/line number
177 $response = _new_response($request, &HTTP::Status::RC_NOT_IMPLEMENTED, $@);
178 if ($scheme eq "https") {
179 $response->message($response->message . " (LWP::Protocol::https not installed)");
180 $response->content_type("text/plain");
181 $response->content(<<EOT);
182LWP will support https URLs if the LWP::Protocol::https module
183is installed.
184EOT
185 }
186 }
187 }
188
189 if (!$response && $self->{use_eval}) {
190 # we eval, and turn dies into responses below
191 eval {
192 $response = $protocol->request($request, $proxy, $arg, $size, $self->{timeout}) ||
193 die "No response returned by $protocol";
194 };
195 if ($@) {
196 if (UNIVERSAL::isa($@, "HTTP::Response")) {
197 $response = $@;
198 $response->request($request);
199 }
200 else {
201 my $full = $@;
202 (my $status = $@) =~ s/\n.*//s;
203 $status =~ s/ at .* line \d+.*//s; # remove file/line number
204 my $code = ($status =~ s/^(\d\d\d)\s+//) ? $1 : &HTTP::Status::RC_INTERNAL_SERVER_ERROR;
205 $response = _new_response($request, $code, $status, $full);
206 }
207 }
208 }
209 elsif (!$response) {
210 $response = $protocol->request($request, $proxy,
211 $arg, $size, $self->{timeout});
212 # XXX: Should we die unless $response->is_success ???
213 }
214 }
215
216 $response->request($request); # record request for reference
217 $response->header("Client-Date" => HTTP::Date::time2str(time));
218
219 $self->run_handlers("response_done", $response);
220
221 $self->progress("end", $response);
222 return $response;
223}
224
225
226sub prepare_request
227{
228 my($self, $request) = @_;
229 die "Method missing" unless $request->method;
230 my $url = $request->uri;
231 die "URL missing" unless $url;
232 die "URL must be absolute" unless $url->scheme;
233
234 $self->run_handlers("request_preprepare", $request);
235
236 if (my $def_headers = $self->{def_headers}) {
237 for my $h ($def_headers->header_field_names) {
238 $request->init_header($h => [$def_headers->header($h)]);
239 }
240 }
241
242 $self->run_handlers("request_prepare", $request);
243
244 return $request;
245}
246
247
248sub simple_request
249{
250 my($self, $request, $arg, $size) = @_;
251
252 # sanity check the request passed in
253 if (defined $request) {
254 if (ref $request) {
255 Carp::croak("You need a request object, not a " . ref($request) . " object")
256 if ref($request) eq 'ARRAY' or ref($request) eq 'HASH' or
257 !$request->can('method') or !$request->can('uri');
258 }
259 else {
260 Carp::croak("You need a request object, not '$request'");
261 }
262 }
263 else {
264 Carp::croak("No request object passed in");
265 }
266
267 eval {
268 $request = $self->prepare_request($request);
269 };
270 if ($@) {
271 $@ =~ s/ at .* line \d+.*//s; # remove file/line number
272 return _new_response($request, &HTTP::Status::RC_BAD_REQUEST, $@);
273 }
274 return $self->send_request($request, $arg, $size);
275}
276
277
278sub request
279{
280 my($self, $request, $arg, $size, $previous) = @_;
281
282 my $response = $self->simple_request($request, $arg, $size);
283 $response->previous($previous) if $previous;
284
285 if ($response->redirects >= $self->{max_redirect}) {
286 $response->header("Client-Warning" =>
287 "Redirect loop detected (max_redirect = $self->{max_redirect})");
288 return $response;
289 }
290
291 if (my $req = $self->run_handlers("response_redirect", $response)) {
292 return $self->request($req, $arg, $size, $response);
293 }
294
295 my $code = $response->code;
296
297 if ($code == &HTTP::Status::RC_MOVED_PERMANENTLY or
298 $code == &HTTP::Status::RC_FOUND or
299 $code == &HTTP::Status::RC_SEE_OTHER or
300 $code == &HTTP::Status::RC_TEMPORARY_REDIRECT)
301 {
302 my $referral = $request->clone;
303
304 # These headers should never be forwarded
305 $referral->remove_header('Host', 'Cookie');
306
307 if ($referral->header('Referer') &&
308 $request->uri->scheme eq 'https' &&
309 $referral->uri->scheme eq 'http')
310 {
311 # RFC 2616, section 15.1.3.
312 # https -> http redirect, suppressing Referer
313 $referral->remove_header('Referer');
314 }
315
316 if ($code == &HTTP::Status::RC_SEE_OTHER ||
317 $code == &HTTP::Status::RC_FOUND)
318 {
319 my $method = uc($referral->method);
320 unless ($method eq "GET" || $method eq "HEAD") {
321 $referral->method("GET");
322 $referral->content("");
323 $referral->remove_content_headers;
324 }
325 }
326
327 # And then we update the URL based on the Location:-header.
328 my $referral_uri = $response->header('Location');
329 {
330 # Some servers erroneously return a relative URL for redirects,
331 # so make it absolute if it not already is.
332 local $URI::ABS_ALLOW_RELATIVE_SCHEME = 1;
333 my $base = $response->base;
334 $referral_uri = "" unless defined $referral_uri;
335 $referral_uri = $HTTP::URI_CLASS->new($referral_uri, $base)
336 ->abs($base);
337 }
338 $referral->uri($referral_uri);
339
340 return $response unless $self->redirect_ok($referral, $response);
341 return $self->request($referral, $arg, $size, $response);
342
343 }
344 elsif ($code == &HTTP::Status::RC_UNAUTHORIZED ||
345 $code == &HTTP::Status::RC_PROXY_AUTHENTICATION_REQUIRED
346 )
347 {
348 my $proxy = ($code == &HTTP::Status::RC_PROXY_AUTHENTICATION_REQUIRED);
349 my $ch_header = $proxy ? "Proxy-Authenticate" : "WWW-Authenticate";
350 my @challenge = $response->header($ch_header);
351 unless (@challenge) {
352 $response->header("Client-Warning" =>
353 "Missing Authenticate header");
354 return $response;
355 }
356
357 require HTTP::Headers::Util;
358 CHALLENGE: for my $challenge (@challenge) {
359 $challenge =~ tr/,/;/; # "," is used to separate auth-params!!
360 ($challenge) = HTTP::Headers::Util::split_header_words($challenge);
361 my $scheme = shift(@$challenge);
362 shift(@$challenge); # no value
363 $challenge = { @$challenge }; # make rest into a hash
364
365 unless ($scheme =~ /^([a-z]+(?:-[a-z]+)*)$/) {
366 $response->header("Client-Warning" =>
367 "Bad authentication scheme '$scheme'");
368 return $response;
369 }
370 $scheme = $1; # untainted now
371 my $class = "LWP::Authen::\u$scheme";
372 $class =~ s/-/_/g;
373
374 no strict 'refs';
375 unless (%{"$class\::"}) {
376 # try to load it
377 eval "require $class";
378 if ($@) {
379 if ($@ =~ /^Can\'t locate/) {
380 $response->header("Client-Warning" =>
381 "Unsupported authentication scheme '$scheme'");
382 }
383 else {
384 $response->header("Client-Warning" => $@);
385 }
386 next CHALLENGE;
387 }
388 }
389 unless ($class->can("authenticate")) {
390 $response->header("Client-Warning" =>
391 "Unsupported authentication scheme '$scheme'");
392 next CHALLENGE;
393 }
394 return $class->authenticate($self, $proxy, $challenge, $response,
395 $request, $arg, $size);
396 }
397 return $response;
398 }
399 return $response;
400}
401
402
403#
404# Now the shortcuts...
405#
406sub get {
407 require HTTP::Request::Common;
408 my($self, @parameters) = @_;
409 my @suff = $self->_process_colonic_headers(\@parameters,1);
410 return $self->request( HTTP::Request::Common::GET( @parameters ), @suff );
411}
412
413
414sub post {
415 require HTTP::Request::Common;
416 my($self, @parameters) = @_;
417 my @suff = $self->_process_colonic_headers(\@parameters, (ref($parameters[1]) ? 2 : 1));
418 return $self->request( HTTP::Request::Common::POST( @parameters ), @suff );
419}
420
421
422sub head {
423 require HTTP::Request::Common;
424 my($self, @parameters) = @_;
425 my @suff = $self->_process_colonic_headers(\@parameters,1);
426 return $self->request( HTTP::Request::Common::HEAD( @parameters ), @suff );
427}
428
429
430sub put {
431 require HTTP::Request::Common;
432 my($self, @parameters) = @_;
433 my @suff = $self->_process_colonic_headers(\@parameters, (ref($parameters[1]) ? 2 : 1));
434 return $self->request( HTTP::Request::Common::PUT( @parameters ), @suff );
435}
436
437
438sub delete {
439 require HTTP::Request::Common;
440 my($self, @parameters) = @_;
441 my @suff = $self->_process_colonic_headers(\@parameters,1);
442 return $self->request( HTTP::Request::Common::DELETE( @parameters ), @suff );
443}
444
445
446sub _process_colonic_headers {
447 # Process :content_cb / :content_file / :read_size_hint headers.
448 my($self, $args, $start_index) = @_;
449
450 my($arg, $size);
451 for(my $i = $start_index; $i < @$args; $i += 2) {
452 next unless defined $args->[$i];
453
454 #printf "Considering %s => %s\n", $args->[$i], $args->[$i + 1];
455
456 if($args->[$i] eq ':content_cb') {
457 # Some sanity-checking...
458 $arg = $args->[$i + 1];
459 Carp::croak("A :content_cb value can't be undef") unless defined $arg;
460 Carp::croak("A :content_cb value must be a coderef")
461 unless ref $arg and UNIVERSAL::isa($arg, 'CODE');
462
463 }
464 elsif ($args->[$i] eq ':content_file') {
465 $arg = $args->[$i + 1];
466
467 # Some sanity-checking...
468 Carp::croak("A :content_file value can't be undef")
469 unless defined $arg;
470 Carp::croak("A :content_file value can't be a reference")
471 if ref $arg;
472 Carp::croak("A :content_file value can't be \"\"")
473 unless length $arg;
474
475 }
476 elsif ($args->[$i] eq ':read_size_hint') {
477 $size = $args->[$i + 1];
478 # Bother checking it?
479
480 }
481 else {
482 next;
483 }
484 splice @$args, $i, 2;
485 $i -= 2;
486 }
487
488 # And return a suitable suffix-list for request(REQ,...)
489
490 return unless defined $arg;
491 return $arg, $size if defined $size;
492 return $arg;
493}
494
495
496sub is_online {
497 my $self = shift;
498 return 1 if $self->get("http://www.msftncsi.com/ncsi.txt")->content eq "Microsoft NCSI";
499 return 1 if $self->get("http://www.apple.com")->content =~ m,<title>Apple</title>,;
500 return 0;
501}
502
503
504my @ANI = qw(- \ | /);
505
506sub progress {
507 my($self, $status, $m) = @_;
508 return unless $self->{show_progress};
509
510 local($,, $\);
511 if ($status eq "begin") {
512 print STDERR "** ", $m->method, " ", $m->uri, " ==> ";
513 $self->{progress_start} = time;
514 $self->{progress_lastp} = "";
515 $self->{progress_ani} = 0;
516 }
517 elsif ($status eq "end") {
518 delete $self->{progress_lastp};
519 delete $self->{progress_ani};
520 print STDERR $m->status_line;
521 my $t = time - delete $self->{progress_start};
522 print STDERR " (${t}s)" if $t;
523 print STDERR "\n";
524 }
525 elsif ($status eq "tick") {
526 print STDERR "$ANI[$self->{progress_ani}++]\b";
527 $self->{progress_ani} %= @ANI;
528 }
529 else {
530 my $p = sprintf "%3.0f%%", $status * 100;
531 return if $p eq $self->{progress_lastp};
532 print STDERR "$p\b\b\b\b";
533 $self->{progress_lastp} = $p;
534 }
535 STDERR->flush;
536}
537
538
539#
540# This whole allow/forbid thing is based on man 1 at's way of doing things.
541#
542sub is_protocol_supported
543{
544 my($self, $scheme) = @_;
545 if (ref $scheme) {
546 # assume we got a reference to an URI object
547 $scheme = $scheme->scheme;
548 }
549 else {
550 Carp::croak("Illegal scheme '$scheme' passed to is_protocol_supported")
551 if $scheme =~ /\W/;
552 $scheme = lc $scheme;
553 }
554
555 my $x;
556 if(ref($self) and $x = $self->protocols_allowed) {
557 return 0 unless grep lc($_) eq $scheme, @$x;
558 }
559 elsif (ref($self) and $x = $self->protocols_forbidden) {
560 return 0 if grep lc($_) eq $scheme, @$x;
561 }
562
563 local($SIG{__DIE__}); # protect against user defined die handlers
564 $x = LWP::Protocol::implementor($scheme);
565 return 1 if $x and $x ne 'LWP::Protocol::nogo';
566 return 0;
567}
568
569
570sub protocols_allowed { shift->_elem('protocols_allowed' , @_) }
571sub protocols_forbidden { shift->_elem('protocols_forbidden' , @_) }
572sub requests_redirectable { shift->_elem('requests_redirectable', @_) }
573
574
575sub redirect_ok
576{
577 # RFC 2616, section 10.3.2 and 10.3.3 say:
578 # If the 30[12] status code is received in response to a request other
579 # than GET or HEAD, the user agent MUST NOT automatically redirect the
580 # request unless it can be confirmed by the user, since this might
581 # change the conditions under which the request was issued.
582
583 # Note that this routine used to be just:
584 # return 0 if $_[1]->method eq "POST"; return 1;
585
586 my($self, $new_request, $response) = @_;
587 my $method = $response->request->method;
588 return 0 unless grep $_ eq $method,
589 @{ $self->requests_redirectable || [] };
590
591 if ($new_request->uri->scheme eq 'file') {
592 $response->header("Client-Warning" =>
593 "Can't redirect to a file:// URL!");
594 return 0;
595 }
596
597 # Otherwise it's apparently okay...
598 return 1;
599}
600
601
602sub credentials
603{
604 my $self = shift;
605 my $netloc = lc(shift);
606 my $realm = shift || "";
607 my $old = $self->{basic_authentication}{$netloc}{$realm};
608 if (@_) {
609 $self->{basic_authentication}{$netloc}{$realm} = [@_];
610 }
611 return unless $old;
612 return @$old if wantarray;
613 return join(":", @$old);
614}
615
616
617sub get_basic_credentials
618{
619 my($self, $realm, $uri, $proxy) = @_;
620 return if $proxy;
621 return $self->credentials($uri->host_port, $realm);
622}
623
624
625sub timeout { shift->_elem('timeout', @_); }
626sub local_address{ shift->_elem('local_address',@_); }
627sub max_size { shift->_elem('max_size', @_); }
628sub max_redirect { shift->_elem('max_redirect', @_); }
629sub show_progress{ shift->_elem('show_progress', @_); }
630
631sub ssl_opts {
632 my $self = shift;
633 if (@_ == 1) {
634 my $k = shift;
635 return $self->{ssl_opts}{$k};
636 }
637 if (@_) {
638 my $old;
639 while (@_) {
640 my($k, $v) = splice(@_, 0, 2);
641 $old = $self->{ssl_opts}{$k} unless @_;
642 if (defined $v) {
643 $self->{ssl_opts}{$k} = $v;
644 }
645 else {
646 delete $self->{ssl_opts}{$k};
647 }
648 }
649 %{$self->{ssl_opts}} = (%{$self->{ssl_opts}}, @_);
650 return $old;
651 }
652
653 return keys %{$self->{ssl_opts}};
654}
655
656sub parse_head {
657 my $self = shift;
658 if (@_) {
659 my $flag = shift;
660 my $parser;
661 my $old = $self->set_my_handler("response_header", $flag ? sub {
662 my($response, $ua) = @_;
663 require HTML::HeadParser;
664 $parser = HTML::HeadParser->new;
665 $parser->xml_mode(1) if $response->content_is_xhtml;
666 $parser->utf8_mode(1) if $] >= 5.008 && $HTML::Parser::VERSION >= 3.40;
667
668 push(@{$response->{handlers}{response_data}}, {
669 callback => sub {
670 return unless $parser;
671 unless ($parser->parse($_[3])) {
672 my $h = $parser->header;
673 my $r = $_[0];
674 for my $f ($h->header_field_names) {
675 $r->init_header($f, [$h->header($f)]);
676 }
677 undef($parser);
678 }
679 },
680 });
681
682 } : undef,
683 m_media_type => "html",
684 );
685 return !!$old;
686 }
687 else {
688 return !!$self->get_my_handler("response_header");
689 }
690}
691
692sub cookie_jar {
693 my $self = shift;
694 my $old = $self->{cookie_jar};
695 if (@_) {
696 my $jar = shift;
697 if (ref($jar) eq "HASH") {
698 require HTTP::Cookies;
699 $jar = HTTP::Cookies->new(%$jar);
700 }
701 $self->{cookie_jar} = $jar;
702 $self->set_my_handler("request_prepare",
703 $jar ? sub { $jar->add_cookie_header($_[0]); } : undef,
704 );
705 $self->set_my_handler("response_done",
706 $jar ? sub { $jar->extract_cookies($_[0]); } : undef,
707 );
708 }
709 $old;
710}
711
712sub default_headers {
713 my $self = shift;
714 my $old = $self->{def_headers} ||= HTTP::Headers->new;
715 if (@_) {
716 Carp::croak("default_headers not set to HTTP::Headers compatible object")
717 unless @_ == 1 && $_[0]->can("header_field_names");
718 $self->{def_headers} = shift;
719 }
720 return $old;
721}
722
723sub default_header {
724 my $self = shift;
725 return $self->default_headers->header(@_);
726}
727
728sub _agent { "libwww-perl/$LWP::VERSION" }
729
730sub agent {
731 my $self = shift;
732 if (@_) {
733 my $agent = shift;
734 if ($agent) {
735 $agent .= $self->_agent if $agent =~ /\s+$/;
736 }
737 else {
738 undef($agent)
739 }
740 return $self->default_header("User-Agent", $agent);
741 }
742 return $self->default_header("User-Agent");
743}
744
745sub from { # legacy
746 my $self = shift;
747 return $self->default_header("From", @_);
748}
749
750
751sub conn_cache {
752 my $self = shift;
753 my $old = $self->{conn_cache};
754 if (@_) {
755 my $cache = shift;
756 if (ref($cache) eq "HASH") {
757 require LWP::ConnCache;
758 $cache = LWP::ConnCache->new(%$cache);
759 }
760 $self->{conn_cache} = $cache;
761 }
762 $old;
763}
764
765
766sub add_handler {
767 my($self, $phase, $cb, %spec) = @_;
768 $spec{line} ||= join(":", (caller)[1,2]);
769 my $conf = $self->{handlers}{$phase} ||= do {
770 require HTTP::Config;
771 HTTP::Config->new;
772 };
773 $conf->add(%spec, callback => $cb);
774}
775
776sub set_my_handler {
777 my($self, $phase, $cb, %spec) = @_;
778 $spec{owner} = (caller(1))[3] unless exists $spec{owner};
779 $self->remove_handler($phase, %spec);
780 $spec{line} ||= join(":", (caller)[1,2]);
781 $self->add_handler($phase, $cb, %spec) if $cb;
782}
783
784sub get_my_handler {
785 my $self = shift;
786 my $phase = shift;
787 my $init = pop if @_ % 2;
788 my %spec = @_;
789 my $conf = $self->{handlers}{$phase};
790 unless ($conf) {
791 return unless $init;
792 require HTTP::Config;
793 $conf = $self->{handlers}{$phase} = HTTP::Config->new;
794 }
795 $spec{owner} = (caller(1))[3] unless exists $spec{owner};
796 my @h = $conf->find(%spec);
797 if (!@h && $init) {
798 if (ref($init) eq "CODE") {
799 $init->(\%spec);
800 }
801 elsif (ref($init) eq "HASH") {
802 while (my($k, $v) = each %$init) {
803 $spec{$k} = $v;
804 }
805 }
806 $spec{callback} ||= sub {};
807 $spec{line} ||= join(":", (caller)[1,2]);
808 $conf->add(\%spec);
809 return \%spec;
810 }
811 return wantarray ? @h : $h[0];
812}
813
814sub remove_handler {
815 my($self, $phase, %spec) = @_;
816 if ($phase) {
817 my $conf = $self->{handlers}{$phase} || return;
818 my @h = $conf->remove(%spec);
819 delete $self->{handlers}{$phase} if $conf->empty;
820 return @h;
821 }
822
823 return unless $self->{handlers};
824 return map $self->remove_handler($_), sort keys %{$self->{handlers}};
825}
826
827sub handlers {
828 my($self, $phase, $o) = @_;
829 my @h;
830 if ($o->{handlers} && $o->{handlers}{$phase}) {
831 push(@h, @{$o->{handlers}{$phase}});
832 }
833 if (my $conf = $self->{handlers}{$phase}) {
834 push(@h, $conf->matching($o));
835 }
836 return @h;
837}
838
839sub run_handlers {
840 my($self, $phase, $o) = @_;
841 if (defined(wantarray)) {
842 for my $h ($self->handlers($phase, $o)) {
843 my $ret = $h->{callback}->($o, $self, $h);
844 return $ret if $ret;
845 }
846 return undef;
847 }
848
849 for my $h ($self->handlers($phase, $o)) {
850 $h->{callback}->($o, $self, $h);
851 }
852}
853
854
855# deprecated
856sub use_eval { shift->_elem('use_eval', @_); }
857sub use_alarm
858{
859 Carp::carp("LWP::UserAgent->use_alarm(BOOL) is a no-op")
860 if @_ > 1 && $^W;
861 "";
862}
863
864
865sub clone
866{
867 my $self = shift;
868 my $copy = bless { %$self }, ref $self; # copy most fields
869
870 delete $copy->{handlers};
871 delete $copy->{conn_cache};
872
873 # copy any plain arrays and hashes; known not to need recursive copy
874 for my $k (qw(proxy no_proxy requests_redirectable ssl_opts)) {
875 next unless $copy->{$k};
876 if (ref($copy->{$k}) eq "ARRAY") {
877 $copy->{$k} = [ @{$copy->{$k}} ];
878 }
879 elsif (ref($copy->{$k}) eq "HASH") {
880 $copy->{$k} = { %{$copy->{$k}} };
881 }
882 }
883
884 if ($self->{def_headers}) {
885 $copy->{def_headers} = $self->{def_headers}->clone;
886 }
887
888 # re-enable standard handlers
889 $copy->parse_head($self->parse_head);
890
891 # no easy way to clone the cookie jar; so let's just remove it for now
892 $copy->cookie_jar(undef);
893
894 $copy;
895}
896
897
898sub mirror
899{
900 my($self, $url, $file) = @_;
901
902 my $request = HTTP::Request->new('GET', $url);
903
904 # If the file exists, add a cache-related header
905 if ( -e $file ) {
906 my ($mtime) = ( stat($file) )[9];
907 if ($mtime) {
908 $request->header( 'If-Modified-Since' => HTTP::Date::time2str($mtime) );
909 }
910 }
911 my $tmpfile = "$file-$$";
912
913 my $response = $self->request($request, $tmpfile);
914 if ( $response->header('X-Died') ) {
915 die $response->header('X-Died');
916 }
917
918 # Only fetching a fresh copy of the would be considered success.
919 # If the file was not modified, "304" would returned, which
920 # is considered by HTTP::Status to be a "redirect", /not/ "success"
921 if ( $response->is_success ) {
922 my @stat = stat($tmpfile) or die "Could not stat tmpfile '$tmpfile': $!";
923 my $file_length = $stat[7];
924 my ($content_length) = $response->header('Content-length');
925
926 if ( defined $content_length and $file_length < $content_length ) {
927 unlink($tmpfile);
928 die "Transfer truncated: " . "only $file_length out of $content_length bytes received\n";
929 }
930 elsif ( defined $content_length and $file_length > $content_length ) {
931 unlink($tmpfile);
932 die "Content-length mismatch: " . "expected $content_length bytes, got $file_length\n";
933 }
934 # The file was the expected length.
935 else {
936 # Replace the stale file with a fresh copy
937 if ( -e $file ) {
938 # Some dosish systems fail to rename if the target exists
939 chmod 0777, $file;
940 unlink $file;
941 }
942 rename( $tmpfile, $file )
943 or die "Cannot rename '$tmpfile' to '$file': $!\n";
944
945 # make sure the file has the same last modification time
946 if ( my $lm = $response->last_modified ) {
947 utime $lm, $lm, $file;
948 }
949 }
950 }
951 # The local copy is fresh enough, so just delete the temp file
952 else {
953 unlink($tmpfile);
954 }
955 return $response;
956}
957
958
959sub _need_proxy {
960 my($req, $ua) = @_;
961 return if exists $req->{proxy};
962 my $proxy = $ua->{proxy}{$req->uri->scheme} || return;
963 if ($ua->{no_proxy}) {
964 if (my $host = eval { $req->uri->host }) {
965 for my $domain (@{$ua->{no_proxy}}) {
966 if ($host =~ /\Q$domain\E$/) {
967 return;
968 }
969 }
970 }
971 }
972 $req->{proxy} = $HTTP::URI_CLASS->new($proxy);
973}
974
975
976sub proxy
977{
978 my $self = shift;
979 my $key = shift;
980 return map $self->proxy($_, @_), @$key if ref $key;
981
982 Carp::croak("'$key' is not a valid URI scheme") unless $key =~ /^$URI::scheme_re\z/;
983 my $old = $self->{'proxy'}{$key};
984 if (@_) {
985 my $url = shift;
986 if (defined($url) && length($url)) {
987 Carp::croak("Proxy must be specified as absolute URI; '$url' is not") unless $url =~ /^$URI::scheme_re:/;
988 Carp::croak("Bad http proxy specification '$url'") if $url =~ /^https?:/ && $url !~ m,^https?://\w,;
989 }
990 $self->{proxy}{$key} = $url;
991 $self->set_my_handler("request_preprepare", \&_need_proxy)
992 }
993 return $old;
994}
995
996
997sub env_proxy {
998 my ($self) = @_;
999 require Encode;
1000 require Encode::Locale;
1001 my($k,$v);
1002 while(($k, $v) = each %ENV) {
1003 if ($ENV{REQUEST_METHOD}) {
1004 # Need to be careful when called in the CGI environment, as
1005 # the HTTP_PROXY variable is under control of that other guy.
1006 next if $k =~ /^HTTP_/;
1007 $k = "HTTP_PROXY" if $k eq "CGI_HTTP_PROXY";
1008 }
1009 $k = lc($k);
1010 next unless $k =~ /^(.*)_proxy$/;
1011 $k = $1;
1012 if ($k eq 'no') {
1013 $self->no_proxy(split(/\s*,\s*/, $v));
1014 }
1015 else {
1016 # Ignore random _proxy variables, allow only valid schemes
1017 next unless $k =~ /^$URI::scheme_re\z/;
1018 # Ignore xxx_proxy variables if xxx isn't a supported protocol
1019 next unless LWP::Protocol::implementor($k);
1020 $self->proxy($k, Encode::decode(locale => $v));
1021 }
1022 }
1023}
1024
1025
1026sub no_proxy {
1027 my($self, @no) = @_;
1028 if (@no) {
1029 push(@{ $self->{'no_proxy'} }, @no);
1030 }
1031 else {
1032 $self->{'no_proxy'} = [];
1033 }
1034}
1035
1036
1037sub _new_response {
1038 my($request, $code, $message, $content) = @_;
1039 $message ||= HTTP::Status::status_message($code);
1040 my $response = HTTP::Response->new($code, $message);
1041 $response->request($request);
1042 $response->header("Client-Date" => HTTP::Date::time2str(time));
1043 $response->header("Client-Warning" => "Internal response");
1044 $response->header("Content-Type" => "text/plain");
1045 $response->content($content || "$code $message\n");
1046 return $response;
1047}
1048
1049
10501;
1051
1052__END__
1053
1054=head1 NAME
1055
1056LWP::UserAgent - Web user agent class
1057
1058=head1 SYNOPSIS
1059
1060 require LWP::UserAgent;
1061
1062 my $ua = LWP::UserAgent->new;
1063 $ua->timeout(10);
1064 $ua->env_proxy;
1065
1066 my $response = $ua->get('http://search.cpan.org/');
1067
1068 if ($response->is_success) {
1069 print $response->decoded_content; # or whatever
1070 }
1071 else {
1072 die $response->status_line;
1073 }
1074
1075=head1 DESCRIPTION
1076
1077The C<LWP::UserAgent> is a class implementing a web user agent.
1078C<LWP::UserAgent> objects can be used to dispatch web requests.
1079
1080In normal use the application creates an C<LWP::UserAgent> object, and
1081then configures it with values for timeouts, proxies, name, etc. It
1082then creates an instance of C<HTTP::Request> for the request that
1083needs to be performed. This request is then passed to one of the
1084request method the UserAgent, which dispatches it using the relevant
1085protocol, and returns a C<HTTP::Response> object. There are
1086convenience methods for sending the most common request types: get(),
1087head(), post(), put() and delete(). When using these methods then the
1088creation of the request object is hidden as shown in the synopsis above.
1089
1090The basic approach of the library is to use HTTP style communication
1091for all protocol schemes. This means that you will construct
1092C<HTTP::Request> objects and receive C<HTTP::Response> objects even
1093for non-HTTP resources like I<gopher> and I<ftp>. In order to achieve
1094even more similarity to HTTP style communications, gopher menus and
1095file directories are converted to HTML documents.
1096
1097=head1 CONSTRUCTOR METHODS
1098
1099The following constructor methods are available:
1100
1101=over 4
1102
1103=item $ua = LWP::UserAgent->new( %options )
1104
1105This method constructs a new C<LWP::UserAgent> object and returns it.
1106Key/value pair arguments may be provided to set up the initial state.
1107The following options correspond to attribute methods described below:
1108
1109 KEY DEFAULT
1110 ----------- --------------------
1111 agent "libwww-perl/#.###"
1112 from undef
1113 conn_cache undef
1114 cookie_jar undef
1115 default_headers HTTP::Headers->new
1116 local_address undef
1117 ssl_opts { verify_hostname => 1 }
1118 max_size undef
1119 max_redirect 7
1120 parse_head 1
1121 protocols_allowed undef
1122 protocols_forbidden undef
1123 requests_redirectable ['GET', 'HEAD']
1124 timeout 180
1125
1126The following additional options are also accepted: If the C<env_proxy> option
1127is passed in with a TRUE value, then proxy settings are read from environment
1128variables (see env_proxy() method below). If C<env_proxy> isn't provided the
1129C<PERL_LWP_ENV_PROXY> environment variable controls if env_proxy() is called
1130during initialization. If the C<keep_alive> option is passed in, then a
1131C<LWP::ConnCache> is set up (see conn_cache() method below). The C<keep_alive>
1132value is passed on as the C<total_capacity> for the connection cache.
1133
1134=item $ua->clone
1135
1136Returns a copy of the LWP::UserAgent object.
1137
1138=back
1139
1140=head1 ATTRIBUTES
1141
1142The settings of the configuration attributes modify the behaviour of the
1143C<LWP::UserAgent> when it dispatches requests. Most of these can also
1144be initialized by options passed to the constructor method.
1145
1146The following attribute methods are provided. The attribute value is
1147left unchanged if no argument is given. The return value from each
1148method is the old attribute value.
1149
1150=over
1151
1152=item $ua->agent
1153
1154=item $ua->agent( $product_id )
1155
1156Get/set the product token that is used to identify the user agent on
1157the network. The agent value is sent as the "User-Agent" header in
1158the requests. The default is the string returned by the _agent()
1159method (see below).
1160
1161If the $product_id ends with space then the _agent() string is
1162appended to it.
1163
1164The user agent string should be one or more simple product identifiers
1165with an optional version number separated by the "/" character.
1166Examples are:
1167
1168 $ua->agent('Checkbot/0.4 ' . $ua->_agent);
1169 $ua->agent('Checkbot/0.4 '); # same as above
1170 $ua->agent('Mozilla/5.0');
1171 $ua->agent(""); # don't identify
1172
1173=item $ua->_agent
1174
1175Returns the default agent identifier. This is a string of the form
1176"libwww-perl/#.###", where "#.###" is substituted with the version number
1177of this library.
1178
1179=item $ua->from
1180
1181=item $ua->from( $email_address )
1182
1183Get/set the e-mail address for the human user who controls
1184the requesting user agent. The address should be machine-usable, as
1185defined in RFC 822. The C<from> value is send as the "From" header in
1186the requests. Example:
1187
1188 $ua->from('[email protected]');
1189
1190The default is to not send a "From" header. See the default_headers()
1191method for the more general interface that allow any header to be defaulted.
1192
1193=item $ua->cookie_jar
1194
1195=item $ua->cookie_jar( $cookie_jar_obj )
1196
1197Get/set the cookie jar object to use. The only requirement is that
1198the cookie jar object must implement the extract_cookies($request) and
1199add_cookie_header($response) methods. These methods will then be
1200invoked by the user agent as requests are sent and responses are
1201received. Normally this will be a C<HTTP::Cookies> object or some
1202subclass.
1203
1204The default is to have no cookie_jar, i.e. never automatically add
1205"Cookie" headers to the requests.
1206
1207Shortcut: If a reference to a plain hash is passed in as the
1208$cookie_jar_object, then it is replaced with an instance of
1209C<HTTP::Cookies> that is initialized based on the hash. This form also
1210automatically loads the C<HTTP::Cookies> module. It means that:
1211
1212 $ua->cookie_jar({ file => "$ENV{HOME}/.cookies.txt" });
1213
1214is really just a shortcut for:
1215
1216 require HTTP::Cookies;
1217 $ua->cookie_jar(HTTP::Cookies->new(file => "$ENV{HOME}/.cookies.txt"));
1218
1219=item $ua->default_headers
1220
1221=item $ua->default_headers( $headers_obj )
1222
1223Get/set the headers object that will provide default header values for
1224any requests sent. By default this will be an empty C<HTTP::Headers>
1225object.
1226
1227=item $ua->default_header( $field )
1228
1229=item $ua->default_header( $field => $value )
1230
1231This is just a short-cut for $ua->default_headers->header( $field =>
1232$value ). Example:
1233
1234 $ua->default_header('Accept-Encoding' => scalar HTTP::Message::decodable());
1235 $ua->default_header('Accept-Language' => "no, en");
1236
1237=item $ua->conn_cache
1238
1239=item $ua->conn_cache( $cache_obj )
1240
1241Get/set the C<LWP::ConnCache> object to use. See L<LWP::ConnCache>
1242for details.
1243
1244=item $ua->credentials( $netloc, $realm )
1245
1246=item $ua->credentials( $netloc, $realm, $uname, $pass )
1247
1248Get/set the user name and password to be used for a realm.
1249
1250The $netloc is a string of the form "<host>:<port>". The username and
1251password will only be passed to this server. Example:
1252
1253 $ua->credentials("www.example.com:80", "Some Realm", "foo", "secret");
1254
1255=item $ua->local_address
1256
1257=item $ua->local_address( $address )
1258
1259Get/set the local interface to bind to for network connections. The interface
1260can be specified as a hostname or an IP address. This value is passed as the
1261C<LocalAddr> argument to L<IO::Socket::INET>.
1262
1263=item $ua->max_size
1264
1265=item $ua->max_size( $bytes )
1266
1267Get/set the size limit for response content. The default is C<undef>,
1268which means that there is no limit. If the returned response content
1269is only partial, because the size limit was exceeded, then a
1270"Client-Aborted" header will be added to the response. The content
1271might end up longer than C<max_size> as we abort once appending a
1272chunk of data makes the length exceed the limit. The "Content-Length"
1273header, if present, will indicate the length of the full content and
1274will normally not be the same as C<< length($res->content) >>.
1275
1276=item $ua->max_redirect
1277
1278=item $ua->max_redirect( $n )
1279
1280This reads or sets the object's limit of how many times it will obey
1281redirection responses in a given request cycle.
1282
1283By default, the value is 7. This means that if you call request()
1284method and the response is a redirect elsewhere which is in turn a
1285redirect, and so on seven times, then LWP gives up after that seventh
1286request.
1287
1288=item $ua->parse_head
1289
1290=item $ua->parse_head( $boolean )
1291
1292Get/set a value indicating whether we should initialize response
1293headers from the E<lt>head> section of HTML documents. The default is
1294TRUE. Do not turn this off, unless you know what you are doing.
1295
1296=item $ua->protocols_allowed
1297
1298=item $ua->protocols_allowed( \@protocols )
1299
1300This reads (or sets) this user agent's list of protocols that the
1301request methods will exclusively allow. The protocol names are case
1302insensitive.
1303
1304For example: C<$ua-E<gt>protocols_allowed( [ 'http', 'https'] );>
1305means that this user agent will I<allow only> those protocols,
1306and attempts to use this user agent to access URLs with any other
1307schemes (like "ftp://...") will result in a 500 error.
1308
1309To delete the list, call: C<$ua-E<gt>protocols_allowed(undef)>
1310
1311By default, an object has neither a C<protocols_allowed> list, nor a
1312C<protocols_forbidden> list.
1313
1314Note that having a C<protocols_allowed> list causes any
1315C<protocols_forbidden> list to be ignored.
1316
1317=item $ua->protocols_forbidden
1318
1319=item $ua->protocols_forbidden( \@protocols )
1320
1321This reads (or sets) this user agent's list of protocols that the
1322request method will I<not> allow. The protocol names are case
1323insensitive.
1324
1325For example: C<$ua-E<gt>protocols_forbidden( [ 'file', 'mailto'] );>
1326means that this user agent will I<not> allow those protocols, and
1327attempts to use this user agent to access URLs with those schemes
1328will result in a 500 error.
1329
1330To delete the list, call: C<$ua-E<gt>protocols_forbidden(undef)>
1331
1332=item $ua->requests_redirectable
1333
1334=item $ua->requests_redirectable( \@requests )
1335
1336This reads or sets the object's list of request names that
1337C<$ua-E<gt>redirect_ok(...)> will allow redirection for. By
1338default, this is C<['GET', 'HEAD']>, as per RFC 2616. To
1339change to include 'POST', consider:
1340
1341 push @{ $ua->requests_redirectable }, 'POST';
1342
1343=item $ua->show_progress
1344
1345=item $ua->show_progress( $boolean )
1346
1347Get/set a value indicating whether a progress bar should be displayed
1348on on the terminal as requests are processed. The default is FALSE.
1349
1350=item $ua->timeout
1351
1352=item $ua->timeout( $secs )
1353
1354Get/set the timeout value in seconds. The default timeout() value is
1355180 seconds, i.e. 3 minutes.
1356
1357The requests is aborted if no activity on the connection to the server
1358is observed for C<timeout> seconds. This means that the time it takes
1359for the complete transaction and the request() method to actually
1360return might be longer.
1361
1362=item $ua->ssl_opts
1363
1364=item $ua->ssl_opts( $key )
1365
1366=item $ua->ssl_opts( $key => $value )
1367
1368Get/set the options for SSL connections. Without argument return the list
1369of options keys currently set. With a single argument return the current
1370value for the given option. With 2 arguments set the option value and return
1371the old. Setting an option to the value C<undef> removes this option.
1372
1373The options that LWP relates to are:
1374
1375=over
1376
1377=item C<verify_hostname> => $bool
1378
1379When TRUE LWP will for secure protocol schemes ensure it connects to servers
1380that have a valid certificate matching the expected hostname. If FALSE no
1381checks are made and you can't be sure that you communicate with the expected peer.
1382The no checks behaviour was the default for libwww-perl-5.837 and earlier releases.
1383
1384This option is initialized from the L<PERL_LWP_SSL_VERIFY_HOSTNAME> environment
1385variable. If this environment variable isn't set; then C<verify_hostname>
1386defaults to 1.
1387
1388=item C<SSL_ca_file> => $path
1389
1390The path to a file containing Certificate Authority certificates.
1391A default setting for this option is provided by checking the environment
1392variables C<PERL_LWP_SSL_CA_FILE> and C<HTTPS_CA_FILE> in order.
1393
1394=item C<SSL_ca_path> => $path
1395
1396The path to a directory containing files containing Certificate Authority
1397certificates.
1398A default setting for this option is provided by checking the environment
1399variables C<PERL_LWP_SSL_CA_PATH> and C<HTTPS_CA_DIR> in order.
1400
1401=back
1402
1403Other options can be set and are processed directly by the SSL Socket implementation
1404in use. See L<IO::Socket::SSL> or L<Net::SSL> for details.
1405
1406The libwww-perl core no longer bundles protocol plugins for SSL. You will need
1407to install L<LWP::Protocol::https> separately to enable support for processing
1408https-URLs.
1409
1410=back
1411
1412=head2 Proxy attributes
1413
1414The following methods set up when requests should be passed via a
1415proxy server.
1416
1417=over
1418
1419=item $ua->proxy(\@schemes, $proxy_url)
1420
1421=item $ua->proxy($scheme, $proxy_url)
1422
1423Set/retrieve proxy URL for a scheme:
1424
1425 $ua->proxy(['http', 'ftp'], 'http://proxy.sn.no:8001/');
1426 $ua->proxy('gopher', 'http://proxy.sn.no:8001/');
1427
1428The first form specifies that the URL is to be used for proxying of
1429access methods listed in the list in the first method argument,
1430i.e. 'http' and 'ftp'.
1431
1432The second form shows a shorthand form for specifying
1433proxy URL for a single access scheme.
1434
1435=item $ua->no_proxy( $domain, ... )
1436
1437Do not proxy requests to the given domains. Calling no_proxy without
1438any domains clears the list of domains. Eg:
1439
1440 $ua->no_proxy('localhost', 'example.com');
1441
1442=item $ua->env_proxy
1443
1444Load proxy settings from *_proxy environment variables. You might
1445specify proxies like this (sh-syntax):
1446
1447 gopher_proxy=http://proxy.my.place/
1448 wais_proxy=http://proxy.my.place/
1449 no_proxy="localhost,example.com"
1450 export gopher_proxy wais_proxy no_proxy
1451
1452csh or tcsh users should use the C<setenv> command to define these
1453environment variables.
1454
1455On systems with case insensitive environment variables there exists a
1456name clash between the CGI environment variables and the C<HTTP_PROXY>
1457environment variable normally picked up by env_proxy(). Because of
1458this C<HTTP_PROXY> is not honored for CGI scripts. The
1459C<CGI_HTTP_PROXY> environment variable can be used instead.
1460
1461=back
1462
1463=head2 Handlers
1464
1465Handlers are code that injected at various phases during the
1466processing of requests. The following methods are provided to manage
1467the active handlers:
1468
1469=over
1470
1471=item $ua->add_handler( $phase => \&cb, %matchspec )
1472
1473Add handler to be invoked in the given processing phase. For how to
1474specify %matchspec see L<HTTP::Config/"Matching">.
1475
1476The possible values $phase and the corresponding callback signatures are:
1477
1478=over
1479
1480=item request_preprepare => sub { my($request, $ua, $h) = @_; ... }
1481
1482The handler is called before the C<request_prepare> and other standard
1483initialization of of the request. This can be used to set up headers
1484and attributes that the C<request_prepare> handler depends on. Proxy
1485initialization should take place here; but in general don't register
1486handlers for this phase.
1487
1488=item request_prepare => sub { my($request, $ua, $h) = @_; ... }
1489
1490The handler is called before the request is sent and can modify the
1491request any way it see fit. This can for instance be used to add
1492certain headers to specific requests.
1493
1494The method can assign a new request object to $_[0] to replace the
1495request that is sent fully.
1496
1497The return value from the callback is ignored. If an exception is
1498raised it will abort the request and make the request method return a
1499"400 Bad request" response.
1500
1501=item request_send => sub { my($request, $ua, $h) = @_; ... }
1502
1503This handler gets a chance of handling requests before they're sent to the
1504protocol handlers. It should return an HTTP::Response object if it
1505wishes to terminate the processing; otherwise it should return nothing.
1506
1507The C<response_header> and C<response_data> handlers will not be
1508invoked for this response, but the C<response_done> will be.
1509
1510=item response_header => sub { my($response, $ua, $h) = @_; ... }
1511
1512This handler is called right after the response headers have been
1513received, but before any content data. The handler might set up
1514handlers for data and might croak to abort the request.
1515
1516The handler might set the $response->{default_add_content} value to
1517control if any received data should be added to the response object
1518directly. This will initially be false if the $ua->request() method
1519was called with a $content_file or $content_cb argument; otherwise true.
1520
1521=item response_data => sub { my($response, $ua, $h, $data) = @_; ... }
1522
1523This handler is called for each chunk of data received for the
1524response. The handler might croak to abort the request.
1525
1526This handler needs to return a TRUE value to be called again for
1527subsequent chunks for the same request.
1528
1529=item response_done => sub { my($response, $ua, $h) = @_; ... }
1530
1531The handler is called after the response has been fully received, but
1532before any redirect handling is attempted. The handler can be used to
1533extract information or modify the response.
1534
1535=item response_redirect => sub { my($response, $ua, $h) = @_; ... }
1536
1537The handler is called in $ua->request after C<response_done>. If the
1538handler returns an HTTP::Request object we'll start over with processing
1539this request instead.
1540
1541=back
1542
1543=item $ua->remove_handler( undef, %matchspec )
1544
1545=item $ua->remove_handler( $phase, %matchspec )
1546
1547Remove handlers that match the given %matchspec. If $phase is not
1548provided remove handlers from all phases.
1549
1550Be careful as calling this function with %matchspec that is not not
1551specific enough can remove handlers not owned by you. It's probably
1552better to use the set_my_handler() method instead.
1553
1554The removed handlers are returned.
1555
1556=item $ua->set_my_handler( $phase, $cb, %matchspec )
1557
1558Set handlers private to the executing subroutine. Works by defaulting
1559an C<owner> field to the %matchspec that holds the name of the called
1560subroutine. You might pass an explicit C<owner> to override this.
1561
1562If $cb is passed as C<undef>, remove the handler.
1563
1564=item $ua->get_my_handler( $phase, %matchspec )
1565
1566=item $ua->get_my_handler( $phase, %matchspec, $init )
1567
1568Will retrieve the matching handler as hash ref.
1569
1570If C<$init> is passed passed as a TRUE value, create and add the
1571handler if it's not found. If $init is a subroutine reference, then
1572it's called with the created handler hash as argument. This sub might
1573populate the hash with extra fields; especially the callback. If
1574$init is a hash reference, merge the hashes.
1575
1576=item $ua->handlers( $phase, $request )
1577
1578=item $ua->handlers( $phase, $response )
1579
1580Returns the handlers that apply to the given request or response at
1581the given processing phase.
1582
1583=back
1584
1585=head1 REQUEST METHODS
1586
1587The methods described in this section are used to dispatch requests
1588via the user agent. The following request methods are provided:
1589
1590=over
1591
1592=item $ua->get( $url )
1593
1594=item $ua->get( $url , $field_name => $value, ... )
1595
1596This method will dispatch a C<GET> request on the given $url. Further
1597arguments can be given to initialize the headers of the request. These
1598are given as separate name/value pairs. The return value is a
1599response object. See L<HTTP::Response> for a description of the
1600interface it provides.
1601
1602There will still be a response object returned when LWP can't connect to the
1603server specified in the URL or when other failures in protocol handlers occur.
1604These internal responses use the standard HTTP status codes, so the responses
1605can't be differentiated by testing the response status code alone. Error
1606responses that LWP generates internally will have the "Client-Warning" header
1607set to the value "Internal response". If you need to differentiate these
1608internal responses from responses that a remote server actually generates, you
1609need to test this header value.
1610
1611Fields names that start with ":" are special. These will not
1612initialize headers of the request but will determine how the response
1613content is treated. The following special field names are recognized:
1614
1615 :content_file => $filename
1616 :content_cb => \&callback
1617 :read_size_hint => $bytes
1618
1619If a $filename is provided with the C<:content_file> option, then the
1620response content will be saved here instead of in the response
1621object. If a callback is provided with the C<:content_cb> option then
1622this function will be called for each chunk of the response content as
1623it is received from the server. If neither of these options are
1624given, then the response content will accumulate in the response
1625object itself. This might not be suitable for very large response
1626bodies. Only one of C<:content_file> or C<:content_cb> can be
1627specified. The content of unsuccessful responses will always
1628accumulate in the response object itself, regardless of the
1629C<:content_file> or C<:content_cb> options passed in.
1630
1631The C<:read_size_hint> option is passed to the protocol module which
1632will try to read data from the server in chunks of this size. A
1633smaller value for the C<:read_size_hint> will result in a higher
1634number of callback invocations.
1635
1636The callback function is called with 3 arguments: a chunk of data, a
1637reference to the response object, and a reference to the protocol
1638object. The callback can abort the request by invoking die(). The
1639exception message will show up as the "X-Died" header field in the
1640response returned by the get() function.
1641
1642=item $ua->head( $url )
1643
1644=item $ua->head( $url , $field_name => $value, ... )
1645
1646This method will dispatch a C<HEAD> request on the given $url.
1647Otherwise it works like the get() method described above.
1648
1649=item $ua->post( $url, \%form )
1650
1651=item $ua->post( $url, \@form )
1652
1653=item $ua->post( $url, \%form, $field_name => $value, ... )
1654
1655=item $ua->post( $url, $field_name => $value,... Content => \%form )
1656
1657=item $ua->post( $url, $field_name => $value,... Content => \@form )
1658
1659=item $ua->post( $url, $field_name => $value,... Content => $content )
1660
1661This method will dispatch a C<POST> request on the given $url, with
1662%form or @form providing the key/value pairs for the fill-in form
1663content. Additional headers and content options are the same as for
1664the get() method.
1665
1666This method will use the POST() function from C<HTTP::Request::Common>
1667to build the request. See L<HTTP::Request::Common> for a details on
1668how to pass form content and other advanced features.
1669
1670=item $ua->put( $url, \%form )
1671
1672=item $ua->put( $url, \@form )
1673
1674=item $ua->put( $url, \%form, $field_name => $value, ... )
1675
1676=item $ua->put( $url, $field_name => $value,... Content => \%form )
1677
1678=item $ua->put( $url, $field_name => $value,... Content => \@form )
1679
1680=item $ua->put( $url, $field_name => $value,... Content => $content )
1681
1682This method will dispatch a C<PUT> request on the given $url, with
1683%form or @form providing the key/value pairs for the fill-in form
1684content. Additional headers and content options are the same as for
1685the get() method.
1686
1687This method will use the PUT() function from C<HTTP::Request::Common>
1688to build the request. See L<HTTP::Request::Common> for a details on
1689how to pass form content and other advanced features.
1690
1691=item $ua->delete( $url )
1692
1693=item $ua->delete( $url, $field_name => $value, ... )
1694
1695This method will dispatch a C<DELETE> request on the given $url. Additional
1696headers and content options are the same as for the get() method.
1697
1698This method will use the DELETE() function from C<HTTP::Request::Common>
1699to build the request. See L<HTTP::Request::Common> for a details on
1700how to pass form content and other advanced features.
1701
1702=item $ua->mirror( $url, $filename )
1703
1704This method will get the document identified by $url and store it in
1705file called $filename. If the file already exists, then the request
1706will contain an "If-Modified-Since" header matching the modification
1707time of the file. If the document on the server has not changed since
1708this time, then nothing happens. If the document has been updated, it
1709will be downloaded again. The modification time of the file will be
1710forced to match that of the server.
1711
1712The return value is the the response object.
1713
1714=item $ua->request( $request )
1715
1716=item $ua->request( $request, $content_file )
1717
1718=item $ua->request( $request, $content_cb )
1719
1720=item $ua->request( $request, $content_cb, $read_size_hint )
1721
1722This method will dispatch the given $request object. Normally this
1723will be an instance of the C<HTTP::Request> class, but any object with
1724a similar interface will do. The return value is a response object.
1725See L<HTTP::Request> and L<HTTP::Response> for a description of the
1726interface provided by these classes.
1727
1728The request() method will process redirects and authentication
1729responses transparently. This means that it may actually send several
1730simple requests via the simple_request() method described below.
1731
1732The request methods described above; get(), head(), post() and
1733mirror(), will all dispatch the request they build via this method.
1734They are convenience methods that simply hides the creation of the
1735request object for you.
1736
1737The $content_file, $content_cb and $read_size_hint all correspond to
1738options described with the get() method above.
1739
1740You are allowed to use a CODE reference as C<content> in the request
1741object passed in. The C<content> function should return the content
1742when called. The content can be returned in chunks. The content
1743function will be invoked repeatedly until it return an empty string to
1744signal that there is no more content.
1745
1746=item $ua->simple_request( $request )
1747
1748=item $ua->simple_request( $request, $content_file )
1749
1750=item $ua->simple_request( $request, $content_cb )
1751
1752=item $ua->simple_request( $request, $content_cb, $read_size_hint )
1753
1754This method dispatches a single request and returns the response
1755received. Arguments are the same as for request() described above.
1756
1757The difference from request() is that simple_request() will not try to
1758handle redirects or authentication responses. The request() method
1759will in fact invoke this method for each simple request it sends.
1760
1761=item $ua->is_online
1762
1763Tries to determine if you have access to the Internet. Returns
1764TRUE if the built-in heuristics determine that the user agent is
1765able to access the Internet (over HTTP). See also L<LWP::Online>.
1766
1767=item $ua->is_protocol_supported( $scheme )
1768
1769You can use this method to test whether this user agent object supports the
1770specified C<scheme>. (The C<scheme> might be a string (like 'http' or
1771'ftp') or it might be an URI object reference.)
1772
1773Whether a scheme is supported, is determined by the user agent's
1774C<protocols_allowed> or C<protocols_forbidden> lists (if any), and by
1775the capabilities of LWP. I.e., this will return TRUE only if LWP
1776supports this protocol I<and> it's permitted for this particular
1777object.
1778
1779=back
1780
1781=head2 Callback methods
1782
1783The following methods will be invoked as requests are processed. These
1784methods are documented here because subclasses of C<LWP::UserAgent>
1785might want to override their behaviour.
1786
1787=over
1788
1789=item $ua->prepare_request( $request )
1790
1791This method is invoked by simple_request(). Its task is to modify the
1792given $request object by setting up various headers based on the
1793attributes of the user agent. The return value should normally be the
1794$request object passed in. If a different request object is returned
1795it will be the one actually processed.
1796
1797The headers affected by the base implementation are; "User-Agent",
1798"From", "Range" and "Cookie".
1799
1800=item $ua->redirect_ok( $prospective_request, $response )
1801
1802This method is called by request() before it tries to follow a
1803redirection to the request in $response. This should return a TRUE
1804value if this redirection is permissible. The $prospective_request
1805will be the request to be sent if this method returns TRUE.
1806
1807The base implementation will return FALSE unless the method
1808is in the object's C<requests_redirectable> list,
1809FALSE if the proposed redirection is to a "file://..."
1810URL, and TRUE otherwise.
1811
1812=item $ua->get_basic_credentials( $realm, $uri, $isproxy )
1813
1814This is called by request() to retrieve credentials for documents
1815protected by Basic or Digest Authentication. The arguments passed in
1816is the $realm provided by the server, the $uri requested and a boolean
1817flag to indicate if this is authentication against a proxy server.
1818
1819The method should return a username and password. It should return an
1820empty list to abort the authentication resolution attempt. Subclasses
1821can override this method to prompt the user for the information. An
1822example of this can be found in C<lwp-request> program distributed
1823with this library.
1824
1825The base implementation simply checks a set of pre-stored member
1826variables, set up with the credentials() method.
1827
1828=item $ua->progress( $status, $request_or_response )
1829
1830This is called frequently as the response is received regardless of
1831how the content is processed. The method is called with $status
1832"begin" at the start of processing the request and with $state "end"
1833before the request method returns. In between these $status will be
1834the fraction of the response currently received or the string "tick"
1835if the fraction can't be calculated.
1836
1837When $status is "begin" the second argument is the request object,
1838otherwise it is the response object.
1839
1840=back
1841
1842=head1 SEE ALSO
1843
1844See L<LWP> for a complete overview of libwww-perl5. See L<lwpcook>
1845and the scripts F<lwp-request> and F<lwp-download> for examples of
1846usage.
1847
1848See L<HTTP::Request> and L<HTTP::Response> for a description of the
1849message objects dispatched and received. See L<HTTP::Request::Common>
1850and L<HTML::Form> for other ways to build request objects.
1851
1852See L<WWW::Mechanize> and L<WWW::Search> for examples of more
1853specialized user agents based on C<LWP::UserAgent>.
1854
1855=head1 COPYRIGHT
1856
1857Copyright 1995-2009 Gisle Aas.
1858
1859This library is free software; you can redistribute it and/or
1860modify it under the same terms as Perl itself.
Note: See TracBrowser for help on using the repository browser.