source: main/trunk/greenstone2/perllib/cpan/HTTP/Cookies.pm@ 27181

Last change on this file since 27181 was 27181, checked in by davidb, 11 years ago

Latest libwww-perl (v6x) isn't as self-sufficeint as earlier (v5.x) in terms of supporting Perl modules. Dropping back to to this earlier version so activate.pl runs smoothly when system-installed Perl on Unix system does not have the LWP and related modules installed

File size: 19.8 KB
Line 
1package HTTP::Cookies;
2
3use strict;
4use HTTP::Date qw(str2time parse_date time2str);
5use HTTP::Headers::Util qw(_split_header_words join_header_words);
6
7use vars qw($VERSION $EPOCH_OFFSET);
8$VERSION = "5.837";
9
10# Legacy: because "use "HTTP::Cookies" used be the ONLY way
11# to load the class HTTP::Cookies::Netscape.
12require HTTP::Cookies::Netscape;
13
14$EPOCH_OFFSET = 0; # difference from Unix epoch
15if ($^O eq "MacOS") {
16 require Time::Local;
17 $EPOCH_OFFSET = Time::Local::timelocal(0,0,0,1,0,70);
18}
19
20# A HTTP::Cookies object is a hash. The main attribute is the
21# COOKIES 3 level hash: $self->{COOKIES}{$domain}{$path}{$key}.
22
23sub new
24{
25 my $class = shift;
26 my $self = bless {
27 COOKIES => {},
28 }, $class;
29 my %cnf = @_;
30 for (keys %cnf) {
31 $self->{lc($_)} = $cnf{$_};
32 }
33 $self->load;
34 $self;
35}
36
37
38sub add_cookie_header
39{
40 my $self = shift;
41 my $request = shift || return;
42 my $url = $request->uri;
43 my $scheme = $url->scheme;
44 unless ($scheme =~ /^https?\z/) {
45 return;
46 }
47
48 my $domain = _host($request, $url);
49 $domain = "$domain.local" unless $domain =~ /\./;
50 my $secure_request = ($scheme eq "https");
51 my $req_path = _url_path($url);
52 my $req_port = $url->port;
53 my $now = time();
54 _normalize_path($req_path) if $req_path =~ /%/;
55
56 my @cval; # cookie values for the "Cookie" header
57 my $set_ver;
58 my $netscape_only = 0; # An exact domain match applies to any cookie
59
60 while ($domain =~ /\./) {
61 # Checking $domain for cookies"
62 my $cookies = $self->{COOKIES}{$domain};
63 next unless $cookies;
64 if ($self->{delayload} && defined($cookies->{'//+delayload'})) {
65 my $cookie_data = $cookies->{'//+delayload'}{'cookie'};
66 delete $self->{COOKIES}{$domain};
67 $self->load_cookie($cookie_data->[1]);
68 $cookies = $self->{COOKIES}{$domain};
69 next unless $cookies; # should not really happen
70 }
71
72 # Want to add cookies corresponding to the most specific paths
73 # first (i.e. longest path first)
74 my $path;
75 for $path (sort {length($b) <=> length($a) } keys %$cookies) {
76 if (index($req_path, $path) != 0) {
77 next;
78 }
79
80 my($key,$array);
81 while (($key,$array) = each %{$cookies->{$path}}) {
82 my($version,$val,$port,$path_spec,$secure,$expires) = @$array;
83 if ($secure && !$secure_request) {
84 next;
85 }
86 if ($expires && $expires < $now) {
87 next;
88 }
89 if ($port) {
90 my $found;
91 if ($port =~ s/^_//) {
92 # The corresponding Set-Cookie attribute was empty
93 $found++ if $port eq $req_port;
94 $port = "";
95 }
96 else {
97 my $p;
98 for $p (split(/,/, $port)) {
99 $found++, last if $p eq $req_port;
100 }
101 }
102 unless ($found) {
103 next;
104 }
105 }
106 if ($version > 0 && $netscape_only) {
107 next;
108 }
109
110 # set version number of cookie header.
111 # XXX: What should it be if multiple matching
112 # Set-Cookie headers have different versions themselves
113 if (!$set_ver++) {
114 if ($version >= 1) {
115 push(@cval, "\$Version=$version");
116 }
117 elsif (!$self->{hide_cookie2}) {
118 $request->header(Cookie2 => '$Version="1"');
119 }
120 }
121
122 # do we need to quote the value
123 if ($val =~ /\W/ && $version) {
124 $val =~ s/([\\\"])/\\$1/g;
125 $val = qq("$val");
126 }
127
128 # and finally remember this cookie
129 push(@cval, "$key=$val");
130 if ($version >= 1) {
131 push(@cval, qq(\$Path="$path")) if $path_spec;
132 push(@cval, qq(\$Domain="$domain")) if $domain =~ /^\./;
133 if (defined $port) {
134 my $p = '$Port';
135 $p .= qq(="$port") if length $port;
136 push(@cval, $p);
137 }
138 }
139
140 }
141 }
142
143 } continue {
144 # Try with a more general domain, alternately stripping
145 # leading name components and leading dots. When this
146 # results in a domain with no leading dot, it is for
147 # Netscape cookie compatibility only:
148 #
149 # a.b.c.net Any cookie
150 # .b.c.net Any cookie
151 # b.c.net Netscape cookie only
152 # .c.net Any cookie
153
154 if ($domain =~ s/^\.+//) {
155 $netscape_only = 1;
156 }
157 else {
158 $domain =~ s/[^.]*//;
159 $netscape_only = 0;
160 }
161 }
162
163 if (@cval) {
164 if (my $old = $request->header("Cookie")) {
165 unshift(@cval, $old);
166 }
167 $request->header(Cookie => join("; ", @cval));
168 }
169
170 $request;
171}
172
173
174sub extract_cookies
175{
176 my $self = shift;
177 my $response = shift || return;
178
179 my @set = _split_header_words($response->_header("Set-Cookie2"));
180 my @ns_set = $response->_header("Set-Cookie");
181
182 return $response unless @set || @ns_set; # quick exit
183
184 my $request = $response->request;
185 my $url = $request->uri;
186 my $req_host = _host($request, $url);
187 $req_host = "$req_host.local" unless $req_host =~ /\./;
188 my $req_port = $url->port;
189 my $req_path = _url_path($url);
190 _normalize_path($req_path) if $req_path =~ /%/;
191
192 if (@ns_set) {
193 # The old Netscape cookie format for Set-Cookie
194 # http://curl.haxx.se/rfc/cookie_spec.html
195 # can for instance contain an unquoted "," in the expires
196 # field, so we have to use this ad-hoc parser.
197 my $now = time();
198
199 # Build a hash of cookies that was present in Set-Cookie2
200 # headers. We need to skip them if we also find them in a
201 # Set-Cookie header.
202 my %in_set2;
203 for (@set) {
204 $in_set2{$_->[0]}++;
205 }
206
207 my $set;
208 for $set (@ns_set) {
209 $set =~ s/^\s+//;
210 my @cur;
211 my $param;
212 my $expires;
213 my $first_param = 1;
214 for $param (split(/;\s*/, $set)) {
215 next unless length($param);
216 my($k,$v) = split(/\s*=\s*/, $param, 2);
217 if (defined $v) {
218 $v =~ s/\s+$//;
219 #print "$k => $v\n";
220 }
221 else {
222 $k =~ s/\s+$//;
223 #print "$k => undef";
224 }
225 if (!$first_param && lc($k) eq "expires") {
226 my $etime = str2time($v);
227 if (defined $etime) {
228 push(@cur, "Max-Age" => $etime - $now);
229 $expires++;
230 }
231 else {
232 # parse_date can deal with years outside the range of time_t,
233 my($year, $mon, $day, $hour, $min, $sec, $tz) = parse_date($v);
234 if ($year) {
235 my $thisyear = (gmtime)[5] + 1900;
236 if ($year < $thisyear) {
237 push(@cur, "Max-Age" => -1); # any negative value will do
238 $expires++;
239 }
240 elsif ($year >= $thisyear + 10) {
241 # the date is at least 10 years into the future, just replace
242 # it with something approximate
243 push(@cur, "Max-Age" => 10 * 365 * 24 * 60 * 60);
244 $expires++;
245 }
246 }
247 }
248 }
249 elsif (!$first_param && lc($k) =~ /^(?:version|discard|ns-cookie)/) {
250 # ignore
251 }
252 else {
253 push(@cur, $k => $v);
254 }
255 $first_param = 0;
256 }
257 next unless @cur;
258 next if $in_set2{$cur[0]};
259
260# push(@cur, "Port" => $req_port);
261 push(@cur, "Discard" => undef) unless $expires;
262 push(@cur, "Version" => 0);
263 push(@cur, "ns-cookie" => 1);
264 push(@set, \@cur);
265 }
266 }
267
268 SET_COOKIE:
269 for my $set (@set) {
270 next unless @$set >= 2;
271
272 my $key = shift @$set;
273 my $val = shift @$set;
274
275 my %hash;
276 while (@$set) {
277 my $k = shift @$set;
278 my $v = shift @$set;
279 my $lc = lc($k);
280 # don't loose case distinction for unknown fields
281 $k = $lc if $lc =~ /^(?:discard|domain|max-age|
282 path|port|secure|version)$/x;
283 if ($k eq "discard" || $k eq "secure") {
284 $v = 1 unless defined $v;
285 }
286 next if exists $hash{$k}; # only first value is significant
287 $hash{$k} = $v;
288 };
289
290 my %orig_hash = %hash;
291 my $version = delete $hash{version};
292 $version = 1 unless defined($version);
293 my $discard = delete $hash{discard};
294 my $secure = delete $hash{secure};
295 my $maxage = delete $hash{'max-age'};
296 my $ns_cookie = delete $hash{'ns-cookie'};
297
298 # Check domain
299 my $domain = delete $hash{domain};
300 $domain = lc($domain) if defined $domain;
301 if (defined($domain)
302 && $domain ne $req_host && $domain ne ".$req_host") {
303 if ($domain !~ /\./ && $domain ne "local") {
304 next SET_COOKIE;
305 }
306 $domain = ".$domain" unless $domain =~ /^\./;
307 if ($domain =~ /\.\d+$/) {
308 next SET_COOKIE;
309 }
310 my $len = length($domain);
311 unless (substr($req_host, -$len) eq $domain) {
312 next SET_COOKIE;
313 }
314 my $hostpre = substr($req_host, 0, length($req_host) - $len);
315 if ($hostpre =~ /\./ && !$ns_cookie) {
316 next SET_COOKIE;
317 }
318 }
319 else {
320 $domain = $req_host;
321 }
322
323 my $path = delete $hash{path};
324 my $path_spec;
325 if (defined $path && $path ne '') {
326 $path_spec++;
327 _normalize_path($path) if $path =~ /%/;
328 if (!$ns_cookie &&
329 substr($req_path, 0, length($path)) ne $path) {
330 next SET_COOKIE;
331 }
332 }
333 else {
334 $path = $req_path;
335 $path =~ s,/[^/]*$,,;
336 $path = "/" unless length($path);
337 }
338
339 my $port;
340 if (exists $hash{port}) {
341 $port = delete $hash{port};
342 if (defined $port) {
343 $port =~ s/\s+//g;
344 my $found;
345 for my $p (split(/,/, $port)) {
346 unless ($p =~ /^\d+$/) {
347 next SET_COOKIE;
348 }
349 $found++ if $p eq $req_port;
350 }
351 unless ($found) {
352 next SET_COOKIE;
353 }
354 }
355 else {
356 $port = "_$req_port";
357 }
358 }
359 $self->set_cookie($version,$key,$val,$path,$domain,$port,$path_spec,$secure,$maxage,$discard, \%hash)
360 if $self->set_cookie_ok(\%orig_hash);
361 }
362
363 $response;
364}
365
366sub set_cookie_ok
367{
368 1;
369}
370
371
372sub set_cookie
373{
374 my $self = shift;
375 my($version,
376 $key, $val, $path, $domain, $port,
377 $path_spec, $secure, $maxage, $discard, $rest) = @_;
378
379 # path and key can not be empty (key can't start with '$')
380 return $self if !defined($path) || $path !~ m,^/, ||
381 !defined($key) || $key =~ m,^\$,;
382
383 # ensure legal port
384 if (defined $port) {
385 return $self unless $port =~ /^_?\d+(?:,\d+)*$/;
386 }
387
388 my $expires;
389 if (defined $maxage) {
390 if ($maxage <= 0) {
391 delete $self->{COOKIES}{$domain}{$path}{$key};
392 return $self;
393 }
394 $expires = time() + $maxage;
395 }
396 $version = 0 unless defined $version;
397
398 my @array = ($version, $val,$port,
399 $path_spec,
400 $secure, $expires, $discard);
401 push(@array, {%$rest}) if defined($rest) && %$rest;
402 # trim off undefined values at end
403 pop(@array) while !defined $array[-1];
404
405 $self->{COOKIES}{$domain}{$path}{$key} = \@array;
406 $self;
407}
408
409
410sub save
411{
412 my $self = shift;
413 my $file = shift || $self->{'file'} || return;
414 local(*FILE);
415 open(FILE, ">$file") or die "Can't open $file: $!";
416 print FILE "#LWP-Cookies-1.0\n";
417 print FILE $self->as_string(!$self->{ignore_discard});
418 close(FILE);
419 1;
420}
421
422
423sub load
424{
425 my $self = shift;
426 my $file = shift || $self->{'file'} || return;
427 local(*FILE, $_);
428 local $/ = "\n"; # make sure we got standard record separator
429 open(FILE, $file) or return;
430 my $magic = <FILE>;
431 unless ($magic =~ /^\#LWP-Cookies-(\d+\.\d+)/) {
432 warn "$file does not seem to contain cookies";
433 return;
434 }
435 while (<FILE>) {
436 next unless s/^Set-Cookie3:\s*//;
437 chomp;
438 my $cookie;
439 for $cookie (_split_header_words($_)) {
440 my($key,$val) = splice(@$cookie, 0, 2);
441 my %hash;
442 while (@$cookie) {
443 my $k = shift @$cookie;
444 my $v = shift @$cookie;
445 $hash{$k} = $v;
446 }
447 my $version = delete $hash{version};
448 my $path = delete $hash{path};
449 my $domain = delete $hash{domain};
450 my $port = delete $hash{port};
451 my $expires = str2time(delete $hash{expires});
452
453 my $path_spec = exists $hash{path_spec}; delete $hash{path_spec};
454 my $secure = exists $hash{secure}; delete $hash{secure};
455 my $discard = exists $hash{discard}; delete $hash{discard};
456
457 my @array = ($version,$val,$port,
458 $path_spec,$secure,$expires,$discard);
459 push(@array, \%hash) if %hash;
460 $self->{COOKIES}{$domain}{$path}{$key} = \@array;
461 }
462 }
463 close(FILE);
464 1;
465}
466
467
468sub revert
469{
470 my $self = shift;
471 $self->clear->load;
472 $self;
473}
474
475
476sub clear
477{
478 my $self = shift;
479 if (@_ == 0) {
480 $self->{COOKIES} = {};
481 }
482 elsif (@_ == 1) {
483 delete $self->{COOKIES}{$_[0]};
484 }
485 elsif (@_ == 2) {
486 delete $self->{COOKIES}{$_[0]}{$_[1]};
487 }
488 elsif (@_ == 3) {
489 delete $self->{COOKIES}{$_[0]}{$_[1]}{$_[2]};
490 }
491 else {
492 require Carp;
493 Carp::carp('Usage: $c->clear([domain [,path [,key]]])');
494 }
495 $self;
496}
497
498
499sub clear_temporary_cookies
500{
501 my($self) = @_;
502
503 $self->scan(sub {
504 if($_[9] or # "Discard" flag set
505 not $_[8]) { # No expire field?
506 $_[8] = -1; # Set the expire/max_age field
507 $self->set_cookie(@_); # Clear the cookie
508 }
509 });
510}
511
512
513sub DESTROY
514{
515 my $self = shift;
516 local($., $@, $!, $^E, $?);
517 $self->save if $self->{'autosave'};
518}
519
520
521sub scan
522{
523 my($self, $cb) = @_;
524 my($domain,$path,$key);
525 for $domain (sort keys %{$self->{COOKIES}}) {
526 for $path (sort keys %{$self->{COOKIES}{$domain}}) {
527 for $key (sort keys %{$self->{COOKIES}{$domain}{$path}}) {
528 my($version,$val,$port,$path_spec,
529 $secure,$expires,$discard,$rest) =
530 @{$self->{COOKIES}{$domain}{$path}{$key}};
531 $rest = {} unless defined($rest);
532 &$cb($version,$key,$val,$path,$domain,$port,
533 $path_spec,$secure,$expires,$discard,$rest);
534 }
535 }
536 }
537}
538
539
540sub as_string
541{
542 my($self, $skip_discard) = @_;
543 my @res;
544 $self->scan(sub {
545 my($version,$key,$val,$path,$domain,$port,
546 $path_spec,$secure,$expires,$discard,$rest) = @_;
547 return if $discard && $skip_discard;
548 my @h = ($key, $val);
549 push(@h, "path", $path);
550 push(@h, "domain" => $domain);
551 push(@h, "port" => $port) if defined $port;
552 push(@h, "path_spec" => undef) if $path_spec;
553 push(@h, "secure" => undef) if $secure;
554 push(@h, "expires" => HTTP::Date::time2isoz($expires)) if $expires;
555 push(@h, "discard" => undef) if $discard;
556 my $k;
557 for $k (sort keys %$rest) {
558 push(@h, $k, $rest->{$k});
559 }
560 push(@h, "version" => $version);
561 push(@res, "Set-Cookie3: " . join_header_words(\@h));
562 });
563 join("\n", @res, "");
564}
565
566sub _host
567{
568 my($request, $url) = @_;
569 if (my $h = $request->header("Host")) {
570 $h =~ s/:\d+$//; # might have a port as well
571 return lc($h);
572 }
573 return lc($url->host);
574}
575
576sub _url_path
577{
578 my $url = shift;
579 my $path;
580 if($url->can('epath')) {
581 $path = $url->epath; # URI::URL method
582 }
583 else {
584 $path = $url->path; # URI::_generic method
585 }
586 $path = "/" unless length $path;
587 $path;
588}
589
590sub _normalize_path # so that plain string compare can be used
591{
592 my $x;
593 $_[0] =~ s/%([0-9a-fA-F][0-9a-fA-F])/
594 $x = uc($1);
595 $x eq "2F" || $x eq "25" ? "%$x" :
596 pack("C", hex($x));
597 /eg;
598 $_[0] =~ s/([\0-\x20\x7f-\xff])/sprintf("%%%02X",ord($1))/eg;
599}
600
6011;
602
603__END__
604
605=head1 NAME
606
607HTTP::Cookies - HTTP cookie jars
608
609=head1 SYNOPSIS
610
611 use HTTP::Cookies;
612 $cookie_jar = HTTP::Cookies->new(
613 file => "$ENV{'HOME'}/lwp_cookies.dat",
614 autosave => 1,
615 );
616
617 use LWP;
618 my $browser = LWP::UserAgent->new;
619 $browser->cookie_jar($cookie_jar);
620
621Or for an empty and temporary cookie jar:
622
623 use LWP;
624 my $browser = LWP::UserAgent->new;
625 $browser->cookie_jar( {} );
626
627=head1 DESCRIPTION
628
629This class is for objects that represent a "cookie jar" -- that is, a
630database of all the HTTP cookies that a given LWP::UserAgent object
631knows about.
632
633Cookies are a general mechanism which server side connections can use
634to both store and retrieve information on the client side of the
635connection. For more information about cookies refer to
636<URL:http://curl.haxx.se/rfc/cookie_spec.html> and
637<URL:http://www.cookiecentral.com/>. This module also implements the
638new style cookies described in I<RFC 2965>.
639The two variants of cookies are supposed to be able to coexist happily.
640
641Instances of the class I<HTTP::Cookies> are able to store a collection
642of Set-Cookie2: and Set-Cookie: headers and are able to use this
643information to initialize Cookie-headers in I<HTTP::Request> objects.
644The state of a I<HTTP::Cookies> object can be saved in and restored from
645files.
646
647=head1 METHODS
648
649The following methods are provided:
650
651=over 4
652
653=item $cookie_jar = HTTP::Cookies->new
654
655The constructor takes hash style parameters. The following
656parameters are recognized:
657
658 file: name of the file to restore cookies from and save cookies to
659 autosave: save during destruction (bool)
660 ignore_discard: save even cookies that are requested to be discarded (bool)
661 hide_cookie2: do not add Cookie2 header to requests
662
663Future parameters might include (not yet implemented):
664
665 max_cookies 300
666 max_cookies_per_domain 20
667 max_cookie_size 4096
668
669 no_cookies list of domain names that we never return cookies to
670
671=item $cookie_jar->add_cookie_header( $request )
672
673The add_cookie_header() method will set the appropriate Cookie:-header
674for the I<HTTP::Request> object given as argument. The $request must
675have a valid url attribute before this method is called.
676
677=item $cookie_jar->extract_cookies( $response )
678
679The extract_cookies() method will look for Set-Cookie: and
680Set-Cookie2: headers in the I<HTTP::Response> object passed as
681argument. Any of these headers that are found are used to update
682the state of the $cookie_jar.
683
684=item $cookie_jar->set_cookie( $version, $key, $val, $path, $domain, $port, $path_spec, $secure, $maxage, $discard, \%rest )
685
686The set_cookie() method updates the state of the $cookie_jar. The
687$key, $val, $domain, $port and $path arguments are strings. The
688$path_spec, $secure, $discard arguments are boolean values. The $maxage
689value is a number indicating number of seconds that this cookie will
690live. A value <= 0 will delete this cookie. %rest defines
691various other attributes like "Comment" and "CommentURL".
692
693=item $cookie_jar->save
694
695=item $cookie_jar->save( $file )
696
697This method file saves the state of the $cookie_jar to a file.
698The state can then be restored later using the load() method. If a
699filename is not specified we will use the name specified during
700construction. If the attribute I<ignore_discard> is set, then we
701will even save cookies that are marked to be discarded.
702
703The default is to save a sequence of "Set-Cookie3" lines.
704"Set-Cookie3" is a proprietary LWP format, not known to be compatible
705with any browser. The I<HTTP::Cookies::Netscape> sub-class can
706be used to save in a format compatible with Netscape.
707
708=item $cookie_jar->load
709
710=item $cookie_jar->load( $file )
711
712This method reads the cookies from the file and adds them to the
713$cookie_jar. The file must be in the format written by the save()
714method.
715
716=item $cookie_jar->revert
717
718This method empties the $cookie_jar and re-loads the $cookie_jar
719from the last save file.
720
721=item $cookie_jar->clear
722
723=item $cookie_jar->clear( $domain )
724
725=item $cookie_jar->clear( $domain, $path )
726
727=item $cookie_jar->clear( $domain, $path, $key )
728
729Invoking this method without arguments will empty the whole
730$cookie_jar. If given a single argument only cookies belonging to
731that domain will be removed. If given two arguments, cookies
732belonging to the specified path within that domain are removed. If
733given three arguments, then the cookie with the specified key, path
734and domain is removed.
735
736=item $cookie_jar->clear_temporary_cookies
737
738Discard all temporary cookies. Scans for all cookies in the jar
739with either no expire field or a true C<discard> flag. To be
740called when the user agent shuts down according to RFC 2965.
741
742=item $cookie_jar->scan( \&callback )
743
744The argument is a subroutine that will be invoked for each cookie
745stored in the $cookie_jar. The subroutine will be invoked with
746the following arguments:
747
748 0 version
749 1 key
750 2 val
751 3 path
752 4 domain
753 5 port
754 6 path_spec
755 7 secure
756 8 expires
757 9 discard
758 10 hash
759
760=item $cookie_jar->as_string
761
762=item $cookie_jar->as_string( $skip_discardables )
763
764The as_string() method will return the state of the $cookie_jar
765represented as a sequence of "Set-Cookie3" header lines separated by
766"\n". If $skip_discardables is TRUE, it will not return lines for
767cookies with the I<Discard> attribute.
768
769=back
770
771=head1 SEE ALSO
772
773L<HTTP::Cookies::Netscape>, L<HTTP::Cookies::Microsoft>
774
775=head1 COPYRIGHT
776
777Copyright 1997-2002 Gisle Aas
778
779This library is free software; you can redistribute it and/or
780modify it under the same terms as Perl itself.
781
Note: See TracBrowser for help on using the repository browser.