[27183] | 1 | package LWP::Protocol::http10;
|
---|
| 2 |
|
---|
| 3 | use strict;
|
---|
| 4 |
|
---|
| 5 | require HTTP::Response;
|
---|
| 6 | require HTTP::Status;
|
---|
| 7 | require IO::Socket;
|
---|
| 8 | require IO::Select;
|
---|
| 9 |
|
---|
| 10 | use vars qw(@ISA @EXTRA_SOCK_OPTS);
|
---|
| 11 |
|
---|
| 12 | require LWP::Protocol;
|
---|
| 13 | @ISA = qw(LWP::Protocol);
|
---|
| 14 |
|
---|
| 15 | my $CRLF = "\015\012"; # how lines should be terminated;
|
---|
| 16 | # "\r\n" is not correct on all systems, for
|
---|
| 17 | # instance MacPerl defines it to "\012\015"
|
---|
| 18 |
|
---|
| 19 | sub _new_socket
|
---|
| 20 | {
|
---|
| 21 | my($self, $host, $port, $timeout) = @_;
|
---|
| 22 |
|
---|
| 23 | local($^W) = 0; # IO::Socket::INET can be noisy
|
---|
| 24 | my $sock = IO::Socket::INET->new(PeerAddr => $host,
|
---|
| 25 | PeerPort => $port,
|
---|
| 26 | Proto => 'tcp',
|
---|
| 27 | Timeout => $timeout,
|
---|
| 28 | $self->_extra_sock_opts($host, $port),
|
---|
| 29 | );
|
---|
| 30 | unless ($sock) {
|
---|
| 31 | # IO::Socket::INET leaves additional error messages in $@
|
---|
| 32 | $@ =~ s/^.*?: //;
|
---|
| 33 | die "Can't connect to $host:$port ($@)";
|
---|
| 34 | }
|
---|
| 35 | $sock;
|
---|
| 36 | }
|
---|
| 37 |
|
---|
| 38 | sub _extra_sock_opts # to be overridden by subclass
|
---|
| 39 | {
|
---|
| 40 | return @EXTRA_SOCK_OPTS;
|
---|
| 41 | }
|
---|
| 42 |
|
---|
| 43 |
|
---|
| 44 | sub _check_sock
|
---|
| 45 | {
|
---|
| 46 | #my($self, $req, $sock) = @_;
|
---|
| 47 | }
|
---|
| 48 |
|
---|
| 49 | sub _get_sock_info
|
---|
| 50 | {
|
---|
| 51 | my($self, $res, $sock) = @_;
|
---|
| 52 | if (defined(my $peerhost = $sock->peerhost)) {
|
---|
| 53 | $res->header("Client-Peer" => "$peerhost:" . $sock->peerport);
|
---|
| 54 | }
|
---|
| 55 | }
|
---|
| 56 |
|
---|
| 57 | sub _fixup_header
|
---|
| 58 | {
|
---|
| 59 | my($self, $h, $url, $proxy) = @_;
|
---|
| 60 |
|
---|
| 61 | $h->remove_header('Connection'); # need support here to be useful
|
---|
| 62 |
|
---|
| 63 | # HTTP/1.1 will require us to send the 'Host' header, so we might
|
---|
| 64 | # as well start now.
|
---|
| 65 | my $hhost = $url->authority;
|
---|
| 66 | if ($hhost =~ s/^([^\@]*)\@//) { # get rid of potential "user:pass@"
|
---|
| 67 | # add authorization header if we need them. HTTP URLs do
|
---|
| 68 | # not really support specification of user and password, but
|
---|
| 69 | # we allow it.
|
---|
| 70 | if (defined($1) && not $h->header('Authorization')) {
|
---|
| 71 | require URI::Escape;
|
---|
| 72 | $h->authorization_basic(map URI::Escape::uri_unescape($_),
|
---|
| 73 | split(":", $1, 2));
|
---|
| 74 | }
|
---|
| 75 | }
|
---|
| 76 | $h->init_header('Host' => $hhost);
|
---|
| 77 |
|
---|
| 78 | if ($proxy) {
|
---|
| 79 | # Check the proxy URI's userinfo() for proxy credentials
|
---|
| 80 | # export http_proxy="http://proxyuser:proxypass@proxyhost:port"
|
---|
| 81 | my $p_auth = $proxy->userinfo();
|
---|
| 82 | if(defined $p_auth) {
|
---|
| 83 | require URI::Escape;
|
---|
| 84 | $h->proxy_authorization_basic(map URI::Escape::uri_unescape($_),
|
---|
| 85 | split(":", $p_auth, 2))
|
---|
| 86 | }
|
---|
| 87 | }
|
---|
| 88 | }
|
---|
| 89 |
|
---|
| 90 |
|
---|
| 91 | sub request
|
---|
| 92 | {
|
---|
| 93 | my($self, $request, $proxy, $arg, $size, $timeout) = @_;
|
---|
| 94 |
|
---|
| 95 | $size ||= 4096;
|
---|
| 96 |
|
---|
| 97 | # check method
|
---|
| 98 | my $method = $request->method;
|
---|
| 99 | unless ($method =~ /^[A-Za-z0-9_!\#\$%&\'*+\-.^\`|~]+$/) { # HTTP token
|
---|
| 100 | return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST,
|
---|
| 101 | 'Library does not allow method ' .
|
---|
| 102 | "$method for 'http:' URLs");
|
---|
| 103 | }
|
---|
| 104 |
|
---|
| 105 | my $url = $request->uri;
|
---|
| 106 | my($host, $port, $fullpath);
|
---|
| 107 |
|
---|
| 108 | # Check if we're proxy'ing
|
---|
| 109 | if (defined $proxy) {
|
---|
| 110 | # $proxy is an URL to an HTTP server which will proxy this request
|
---|
| 111 | $host = $proxy->host;
|
---|
| 112 | $port = $proxy->port;
|
---|
| 113 | $fullpath = $method eq "CONNECT" ?
|
---|
| 114 | ($url->host . ":" . $url->port) :
|
---|
| 115 | $url->as_string;
|
---|
| 116 | }
|
---|
| 117 | else {
|
---|
| 118 | $host = $url->host;
|
---|
| 119 | $port = $url->port;
|
---|
| 120 | $fullpath = $url->path_query;
|
---|
| 121 | $fullpath = "/" unless length $fullpath;
|
---|
| 122 | }
|
---|
| 123 |
|
---|
| 124 | # connect to remote site
|
---|
| 125 | my $socket = $self->_new_socket($host, $port, $timeout);
|
---|
| 126 | $self->_check_sock($request, $socket);
|
---|
| 127 |
|
---|
| 128 | my $sel = IO::Select->new($socket) if $timeout;
|
---|
| 129 |
|
---|
| 130 | my $request_line = "$method $fullpath HTTP/1.0$CRLF";
|
---|
| 131 |
|
---|
| 132 | my $h = $request->headers->clone;
|
---|
| 133 | my $cont_ref = $request->content_ref;
|
---|
| 134 | $cont_ref = $$cont_ref if ref($$cont_ref);
|
---|
| 135 | my $ctype = ref($cont_ref);
|
---|
| 136 |
|
---|
| 137 | # If we're sending content we *have* to specify a content length
|
---|
| 138 | # otherwise the server won't know a messagebody is coming.
|
---|
| 139 | if ($ctype eq 'CODE') {
|
---|
| 140 | die 'No Content-Length header for request with dynamic content'
|
---|
| 141 | unless defined($h->header('Content-Length')) ||
|
---|
| 142 | $h->content_type =~ /^multipart\//;
|
---|
| 143 | # For HTTP/1.1 we could have used chunked transfer encoding...
|
---|
| 144 | }
|
---|
| 145 | else {
|
---|
| 146 | $h->header('Content-Length' => length $$cont_ref)
|
---|
| 147 | if defined($$cont_ref) && length($$cont_ref);
|
---|
| 148 | }
|
---|
| 149 |
|
---|
| 150 | $self->_fixup_header($h, $url, $proxy);
|
---|
| 151 |
|
---|
| 152 | my $buf = $request_line . $h->as_string($CRLF) . $CRLF;
|
---|
| 153 | my $n; # used for return value from syswrite/sysread
|
---|
| 154 | my $length;
|
---|
| 155 | my $offset;
|
---|
| 156 |
|
---|
| 157 | # syswrite $buf
|
---|
| 158 | $length = length($buf);
|
---|
| 159 | $offset = 0;
|
---|
| 160 | while ( $offset < $length ) {
|
---|
| 161 | die "write timeout" if $timeout && !$sel->can_write($timeout);
|
---|
| 162 | $n = $socket->syswrite($buf, $length-$offset, $offset );
|
---|
| 163 | die $! unless defined($n);
|
---|
| 164 | $offset += $n;
|
---|
| 165 | }
|
---|
| 166 |
|
---|
| 167 | if ($ctype eq 'CODE') {
|
---|
| 168 | while ( ($buf = &$cont_ref()), defined($buf) && length($buf)) {
|
---|
| 169 | # syswrite $buf
|
---|
| 170 | $length = length($buf);
|
---|
| 171 | $offset = 0;
|
---|
| 172 | while ( $offset < $length ) {
|
---|
| 173 | die "write timeout" if $timeout && !$sel->can_write($timeout);
|
---|
| 174 | $n = $socket->syswrite($buf, $length-$offset, $offset );
|
---|
| 175 | die $! unless defined($n);
|
---|
| 176 | $offset += $n;
|
---|
| 177 | }
|
---|
| 178 | }
|
---|
| 179 | }
|
---|
| 180 | elsif (defined($$cont_ref) && length($$cont_ref)) {
|
---|
| 181 | # syswrite $$cont_ref
|
---|
| 182 | $length = length($$cont_ref);
|
---|
| 183 | $offset = 0;
|
---|
| 184 | while ( $offset < $length ) {
|
---|
| 185 | die "write timeout" if $timeout && !$sel->can_write($timeout);
|
---|
| 186 | $n = $socket->syswrite($$cont_ref, $length-$offset, $offset );
|
---|
| 187 | die $! unless defined($n);
|
---|
| 188 | $offset += $n;
|
---|
| 189 | }
|
---|
| 190 | }
|
---|
| 191 |
|
---|
| 192 | # read response line from server
|
---|
| 193 | my $response;
|
---|
| 194 | $buf = '';
|
---|
| 195 |
|
---|
| 196 | # Inside this loop we will read the response line and all headers
|
---|
| 197 | # found in the response.
|
---|
| 198 | while (1) {
|
---|
| 199 | die "read timeout" if $timeout && !$sel->can_read($timeout);
|
---|
| 200 | $n = $socket->sysread($buf, $size, length($buf));
|
---|
| 201 | die $! unless defined($n);
|
---|
| 202 | die "unexpected EOF before status line seen" unless $n;
|
---|
| 203 |
|
---|
| 204 | if ($buf =~ s/^(HTTP\/\d+\.\d+)[ \t]+(\d+)[ \t]*([^\012]*)\012//) {
|
---|
| 205 | # HTTP/1.0 response or better
|
---|
| 206 | my($ver,$code,$msg) = ($1, $2, $3);
|
---|
| 207 | $msg =~ s/\015$//;
|
---|
| 208 | $response = HTTP::Response->new($code, $msg);
|
---|
| 209 | $response->protocol($ver);
|
---|
| 210 |
|
---|
| 211 | # ensure that we have read all headers. The headers will be
|
---|
| 212 | # terminated by two blank lines
|
---|
| 213 | until ($buf =~ /^\015?\012/ || $buf =~ /\015?\012\015?\012/) {
|
---|
| 214 | # must read more if we can...
|
---|
| 215 | die "read timeout" if $timeout && !$sel->can_read($timeout);
|
---|
| 216 | my $old_len = length($buf);
|
---|
| 217 | $n = $socket->sysread($buf, $size, $old_len);
|
---|
| 218 | die $! unless defined($n);
|
---|
| 219 | die "unexpected EOF before all headers seen" unless $n;
|
---|
| 220 | }
|
---|
| 221 |
|
---|
| 222 | # now we start parsing the headers. The strategy is to
|
---|
| 223 | # remove one line at a time from the beginning of the header
|
---|
| 224 | # buffer ($res).
|
---|
| 225 | my($key, $val);
|
---|
| 226 | while ($buf =~ s/([^\012]*)\012//) {
|
---|
| 227 | my $line = $1;
|
---|
| 228 |
|
---|
| 229 | # if we need to restore as content when illegal headers
|
---|
| 230 | # are found.
|
---|
| 231 | my $save = "$line\012";
|
---|
| 232 |
|
---|
| 233 | $line =~ s/\015$//;
|
---|
| 234 | last unless length $line;
|
---|
| 235 |
|
---|
| 236 | if ($line =~ /^([a-zA-Z0-9_\-.]+)\s*:\s*(.*)/) {
|
---|
| 237 | $response->push_header($key, $val) if $key;
|
---|
| 238 | ($key, $val) = ($1, $2);
|
---|
| 239 | }
|
---|
| 240 | elsif ($line =~ /^\s+(.*)/ && $key) {
|
---|
| 241 | $val .= " $1";
|
---|
| 242 | }
|
---|
| 243 | else {
|
---|
| 244 | $response->push_header("Client-Bad-Header-Line" => $line);
|
---|
| 245 | }
|
---|
| 246 | }
|
---|
| 247 | $response->push_header($key, $val) if $key;
|
---|
| 248 | last;
|
---|
| 249 |
|
---|
| 250 | }
|
---|
| 251 | elsif ((length($buf) >= 5 and $buf !~ /^HTTP\//) or
|
---|
| 252 | $buf =~ /\012/ ) {
|
---|
| 253 | # HTTP/0.9 or worse
|
---|
| 254 | $response = HTTP::Response->new(&HTTP::Status::RC_OK, "OK");
|
---|
| 255 | $response->protocol('HTTP/0.9');
|
---|
| 256 | last;
|
---|
| 257 |
|
---|
| 258 | }
|
---|
| 259 | else {
|
---|
| 260 | # need more data
|
---|
| 261 | }
|
---|
| 262 | };
|
---|
| 263 | $response->request($request);
|
---|
| 264 | $self->_get_sock_info($response, $socket);
|
---|
| 265 |
|
---|
| 266 | if ($method eq "CONNECT") {
|
---|
| 267 | $response->{client_socket} = $socket; # so it can be picked up
|
---|
| 268 | $response->content($buf); # in case we read more than the headers
|
---|
| 269 | return $response;
|
---|
| 270 | }
|
---|
| 271 |
|
---|
| 272 | my $usebuf = length($buf) > 0;
|
---|
| 273 | $response = $self->collect($arg, $response, sub {
|
---|
| 274 | if ($usebuf) {
|
---|
| 275 | $usebuf = 0;
|
---|
| 276 | return \$buf;
|
---|
| 277 | }
|
---|
| 278 | die "read timeout" if $timeout && !$sel->can_read($timeout);
|
---|
| 279 | my $n = $socket->sysread($buf, $size);
|
---|
| 280 | die $! unless defined($n);
|
---|
| 281 | return \$buf;
|
---|
| 282 | } );
|
---|
| 283 |
|
---|
| 284 | #$socket->close;
|
---|
| 285 |
|
---|
| 286 | $response;
|
---|
| 287 | }
|
---|
| 288 |
|
---|
| 289 | 1;
|
---|