1 | package LWP::Protocol::http10;
|
---|
2 |
|
---|
3 | use strict;
|
---|
4 |
|
---|
5 | require HTTP::Response;
|
---|
6 | require HTTP::Status;
|
---|
7 | require IO::Socket;
|
---|
8 | require IO::Select;
|
---|
9 |
|
---|
10 | use vars qw(@ISA @EXTRA_SOCK_OPTS);
|
---|
11 |
|
---|
12 | require LWP::Protocol;
|
---|
13 | @ISA = qw(LWP::Protocol);
|
---|
14 |
|
---|
15 | my $CRLF = "\015\012"; # how lines should be terminated;
|
---|
16 | # "\r\n" is not correct on all systems, for
|
---|
17 | # instance MacPerl defines it to "\012\015"
|
---|
18 |
|
---|
19 | sub _new_socket
|
---|
20 | {
|
---|
21 | my($self, $host, $port, $timeout) = @_;
|
---|
22 |
|
---|
23 | local($^W) = 0; # IO::Socket::INET can be noisy
|
---|
24 | my $sock = IO::Socket::INET->new(PeerAddr => $host,
|
---|
25 | PeerPort => $port,
|
---|
26 | Proto => 'tcp',
|
---|
27 | Timeout => $timeout,
|
---|
28 | $self->_extra_sock_opts($host, $port),
|
---|
29 | );
|
---|
30 | unless ($sock) {
|
---|
31 | # IO::Socket::INET leaves additional error messages in $@
|
---|
32 | $@ =~ s/^.*?: //;
|
---|
33 | die "Can't connect to $host:$port ($@)";
|
---|
34 | }
|
---|
35 | $sock;
|
---|
36 | }
|
---|
37 |
|
---|
38 | sub _extra_sock_opts # to be overridden by subclass
|
---|
39 | {
|
---|
40 | return @EXTRA_SOCK_OPTS;
|
---|
41 | }
|
---|
42 |
|
---|
43 |
|
---|
44 | sub _check_sock
|
---|
45 | {
|
---|
46 | #my($self, $req, $sock) = @_;
|
---|
47 | }
|
---|
48 |
|
---|
49 | sub _get_sock_info
|
---|
50 | {
|
---|
51 | my($self, $res, $sock) = @_;
|
---|
52 | if (defined(my $peerhost = $sock->peerhost)) {
|
---|
53 | $res->header("Client-Peer" => "$peerhost:" . $sock->peerport);
|
---|
54 | }
|
---|
55 | }
|
---|
56 |
|
---|
57 | sub _fixup_header
|
---|
58 | {
|
---|
59 | my($self, $h, $url, $proxy) = @_;
|
---|
60 |
|
---|
61 | $h->remove_header('Connection'); # need support here to be useful
|
---|
62 |
|
---|
63 | # HTTP/1.1 will require us to send the 'Host' header, so we might
|
---|
64 | # as well start now.
|
---|
65 | my $hhost = $url->authority;
|
---|
66 | if ($hhost =~ s/^([^\@]*)\@//) { # get rid of potential "user:pass@"
|
---|
67 | # add authorization header if we need them. HTTP URLs do
|
---|
68 | # not really support specification of user and password, but
|
---|
69 | # we allow it.
|
---|
70 | if (defined($1) && not $h->header('Authorization')) {
|
---|
71 | require URI::Escape;
|
---|
72 | $h->authorization_basic(map URI::Escape::uri_unescape($_),
|
---|
73 | split(":", $1, 2));
|
---|
74 | }
|
---|
75 | }
|
---|
76 | $h->init_header('Host' => $hhost);
|
---|
77 |
|
---|
78 | if ($proxy) {
|
---|
79 | # Check the proxy URI's userinfo() for proxy credentials
|
---|
80 | # export http_proxy="http://proxyuser:proxypass@proxyhost:port"
|
---|
81 | my $p_auth = $proxy->userinfo();
|
---|
82 | if(defined $p_auth) {
|
---|
83 | require URI::Escape;
|
---|
84 | $h->proxy_authorization_basic(map URI::Escape::uri_unescape($_),
|
---|
85 | split(":", $p_auth, 2))
|
---|
86 | }
|
---|
87 | }
|
---|
88 | }
|
---|
89 |
|
---|
90 |
|
---|
91 | sub request
|
---|
92 | {
|
---|
93 | my($self, $request, $proxy, $arg, $size, $timeout) = @_;
|
---|
94 |
|
---|
95 | $size ||= 4096;
|
---|
96 |
|
---|
97 | # check method
|
---|
98 | my $method = $request->method;
|
---|
99 | unless ($method =~ /^[A-Za-z0-9_!\#\$%&\'*+\-.^\`|~]+$/) { # HTTP token
|
---|
100 | return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST,
|
---|
101 | 'Library does not allow method ' .
|
---|
102 | "$method for 'http:' URLs");
|
---|
103 | }
|
---|
104 |
|
---|
105 | my $url = $request->uri;
|
---|
106 | my($host, $port, $fullpath);
|
---|
107 |
|
---|
108 | # Check if we're proxy'ing
|
---|
109 | if (defined $proxy) {
|
---|
110 | # $proxy is an URL to an HTTP server which will proxy this request
|
---|
111 | $host = $proxy->host;
|
---|
112 | $port = $proxy->port;
|
---|
113 | $fullpath = $method eq "CONNECT" ?
|
---|
114 | ($url->host . ":" . $url->port) :
|
---|
115 | $url->as_string;
|
---|
116 | }
|
---|
117 | else {
|
---|
118 | $host = $url->host;
|
---|
119 | $port = $url->port;
|
---|
120 | $fullpath = $url->path_query;
|
---|
121 | $fullpath = "/" unless length $fullpath;
|
---|
122 | }
|
---|
123 |
|
---|
124 | # connect to remote site
|
---|
125 | my $socket = $self->_new_socket($host, $port, $timeout);
|
---|
126 | $self->_check_sock($request, $socket);
|
---|
127 |
|
---|
128 | my $sel = IO::Select->new($socket) if $timeout;
|
---|
129 |
|
---|
130 | my $request_line = "$method $fullpath HTTP/1.0$CRLF";
|
---|
131 |
|
---|
132 | my $h = $request->headers->clone;
|
---|
133 | my $cont_ref = $request->content_ref;
|
---|
134 | $cont_ref = $$cont_ref if ref($$cont_ref);
|
---|
135 | my $ctype = ref($cont_ref);
|
---|
136 |
|
---|
137 | # If we're sending content we *have* to specify a content length
|
---|
138 | # otherwise the server won't know a messagebody is coming.
|
---|
139 | if ($ctype eq 'CODE') {
|
---|
140 | die 'No Content-Length header for request with dynamic content'
|
---|
141 | unless defined($h->header('Content-Length')) ||
|
---|
142 | $h->content_type =~ /^multipart\//;
|
---|
143 | # For HTTP/1.1 we could have used chunked transfer encoding...
|
---|
144 | }
|
---|
145 | else {
|
---|
146 | $h->header('Content-Length' => length $$cont_ref)
|
---|
147 | if defined($$cont_ref) && length($$cont_ref);
|
---|
148 | }
|
---|
149 |
|
---|
150 | $self->_fixup_header($h, $url, $proxy);
|
---|
151 |
|
---|
152 | my $buf = $request_line . $h->as_string($CRLF) . $CRLF;
|
---|
153 | my $n; # used for return value from syswrite/sysread
|
---|
154 | my $length;
|
---|
155 | my $offset;
|
---|
156 |
|
---|
157 | # syswrite $buf
|
---|
158 | $length = length($buf);
|
---|
159 | $offset = 0;
|
---|
160 | while ( $offset < $length ) {
|
---|
161 | die "write timeout" if $timeout && !$sel->can_write($timeout);
|
---|
162 | $n = $socket->syswrite($buf, $length-$offset, $offset );
|
---|
163 | die $! unless defined($n);
|
---|
164 | $offset += $n;
|
---|
165 | }
|
---|
166 |
|
---|
167 | if ($ctype eq 'CODE') {
|
---|
168 | while ( ($buf = &$cont_ref()), defined($buf) && length($buf)) {
|
---|
169 | # syswrite $buf
|
---|
170 | $length = length($buf);
|
---|
171 | $offset = 0;
|
---|
172 | while ( $offset < $length ) {
|
---|
173 | die "write timeout" if $timeout && !$sel->can_write($timeout);
|
---|
174 | $n = $socket->syswrite($buf, $length-$offset, $offset );
|
---|
175 | die $! unless defined($n);
|
---|
176 | $offset += $n;
|
---|
177 | }
|
---|
178 | }
|
---|
179 | }
|
---|
180 | elsif (defined($$cont_ref) && length($$cont_ref)) {
|
---|
181 | # syswrite $$cont_ref
|
---|
182 | $length = length($$cont_ref);
|
---|
183 | $offset = 0;
|
---|
184 | while ( $offset < $length ) {
|
---|
185 | die "write timeout" if $timeout && !$sel->can_write($timeout);
|
---|
186 | $n = $socket->syswrite($$cont_ref, $length-$offset, $offset );
|
---|
187 | die $! unless defined($n);
|
---|
188 | $offset += $n;
|
---|
189 | }
|
---|
190 | }
|
---|
191 |
|
---|
192 | # read response line from server
|
---|
193 | my $response;
|
---|
194 | $buf = '';
|
---|
195 |
|
---|
196 | # Inside this loop we will read the response line and all headers
|
---|
197 | # found in the response.
|
---|
198 | while (1) {
|
---|
199 | die "read timeout" if $timeout && !$sel->can_read($timeout);
|
---|
200 | $n = $socket->sysread($buf, $size, length($buf));
|
---|
201 | die $! unless defined($n);
|
---|
202 | die "unexpected EOF before status line seen" unless $n;
|
---|
203 |
|
---|
204 | if ($buf =~ s/^(HTTP\/\d+\.\d+)[ \t]+(\d+)[ \t]*([^\012]*)\012//) {
|
---|
205 | # HTTP/1.0 response or better
|
---|
206 | my($ver,$code,$msg) = ($1, $2, $3);
|
---|
207 | $msg =~ s/\015$//;
|
---|
208 | $response = HTTP::Response->new($code, $msg);
|
---|
209 | $response->protocol($ver);
|
---|
210 |
|
---|
211 | # ensure that we have read all headers. The headers will be
|
---|
212 | # terminated by two blank lines
|
---|
213 | until ($buf =~ /^\015?\012/ || $buf =~ /\015?\012\015?\012/) {
|
---|
214 | # must read more if we can...
|
---|
215 | die "read timeout" if $timeout && !$sel->can_read($timeout);
|
---|
216 | my $old_len = length($buf);
|
---|
217 | $n = $socket->sysread($buf, $size, $old_len);
|
---|
218 | die $! unless defined($n);
|
---|
219 | die "unexpected EOF before all headers seen" unless $n;
|
---|
220 | }
|
---|
221 |
|
---|
222 | # now we start parsing the headers. The strategy is to
|
---|
223 | # remove one line at a time from the beginning of the header
|
---|
224 | # buffer ($res).
|
---|
225 | my($key, $val);
|
---|
226 | while ($buf =~ s/([^\012]*)\012//) {
|
---|
227 | my $line = $1;
|
---|
228 |
|
---|
229 | # if we need to restore as content when illegal headers
|
---|
230 | # are found.
|
---|
231 | my $save = "$line\012";
|
---|
232 |
|
---|
233 | $line =~ s/\015$//;
|
---|
234 | last unless length $line;
|
---|
235 |
|
---|
236 | if ($line =~ /^([a-zA-Z0-9_\-.]+)\s*:\s*(.*)/) {
|
---|
237 | $response->push_header($key, $val) if $key;
|
---|
238 | ($key, $val) = ($1, $2);
|
---|
239 | }
|
---|
240 | elsif ($line =~ /^\s+(.*)/ && $key) {
|
---|
241 | $val .= " $1";
|
---|
242 | }
|
---|
243 | else {
|
---|
244 | $response->push_header("Client-Bad-Header-Line" => $line);
|
---|
245 | }
|
---|
246 | }
|
---|
247 | $response->push_header($key, $val) if $key;
|
---|
248 | last;
|
---|
249 |
|
---|
250 | }
|
---|
251 | elsif ((length($buf) >= 5 and $buf !~ /^HTTP\//) or
|
---|
252 | $buf =~ /\012/ ) {
|
---|
253 | # HTTP/0.9 or worse
|
---|
254 | $response = HTTP::Response->new(&HTTP::Status::RC_OK, "OK");
|
---|
255 | $response->protocol('HTTP/0.9');
|
---|
256 | last;
|
---|
257 |
|
---|
258 | }
|
---|
259 | else {
|
---|
260 | # need more data
|
---|
261 | }
|
---|
262 | };
|
---|
263 | $response->request($request);
|
---|
264 | $self->_get_sock_info($response, $socket);
|
---|
265 |
|
---|
266 | if ($method eq "CONNECT") {
|
---|
267 | $response->{client_socket} = $socket; # so it can be picked up
|
---|
268 | $response->content($buf); # in case we read more than the headers
|
---|
269 | return $response;
|
---|
270 | }
|
---|
271 |
|
---|
272 | my $usebuf = length($buf) > 0;
|
---|
273 | $response = $self->collect($arg, $response, sub {
|
---|
274 | if ($usebuf) {
|
---|
275 | $usebuf = 0;
|
---|
276 | return \$buf;
|
---|
277 | }
|
---|
278 | die "read timeout" if $timeout && !$sel->can_read($timeout);
|
---|
279 | my $n = $socket->sysread($buf, $size);
|
---|
280 | die $! unless defined($n);
|
---|
281 | return \$buf;
|
---|
282 | } );
|
---|
283 |
|
---|
284 | #$socket->close;
|
---|
285 |
|
---|
286 | $response;
|
---|
287 | }
|
---|
288 |
|
---|
289 | 1;
|
---|