source: main/trunk/greenstone2/perllib/cpan/Mojo/Message/Request.pm@ 32205

Last change on this file since 32205 was 32205, checked in by ak19, 6 years ago

First set of commits to do with implementing the new 'paged_html' output option of PDFPlugin that uses using xpdftools' new pdftohtml. So far tested only on Linux (64 bit), but things work there so I'm optimistically committing the changes since they work. 2. Committing the pre-built Linux binaries of XPDFtools for both 32 and 64 bit built by the XPDF group. 2. To use the correct bitness variant of xpdftools, setup.bash now exports the BITNESS env var, consulted by gsConvert.pl. 3. All the perl code changes to do with using xpdf tools' pdftohtml to generate paged_html and feed it in the desired form into GS(3): gsConvert.pl, PDFPlugin.pm and its parent ConvertBinaryPFile.pm have been modified to make it all work. xpdftools' pdftohtml generates a folder containing an html file and a screenshot for each page in a PDF (as well as an index.html linking to each page's html). However, we want a single html file that contains each individual 'page' html's content in a div, and need to do some further HTML style, attribute and structure modifications to massage the xpdftool output to what we want for GS. In order to parse and manipulate the HTML 'DOM' to do this, we're using the Mojo::DOM package that Dr Bainbridge found and which he's compiled up. Mojo::DOM is therefore also committed in this revision. Some further changes and some display fixes are required, but need to check with the others about that.

File size: 11.9 KB
Line 
1package Mojo::Message::Request;
2use Mojo::Base 'Mojo::Message';
3
4use Mojo::Cookie::Request;
5use Mojo::Util qw(b64_encode b64_decode sha1_sum);
6use Mojo::URL;
7
8my ($SEED, $COUNTER) = (rand, int rand 0xffffff);
9
10has env => sub { {} };
11has method => 'GET';
12has [qw(proxy reverse_proxy)];
13has request_id => sub {
14 substr sha1_sum($SEED . $$ . ($COUNTER = ($COUNTER + 1) % 0xffffff)), 0, 8;
15};
16has url => sub { Mojo::URL->new };
17has via_proxy => 1;
18
19sub clone {
20 my $self = shift;
21
22 # Dynamic requests cannot be cloned
23 return undef unless my $content = $self->content->clone;
24 my $clone = $self->new(
25 content => $content,
26 method => $self->method,
27 url => $self->url->clone,
28 version => $self->version
29 );
30 $clone->{proxy} = $self->{proxy}->clone if $self->{proxy};
31
32 return $clone;
33}
34
35sub cookies {
36 my $self = shift;
37
38 # Parse cookies
39 my $headers = $self->headers;
40 return [map { @{Mojo::Cookie::Request->parse($_)} } $headers->cookie]
41 unless @_;
42
43 # Add cookies
44 my @cookies = map { ref $_ eq 'HASH' ? Mojo::Cookie::Request->new($_) : $_ }
45 $headers->cookie || (), @_;
46 $headers->cookie(join '; ', @cookies);
47
48 return $self;
49}
50
51sub every_param { shift->params->every_param(@_) }
52
53sub extract_start_line {
54 my ($self, $bufref) = @_;
55
56 # Ignore any leading empty lines
57 return undef unless $$bufref =~ s/^\s*(.*?)\x0d?\x0a//;
58
59 # We have a (hopefully) full request-line
60 return !$self->error({message => 'Bad request start-line'})
61 unless $1 =~ /^(\S+)\s+(\S+)\s+HTTP\/(\d\.\d)$/;
62 my $url = $self->method($1)->version($3)->url;
63 my $target = $2;
64 return !!$url->host_port($target) if $1 eq 'CONNECT';
65 return !!$url->parse($target)->fragment(undef) if $target =~ /^[^:\/?#]+:/;
66 return !!$url->path_query($target);
67}
68
69sub fix_headers {
70 my $self = shift;
71 $self->{fix} ? return $self : $self->SUPER::fix_headers(@_);
72
73 # Host
74 my $url = $self->url;
75 my $headers = $self->headers;
76 $headers->host($url->host_port) unless $headers->host;
77
78 # Basic authentication
79 if ((my $info = $url->userinfo) && !$headers->authorization) {
80 $headers->authorization('Basic ' . b64_encode($info, ''));
81 }
82
83 # Basic proxy authentication
84 return $self unless (my $proxy = $self->proxy) && $self->via_proxy;
85 return $self unless my $info = $proxy->userinfo;
86 $headers->proxy_authorization('Basic ' . b64_encode($info, ''))
87 unless $headers->proxy_authorization;
88 return $self;
89}
90
91sub get_start_line_chunk {
92 my ($self, $offset) = @_;
93 $self->_start_line->emit(progress => 'start_line', $offset);
94 return substr $self->{start_buffer}, $offset, 131072;
95}
96
97sub is_handshake { lc($_[0]->headers->upgrade // '') eq 'websocket' }
98
99sub is_secure {
100 my $url = shift->url;
101 return ($url->protocol || $url->base->protocol) eq 'https';
102}
103
104sub is_xhr {
105 (shift->headers->header('X-Requested-With') // '') =~ /XMLHttpRequest/i;
106}
107
108sub param { shift->params->param(@_) }
109
110sub params {
111 my $self = shift;
112 return $self->{params}
113 ||= $self->body_params->clone->append($self->query_params);
114}
115
116sub parse {
117 my $self = shift;
118 my ($env, $chunk) = ref $_[0] ? (shift, '') : (undef, shift);
119
120 # Parse CGI environment
121 $self->env($env)->_parse_env($env) if $env;
122
123 # Parse normal message
124 if (($self->{state} // '') ne 'cgi') { $self->SUPER::parse($chunk) }
125
126 # Parse CGI content
127 else { $self->content($self->content->parse_body($chunk))->SUPER::parse('') }
128
129 # Check if we can fix things that require all headers
130 return $self unless $self->is_finished;
131
132 # Base URL
133 my $base = $self->url->base;
134 $base->scheme('http') unless $base->scheme;
135 my $headers = $self->headers;
136 if (!$base->host && (my $host = $headers->host)) { $base->host_port($host) }
137
138 # Basic authentication
139 if (my $basic = _basic($headers->authorization)) { $base->userinfo($basic) }
140
141 # Basic proxy authentication
142 my $basic = _basic($headers->proxy_authorization);
143 $self->proxy(Mojo::URL->new->userinfo($basic)) if $basic;
144
145 # "X-Forwarded-Proto"
146 $base->scheme('https')
147 if $self->reverse_proxy
148 && ($headers->header('X-Forwarded-Proto') // '') eq 'https';
149
150 return $self;
151}
152
153sub query_params { shift->url->query }
154
155sub start_line_size { length shift->_start_line->{start_buffer} }
156
157sub _basic { $_[0] && $_[0] =~ /Basic (.+)$/ ? b64_decode $1 : undef }
158
159sub _parse_env {
160 my ($self, $env) = @_;
161
162 # Bypass normal message parser
163 $self->{state} = 'cgi';
164
165 # Extract headers
166 my $headers = $self->headers;
167 my $url = $self->url;
168 my $base = $url->base;
169 for my $name (keys %$env) {
170 my $value = $env->{$name};
171 next unless $name =~ s/^HTTP_//i;
172 $name =~ y/_/-/;
173 $headers->header($name => $value);
174
175 # Host/Port
176 $value =~ s/:(\d+)$// ? $base->host($value)->port($1) : $base->host($value)
177 if $name eq 'HOST';
178 }
179
180 # Content-Type is a special case on some servers
181 $headers->content_type($env->{CONTENT_TYPE}) if $env->{CONTENT_TYPE};
182
183 # Content-Length is a special case on some servers
184 $headers->content_length($env->{CONTENT_LENGTH}) if $env->{CONTENT_LENGTH};
185
186 # Query
187 $url->query->parse($env->{QUERY_STRING}) if $env->{QUERY_STRING};
188
189 # Method
190 $self->method($env->{REQUEST_METHOD}) if $env->{REQUEST_METHOD};
191
192 # Scheme/Version
193 $base->scheme($1) and $self->version($2)
194 if ($env->{SERVER_PROTOCOL} // '') =~ m!^([^/]+)/([^/]+)$!;
195
196 # HTTPS
197 $base->scheme('https') if uc($env->{HTTPS} // '') eq 'ON';
198
199 # Path
200 my $path = $url->path->parse($env->{PATH_INFO} ? $env->{PATH_INFO} : '');
201
202 # Base path
203 if (my $value = $env->{SCRIPT_NAME}) {
204
205 # Make sure there is a trailing slash (important for merging)
206 $base->path->parse($value =~ m!/$! ? $value : "$value/");
207
208 # Remove SCRIPT_NAME prefix if necessary
209 my $buffer = $path->to_string;
210 $value =~ s!^/|/$!!g;
211 $buffer =~ s!^/?\Q$value\E/?!!;
212 $buffer =~ s!^/!!;
213 $path->parse($buffer);
214 }
215}
216
217sub _start_line {
218 my $self = shift;
219
220 return $self if defined $self->{start_buffer};
221
222 # Path
223 my $url = $self->url;
224 my $path = $url->path_query;
225 $path = "/$path" unless $path =~ m!^/!;
226
227 # CONNECT
228 my $method = uc $self->method;
229 if ($method eq 'CONNECT') {
230 my $port = $url->port // ($url->protocol eq 'https' ? '443' : '80');
231 $path = $url->ihost . ":$port";
232 }
233
234 # Proxy
235 elsif ($self->proxy && $self->via_proxy && $url->protocol ne 'https') {
236 $path = $url->clone->userinfo(undef) unless $self->is_handshake;
237 }
238
239 $self->{start_buffer} = "$method $path HTTP/@{[$self->version]}\x0d\x0a";
240
241 return $self;
242}
243
2441;
245
246=encoding utf8
247
248=head1 NAME
249
250Mojo::Message::Request - HTTP request
251
252=head1 SYNOPSIS
253
254 use Mojo::Message::Request;
255
256 # Parse
257 my $req = Mojo::Message::Request->new;
258 $req->parse("GET /foo HTTP/1.0\x0d\x0a");
259 $req->parse("Content-Length: 12\x0d\x0a");
260 $req->parse("Content-Type: text/plain\x0d\x0a\x0d\x0a");
261 $req->parse('Hello World!');
262 say $req->method;
263 say $req->headers->content_type;
264 say $req->body;
265
266 # Build
267 my $req = Mojo::Message::Request->new;
268 $req->url->parse('http://127.0.0.1/foo/bar');
269 $req->method('GET');
270 say $req->to_string;
271
272=head1 DESCRIPTION
273
274L<Mojo::Message::Request> is a container for HTTP requests, based on
275L<RFC 7230|http://tools.ietf.org/html/rfc7230>,
276L<RFC 7231|http://tools.ietf.org/html/rfc7231>,
277L<RFC 7235|http://tools.ietf.org/html/rfc7235> and
278L<RFC 2817|http://tools.ietf.org/html/rfc2817>.
279
280=head1 EVENTS
281
282L<Mojo::Message::Request> inherits all events from L<Mojo::Message>.
283
284=head1 ATTRIBUTES
285
286L<Mojo::Message::Request> inherits all attributes from L<Mojo::Message> and
287implements the following new ones.
288
289=head2 env
290
291 my $env = $req->env;
292 $req = $req->env({PATH_INFO => '/'});
293
294Direct access to the C<CGI> or C<PSGI> environment hash if available.
295
296 # Check CGI version
297 my $version = $req->env->{GATEWAY_INTERFACE};
298
299 # Check PSGI version
300 my $version = $req->env->{'psgi.version'};
301
302=head2 method
303
304 my $method = $req->method;
305 $req = $req->method('POST');
306
307HTTP request method, defaults to C<GET>.
308
309=head2 proxy
310
311 my $url = $req->proxy;
312 $req = $req->proxy(Mojo::URL->new('http://127.0.0.1:3000'));
313
314Proxy URL for request.
315
316=head2 reverse_proxy
317
318 my $bool = $req->reverse_proxy;
319 $req = $req->reverse_proxy($bool);
320
321Request has been performed through a reverse proxy.
322
323=head2 request_id
324
325 my $id = $req->request_id;
326 $req = $req->request_id('aee7d5d8');
327
328Request ID, defaults to a reasonably unique value.
329
330=head2 url
331
332 my $url = $req->url;
333 $req = $req->url(Mojo::URL->new);
334
335HTTP request URL, defaults to a L<Mojo::URL> object.
336
337 # Get request information
338 my $info = $req->url->to_abs->userinfo;
339 my $host = $req->url->to_abs->host;
340 my $path = $req->url->to_abs->path;
341
342=head2 via_proxy
343
344 my $bool = $req->via_proxy;
345 $req = $req->via_proxy($bool);
346
347Request can be performed through a proxy server.
348
349=head1 METHODS
350
351L<Mojo::Message::Request> inherits all methods from L<Mojo::Message> and
352implements the following new ones.
353
354=head2 clone
355
356 my $clone = $req->clone;
357
358Return a new L<Mojo::Message::Request> object cloned from this request if
359possible, otherwise return C<undef>.
360
361=head2 cookies
362
363 my $cookies = $req->cookies;
364 $req = $req->cookies(Mojo::Cookie::Request->new);
365 $req = $req->cookies({name => 'foo', value => 'bar'});
366
367Access request cookies, usually L<Mojo::Cookie::Request> objects.
368
369 # Names of all cookies
370 say $_->name for @{$req->cookies};
371
372=head2 every_param
373
374 my $values = $req->every_param('foo');
375
376Similar to L</"param">, but returns all values sharing the same name as an
377array reference.
378
379 # Get first value
380 say $req->every_param('foo')->[0];
381
382=head2 extract_start_line
383
384 my $bool = $req->extract_start_line(\$str);
385
386Extract request-line from string.
387
388=head2 fix_headers
389
390 $req = $req->fix_headers;
391
392Make sure request has all required headers.
393
394=head2 get_start_line_chunk
395
396 my $bytes = $req->get_start_line_chunk($offset);
397
398Get a chunk of request-line data starting from a specific position. Note that
399this method finalizes the request.
400
401=head2 is_handshake
402
403 my $bool = $req->is_handshake;
404
405Check C<Upgrade> header for C<websocket> value.
406
407=head2 is_secure
408
409 my $bool = $req->is_secure;
410
411Check if connection is secure.
412
413=head2 is_xhr
414
415 my $bool = $req->is_xhr;
416
417Check C<X-Requested-With> header for C<XMLHttpRequest> value.
418
419=head2 param
420
421 my $value = $req->param('foo');
422
423Access C<GET> and C<POST> parameters extracted from the query string and
424C<application/x-www-form-urlencoded> or C<multipart/form-data> message body. If
425there are multiple values sharing the same name, and you want to access more
426than just the last one, you can use L</"every_param">. Note that this method
427caches all data, so it should not be called before the entire request body has
428been received. Parts of the request body need to be loaded into memory to parse
429C<POST> parameters, so you have to make sure it is not excessively large.
430There's a 16MiB limit for requests by default.
431
432=head2 params
433
434 my $params = $req->params;
435
436All C<GET> and C<POST> parameters extracted from the query string and
437C<application/x-www-form-urlencoded> or C<multipart/form-data> message body,
438usually a L<Mojo::Parameters> object. Note that this method caches all data, so
439it should not be called before the entire request body has been received. Parts
440of the request body need to be loaded into memory to parse C<POST> parameters,
441so you have to make sure it is not excessively large. There's a 16MiB limit for
442requests by default.
443
444 # Get parameter names and values
445 my $hash = $req->params->to_hash;
446
447=head2 parse
448
449 $req = $req->parse('GET /foo/bar HTTP/1.1');
450 $req = $req->parse({PATH_INFO => '/'});
451
452Parse HTTP request chunks or environment hash.
453
454=head2 query_params
455
456 my $params = $req->query_params;
457
458All C<GET> parameters, usually a L<Mojo::Parameters> object.
459
460 # Turn GET parameters to hash and extract value
461 say $req->query_params->to_hash->{foo};
462
463=head2 start_line_size
464
465 my $size = $req->start_line_size;
466
467Size of the request-line in bytes. Note that this method finalizes the request.
468
469=head1 SEE ALSO
470
471L<Mojolicious>, L<Mojolicious::Guides>, L<https://mojolicious.org>.
472
473=cut
Note: See TracBrowser for help on using the repository browser.