source: main/trunk/greenstone2/perllib/cpan/Mojo/UserAgent/Transactor.pm@ 32205

Last change on this file since 32205 was 32205, checked in by ak19, 6 years ago

First set of commits to do with implementing the new 'paged_html' output option of PDFPlugin that uses using xpdftools' new pdftohtml. So far tested only on Linux (64 bit), but things work there so I'm optimistically committing the changes since they work. 2. Committing the pre-built Linux binaries of XPDFtools for both 32 and 64 bit built by the XPDF group. 2. To use the correct bitness variant of xpdftools, setup.bash now exports the BITNESS env var, consulted by gsConvert.pl. 3. All the perl code changes to do with using xpdf tools' pdftohtml to generate paged_html and feed it in the desired form into GS(3): gsConvert.pl, PDFPlugin.pm and its parent ConvertBinaryPFile.pm have been modified to make it all work. xpdftools' pdftohtml generates a folder containing an html file and a screenshot for each page in a PDF (as well as an index.html linking to each page's html). However, we want a single html file that contains each individual 'page' html's content in a div, and need to do some further HTML style, attribute and structure modifications to massage the xpdftool output to what we want for GS. In order to parse and manipulate the HTML 'DOM' to do this, we're using the Mojo::DOM package that Dr Bainbridge found and which he's compiled up. Mojo::DOM is therefore also committed in this revision. Some further changes and some display fixes are required, but need to check with the others about that.

File size: 16.2 KB
Line 
1package Mojo::UserAgent::Transactor;
2use Mojo::Base -base;
3
4use Mojo::Asset::File;
5use Mojo::Asset::Memory;
6use Mojo::Content::MultiPart;
7use Mojo::Content::Single;
8use Mojo::File 'path';
9use Mojo::JSON 'encode_json';
10use Mojo::Parameters;
11use Mojo::Transaction::HTTP;
12use Mojo::Transaction::WebSocket;
13use Mojo::URL;
14use Mojo::Util qw(encode url_escape);
15use Mojo::WebSocket qw(challenge client_handshake);
16
17has generators =>
18 sub { {form => \&_form, json => \&_json, multipart => \&_multipart} };
19has name => 'Mojolicious (Perl)';
20
21sub add_generator { $_[0]->generators->{$_[1]} = $_[2] and return $_[0] }
22
23sub endpoint {
24 my ($self, $tx) = @_;
25
26 # Basic endpoint
27 my $req = $tx->req;
28 my $url = $req->url;
29 my $proto = $url->protocol || 'http';
30 my $host = $url->ihost;
31 my $port = $url->port // ($proto eq 'https' ? 443 : 80);
32
33 # Proxy for normal HTTP requests
34 my $socks;
35 if (my $proxy = $req->proxy) { $socks = $proxy->protocol eq 'socks' }
36 return _proxy($tx, $proto, $host, $port)
37 if $proto eq 'http' && !$req->is_handshake && !$socks;
38
39 return $proto, $host, $port;
40}
41
42sub peer { _proxy($_[1], $_[0]->endpoint($_[1])) }
43
44sub proxy_connect {
45 my ($self, $old) = @_;
46
47 # Already a CONNECT request
48 my $req = $old->req;
49 return undef if uc $req->method eq 'CONNECT';
50
51 # No proxy
52 return undef unless (my $proxy = $req->proxy) && $req->via_proxy;
53 return undef if $proxy->protocol eq 'socks';
54
55 # WebSocket and/or HTTPS
56 my $url = $req->url;
57 return undef unless $req->is_handshake || $url->protocol eq 'https';
58
59 # CONNECT request (expect a bad response)
60 my $new = $self->tx(CONNECT => $url->clone->userinfo(undef));
61 $new->req->proxy($proxy);
62 $new->res->content->auto_relax(0)->headers->connection('keep-alive');
63
64 return $new;
65}
66
67sub redirect {
68 my ($self, $old) = @_;
69
70 # Commonly used codes
71 my $res = $old->res;
72 my $code = $res->code // 0;
73 return undef unless grep { $_ == $code } 301, 302, 303, 307, 308;
74
75 # CONNECT requests cannot be redirected
76 my $req = $old->req;
77 return undef if uc $req->method eq 'CONNECT';
78
79 # Fix location without authority and/or scheme
80 return undef
81 unless my $location = $res->headers->every_header('Location')->[0];
82 $location = Mojo::URL->new($location);
83 $location = $location->base($req->url)->to_abs unless $location->is_abs;
84 my $proto = $location->protocol;
85 return undef if ($proto ne 'http' && $proto ne 'https') || !$location->host;
86
87 # Clone request if necessary
88 my $new = Mojo::Transaction::HTTP->new;
89 if ($code == 307 || $code == 308) {
90 return undef unless my $clone = $req->clone;
91 $new->req($clone);
92 }
93 else {
94 my $m = uc $req->method;
95 my $headers = $new->req->method($code == 303 || $m eq 'POST' ? 'GET' : $m)
96 ->content->headers($req->headers->clone)->headers;
97 $headers->remove($_) for grep {/^content-/i} @{$headers->names};
98 }
99 my $headers = $new->req->url($location)->headers;
100 $headers->remove($_) for qw(Authorization Cookie Host Referer);
101 return $new->previous($old);
102}
103
104sub tx {
105 my ($self, $method, $url) = (shift, shift, shift);
106
107 # Method and URL
108 my $tx = Mojo::Transaction::HTTP->new;
109 my $req = $tx->req->method($method);
110 if (ref $url) { $req->url($url) }
111 else { $req->url->parse($url =~ m!^/|://! ? $url : "http://$url") }
112
113 # Headers (we identify ourselves and accept gzip compression)
114 my $headers = $req->headers;
115 $headers->from_hash(shift) if ref $_[0] eq 'HASH';
116 $headers->user_agent($self->name) unless $headers->user_agent;
117 $headers->accept_encoding('gzip') unless $headers->accept_encoding;
118
119 # Generator
120 if (@_ > 1) {
121 my $cb = $self->generators->{shift()};
122 $self->$cb($tx, @_);
123 }
124
125 # Body
126 elsif (@_) { $req->body(shift) }
127
128 return $tx;
129}
130
131sub upgrade {
132 my ($self, $tx) = @_;
133 my $code = $tx->res->code // 0;
134 return undef unless $tx->req->is_handshake && $code == 101;
135 my $ws = Mojo::Transaction::WebSocket->new(handshake => $tx, masked => 1);
136 return challenge($ws) ? $ws->established(1) : undef;
137}
138
139sub websocket {
140 my $self = shift;
141
142 # New WebSocket transaction
143 my $sub = ref $_[-1] eq 'ARRAY' ? pop : [];
144 my $tx = $self->tx(GET => @_);
145 my $req = $tx->req;
146 $req->headers->sec_websocket_protocol(join ', ', @$sub) if @$sub;
147
148 # Handshake protocol
149 my $url = $req->url;
150 my $proto = $url->protocol // '';
151 if ($proto eq 'ws') { $url->scheme('http') }
152 elsif ($proto eq 'wss') { $url->scheme('https') }
153 elsif ($proto eq 'ws+unix') { $url->scheme('http+unix') }
154
155 return client_handshake $tx;
156}
157
158sub _content { Mojo::Content::MultiPart->new(headers => $_[0], parts => $_[1]) }
159
160sub _form {
161 my ($self, $tx, $form, %options) = @_;
162 $options{charset} = 'UTF-8' unless exists $options{charset};
163
164 # Check for uploads and force multipart if necessary
165 my $req = $tx->req;
166 my $headers = $req->headers;
167 my $multipart = ($headers->content_type // '') =~ m!multipart/form-data!i;
168 for my $value (map { ref $_ eq 'ARRAY' ? @$_ : $_ } values %$form) {
169 ++$multipart and last if ref $value eq 'HASH';
170 }
171
172 # Multipart
173 if ($multipart) {
174 $req->content(_content($headers, _form_parts($options{charset}, $form)));
175 _type($headers, 'multipart/form-data');
176 return $tx;
177 }
178
179 # Query parameters or urlencoded
180 my $method = uc $req->method;
181 my @form = map { $_ => $form->{$_} } sort keys %$form;
182 if ($method eq 'GET' || $method eq 'HEAD') { $req->url->query->merge(@form) }
183 else {
184 $req->body(
185 Mojo::Parameters->new(@form)->charset($options{charset})->to_string);
186 _type($headers, 'application/x-www-form-urlencoded');
187 }
188 return $tx;
189}
190
191sub _form_parts {
192 my ($charset, $form) = @_;
193
194 my @parts;
195 for my $name (sort keys %$form) {
196 next unless defined(my $values = $form->{$name});
197 $values = [$values] unless ref $values eq 'ARRAY';
198 push @parts, @{_parts($charset, $name, $values)};
199 }
200
201 return \@parts;
202}
203
204sub _json {
205 my ($self, $tx, $data) = @_;
206 _type($tx->req->body(encode_json $data)->headers, 'application/json');
207 return $tx;
208}
209
210sub _multipart {
211 my ($self, $tx, $parts) = @_;
212 my $req = $tx->req;
213 $req->content(_content($req->headers, _parts(undef, undef, $parts)));
214 return $tx;
215}
216
217sub _parts {
218 my ($charset, $name, $values) = @_;
219
220 my @parts;
221 for my $value (@$values) {
222 push @parts, my $part = Mojo::Content::Single->new;
223
224 my $filename;
225 my $headers = $part->headers;
226 if (ref $value eq 'HASH') {
227
228 # File
229 if (my $file = delete $value->{file}) {
230 $file = Mojo::Asset::File->new(path => $file) unless ref $file;
231 $part->asset($file);
232 $value->{filename} //= path($file->path)->basename
233 if $file->isa('Mojo::Asset::File');
234 }
235
236 # Memory
237 elsif (defined(my $content = delete $value->{content})) {
238 $part->asset(Mojo::Asset::Memory->new->add_chunk($content));
239 }
240
241 # Filename and headers
242 $filename = delete $value->{filename};
243 $headers->from_hash($value);
244 next unless defined $name;
245 $filename = url_escape $filename // $name, '"';
246 $filename = encode $charset, $filename if $charset;
247 }
248
249 # Field
250 else {
251 $value = encode $charset, $value if $charset;
252 $part->asset(Mojo::Asset::Memory->new->add_chunk($value));
253 }
254
255 # Content-Disposition
256 next unless defined $name;
257 $name = url_escape $name, '"';
258 $name = encode $charset, $name if $charset;
259 my $disposition = qq{form-data; name="$name"};
260 $disposition .= qq{; filename="$filename"} if defined $filename;
261 $headers->content_disposition($disposition);
262 }
263
264 return \@parts;
265}
266
267sub _proxy {
268 my ($tx, $proto, $host, $port) = @_;
269
270 my $req = $tx->req;
271 if ($req->via_proxy && (my $proxy = $req->proxy)) {
272 return $proxy->protocol, $proxy->ihost,
273 $proxy->port // ($proto eq 'https' ? 443 : 80);
274 }
275
276 return $proto, $host, $port;
277}
278
279sub _type { $_[0]->content_type($_[1]) unless $_[0]->content_type }
280
2811;
282
283=encoding utf8
284
285=head1 NAME
286
287Mojo::UserAgent::Transactor - User agent transactor
288
289=head1 SYNOPSIS
290
291 use Mojo::UserAgent::Transactor;
292
293 # GET request with Accept header
294 my $t = Mojo::UserAgent::Transactor->new;
295 say $t->tx(GET => 'http://example.com' => {Accept => '*/*'})->req->to_string;
296
297 # POST request with form-data
298 say $t->tx(POST => 'example.com' => form => {a => 'b'})->req->to_string;
299
300 # PUT request with JSON data
301 say $t->tx(PUT => 'example.com' => json => {a => 'b'})->req->to_string;
302
303=head1 DESCRIPTION
304
305L<Mojo::UserAgent::Transactor> is the transaction building and manipulation
306framework used by L<Mojo::UserAgent>.
307
308=head1 GENERATORS
309
310These content generators are available by default.
311
312=head2 form
313
314 $t->tx(POST => 'http://example.com' => form => {a => 'b'});
315
316Generate query string, C<application/x-www-form-urlencoded> or
317C<multipart/form-data> content. See L</"tx"> for more.
318
319=head2 json
320
321 $t->tx(PATCH => 'http://example.com' => json => {a => 'b'});
322
323Generate JSON content with L<Mojo::JSON>. See L</"tx"> for more.
324
325=head2 multipart
326
327 $t->tx(PUT => 'http://example.com' => multipart => ['Hello', 'World!']);
328
329Generate multipart content. See L</"tx"> for more.
330
331=head1 ATTRIBUTES
332
333L<Mojo::UserAgent::Transactor> implements the following attributes.
334
335=head2 generators
336
337 my $generators = $t->generators;
338 $t = $t->generators({foo => sub {...}});
339
340Registered content generators, by default only C<form>, C<json> and C<multipart>
341are already defined.
342
343=head2 name
344
345 my $name = $t->name;
346 $t = $t->name('Mojolicious');
347
348Value for C<User-Agent> request header of generated transactions, defaults to
349C<Mojolicious (Perl)>.
350
351=head1 METHODS
352
353L<Mojo::UserAgent::Transactor> inherits all methods from L<Mojo::Base> and
354implements the following new ones.
355
356=head2 add_generator
357
358 $t = $t->add_generator(foo => sub {...});
359
360Register a content generator.
361
362 $t->add_generator(foo => sub {
363 my ($t, $tx, @args) = @_;
364 ...
365 });
366
367=head2 endpoint
368
369 my ($proto, $host, $port) = $t->endpoint(Mojo::Transaction::HTTP->new);
370
371Actual endpoint for transaction.
372
373=head2 peer
374
375 my ($proto, $host, $port) = $t->peer(Mojo::Transaction::HTTP->new);
376
377Actual peer for transaction.
378
379=head2 proxy_connect
380
381 my $tx = $t->proxy_connect(Mojo::Transaction::HTTP->new);
382
383Build L<Mojo::Transaction::HTTP> proxy C<CONNECT> request for transaction if
384possible.
385
386=head2 redirect
387
388 my $tx = $t->redirect(Mojo::Transaction::HTTP->new);
389
390Build L<Mojo::Transaction::HTTP> follow-up request for C<301>, C<302>, C<303>,
391C<307> or C<308> redirect response if possible.
392
393=head2 tx
394
395 my $tx = $t->tx(GET => 'example.com');
396 my $tx = $t->tx(POST => 'http://example.com');
397 my $tx = $t->tx(GET => 'http://example.com' => {Accept => '*/*'});
398 my $tx = $t->tx(PUT => 'http://example.com' => 'Content!');
399 my $tx = $t->tx(PUT => 'http://example.com' => form => {a => 'b'});
400 my $tx = $t->tx(PUT => 'http://example.com' => json => {a => 'b'});
401 my $tx = $t->tx(PUT => 'https://example.com' => multipart => ['a', 'b']);
402 my $tx = $t->tx(POST => 'example.com' => {Accept => '*/*'} => 'Content!');
403 my $tx = $t->tx(
404 PUT => 'example.com' => {Accept => '*/*'} => form => {a => 'b'});
405 my $tx = $t->tx(
406 PUT => 'example.com' => {Accept => '*/*'} => json => {a => 'b'});
407 my $tx = $t->tx(
408 PUT => 'example.com' => {Accept => '*/*'} => multipart => ['a', 'b']);
409
410Versatile general purpose L<Mojo::Transaction::HTTP> transaction builder for
411requests, with support for L</"GENERATORS">.
412
413 # Generate and inspect custom GET request with DNT header and content
414 say $t->tx(GET => 'example.com' => {DNT => 1} => 'Bye!')->req->to_string;
415
416 # Stream response content to STDOUT
417 my $tx = $t->tx(GET => 'http://example.com');
418 $tx->res->content->unsubscribe('read')->on(read => sub { say $_[1] });
419
420 # PUT request with content streamed from file
421 my $tx = $t->tx(PUT => 'http://example.com');
422 $tx->req->content->asset(Mojo::Asset::File->new(path => '/foo.txt'));
423
424The C<json> content generator uses L<Mojo::JSON> for encoding and sets the
425content type to C<application/json>.
426
427 # POST request with "application/json" content
428 my $tx = $t->tx(
429 POST => 'http://example.com' => json => {a => 'b', c => [1, 2, 3]});
430
431The C<form> content generator will automatically use query parameters for
432C<GET> and C<HEAD> requests.
433
434 # GET request with query parameters
435 my $tx = $t->tx(GET => 'http://example.com' => form => {a => 'b'});
436
437For all other request methods the C<application/x-www-form-urlencoded> content
438type is used.
439
440 # POST request with "application/x-www-form-urlencoded" content
441 my $tx = $t->tx(
442 POST => 'http://example.com' => form => {a => 'b', c => 'd'});
443
444Parameters may be encoded with the C<charset> option.
445
446 # PUT request with Shift_JIS encoded form values
447 my $tx = $t->tx(
448 PUT => 'example.com' => form => {a => 'b'} => charset => 'Shift_JIS');
449
450An array reference can be used for multiple form values sharing the same name.
451
452 # POST request with form values sharing the same name
453 my $tx = $t->tx(
454 POST => 'http://example.com' => form => {a => ['b', 'c', 'd']});
455
456A hash reference with a C<content> or C<file> value can be used to switch to
457the C<multipart/form-data> content type for file uploads.
458
459 # POST request with "multipart/form-data" content
460 my $tx = $t->tx(
461 POST => 'http://example.com' => form => {mytext => {content => 'lala'}});
462
463 # POST request with multiple files sharing the same name
464 my $tx = $t->tx(POST => 'http://example.com' =>
465 form => {mytext => [{content => 'first'}, {content => 'second'}]});
466
467The C<file> value should contain the path to the file you want to upload or an
468asset object, like L<Mojo::Asset::File> or L<Mojo::Asset::Memory>.
469
470 # POST request with upload streamed from file
471 my $tx = $t->tx(
472 POST => 'http://example.com' => form => {mytext => {file => '/foo.txt'}});
473
474 # POST request with upload streamed from asset
475 my $asset = Mojo::Asset::Memory->new->add_chunk('lalala');
476 my $tx = $t->tx(
477 POST => 'http://example.com' => form => {mytext => {file => $asset}});
478
479A C<filename> value will be generated automatically, but can also be set
480manually if necessary. All remaining values in the hash reference get merged
481into the C<multipart/form-data> content as headers.
482
483 # POST request with form values and customized upload (filename and header)
484 my $tx = $t->tx(POST => 'http://example.com' => form => {
485 a => 'b',
486 c => 'd',
487 mytext => {
488 content => 'lalala',
489 filename => 'foo.txt',
490 'Content-Type' => 'text/plain'
491 }
492 });
493
494The C<multipart/form-data> content type can also be enforced by setting the
495C<Content-Type> header manually.
496
497 # Force "multipart/form-data"
498 my $headers = {'Content-Type' => 'multipart/form-data'};
499 my $tx = $t->tx(POST => 'example.com' => $headers => form => {a => 'b'});
500
501The C<multipart> content generator can be used to build custom multipart
502requests and does not set a content type.
503
504 # POST request with multipart content ("foo" and "bar")
505 my $tx = $t->tx(POST => 'http://example.com' => multipart => ['foo', 'bar']);
506
507Similar to the C<form> content generator you can also pass hash references with
508C<content> or C<file> values, as well as headers.
509
510 # POST request with multipart content streamed from file
511 my $tx = $t->tx(
512 POST => 'http://example.com' => multipart => [{file => '/foo.txt'}]);
513
514 # PUT request with multipart content streamed from asset
515 my $headers = {'Content-Type' => 'multipart/custom'};
516 my $asset = Mojo::Asset::Memory->new->add_chunk('lalala');
517 my $tx = $t->tx(
518 PUT => 'http://example.com' => $headers => multipart => [{file => $asset}]);
519
520 # POST request with multipart content and custom headers
521 my $tx = $t->tx(POST => 'http://example.com' => multipart => [
522 {
523 content => 'Hello',
524 'Content-Type' => 'text/plain',
525 'Content-Language' => 'en-US'
526 },
527 {
528 content => 'World!',
529 'Content-Type' => 'text/plain',
530 'Content-Language' => 'en-US'
531 }
532 ]);
533
534=head2 upgrade
535
536 my $tx = $t->upgrade(Mojo::Transaction::HTTP->new);
537
538Build L<Mojo::Transaction::WebSocket> follow-up transaction for WebSocket
539handshake if possible.
540
541=head2 websocket
542
543 my $tx = $t->websocket('ws://example.com');
544 my $tx = $t->websocket('ws://example.com' => {DNT => 1} => ['v1.proto']);
545
546Versatile L<Mojo::Transaction::HTTP> transaction builder for WebSocket
547handshake requests.
548
549=head1 SEE ALSO
550
551L<Mojolicious>, L<Mojolicious::Guides>, L<https://mojolicious.org>.
552
553=cut
Note: See TracBrowser for help on using the repository browser.