source: main/trunk/greenstone2/perllib/cpan/Mojo/Headers.pm@ 32205

Last change on this file since 32205 was 32205, checked in by ak19, 6 years ago

First set of commits to do with implementing the new 'paged_html' output option of PDFPlugin that uses using xpdftools' new pdftohtml. So far tested only on Linux (64 bit), but things work there so I'm optimistically committing the changes since they work. 2. Committing the pre-built Linux binaries of XPDFtools for both 32 and 64 bit built by the XPDF group. 2. To use the correct bitness variant of xpdftools, setup.bash now exports the BITNESS env var, consulted by gsConvert.pl. 3. All the perl code changes to do with using xpdf tools' pdftohtml to generate paged_html and feed it in the desired form into GS(3): gsConvert.pl, PDFPlugin.pm and its parent ConvertBinaryPFile.pm have been modified to make it all work. xpdftools' pdftohtml generates a folder containing an html file and a screenshot for each page in a PDF (as well as an index.html linking to each page's html). However, we want a single html file that contains each individual 'page' html's content in a div, and need to do some further HTML style, attribute and structure modifications to massage the xpdftool output to what we want for GS. In order to parse and manipulate the HTML 'DOM' to do this, we're using the Mojo::DOM package that Dr Bainbridge found and which he's compiled up. Mojo::DOM is therefore also committed in this revision. Some further changes and some display fixes are required, but need to check with the others about that.

File size: 18.7 KB
Line 
1package Mojo::Headers;
2use Mojo::Base -base;
3
4use Mojo::Util 'monkey_patch';
5
6has max_line_size => sub { $ENV{MOJO_MAX_LINE_SIZE} || 8192 };
7has max_lines => sub { $ENV{MOJO_MAX_LINES} || 100 };
8
9# Common headers
10my %NAMES = map { lc() => $_ } (
11 qw(Accept Accept-Charset Accept-Encoding Accept-Language Accept-Ranges),
12 qw(Access-Control-Allow-Origin Allow Authorization Cache-Control Connection),
13 qw(Content-Disposition Content-Encoding Content-Language Content-Length),
14 qw(Content-Location Content-Range Content-Security-Policy Content-Type),
15 qw(Cookie DNT Date ETag Expect Expires Host If-Modified-Since If-None-Match),
16 qw(Last-Modified Link Location Origin Proxy-Authenticate),
17 qw(Proxy-Authorization Range Sec-WebSocket-Accept Sec-WebSocket-Extensions),
18 qw(Sec-WebSocket-Key Sec-WebSocket-Protocol Sec-WebSocket-Version Server),
19 qw(Server-Timing Set-Cookie Status Strict-Transport-Security TE Trailer),
20 qw(Transfer-Encoding Upgrade User-Agent Vary WWW-Authenticate)
21);
22for my $header (keys %NAMES) {
23 my $name = $header;
24 $name =~ y/-/_/;
25 monkey_patch __PACKAGE__, $name, sub {
26 my $self = shift;
27 $self->{headers}{$header} = [@_] and return $self if @_;
28 return undef unless my $headers = $self->{headers}{$header};
29 return join ', ', @$headers;
30 };
31}
32
33sub add {
34 my ($self, $name) = (shift, shift);
35
36 # Make sure we have a normal case entry for name
37 my $key = lc $name;
38 $self->{names}{$key} //= $name unless $NAMES{$key};
39 push @{$self->{headers}{$key}}, @_;
40
41 return $self;
42}
43
44sub append {
45 my ($self, $name, $value) = @_;
46 my $old = $self->header($name);
47 return $self->header($name => defined $old ? "$old, $value" : $value);
48}
49
50sub clone { $_[0]->new->from_hash($_[0]->to_hash(1)) }
51
52sub every_header { shift->{headers}{lc shift} || [] }
53
54sub from_hash {
55 my ($self, $hash) = @_;
56
57 # Empty hash deletes all headers
58 delete $self->{headers} if keys %{$hash} == 0;
59
60 # Merge
61 for my $header (keys %$hash) {
62 my $value = $hash->{$header};
63 $self->add($header => ref $value eq 'ARRAY' ? @$value : $value);
64 }
65
66 return $self;
67}
68
69sub header {
70 my ($self, $name) = (shift, shift);
71
72 # Replace
73 return $self->remove($name)->add($name, @_) if @_;
74
75 return undef unless my $headers = $self->{headers}{lc $name};
76 return join ', ', @$headers;
77}
78
79sub is_finished { (shift->{state} // '') eq 'finished' }
80
81sub is_limit_exceeded { !!shift->{limit} }
82
83sub leftovers { delete shift->{buffer} }
84
85sub names {
86 my $self = shift;
87 return [map { $NAMES{$_} || $self->{names}{$_} } keys %{$self->{headers}}];
88}
89
90sub parse {
91 my ($self, $chunk) = @_;
92
93 $self->{state} = 'headers';
94 $self->{buffer} .= $chunk;
95 my $headers = $self->{cache} ||= [];
96 my $size = $self->max_line_size;
97 my $lines = $self->max_lines;
98 while ($self->{buffer} =~ s/^(.*?)\x0d?\x0a//) {
99 my $line = $1;
100
101 # Check line size limit
102 if ($+[0] > $size || @$headers >= $lines) {
103 @$self{qw(state limit)} = ('finished', 1);
104 return $self;
105 }
106
107 # New header
108 if ($line =~ /^(\S[^:]*)\s*:\s*(.*)$/) { push @$headers, [$1, $2] }
109
110 # Multi-line
111 elsif ($line =~ s/^\s+// && @$headers) { $headers->[-1][1] .= " $line" }
112
113 # Empty line
114 else {
115 $self->add(@$_) for @$headers;
116 @$self{qw(state cache)} = ('finished', []);
117 return $self;
118 }
119 }
120
121 # Check line size limit
122 @$self{qw(state limit)} = ('finished', 1) if length $self->{buffer} > $size;
123
124 return $self;
125}
126
127sub referrer { shift->header(Referer => @_) }
128
129sub remove {
130 my ($self, $name) = @_;
131 delete $self->{headers}{lc $name};
132 return $self;
133}
134
135sub to_hash {
136 my ($self, $multi) = @_;
137 return {map { $_ => $self->{headers}{lc $_} } @{$self->names}} if $multi;
138 return {map { $_ => $self->header($_) } @{$self->names}};
139}
140
141sub to_string {
142 my $self = shift;
143
144 # Make sure multi-line values are formatted correctly
145 my @headers;
146 for my $name (@{$self->names}) {
147 push @headers, "$name: $_" for @{$self->{headers}{lc $name}};
148 }
149
150 return join "\x0d\x0a", @headers;
151}
152
1531;
154
155=encoding utf8
156
157=head1 NAME
158
159Mojo::Headers - HTTP headers
160
161=head1 SYNOPSIS
162
163 use Mojo::Headers;
164
165 # Parse
166 my $headers = Mojo::Headers->new;
167 $headers->parse("Content-Length: 42\x0d\x0a");
168 $headers->parse("Content-Type: text/html\x0d\x0a\x0d\x0a");
169 say $headers->content_length;
170 say $headers->content_type;
171
172 # Build
173 my $headers = Mojo::Headers->new;
174 $headers->content_length(42);
175 $headers->content_type('text/plain');
176 say $headers->to_string;
177
178=head1 DESCRIPTION
179
180L<Mojo::Headers> is a container for HTTP headers, based on
181L<RFC 7230|http://tools.ietf.org/html/rfc7230> and
182L<RFC 7231|http://tools.ietf.org/html/rfc7231>.
183
184=head1 ATTRIBUTES
185
186L<Mojo::Headers> implements the following attributes.
187
188=head2 max_line_size
189
190 my $size = $headers->max_line_size;
191 $headers = $headers->max_line_size(1024);
192
193Maximum header line size in bytes, defaults to the value of the
194C<MOJO_MAX_LINE_SIZE> environment variable or C<8192> (8KiB).
195
196=head2 max_lines
197
198 my $num = $headers->max_lines;
199 $headers = $headers->max_lines(200);
200
201Maximum number of header lines, defaults to the value of the C<MOJO_MAX_LINES>
202environment variable or C<100>.
203
204=head1 METHODS
205
206L<Mojo::Headers> inherits all methods from L<Mojo::Base> and implements the
207following new ones.
208
209=head2 accept
210
211 my $accept = $headers->accept;
212 $headers = $headers->accept('application/json');
213
214Get or replace current header value, shortcut for the C<Accept> header.
215
216=head2 accept_charset
217
218 my $charset = $headers->accept_charset;
219 $headers = $headers->accept_charset('UTF-8');
220
221Get or replace current header value, shortcut for the C<Accept-Charset> header.
222
223=head2 accept_encoding
224
225 my $encoding = $headers->accept_encoding;
226 $headers = $headers->accept_encoding('gzip');
227
228Get or replace current header value, shortcut for the C<Accept-Encoding> header.
229
230=head2 accept_language
231
232 my $language = $headers->accept_language;
233 $headers = $headers->accept_language('de, en');
234
235Get or replace current header value, shortcut for the C<Accept-Language> header.
236
237=head2 accept_ranges
238
239 my $ranges = $headers->accept_ranges;
240 $headers = $headers->accept_ranges('bytes');
241
242Get or replace current header value, shortcut for the C<Accept-Ranges> header.
243
244=head2 access_control_allow_origin
245
246 my $origin = $headers->access_control_allow_origin;
247 $headers = $headers->access_control_allow_origin('*');
248
249Get or replace current header value, shortcut for the
250C<Access-Control-Allow-Origin> header from
251L<Cross-Origin Resource Sharing|http://www.w3.org/TR/cors/>.
252
253=head2 add
254
255 $headers = $headers->add(Foo => 'one value');
256 $headers = $headers->add(Foo => 'first value', 'second value');
257
258Add header with one or more lines.
259
260 # "Vary: Accept
261 # Vary: Accept-Encoding"
262 $headers->add(Vary => 'Accept')->add(Vary => 'Accept-Encoding')->to_string;
263
264=head2 allow
265
266 my $allow = $headers->allow;
267 $headers = $headers->allow('GET, POST');
268
269Get or replace current header value, shortcut for the C<Allow> header.
270
271=head2 append
272
273 $headers = $headers->append(Vary => 'Accept-Encoding');
274
275Append value to header and flatten it if necessary.
276
277 # "Vary: Accept"
278 $headers->append(Vary => 'Accept')->to_string;
279
280 # "Vary: Accept, Accept-Encoding"
281 $headers->vary('Accept')->append(Vary => 'Accept-Encoding')->to_string;
282
283=head2 authorization
284
285 my $authorization = $headers->authorization;
286 $headers = $headers->authorization('Basic Zm9vOmJhcg==');
287
288Get or replace current header value, shortcut for the C<Authorization> header.
289
290=head2 cache_control
291
292 my $cache_control = $headers->cache_control;
293 $headers = $headers->cache_control('max-age=1, no-cache');
294
295Get or replace current header value, shortcut for the C<Cache-Control> header.
296
297=head2 clone
298
299 my $clone = $headers->clone;
300
301Return a new L<Mojo::Headers> object cloned from these headers.
302
303=head2 connection
304
305 my $connection = $headers->connection;
306 $headers = $headers->connection('close');
307
308Get or replace current header value, shortcut for the C<Connection> header.
309
310=head2 content_disposition
311
312 my $disposition = $headers->content_disposition;
313 $headers = $headers->content_disposition('foo');
314
315Get or replace current header value, shortcut for the C<Content-Disposition>
316header.
317
318=head2 content_encoding
319
320 my $encoding = $headers->content_encoding;
321 $headers = $headers->content_encoding('gzip');
322
323Get or replace current header value, shortcut for the C<Content-Encoding>
324header.
325
326=head2 content_language
327
328 my $language = $headers->content_language;
329 $headers = $headers->content_language('en');
330
331Get or replace current header value, shortcut for the C<Content-Language>
332header.
333
334=head2 content_length
335
336 my $len = $headers->content_length;
337 $headers = $headers->content_length(4000);
338
339Get or replace current header value, shortcut for the C<Content-Length> header.
340
341=head2 content_location
342
343 my $location = $headers->content_location;
344 $headers = $headers->content_location('http://127.0.0.1/foo');
345
346Get or replace current header value, shortcut for the C<Content-Location>
347header.
348
349=head2 content_range
350
351 my $range = $headers->content_range;
352 $headers = $headers->content_range('bytes 2-8/100');
353
354Get or replace current header value, shortcut for the C<Content-Range> header.
355
356=head2 content_security_policy
357
358 my $policy = $headers->content_security_policy;
359 $headers = $headers->content_security_policy('default-src https:');
360
361Get or replace current header value, shortcut for the C<Content-Security-Policy>
362header from L<Content Security Policy 1.0|http://www.w3.org/TR/CSP/>.
363
364=head2 content_type
365
366 my $type = $headers->content_type;
367 $headers = $headers->content_type('text/plain');
368
369Get or replace current header value, shortcut for the C<Content-Type> header.
370
371=head2 cookie
372
373 my $cookie = $headers->cookie;
374 $headers = $headers->cookie('f=b');
375
376Get or replace current header value, shortcut for the C<Cookie> header from
377L<RFC 6265|http://tools.ietf.org/html/rfc6265>.
378
379=head2 date
380
381 my $date = $headers->date;
382 $headers = $headers->date('Sun, 17 Aug 2008 16:27:35 GMT');
383
384Get or replace current header value, shortcut for the C<Date> header.
385
386=head2 dnt
387
388 my $dnt = $headers->dnt;
389 $headers = $headers->dnt(1);
390
391Get or replace current header value, shortcut for the C<DNT> (Do Not Track)
392header, which has no specification yet, but is very commonly used.
393
394=head2 etag
395
396 my $etag = $headers->etag;
397 $headers = $headers->etag('"abc321"');
398
399Get or replace current header value, shortcut for the C<ETag> header.
400
401=head2 every_header
402
403 my $all = $headers->every_header('Location');
404
405Similar to L</"header">, but returns all headers sharing the same name as an
406array reference.
407
408 # Get first header value
409 say $headers->every_header('Location')->[0];
410
411=head2 expect
412
413 my $expect = $headers->expect;
414 $headers = $headers->expect('100-continue');
415
416Get or replace current header value, shortcut for the C<Expect> header.
417
418=head2 expires
419
420 my $expires = $headers->expires;
421 $headers = $headers->expires('Thu, 01 Dec 1994 16:00:00 GMT');
422
423Get or replace current header value, shortcut for the C<Expires> header.
424
425=head2 from_hash
426
427 $headers = $headers->from_hash({'Cookie' => 'a=b'});
428 $headers = $headers->from_hash({'Cookie' => ['a=b', 'c=d']});
429 $headers = $headers->from_hash({});
430
431Parse headers from a hash reference, an empty hash removes all headers.
432
433=head2 header
434
435 my $value = $headers->header('Foo');
436 $headers = $headers->header(Foo => 'one value');
437 $headers = $headers->header(Foo => 'first value', 'second value');
438
439Get or replace the current header values.
440
441=head2 host
442
443 my $host = $headers->host;
444 $headers = $headers->host('127.0.0.1');
445
446Get or replace current header value, shortcut for the C<Host> header.
447
448=head2 if_modified_since
449
450 my $date = $headers->if_modified_since;
451 $headers = $headers->if_modified_since('Sun, 17 Aug 2008 16:27:35 GMT');
452
453Get or replace current header value, shortcut for the C<If-Modified-Since>
454header.
455
456=head2 if_none_match
457
458 my $etag = $headers->if_none_match;
459 $headers = $headers->if_none_match('"abc321"');
460
461Get or replace current header value, shortcut for the C<If-None-Match> header.
462
463=head2 is_finished
464
465 my $bool = $headers->is_finished;
466
467Check if header parser is finished.
468
469=head2 is_limit_exceeded
470
471 my $bool = $headers->is_limit_exceeded;
472
473Check if headers have exceeded L</"max_line_size"> or L</"max_lines">.
474
475=head2 last_modified
476
477 my $date = $headers->last_modified;
478 $headers = $headers->last_modified('Sun, 17 Aug 2008 16:27:35 GMT');
479
480Get or replace current header value, shortcut for the C<Last-Modified> header.
481
482=head2 leftovers
483
484 my $bytes = $headers->leftovers;
485
486Get and remove leftover data from header parser.
487
488=head2 link
489
490 my $link = $headers->link;
491 $headers = $headers->link('<http://127.0.0.1/foo/3>; rel="next"');
492
493Get or replace current header value, shortcut for the C<Link> header from
494L<RFC 5988|http://tools.ietf.org/html/rfc5988>.
495
496=head2 location
497
498 my $location = $headers->location;
499 $headers = $headers->location('http://127.0.0.1/foo');
500
501Get or replace current header value, shortcut for the C<Location> header.
502
503=head2 names
504
505 my $names = $headers->names;
506
507Return an array reference with all currently defined headers.
508
509 # Names of all headers
510 say for @{$headers->names};
511
512=head2 origin
513
514 my $origin = $headers->origin;
515 $headers = $headers->origin('http://example.com');
516
517Get or replace current header value, shortcut for the C<Origin> header from
518L<RFC 6454|http://tools.ietf.org/html/rfc6454>.
519
520=head2 parse
521
522 $headers = $headers->parse("Content-Type: text/plain\x0d\x0a\x0d\x0a");
523
524Parse formatted headers.
525
526=head2 proxy_authenticate
527
528 my $authenticate = $headers->proxy_authenticate;
529 $headers = $headers->proxy_authenticate('Basic "realm"');
530
531Get or replace current header value, shortcut for the C<Proxy-Authenticate>
532header.
533
534=head2 proxy_authorization
535
536 my $authorization = $headers->proxy_authorization;
537 $headers = $headers->proxy_authorization('Basic Zm9vOmJhcg==');
538
539Get or replace current header value, shortcut for the C<Proxy-Authorization>
540header.
541
542=head2 range
543
544 my $range = $headers->range;
545 $headers = $headers->range('bytes=2-8');
546
547Get or replace current header value, shortcut for the C<Range> header.
548
549=head2 referrer
550
551 my $referrer = $headers->referrer;
552 $headers = $headers->referrer('http://example.com');
553
554Get or replace current header value, shortcut for the C<Referer> header, there
555was a typo in L<RFC 2068|http://tools.ietf.org/html/rfc2068> which resulted in
556C<Referer> becoming an official header.
557
558=head2 remove
559
560 $headers = $headers->remove('Foo');
561
562Remove a header.
563
564=head2 sec_websocket_accept
565
566 my $accept = $headers->sec_websocket_accept;
567 $headers = $headers->sec_websocket_accept('s3pPLMBiTxaQ9kYGzzhZRbK+xOo=');
568
569Get or replace current header value, shortcut for the C<Sec-WebSocket-Accept>
570header from L<RFC 6455|http://tools.ietf.org/html/rfc6455>.
571
572=head2 sec_websocket_extensions
573
574 my $extensions = $headers->sec_websocket_extensions;
575 $headers = $headers->sec_websocket_extensions('foo');
576
577Get or replace current header value, shortcut for the
578C<Sec-WebSocket-Extensions> header from
579L<RFC 6455|http://tools.ietf.org/html/rfc6455>.
580
581=head2 sec_websocket_key
582
583 my $key = $headers->sec_websocket_key;
584 $headers = $headers->sec_websocket_key('dGhlIHNhbXBsZSBub25jZQ==');
585
586Get or replace current header value, shortcut for the C<Sec-WebSocket-Key>
587header from L<RFC 6455|http://tools.ietf.org/html/rfc6455>.
588
589=head2 sec_websocket_protocol
590
591 my $proto = $headers->sec_websocket_protocol;
592 $headers = $headers->sec_websocket_protocol('sample');
593
594Get or replace current header value, shortcut for the C<Sec-WebSocket-Protocol>
595header from L<RFC 6455|http://tools.ietf.org/html/rfc6455>.
596
597=head2 sec_websocket_version
598
599 my $version = $headers->sec_websocket_version;
600 $headers = $headers->sec_websocket_version(13);
601
602Get or replace current header value, shortcut for the C<Sec-WebSocket-Version>
603header from L<RFC 6455|http://tools.ietf.org/html/rfc6455>.
604
605=head2 server
606
607 my $server = $headers->server;
608 $headers = $headers->server('Mojo');
609
610Get or replace current header value, shortcut for the C<Server> header.
611
612=head2 server_timing
613
614 my $timing = $headers->server_timing;
615 $headers = $headers->server_timing('app;desc=Mojolicious;dur=0.0001');
616
617Get or replace current header value, shortcut for the C<Server-Timing> header
618from L<Server Timing|https://www.w3.org/TR/server-timing/>.
619Note that this method is EXPERIMENTAL and might change without warning!
620
621=head2 set_cookie
622
623 my $cookie = $headers->set_cookie;
624 $headers = $headers->set_cookie('f=b; path=/');
625
626Get or replace current header value, shortcut for the C<Set-Cookie> header from
627L<RFC 6265|http://tools.ietf.org/html/rfc6265>.
628
629=head2 status
630
631 my $status = $headers->status;
632 $headers = $headers->status('200 OK');
633
634Get or replace current header value, shortcut for the C<Status> header from
635L<RFC 3875|http://tools.ietf.org/html/rfc3875>.
636
637=head2 strict_transport_security
638
639 my $policy = $headers->strict_transport_security;
640 $headers = $headers->strict_transport_security('max-age=31536000');
641
642Get or replace current header value, shortcut for the
643C<Strict-Transport-Security> header from
644L<RFC 6797|http://tools.ietf.org/html/rfc6797>.
645
646=head2 te
647
648 my $te = $headers->te;
649 $headers = $headers->te('chunked');
650
651Get or replace current header value, shortcut for the C<TE> header.
652
653=head2 to_hash
654
655 my $single = $headers->to_hash;
656 my $multi = $headers->to_hash(1);
657
658Turn headers into hash reference, array references to represent multiple
659headers with the same name are disabled by default.
660
661 say $headers->to_hash->{DNT};
662
663=head2 to_string
664
665 my $str = $headers->to_string;
666
667Turn headers into a string, suitable for HTTP messages.
668
669=head2 trailer
670
671 my $trailer = $headers->trailer;
672 $headers = $headers->trailer('X-Foo');
673
674Get or replace current header value, shortcut for the C<Trailer> header.
675
676=head2 transfer_encoding
677
678 my $encoding = $headers->transfer_encoding;
679 $headers = $headers->transfer_encoding('chunked');
680
681Get or replace current header value, shortcut for the C<Transfer-Encoding>
682header.
683
684=head2 upgrade
685
686 my $upgrade = $headers->upgrade;
687 $headers = $headers->upgrade('websocket');
688
689Get or replace current header value, shortcut for the C<Upgrade> header.
690
691=head2 user_agent
692
693 my $agent = $headers->user_agent;
694 $headers = $headers->user_agent('Mojo/1.0');
695
696Get or replace current header value, shortcut for the C<User-Agent> header.
697
698=head2 vary
699
700 my $vary = $headers->vary;
701 $headers = $headers->vary('*');
702
703Get or replace current header value, shortcut for the C<Vary> header.
704
705=head2 www_authenticate
706
707 my $authenticate = $headers->www_authenticate;
708 $headers = $headers->www_authenticate('Basic realm="realm"');
709
710Get or replace current header value, shortcut for the C<WWW-Authenticate>
711header.
712
713=head1 SEE ALSO
714
715L<Mojolicious>, L<Mojolicious::Guides>, L<https://mojolicious.org>.
716
717=cut
Note: See TracBrowser for help on using the repository browser.