source: main/trunk/greenstone2/perllib/cpan/Mojo/ByteStream.pm@ 32205

Last change on this file since 32205 was 32205, checked in by ak19, 6 years ago

First set of commits to do with implementing the new 'paged_html' output option of PDFPlugin that uses using xpdftools' new pdftohtml. So far tested only on Linux (64 bit), but things work there so I'm optimistically committing the changes since they work. 2. Committing the pre-built Linux binaries of XPDFtools for both 32 and 64 bit built by the XPDF group. 2. To use the correct bitness variant of xpdftools, setup.bash now exports the BITNESS env var, consulted by gsConvert.pl. 3. All the perl code changes to do with using xpdf tools' pdftohtml to generate paged_html and feed it in the desired form into GS(3): gsConvert.pl, PDFPlugin.pm and its parent ConvertBinaryPFile.pm have been modified to make it all work. xpdftools' pdftohtml generates a folder containing an html file and a screenshot for each page in a PDF (as well as an index.html linking to each page's html). However, we want a single html file that contains each individual 'page' html's content in a div, and need to do some further HTML style, attribute and structure modifications to massage the xpdftool output to what we want for GS. In order to parse and manipulate the HTML 'DOM' to do this, we're using the Mojo::DOM package that Dr Bainbridge found and which he's compiled up. Mojo::DOM is therefore also committed in this revision. Some further changes and some display fixes are required, but need to check with the others about that.

File size: 7.7 KB
Line 
1package Mojo::ByteStream;
2use Mojo::Base -strict;
3use overload bool => sub {1}, '""' => sub { ${$_[0]} }, fallback => 1;
4
5use Exporter 'import';
6use Mojo::Collection;
7use Mojo::Util;
8
9our @EXPORT_OK = ('b');
10
11# Turn most functions from Mojo::Util into methods
12my @UTILS = (
13 qw(b64_decode b64_encode camelize decamelize hmac_sha1_sum html_unescape),
14 qw(md5_bytes md5_sum punycode_decode punycode_encode quote sha1_bytes),
15 qw(sha1_sum slugify term_escape trim unindent unquote url_escape),
16 qw(url_unescape xml_escape xor_encode)
17);
18for my $name (@UTILS) {
19 my $sub = Mojo::Util->can($name);
20 Mojo::Util::monkey_patch __PACKAGE__, $name, sub {
21 my $self = shift;
22 $$self = $sub->($$self, @_);
23 return $self;
24 };
25}
26
27sub b { __PACKAGE__->new(@_) }
28
29sub clone { $_[0]->new(${$_[0]}) }
30
31sub decode { shift->_delegate(\&Mojo::Util::decode, @_) }
32sub encode { shift->_delegate(\&Mojo::Util::encode, @_) }
33
34sub new {
35 my $class = shift;
36 return bless \(my $dummy = join '', @_), ref $class || $class;
37}
38
39sub say {
40 my ($self, $handle) = @_;
41 $handle ||= \*STDOUT;
42 say $handle $$self;
43 return $self;
44}
45
46sub secure_compare { Mojo::Util::secure_compare ${shift()}, shift }
47
48sub size { length ${$_[0]} }
49
50sub split {
51 my ($self, $pattern) = @_;
52 return Mojo::Collection->new(map { $self->new($_) } split $pattern, $$self);
53}
54
55sub tap { shift->Mojo::Base::tap(@_) }
56
57sub to_string { ${$_[0]} }
58
59sub with_roles { shift->Mojo::Base::with_roles(@_) }
60
61sub _delegate {
62 my ($self, $sub) = (shift, shift);
63 $$self = $sub->(shift || 'UTF-8', $$self);
64 return $self;
65}
66
671;
68
69=encoding utf8
70
71=head1 NAME
72
73Mojo::ByteStream - ByteStream
74
75=head1 SYNOPSIS
76
77 use Mojo::ByteStream;
78
79 # Manipulate bytestream
80 my $stream = Mojo::ByteStream->new('foo_bar_baz');
81 say $stream->camelize;
82
83 # Chain methods
84 my $stream = Mojo::ByteStream->new('foo bar baz')->quote;
85 $stream = $stream->unquote->encode('UTF-8')->b64_encode('');
86 say "$stream";
87
88 # Use the alternative constructor
89 use Mojo::ByteStream 'b';
90 my $stream = b('foobarbaz')->b64_encode('')->say;
91
92=head1 DESCRIPTION
93
94L<Mojo::ByteStream> is a scalar-based container for bytestreams that provides a
95more friendly API for many of the functions in L<Mojo::Util>.
96
97 # Access scalar directly to manipulate bytestream
98 my $stream = Mojo::ByteStream->new('foo');
99 $$stream .= 'bar';
100
101=head1 FUNCTIONS
102
103L<Mojo::ByteStream> implements the following functions, which can be imported
104individually.
105
106=head2 b
107
108 my $stream = b('test123');
109
110Construct a new scalar-based L<Mojo::ByteStream> object.
111
112=head1 METHODS
113
114L<Mojo::ByteStream> implements the following methods.
115
116=head2 b64_decode
117
118 $stream = $stream->b64_decode;
119
120Base64 decode bytestream with L<Mojo::Util/"b64_decode">.
121
122=head2 b64_encode
123
124 $stream = $stream->b64_encode;
125 $stream = $stream->b64_encode("\n");
126
127Base64 encode bytestream with L<Mojo::Util/"b64_encode">.
128
129 # "Zm9vIGJhciBiYXo="
130 b('foo bar baz')->b64_encode('');
131
132=head2 camelize
133
134 $stream = $stream->camelize;
135
136Camelize bytestream with L<Mojo::Util/"camelize">.
137
138=head2 clone
139
140 my $stream2 = $stream->clone;
141
142Return a new L<Mojo::ByteStream> object cloned from this bytestream.
143
144=head2 decamelize
145
146 $stream = $stream->decamelize;
147
148Decamelize bytestream with L<Mojo::Util/"decamelize">.
149
150=head2 decode
151
152 $stream = $stream->decode;
153 $stream = $stream->decode('iso-8859-1');
154
155Decode bytestream with L<Mojo::Util/"decode">, defaults to using C<UTF-8>.
156
157 # "♥"
158 b('%E2%99%A5')->url_unescape->decode;
159
160=head2 encode
161
162 $stream = $stream->encode;
163 $stream = $stream->encode('iso-8859-1');
164
165Encode bytestream with L<Mojo::Util/"encode">, defaults to using C<UTF-8>.
166
167 # "%E2%99%A5"
168 b('♥')->encode->url_escape;
169
170=head2 hmac_sha1_sum
171
172 $stream = $stream->hmac_sha1_sum('passw0rd');
173
174Generate HMAC-SHA1 checksum for bytestream with L<Mojo::Util/"hmac_sha1_sum">.
175
176 # "7fbdc89263974a89210ea71f171c77d3f8c21471"
177 b('foo bar baz')->hmac_sha1_sum('secr3t');
178
179=head2 html_unescape
180
181 $stream = $stream->html_unescape;
182
183Unescape all HTML entities in bytestream with L<Mojo::Util/"html_unescape">.
184
185 # "%3Chtml%3E"
186 b('&lt;html&gt;')->html_unescape->url_escape;
187
188=head2 md5_bytes
189
190 $stream = $stream->md5_bytes;
191
192Generate binary MD5 checksum for bytestream with L<Mojo::Util/"md5_bytes">.
193
194=head2 md5_sum
195
196 $stream = $stream->md5_sum;
197
198Generate MD5 checksum for bytestream with L<Mojo::Util/"md5_sum">.
199
200=head2 new
201
202 my $stream = Mojo::ByteStream->new('test123');
203
204Construct a new scalar-based L<Mojo::ByteStream> object.
205
206=head2 punycode_decode
207
208 $stream = $stream->punycode_decode;
209
210Punycode decode bytestream with L<Mojo::Util/"punycode_decode">.
211
212=head2 punycode_encode
213
214 $stream = $stream->punycode_encode;
215
216Punycode encode bytestream with L<Mojo::Util/"punycode_encode">.
217
218=head2 quote
219
220 $stream = $stream->quote;
221
222Quote bytestream with L<Mojo::Util/"quote">.
223
224=head2 say
225
226 $stream = $stream->say;
227 $stream = $stream->say(*STDERR);
228
229Print bytestream to handle and append a newline, defaults to using C<STDOUT>.
230
231=head2 secure_compare
232
233 my $bool = $stream->secure_compare($str);
234
235Compare bytestream with L<Mojo::Util/"secure_compare">.
236
237=head2 sha1_bytes
238
239 $stream = $stream->sha1_bytes;
240
241Generate binary SHA1 checksum for bytestream with L<Mojo::Util/"sha1_bytes">.
242
243=head2 sha1_sum
244
245 $stream = $stream->sha1_sum;
246
247Generate SHA1 checksum for bytestream with L<Mojo::Util/"sha1_sum">.
248
249=head2 size
250
251 my $size = $stream->size;
252
253Size of bytestream.
254
255=head2 slugify
256
257 $stream = $stream->slugify;
258 $stream = $stream->slugify($bool);
259
260Generate URL slug for bytestream with L<Mojo::Util/"slugify">.
261
262=head2 split
263
264 my $collection = $stream->split(',');
265
266Turn bytestream into L<Mojo::Collection> object containing L<Mojo::ByteStream>
267objects.
268
269 # "One,Two,Three"
270 b("one,two,three")->split(',')->map('camelize')->join(',');
271
272=head2 tap
273
274 $stream = $stream->tap(sub {...});
275
276Alias for L<Mojo::Base/"tap">.
277
278=head2 term_escape
279
280 $stream = $stream->term_escape;
281
282Escape POSIX control characters in bytestream with L<Mojo::Util/"term_escape">.
283
284 # Print binary checksum to terminal
285 b('foo')->sha1_bytes->term_escape->say;
286
287=head2 to_string
288
289 my $str = $stream->to_string;
290
291Stringify bytestream.
292
293=head2 trim
294
295 $stream = $stream->trim;
296
297Trim whitespace characters from both ends of bytestream with
298L<Mojo::Util/"trim">.
299
300=head2 unindent
301
302 $stream = $stream->unindent;
303
304Unindent bytestream with L<Mojo::Util/"unindent">.
305
306=head2 unquote
307
308 $stream = $stream->unquote;
309
310Unquote bytestream with L<Mojo::Util/"unquote">.
311
312=head2 url_escape
313
314 $stream = $stream->url_escape;
315 $stream = $stream->url_escape('^A-Za-z0-9\-._~');
316
317Percent encode all unsafe characters in bytestream with
318L<Mojo::Util/"url_escape">.
319
320 # "%E2%98%83"
321 b('☃')->encode->url_escape;
322
323=head2 url_unescape
324
325 $stream = $stream->url_unescape;
326
327Decode percent encoded characters in bytestream with
328L<Mojo::Util/"url_unescape">.
329
330 # "&lt;html&gt;"
331 b('%3Chtml%3E')->url_unescape->xml_escape;
332
333=head2 with_roles
334
335 my $new_class = Mojo::ByteStream->with_roles('Mojo::ByteStream::Role::One');
336 my $new_class = Mojo::ByteStream->with_roles('+One', '+Two');
337 $stream = $stream->with_roles('+One', '+Two');
338
339Alias for L<Mojo::Base/"with_roles">.
340
341=head2 xml_escape
342
343 $stream = $stream->xml_escape;
344
345Escape only the characters C<&>, C<E<lt>>, C<E<gt>>, C<"> and C<'> in
346bytestream with L<Mojo::Util/"xml_escape">.
347
348=head2 xor_encode
349
350 $stream = $stream->xor_encode($key);
351
352XOR encode bytestream with L<Mojo::Util/"xor_encode">.
353
354 # "%04%0E%15B%03%1B%10"
355 b('foo bar')->xor_encode('baz')->url_escape;
356
357=head1 OPERATORS
358
359L<Mojo::ByteStream> overloads the following operators.
360
361=head2 bool
362
363 my $bool = !!$bytestream;
364
365Always true.
366
367=head2 stringify
368
369 my $str = "$bytestream";
370
371Alias for L</"to_string">.
372
373=head1 SEE ALSO
374
375L<Mojolicious>, L<Mojolicious::Guides>, L<https://mojolicious.org>.
376
377=cut
Note: See TracBrowser for help on using the repository browser.