source: main/trunk/greenstone2/perllib/cpan/Mojo/JSON.pm@ 32205

Last change on this file since 32205 was 32205, checked in by ak19, 6 years ago

First set of commits to do with implementing the new 'paged_html' output option of PDFPlugin that uses using xpdftools' new pdftohtml. So far tested only on Linux (64 bit), but things work there so I'm optimistically committing the changes since they work. 2. Committing the pre-built Linux binaries of XPDFtools for both 32 and 64 bit built by the XPDF group. 2. To use the correct bitness variant of xpdftools, setup.bash now exports the BITNESS env var, consulted by gsConvert.pl. 3. All the perl code changes to do with using xpdf tools' pdftohtml to generate paged_html and feed it in the desired form into GS(3): gsConvert.pl, PDFPlugin.pm and its parent ConvertBinaryPFile.pm have been modified to make it all work. xpdftools' pdftohtml generates a folder containing an html file and a screenshot for each page in a PDF (as well as an index.html linking to each page's html). However, we want a single html file that contains each individual 'page' html's content in a div, and need to do some further HTML style, attribute and structure modifications to massage the xpdftool output to what we want for GS. In order to parse and manipulate the HTML 'DOM' to do this, we're using the Mojo::DOM package that Dr Bainbridge found and which he's compiled up. Mojo::DOM is therefore also committed in this revision. Some further changes and some display fixes are required, but need to check with the others about that.

File size: 8.2 KB
Line 
1package Mojo::JSON;
2use Mojo::Base -strict;
3
4use Carp 'croak';
5use Exporter 'import';
6use JSON::PP ();
7use Mojo::Util;
8use Scalar::Util 'blessed';
9
10our @EXPORT_OK = qw(decode_json encode_json false from_json j to_json true);
11
12# Escaped special character map (with u2028 and u2029)
13my %ESCAPE = (
14 '"' => '"',
15 '\\' => '\\',
16 '/' => '/',
17 'b' => "\x08",
18 'f' => "\x0c",
19 'n' => "\x0a",
20 'r' => "\x0d",
21 't' => "\x09",
22 'u2028' => "\x{2028}",
23 'u2029' => "\x{2029}"
24);
25my %REVERSE = map { $ESCAPE{$_} => "\\$_" } keys %ESCAPE;
26for (0x00 .. 0x1f) { $REVERSE{pack 'C', $_} //= sprintf '\u%.4X', $_ }
27
28sub decode_json {
29 my $err = _decode(\my $value, shift);
30 return defined $err ? croak $err : $value;
31}
32
33sub encode_json { Mojo::Util::encode 'UTF-8', _encode_value(shift) }
34
35sub false () {JSON::PP::false}
36
37sub from_json {
38 my $err = _decode(\my $value, shift, 1);
39 return defined $err ? croak $err : $value;
40}
41
42sub j {
43 return encode_json($_[0]) if ref $_[0] eq 'ARRAY' || ref $_[0] eq 'HASH';
44 return eval { decode_json($_[0]) };
45}
46
47sub to_json { _encode_value(shift) }
48
49sub true () {JSON::PP::true}
50
51sub _decode {
52 my $valueref = shift;
53
54 eval {
55
56 # Missing input
57 die "Missing or empty input\n" unless length(local $_ = shift);
58
59 # UTF-8
60 $_ = Mojo::Util::decode 'UTF-8', $_ unless shift;
61 die "Input is not UTF-8 encoded\n" unless defined;
62
63 # Value
64 $$valueref = _decode_value();
65
66 # Leftover data
67 /\G[\x20\x09\x0a\x0d]*\z/gc or _throw('Unexpected data');
68 } ? return undef : chomp $@;
69
70 return $@;
71}
72
73sub _decode_array {
74 my @array;
75 until (m/\G[\x20\x09\x0a\x0d]*\]/gc) {
76
77 # Value
78 push @array, _decode_value();
79
80 # Separator
81 redo if /\G[\x20\x09\x0a\x0d]*,/gc;
82
83 # End
84 last if /\G[\x20\x09\x0a\x0d]*\]/gc;
85
86 # Invalid character
87 _throw('Expected comma or right square bracket while parsing array');
88 }
89
90 return \@array;
91}
92
93sub _decode_object {
94 my %hash;
95 until (m/\G[\x20\x09\x0a\x0d]*\}/gc) {
96
97 # Quote
98 /\G[\x20\x09\x0a\x0d]*"/gc
99 or _throw('Expected string while parsing object');
100
101 # Key
102 my $key = _decode_string();
103
104 # Colon
105 /\G[\x20\x09\x0a\x0d]*:/gc or _throw('Expected colon while parsing object');
106
107 # Value
108 $hash{$key} = _decode_value();
109
110 # Separator
111 redo if /\G[\x20\x09\x0a\x0d]*,/gc;
112
113 # End
114 last if /\G[\x20\x09\x0a\x0d]*\}/gc;
115
116 # Invalid character
117 _throw('Expected comma or right curly bracket while parsing object');
118 }
119
120 return \%hash;
121}
122
123sub _decode_string {
124 my $pos = pos;
125
126 # Extract string with escaped characters
127 m!\G((?:(?:[^\x00-\x1f\\"]|\\(?:["\\/bfnrt]|u[0-9a-fA-F]{4})){0,32766})*)!gc;
128 my $str = $1;
129
130 # Invalid character
131 unless (m/\G"/gc) {
132 _throw('Unexpected character or invalid escape while parsing string')
133 if /\G[\x00-\x1f\\]/;
134 _throw('Unterminated string');
135 }
136
137 # Unescape popular characters
138 if (index($str, '\\u') < 0) {
139 $str =~ s!\\(["\\/bfnrt])!$ESCAPE{$1}!gs;
140 return $str;
141 }
142
143 # Unescape everything else
144 my $buffer = '';
145 while ($str =~ /\G([^\\]*)\\(?:([^u])|u(.{4}))/gc) {
146 $buffer .= $1;
147
148 # Popular character
149 if ($2) { $buffer .= $ESCAPE{$2} }
150
151 # Escaped
152 else {
153 my $ord = hex $3;
154
155 # Surrogate pair
156 if (($ord & 0xf800) == 0xd800) {
157
158 # High surrogate
159 ($ord & 0xfc00) == 0xd800
160 or pos = $pos + pos($str), _throw('Missing high-surrogate');
161
162 # Low surrogate
163 $str =~ /\G\\u([Dd][C-Fc-f]..)/gc
164 or pos = $pos + pos($str), _throw('Missing low-surrogate');
165
166 $ord = 0x10000 + ($ord - 0xd800) * 0x400 + (hex($1) - 0xdc00);
167 }
168
169 # Character
170 $buffer .= pack 'U', $ord;
171 }
172 }
173
174 # The rest
175 return $buffer . substr $str, pos($str), length($str);
176}
177
178sub _decode_value {
179
180 # Leading whitespace
181 /\G[\x20\x09\x0a\x0d]*/gc;
182
183 # String
184 return _decode_string() if /\G"/gc;
185
186 # Object
187 return _decode_object() if /\G\{/gc;
188
189 # Array
190 return _decode_array() if /\G\[/gc;
191
192 # Number
193 return 0 + $1
194 if /\G([-]?(?:0|[1-9][0-9]*)(?:\.[0-9]*)?(?:[eE][+-]?[0-9]+)?)/gc;
195
196 # True
197 return true() if /\Gtrue/gc;
198
199 # False
200 return false() if /\Gfalse/gc;
201
202 # Null
203 return undef if /\Gnull/gc;
204
205 # Invalid character
206 _throw('Expected string, array, object, number, boolean or null');
207}
208
209sub _encode_array {
210 '[' . join(',', map { _encode_value($_) } @{$_[0]}) . ']';
211}
212
213sub _encode_object {
214 my $object = shift;
215 my @pairs = map { _encode_string($_) . ':' . _encode_value($object->{$_}) }
216 sort keys %$object;
217 return '{' . join(',', @pairs) . '}';
218}
219
220sub _encode_string {
221 my $str = shift;
222 $str =~ s!([\x00-\x1f\x{2028}\x{2029}\\"/])!$REVERSE{$1}!gs;
223 return "\"$str\"";
224}
225
226sub _encode_value {
227 my $value = shift;
228
229 # Reference
230 if (my $ref = ref $value) {
231
232 # Object
233 return _encode_object($value) if $ref eq 'HASH';
234
235 # Array
236 return _encode_array($value) if $ref eq 'ARRAY';
237
238 # True or false
239 return $$value ? 'true' : 'false' if $ref eq 'SCALAR';
240 return $value ? 'true' : 'false' if $ref eq 'JSON::PP::Boolean';
241
242 # Everything else
243 return _encode_string($value)
244 unless blessed $value && (my $sub = $value->can('TO_JSON'));
245 return _encode_value($value->$sub);
246 }
247
248 # Null
249 return 'null' unless defined $value;
250
251 # Number
252 no warnings 'numeric';
253 return $value
254 if !utf8::is_utf8($value)
255 && length((my $dummy = '') & $value)
256 && 0 + $value eq $value
257 && $value * 0 == 0;
258
259 # String
260 return _encode_string($value);
261}
262
263sub _throw {
264
265 # Leading whitespace
266 /\G[\x20\x09\x0a\x0d]*/gc;
267
268 # Context
269 my $context = 'Malformed JSON: ' . shift;
270 if (m/\G\z/gc) { $context .= ' before end of data' }
271 else {
272 my @lines = split "\n", substr($_, 0, pos);
273 $context .= ' at line ' . @lines . ', offset ' . length(pop @lines || '');
274 }
275
276 die "$context\n";
277}
278
2791;
280
281=encoding utf8
282
283=head1 NAME
284
285Mojo::JSON - Minimalistic JSON
286
287=head1 SYNOPSIS
288
289 use Mojo::JSON qw(decode_json encode_json);
290
291 my $bytes = encode_json {foo => [1, 2], bar => 'hello!', baz => \1};
292 my $hash = decode_json $bytes;
293
294=head1 DESCRIPTION
295
296L<Mojo::JSON> is a minimalistic and possibly the fastest pure-Perl
297implementation of L<RFC 8259|http://tools.ietf.org/html/rfc8259>.
298
299It supports normal Perl data types like scalar, array reference, hash reference
300and will try to call the C<TO_JSON> method on blessed references, or stringify
301them if it doesn't exist. Differentiating between strings and numbers in Perl
302is hard, depending on how it has been used, a scalar can be both at the same
303time. The string value has a higher precedence unless both representations are
304equivalent.
305
306 [1, -2, 3] -> [1, -2, 3]
307 {"foo": "bar"} -> {foo => 'bar'}
308
309Literal names will be translated to and from L<Mojo::JSON> constants or a
310similar native Perl value.
311
312 true -> Mojo::JSON->true
313 false -> Mojo::JSON->false
314 null -> undef
315
316In addition scalar references will be used to generate booleans, based on if
317their values are true or false.
318
319 \1 -> true
320 \0 -> false
321
322The two Unicode whitespace characters C<u2028> and C<u2029> will always be
323escaped to make JSONP easier, and the character C</> to prevent XSS attacks.
324
325 "\x{2028}\x{2029}</script>" -> "\u2028\u2029<\/script>"
326
327=head1 FUNCTIONS
328
329L<Mojo::JSON> implements the following functions, which can be imported
330individually.
331
332=head2 decode_json
333
334 my $value = decode_json $bytes;
335
336Decode JSON to Perl value and die if decoding fails.
337
338=head2 encode_json
339
340 my $bytes = encode_json {i => '♥ mojolicious'};
341
342Encode Perl value to JSON.
343
344=head2 false
345
346 my $false = false;
347
348False value, used because Perl has no native equivalent.
349
350=head2 from_json
351
352 my $value = from_json $chars;
353
354Decode JSON text that is not C<UTF-8> encoded to Perl value and die if decoding
355fails.
356
357=head2 j
358
359 my $bytes = j [1, 2, 3];
360 my $bytes = j {i => '♥ mojolicious'};
361 my $value = j $bytes;
362
363Encode Perl data structure (which may only be an array reference or hash
364reference) or decode JSON, an C<undef> return value indicates a bare C<null> or
365that decoding failed.
366
367=head2 to_json
368
369 my $chars = to_json {i => '♥ mojolicious'};
370
371Encode Perl value to JSON text without C<UTF-8> encoding it.
372
373=head2 true
374
375 my $true = true;
376
377True value, used because Perl has no native equivalent.
378
379=head1 SEE ALSO
380
381L<Mojolicious>, L<Mojolicious::Guides>, L<https://mojolicious.org>.
382
383=cut
Note: See TracBrowser for help on using the repository browser.