1 | package HTML::Entities;
|
---|
2 |
|
---|
3 | # $Id: Entities.pm 14078 2007-05-17 03:15:41Z lh92 $
|
---|
4 |
|
---|
5 | =head1 NAME
|
---|
6 |
|
---|
7 | HTML::Entities - Encode or decode strings with HTML entities
|
---|
8 |
|
---|
9 | =head1 SYNOPSIS
|
---|
10 |
|
---|
11 | use HTML::Entities;
|
---|
12 |
|
---|
13 | $a = "Våre norske tegn bør æres";
|
---|
14 | decode_entities($a);
|
---|
15 | encode_entities($a, "\200-\377");
|
---|
16 |
|
---|
17 | For example, this:
|
---|
18 |
|
---|
19 | $input = "vis-à-vis Beyoncé's naïve\npapier-mâché résumé";
|
---|
20 | print encode_entities($input), "\n"
|
---|
21 |
|
---|
22 | Prints this out:
|
---|
23 |
|
---|
24 | vis-à-vis Beyoncé's naïve
|
---|
25 | papier-mâché résumé
|
---|
26 |
|
---|
27 | =head1 DESCRIPTION
|
---|
28 |
|
---|
29 | This module deals with encoding and decoding of strings with HTML
|
---|
30 | character entities. The module provides the following functions:
|
---|
31 |
|
---|
32 | =over 4
|
---|
33 |
|
---|
34 | =item decode_entities( $string, ... )
|
---|
35 |
|
---|
36 | This routine replaces HTML entities found in the $string with the
|
---|
37 | corresponding Unicode character. Under perl 5.6 and earlier only
|
---|
38 | characters in the Latin-1 range are replaced. Unrecognized
|
---|
39 | entities are left alone.
|
---|
40 |
|
---|
41 | If multiple strings are provided as argument they are each decoded
|
---|
42 | separately and the same number of strings are returned.
|
---|
43 |
|
---|
44 | If called in void context the arguments are decoded in-place.
|
---|
45 |
|
---|
46 | This routine is exported by default.
|
---|
47 |
|
---|
48 | =item _decode_entities( $string, \%entity2char )
|
---|
49 |
|
---|
50 | =item _decode_entities( $string, \%entity2char, $expand_prefix )
|
---|
51 |
|
---|
52 | This will in-place replace HTML entities in $string. The %entity2char
|
---|
53 | hash must be provided. Named entities not found in the %entity2char
|
---|
54 | hash are left alone. Numeric entities are expanded unless their value
|
---|
55 | overflow.
|
---|
56 |
|
---|
57 | The keys in %entity2char are the entity names to be expanded and their
|
---|
58 | values are what they should expand into. The values do not have to be
|
---|
59 | single character strings. If a key has ";" as suffix,
|
---|
60 | then occurrences in $string are only expanded if properly terminated
|
---|
61 | with ";". Entities without ";" will be expanded regardless of how
|
---|
62 | they are terminated for compatiblity with how common browsers treat
|
---|
63 | entities in the Latin-1 range.
|
---|
64 |
|
---|
65 | If $expand_prefix is TRUE then entities without trailing ";" in
|
---|
66 | %entity2char will even be expanded as a prefix of a longer
|
---|
67 | unrecognized name. The longest matching name in %entity2char will be
|
---|
68 | used. This is mainly present for compatibility with an MSIE
|
---|
69 | misfeature.
|
---|
70 |
|
---|
71 | $string = "foo bar";
|
---|
72 | _decode_entities($string, { nb => "@", nbsp => "\xA0" }, 1);
|
---|
73 | print $string; # will print "foo bar"
|
---|
74 |
|
---|
75 | This routine is exported by default.
|
---|
76 |
|
---|
77 | =item encode_entities( $string )
|
---|
78 |
|
---|
79 | =item encode_entities( $string, $unsafe_chars )
|
---|
80 |
|
---|
81 | This routine replaces unsafe characters in $string with their entity
|
---|
82 | representation. A second argument can be given to specify which
|
---|
83 | characters to consider unsafe (i.e., which to escape). The default set
|
---|
84 | of characters to encode are control chars, high-bit chars, and the
|
---|
85 | C<< < >>, C<< & >>, C<< > >>, C<< ' >> and C<< " >>
|
---|
86 | characters. But this, for example, would encode I<just> the
|
---|
87 | C<< < >>, C<< & >>, C<< > >>, and C<< " >> characters:
|
---|
88 |
|
---|
89 | $encoded = encode_entities($input, '<>&"');
|
---|
90 |
|
---|
91 | This routine is exported by default.
|
---|
92 |
|
---|
93 | =item encode_entities_numeric( $string )
|
---|
94 |
|
---|
95 | =item encode_entities_numeric( $string, $unsafe_chars )
|
---|
96 |
|
---|
97 | This routine works just like encode_entities, except that the replacement
|
---|
98 | entities are always C<&#xI<hexnum>;> and never C<&I<entname>;>. For
|
---|
99 | example, C<encode_entities("r\xF4le")> returns "rôle", but
|
---|
100 | C<encode_entities_numeric("r\xF4le")> returns "rôle".
|
---|
101 |
|
---|
102 | This routine is I<not> exported by default. But you can always
|
---|
103 | export it with C<use HTML::Entities qw(encode_entities_numeric);>
|
---|
104 | or even C<use HTML::Entities qw(:DEFAULT encode_entities_numeric);>
|
---|
105 |
|
---|
106 | =back
|
---|
107 |
|
---|
108 | All these routines modify the string passed as the first argument, if
|
---|
109 | called in a void context. In scalar and array contexts, the encoded or
|
---|
110 | decoded string is returned (without changing the input string).
|
---|
111 |
|
---|
112 | If you prefer not to import these routines into your namespace, you can
|
---|
113 | call them as:
|
---|
114 |
|
---|
115 | use HTML::Entities ();
|
---|
116 | $decoded = HTML::Entities::decode($a);
|
---|
117 | $encoded = HTML::Entities::encode($a);
|
---|
118 | $encoded = HTML::Entities::encode_numeric($a);
|
---|
119 |
|
---|
120 | The module can also export the %char2entity and the %entity2char
|
---|
121 | hashes, which contain the mapping from all characters to the
|
---|
122 | corresponding entities (and vice versa, respectively).
|
---|
123 |
|
---|
124 | =head1 COPYRIGHT
|
---|
125 |
|
---|
126 | Copyright 1995-2006 Gisle Aas. All rights reserved.
|
---|
127 |
|
---|
128 | This library is free software; you can redistribute it and/or
|
---|
129 | modify it under the same terms as Perl itself.
|
---|
130 |
|
---|
131 | =cut
|
---|
132 |
|
---|
133 | use strict;
|
---|
134 | use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION);
|
---|
135 | use vars qw(%entity2char %char2entity);
|
---|
136 |
|
---|
137 | require 5.004;
|
---|
138 | require Exporter;
|
---|
139 | @ISA = qw(Exporter);
|
---|
140 |
|
---|
141 | @EXPORT = qw(encode_entities decode_entities _decode_entities);
|
---|
142 | @EXPORT_OK = qw(%entity2char %char2entity encode_entities_numeric);
|
---|
143 |
|
---|
144 | $VERSION = sprintf("%d.%02d", q$Revision: 14078 $ =~ /(\d+)\.(\d+)/);
|
---|
145 | sub Version { $VERSION; }
|
---|
146 |
|
---|
147 | require HTML::Parser; # for fast XS implemented decode_entities
|
---|
148 |
|
---|
149 |
|
---|
150 | %entity2char = (
|
---|
151 | # Some normal chars that have special meaning in SGML context
|
---|
152 | amp => '&', # ampersand
|
---|
153 | 'gt' => '>', # greater than
|
---|
154 | 'lt' => '<', # less than
|
---|
155 | quot => '"', # double quote
|
---|
156 | apos => "'", # single quote
|
---|
157 |
|
---|
158 | # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML
|
---|
159 | AElig => chr(198), # capital AE diphthong (ligature)
|
---|
160 | Aacute => chr(193), # capital A, acute accent
|
---|
161 | Acirc => chr(194), # capital A, circumflex accent
|
---|
162 | Agrave => chr(192), # capital A, grave accent
|
---|
163 | Aring => chr(197), # capital A, ring
|
---|
164 | Atilde => chr(195), # capital A, tilde
|
---|
165 | Auml => chr(196), # capital A, dieresis or umlaut mark
|
---|
166 | Ccedil => chr(199), # capital C, cedilla
|
---|
167 | ETH => chr(208), # capital Eth, Icelandic
|
---|
168 | Eacute => chr(201), # capital E, acute accent
|
---|
169 | Ecirc => chr(202), # capital E, circumflex accent
|
---|
170 | Egrave => chr(200), # capital E, grave accent
|
---|
171 | Euml => chr(203), # capital E, dieresis or umlaut mark
|
---|
172 | Iacute => chr(205), # capital I, acute accent
|
---|
173 | Icirc => chr(206), # capital I, circumflex accent
|
---|
174 | Igrave => chr(204), # capital I, grave accent
|
---|
175 | Iuml => chr(207), # capital I, dieresis or umlaut mark
|
---|
176 | Ntilde => chr(209), # capital N, tilde
|
---|
177 | Oacute => chr(211), # capital O, acute accent
|
---|
178 | Ocirc => chr(212), # capital O, circumflex accent
|
---|
179 | Ograve => chr(210), # capital O, grave accent
|
---|
180 | Oslash => chr(216), # capital O, slash
|
---|
181 | Otilde => chr(213), # capital O, tilde
|
---|
182 | Ouml => chr(214), # capital O, dieresis or umlaut mark
|
---|
183 | THORN => chr(222), # capital THORN, Icelandic
|
---|
184 | Uacute => chr(218), # capital U, acute accent
|
---|
185 | Ucirc => chr(219), # capital U, circumflex accent
|
---|
186 | Ugrave => chr(217), # capital U, grave accent
|
---|
187 | Uuml => chr(220), # capital U, dieresis or umlaut mark
|
---|
188 | Yacute => chr(221), # capital Y, acute accent
|
---|
189 | aacute => chr(225), # small a, acute accent
|
---|
190 | acirc => chr(226), # small a, circumflex accent
|
---|
191 | aelig => chr(230), # small ae diphthong (ligature)
|
---|
192 | agrave => chr(224), # small a, grave accent
|
---|
193 | aring => chr(229), # small a, ring
|
---|
194 | atilde => chr(227), # small a, tilde
|
---|
195 | auml => chr(228), # small a, dieresis or umlaut mark
|
---|
196 | ccedil => chr(231), # small c, cedilla
|
---|
197 | eacute => chr(233), # small e, acute accent
|
---|
198 | ecirc => chr(234), # small e, circumflex accent
|
---|
199 | egrave => chr(232), # small e, grave accent
|
---|
200 | eth => chr(240), # small eth, Icelandic
|
---|
201 | euml => chr(235), # small e, dieresis or umlaut mark
|
---|
202 | iacute => chr(237), # small i, acute accent
|
---|
203 | icirc => chr(238), # small i, circumflex accent
|
---|
204 | igrave => chr(236), # small i, grave accent
|
---|
205 | iuml => chr(239), # small i, dieresis or umlaut mark
|
---|
206 | ntilde => chr(241), # small n, tilde
|
---|
207 | oacute => chr(243), # small o, acute accent
|
---|
208 | ocirc => chr(244), # small o, circumflex accent
|
---|
209 | ograve => chr(242), # small o, grave accent
|
---|
210 | oslash => chr(248), # small o, slash
|
---|
211 | otilde => chr(245), # small o, tilde
|
---|
212 | ouml => chr(246), # small o, dieresis or umlaut mark
|
---|
213 | szlig => chr(223), # small sharp s, German (sz ligature)
|
---|
214 | thorn => chr(254), # small thorn, Icelandic
|
---|
215 | uacute => chr(250), # small u, acute accent
|
---|
216 | ucirc => chr(251), # small u, circumflex accent
|
---|
217 | ugrave => chr(249), # small u, grave accent
|
---|
218 | uuml => chr(252), # small u, dieresis or umlaut mark
|
---|
219 | yacute => chr(253), # small y, acute accent
|
---|
220 | yuml => chr(255), # small y, dieresis or umlaut mark
|
---|
221 |
|
---|
222 | # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96)
|
---|
223 | copy => chr(169), # copyright sign
|
---|
224 | reg => chr(174), # registered sign
|
---|
225 | nbsp => chr(160), # non breaking space
|
---|
226 |
|
---|
227 | # Additional ISO-8859/1 entities listed in rfc1866 (section 14)
|
---|
228 | iexcl => chr(161),
|
---|
229 | cent => chr(162),
|
---|
230 | pound => chr(163),
|
---|
231 | curren => chr(164),
|
---|
232 | yen => chr(165),
|
---|
233 | brvbar => chr(166),
|
---|
234 | sect => chr(167),
|
---|
235 | uml => chr(168),
|
---|
236 | ordf => chr(170),
|
---|
237 | laquo => chr(171),
|
---|
238 | 'not' => chr(172), # not is a keyword in perl
|
---|
239 | shy => chr(173),
|
---|
240 | macr => chr(175),
|
---|
241 | deg => chr(176),
|
---|
242 | plusmn => chr(177),
|
---|
243 | sup1 => chr(185),
|
---|
244 | sup2 => chr(178),
|
---|
245 | sup3 => chr(179),
|
---|
246 | acute => chr(180),
|
---|
247 | micro => chr(181),
|
---|
248 | para => chr(182),
|
---|
249 | middot => chr(183),
|
---|
250 | cedil => chr(184),
|
---|
251 | ordm => chr(186),
|
---|
252 | raquo => chr(187),
|
---|
253 | frac14 => chr(188),
|
---|
254 | frac12 => chr(189),
|
---|
255 | frac34 => chr(190),
|
---|
256 | iquest => chr(191),
|
---|
257 | 'times' => chr(215), # times is a keyword in perl
|
---|
258 | divide => chr(247),
|
---|
259 |
|
---|
260 | ( $] > 5.007 ? (
|
---|
261 | 'OElig;' => chr(338),
|
---|
262 | 'oelig;' => chr(339),
|
---|
263 | 'Scaron;' => chr(352),
|
---|
264 | 'scaron;' => chr(353),
|
---|
265 | 'Yuml;' => chr(376),
|
---|
266 | 'fnof;' => chr(402),
|
---|
267 | 'circ;' => chr(710),
|
---|
268 | 'tilde;' => chr(732),
|
---|
269 | 'Alpha;' => chr(913),
|
---|
270 | 'Beta;' => chr(914),
|
---|
271 | 'Gamma;' => chr(915),
|
---|
272 | 'Delta;' => chr(916),
|
---|
273 | 'Epsilon;' => chr(917),
|
---|
274 | 'Zeta;' => chr(918),
|
---|
275 | 'Eta;' => chr(919),
|
---|
276 | 'Theta;' => chr(920),
|
---|
277 | 'Iota;' => chr(921),
|
---|
278 | 'Kappa;' => chr(922),
|
---|
279 | 'Lambda;' => chr(923),
|
---|
280 | 'Mu;' => chr(924),
|
---|
281 | 'Nu;' => chr(925),
|
---|
282 | 'Xi;' => chr(926),
|
---|
283 | 'Omicron;' => chr(927),
|
---|
284 | 'Pi;' => chr(928),
|
---|
285 | 'Rho;' => chr(929),
|
---|
286 | 'Sigma;' => chr(931),
|
---|
287 | 'Tau;' => chr(932),
|
---|
288 | 'Upsilon;' => chr(933),
|
---|
289 | 'Phi;' => chr(934),
|
---|
290 | 'Chi;' => chr(935),
|
---|
291 | 'Psi;' => chr(936),
|
---|
292 | 'Omega;' => chr(937),
|
---|
293 | 'alpha;' => chr(945),
|
---|
294 | 'beta;' => chr(946),
|
---|
295 | 'gamma;' => chr(947),
|
---|
296 | 'delta;' => chr(948),
|
---|
297 | 'epsilon;' => chr(949),
|
---|
298 | 'zeta;' => chr(950),
|
---|
299 | 'eta;' => chr(951),
|
---|
300 | 'theta;' => chr(952),
|
---|
301 | 'iota;' => chr(953),
|
---|
302 | 'kappa;' => chr(954),
|
---|
303 | 'lambda;' => chr(955),
|
---|
304 | 'mu;' => chr(956),
|
---|
305 | 'nu;' => chr(957),
|
---|
306 | 'xi;' => chr(958),
|
---|
307 | 'omicron;' => chr(959),
|
---|
308 | 'pi;' => chr(960),
|
---|
309 | 'rho;' => chr(961),
|
---|
310 | 'sigmaf;' => chr(962),
|
---|
311 | 'sigma;' => chr(963),
|
---|
312 | 'tau;' => chr(964),
|
---|
313 | 'upsilon;' => chr(965),
|
---|
314 | 'phi;' => chr(966),
|
---|
315 | 'chi;' => chr(967),
|
---|
316 | 'psi;' => chr(968),
|
---|
317 | 'omega;' => chr(969),
|
---|
318 | 'thetasym;' => chr(977),
|
---|
319 | 'upsih;' => chr(978),
|
---|
320 | 'piv;' => chr(982),
|
---|
321 | 'ensp;' => chr(8194),
|
---|
322 | 'emsp;' => chr(8195),
|
---|
323 | 'thinsp;' => chr(8201),
|
---|
324 | 'zwnj;' => chr(8204),
|
---|
325 | 'zwj;' => chr(8205),
|
---|
326 | 'lrm;' => chr(8206),
|
---|
327 | 'rlm;' => chr(8207),
|
---|
328 | 'ndash;' => chr(8211),
|
---|
329 | 'mdash;' => chr(8212),
|
---|
330 | 'lsquo;' => chr(8216),
|
---|
331 | 'rsquo;' => chr(8217),
|
---|
332 | 'sbquo;' => chr(8218),
|
---|
333 | 'ldquo;' => chr(8220),
|
---|
334 | 'rdquo;' => chr(8221),
|
---|
335 | 'bdquo;' => chr(8222),
|
---|
336 | 'dagger;' => chr(8224),
|
---|
337 | 'Dagger;' => chr(8225),
|
---|
338 | 'bull;' => chr(8226),
|
---|
339 | 'hellip;' => chr(8230),
|
---|
340 | 'permil;' => chr(8240),
|
---|
341 | 'prime;' => chr(8242),
|
---|
342 | 'Prime;' => chr(8243),
|
---|
343 | 'lsaquo;' => chr(8249),
|
---|
344 | 'rsaquo;' => chr(8250),
|
---|
345 | 'oline;' => chr(8254),
|
---|
346 | 'frasl;' => chr(8260),
|
---|
347 | 'euro;' => chr(8364),
|
---|
348 | 'image;' => chr(8465),
|
---|
349 | 'weierp;' => chr(8472),
|
---|
350 | 'real;' => chr(8476),
|
---|
351 | 'trade;' => chr(8482),
|
---|
352 | 'alefsym;' => chr(8501),
|
---|
353 | 'larr;' => chr(8592),
|
---|
354 | 'uarr;' => chr(8593),
|
---|
355 | 'rarr;' => chr(8594),
|
---|
356 | 'darr;' => chr(8595),
|
---|
357 | 'harr;' => chr(8596),
|
---|
358 | 'crarr;' => chr(8629),
|
---|
359 | 'lArr;' => chr(8656),
|
---|
360 | 'uArr;' => chr(8657),
|
---|
361 | 'rArr;' => chr(8658),
|
---|
362 | 'dArr;' => chr(8659),
|
---|
363 | 'hArr;' => chr(8660),
|
---|
364 | 'forall;' => chr(8704),
|
---|
365 | 'part;' => chr(8706),
|
---|
366 | 'exist;' => chr(8707),
|
---|
367 | 'empty;' => chr(8709),
|
---|
368 | 'nabla;' => chr(8711),
|
---|
369 | 'isin;' => chr(8712),
|
---|
370 | 'notin;' => chr(8713),
|
---|
371 | 'ni;' => chr(8715),
|
---|
372 | 'prod;' => chr(8719),
|
---|
373 | 'sum;' => chr(8721),
|
---|
374 | 'minus;' => chr(8722),
|
---|
375 | 'lowast;' => chr(8727),
|
---|
376 | 'radic;' => chr(8730),
|
---|
377 | 'prop;' => chr(8733),
|
---|
378 | 'infin;' => chr(8734),
|
---|
379 | 'ang;' => chr(8736),
|
---|
380 | 'and;' => chr(8743),
|
---|
381 | 'or;' => chr(8744),
|
---|
382 | 'cap;' => chr(8745),
|
---|
383 | 'cup;' => chr(8746),
|
---|
384 | 'int;' => chr(8747),
|
---|
385 | 'there4;' => chr(8756),
|
---|
386 | 'sim;' => chr(8764),
|
---|
387 | 'cong;' => chr(8773),
|
---|
388 | 'asymp;' => chr(8776),
|
---|
389 | 'ne;' => chr(8800),
|
---|
390 | 'equiv;' => chr(8801),
|
---|
391 | 'le;' => chr(8804),
|
---|
392 | 'ge;' => chr(8805),
|
---|
393 | 'sub;' => chr(8834),
|
---|
394 | 'sup;' => chr(8835),
|
---|
395 | 'nsub;' => chr(8836),
|
---|
396 | 'sube;' => chr(8838),
|
---|
397 | 'supe;' => chr(8839),
|
---|
398 | 'oplus;' => chr(8853),
|
---|
399 | 'otimes;' => chr(8855),
|
---|
400 | 'perp;' => chr(8869),
|
---|
401 | 'sdot;' => chr(8901),
|
---|
402 | 'lceil;' => chr(8968),
|
---|
403 | 'rceil;' => chr(8969),
|
---|
404 | 'lfloor;' => chr(8970),
|
---|
405 | 'rfloor;' => chr(8971),
|
---|
406 | 'lang;' => chr(9001),
|
---|
407 | 'rang;' => chr(9002),
|
---|
408 | 'loz;' => chr(9674),
|
---|
409 | 'spades;' => chr(9824),
|
---|
410 | 'clubs;' => chr(9827),
|
---|
411 | 'hearts;' => chr(9829),
|
---|
412 | 'diams;' => chr(9830),
|
---|
413 | ) : ())
|
---|
414 | );
|
---|
415 |
|
---|
416 |
|
---|
417 | # Make the opposite mapping
|
---|
418 | while (my($entity, $char) = each(%entity2char)) {
|
---|
419 | $entity =~ s/;\z//;
|
---|
420 | $char2entity{$char} = "&$entity;";
|
---|
421 | }
|
---|
422 | delete $char2entity{"'"}; # only one-way decoding
|
---|
423 |
|
---|
424 | # Fill in missing entities
|
---|
425 | for (0 .. 255) {
|
---|
426 | next if exists $char2entity{chr($_)};
|
---|
427 | $char2entity{chr($_)} = "&#$_;";
|
---|
428 | }
|
---|
429 |
|
---|
430 | my %subst; # compiled encoding regexps
|
---|
431 |
|
---|
432 | sub decode_entities_old
|
---|
433 | {
|
---|
434 | my $array;
|
---|
435 | if (defined wantarray) {
|
---|
436 | $array = [@_]; # copy
|
---|
437 | } else {
|
---|
438 | $array = \@_; # modify in-place
|
---|
439 | }
|
---|
440 | my $c;
|
---|
441 | for (@$array) {
|
---|
442 | s/(&\#(\d+);?)/$2 < 256 ? chr($2) : $1/eg;
|
---|
443 | s/(&\#[xX]([0-9a-fA-F]+);?)/$c = hex($2); $c < 256 ? chr($c) : $1/eg;
|
---|
444 | s/(&(\w+);?)/$entity2char{$2} || $1/eg;
|
---|
445 | }
|
---|
446 | wantarray ? @$array : $array->[0];
|
---|
447 | }
|
---|
448 |
|
---|
449 | sub encode_entities
|
---|
450 | {
|
---|
451 | my $ref;
|
---|
452 | if (defined wantarray) {
|
---|
453 | my $x = $_[0];
|
---|
454 | $ref = \$x; # copy
|
---|
455 | } else {
|
---|
456 | $ref = \$_[0]; # modify in-place
|
---|
457 | }
|
---|
458 | if (defined $_[1] and length $_[1]) {
|
---|
459 | unless (exists $subst{$_[1]}) {
|
---|
460 | # Because we can't compile regex we fake it with a cached sub
|
---|
461 | my $code = "sub {\$_[0] =~ s/([$_[1]])/\$char2entity{\$1} || num_entity(\$1)/ge; }";
|
---|
462 | $subst{$_[1]} = eval $code;
|
---|
463 | die( $@ . " while trying to turn range: \"$_[1]\"\n "
|
---|
464 | . "into code: $code\n "
|
---|
465 | ) if $@;
|
---|
466 | }
|
---|
467 | &{$subst{$_[1]}}($$ref);
|
---|
468 | } else {
|
---|
469 | # Encode control chars, high bit chars and '<', '&', '>', ''' and '"'
|
---|
470 | $$ref =~ s/([^\n\r\t !\#\$%\(-;=?-~])/$char2entity{$1} || num_entity($1)/ge;
|
---|
471 | }
|
---|
472 | $$ref;
|
---|
473 | }
|
---|
474 |
|
---|
475 | sub encode_entities_numeric {
|
---|
476 | local %char2entity;
|
---|
477 | return &encode_entities; # a goto &encode_entities wouldn't work
|
---|
478 | }
|
---|
479 |
|
---|
480 |
|
---|
481 | sub num_entity {
|
---|
482 | sprintf "&#x%X;", ord($_[0]);
|
---|
483 | }
|
---|
484 |
|
---|
485 | # Set up aliases
|
---|
486 | *encode = \&encode_entities;
|
---|
487 | *encode_numeric = \&encode_entities_numeric;
|
---|
488 | *encode_numerically = \&encode_entities_numeric;
|
---|
489 | *decode = \&decode_entities;
|
---|
490 |
|
---|
491 | 1;
|
---|