1 | package charnames;
|
---|
2 | use strict;
|
---|
3 | use warnings;
|
---|
4 | use Carp;
|
---|
5 | use File::Spec;
|
---|
6 | our $VERSION = '1.05';
|
---|
7 |
|
---|
8 | use bytes (); # for $bytes::hint_bits
|
---|
9 | $charnames::hint_bits = 0x20000; # HINT_LOCALIZE_HH
|
---|
10 |
|
---|
11 | my %alias1 = (
|
---|
12 | # Icky 3.2 names with parentheses.
|
---|
13 | 'LINE FEED' => 'LINE FEED (LF)',
|
---|
14 | 'FORM FEED' => 'FORM FEED (FF)',
|
---|
15 | 'CARRIAGE RETURN' => 'CARRIAGE RETURN (CR)',
|
---|
16 | 'NEXT LINE' => 'NEXT LINE (NEL)',
|
---|
17 | # Convenience.
|
---|
18 | 'LF' => 'LINE FEED (LF)',
|
---|
19 | 'FF' => 'FORM FEED (FF)',
|
---|
20 | 'CR' => 'CARRIAGE RETURN (CR)',
|
---|
21 | 'NEL' => 'NEXT LINE (NEL)',
|
---|
22 | # More convenience. For futher convencience,
|
---|
23 | # it is suggested some way using using the NamesList
|
---|
24 | # aliases is implemented.
|
---|
25 | 'ZWNJ' => 'ZERO WIDTH NON-JOINER',
|
---|
26 | 'ZWJ' => 'ZERO WIDTH JOINER',
|
---|
27 | 'BOM' => 'BYTE ORDER MARK',
|
---|
28 | );
|
---|
29 |
|
---|
30 | my %alias2 = (
|
---|
31 | # Pre-3.2 compatibility (only for the first 256 characters).
|
---|
32 | 'HORIZONTAL TABULATION' => 'CHARACTER TABULATION',
|
---|
33 | 'VERTICAL TABULATION' => 'LINE TABULATION',
|
---|
34 | 'FILE SEPARATOR' => 'INFORMATION SEPARATOR FOUR',
|
---|
35 | 'GROUP SEPARATOR' => 'INFORMATION SEPARATOR THREE',
|
---|
36 | 'RECORD SEPARATOR' => 'INFORMATION SEPARATOR TWO',
|
---|
37 | 'UNIT SEPARATOR' => 'INFORMATION SEPARATOR ONE',
|
---|
38 | 'PARTIAL LINE DOWN' => 'PARTIAL LINE FORWARD',
|
---|
39 | 'PARTIAL LINE UP' => 'PARTIAL LINE BACKWARD',
|
---|
40 | );
|
---|
41 |
|
---|
42 | my %alias3 = (
|
---|
43 | # User defined aliasses. Even more convenient :)
|
---|
44 | );
|
---|
45 | my $txt;
|
---|
46 |
|
---|
47 | sub alias (@)
|
---|
48 | {
|
---|
49 | @_ or return %alias3;
|
---|
50 | my $alias = ref $_[0] ? $_[0] : { @_ };
|
---|
51 | @alias3{keys %$alias} = values %$alias;
|
---|
52 | } # alias
|
---|
53 |
|
---|
54 | sub alias_file ($)
|
---|
55 | {
|
---|
56 | my ($arg, $file) = @_;
|
---|
57 | if (-f $arg && File::Spec->file_name_is_absolute ($arg)) {
|
---|
58 | $file = $arg;
|
---|
59 | }
|
---|
60 | elsif ($arg =~ m/^\w+$/) {
|
---|
61 | $file = "unicore/${arg}_alias.pl";
|
---|
62 | }
|
---|
63 | else {
|
---|
64 | croak "Charnames alias files can only have identifier characters";
|
---|
65 | }
|
---|
66 | if (my @alias = do $file) {
|
---|
67 | @alias == 1 && !defined $alias[0] and
|
---|
68 | croak "$file cannot be used as alias file for charnames";
|
---|
69 | @alias % 2 and
|
---|
70 | croak "$file did not return a (valid) list of alias pairs";
|
---|
71 | alias (@alias);
|
---|
72 | return (1);
|
---|
73 | }
|
---|
74 | 0;
|
---|
75 | } # alias_file
|
---|
76 |
|
---|
77 | # This is not optimized in any way yet
|
---|
78 | sub charnames
|
---|
79 | {
|
---|
80 | my $name = shift;
|
---|
81 |
|
---|
82 | if (exists $alias1{$name}) {
|
---|
83 | $name = $alias1{$name};
|
---|
84 | }
|
---|
85 | elsif (exists $alias2{$name}) {
|
---|
86 | require warnings;
|
---|
87 | warnings::warnif('deprecated', qq{Unicode character name "$name" is deprecated, use "$alias2{$name}" instead});
|
---|
88 | $name = $alias2{$name};
|
---|
89 | }
|
---|
90 | elsif (exists $alias3{$name}) {
|
---|
91 | $name = $alias3{$name};
|
---|
92 | }
|
---|
93 |
|
---|
94 | my $ord;
|
---|
95 | my @off;
|
---|
96 | my $fname;
|
---|
97 |
|
---|
98 | if ($name eq "BYTE ORDER MARK") {
|
---|
99 | $fname = $name;
|
---|
100 | $ord = 0xFEFF;
|
---|
101 | } else {
|
---|
102 | ## Suck in the code/name list as a big string.
|
---|
103 | ## Lines look like:
|
---|
104 | ## "0052\t\tLATIN CAPITAL LETTER R\n"
|
---|
105 | $txt = do "unicore/Name.pl" unless $txt;
|
---|
106 |
|
---|
107 | ## @off will hold the index into the code/name string of the start and
|
---|
108 | ## end of the name as we find it.
|
---|
109 |
|
---|
110 | ## If :full, look for the name exactly
|
---|
111 | if ($^H{charnames_full} and $txt =~ /\t\t\Q$name\E$/m) {
|
---|
112 | @off = ($-[0], $+[0]);
|
---|
113 | }
|
---|
114 |
|
---|
115 | ## If we didn't get above, and :short allowed, look for the short name.
|
---|
116 | ## The short name is like "greek:Sigma"
|
---|
117 | unless (@off) {
|
---|
118 | if ($^H{charnames_short} and $name =~ /^(.+?):(.+)/s) {
|
---|
119 | my ($script, $cname) = ($1, $2);
|
---|
120 | my $case = $cname =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL";
|
---|
121 | if ($txt =~ m/\t\t\U$script\E (?:$case )?LETTER \U\Q$cname\E$/m) {
|
---|
122 | @off = ($-[0], $+[0]);
|
---|
123 | }
|
---|
124 | }
|
---|
125 | }
|
---|
126 |
|
---|
127 | ## If we still don't have it, check for the name among the loaded
|
---|
128 | ## scripts.
|
---|
129 | if (not @off) {
|
---|
130 | my $case = $name =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL";
|
---|
131 | for my $script (@{$^H{charnames_scripts}}) {
|
---|
132 | if ($txt =~ m/\t\t$script (?:$case )?LETTER \U\Q$name\E$/m) {
|
---|
133 | @off = ($-[0], $+[0]);
|
---|
134 | last;
|
---|
135 | }
|
---|
136 | }
|
---|
137 | }
|
---|
138 |
|
---|
139 | ## If we don't have it by now, give up.
|
---|
140 | unless (@off) {
|
---|
141 | carp "Unknown charname '$name'";
|
---|
142 | return "\x{FFFD}";
|
---|
143 | }
|
---|
144 |
|
---|
145 | ##
|
---|
146 | ## Now know where in the string the name starts.
|
---|
147 | ## The code, in hex, is before that.
|
---|
148 | ##
|
---|
149 | ## The code can be 4-6 characters long, so we've got to sort of
|
---|
150 | ## go look for it, just after the newline that comes before $off[0].
|
---|
151 | ##
|
---|
152 | ## This would be much easier if unicore/Name.pl had info in
|
---|
153 | ## a name/code order, instead of code/name order.
|
---|
154 | ##
|
---|
155 | ## The +1 after the rindex() is to skip past the newline we're finding,
|
---|
156 | ## or, if the rindex() fails, to put us to an offset of zero.
|
---|
157 | ##
|
---|
158 | my $hexstart = rindex($txt, "\n", $off[0]) + 1;
|
---|
159 |
|
---|
160 | ## we know where it starts, so turn into number -
|
---|
161 | ## the ordinal for the char.
|
---|
162 | $ord = hex substr($txt, $hexstart, $off[0] - $hexstart);
|
---|
163 | }
|
---|
164 |
|
---|
165 | if ($^H & $bytes::hint_bits) { # "use bytes" in effect?
|
---|
166 | use bytes;
|
---|
167 | return chr $ord if $ord <= 255;
|
---|
168 | my $hex = sprintf "%04x", $ord;
|
---|
169 | if (not defined $fname) {
|
---|
170 | $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2;
|
---|
171 | }
|
---|
172 | croak "Character 0x$hex with name '$fname' is above 0xFF";
|
---|
173 | }
|
---|
174 |
|
---|
175 | no warnings 'utf8'; # allow even illegal characters
|
---|
176 | return pack "U", $ord;
|
---|
177 | } # charnames
|
---|
178 |
|
---|
179 | sub import
|
---|
180 | {
|
---|
181 | shift; ## ignore class name
|
---|
182 |
|
---|
183 | if (not @_) {
|
---|
184 | carp("`use charnames' needs explicit imports list");
|
---|
185 | }
|
---|
186 | $^H |= $charnames::hint_bits;
|
---|
187 | $^H{charnames} = \&charnames ;
|
---|
188 |
|
---|
189 | ##
|
---|
190 | ## fill %h keys with our @_ args.
|
---|
191 | ##
|
---|
192 | my ($promote, %h, @args) = (0);
|
---|
193 | while (my $arg = shift) {
|
---|
194 | if ($arg eq ":alias") {
|
---|
195 | @_ or
|
---|
196 | croak ":alias needs an argument in charnames";
|
---|
197 | my $alias = shift;
|
---|
198 | if (ref $alias) {
|
---|
199 | ref $alias eq "HASH" or
|
---|
200 | croak "Only HASH reference supported as argument to :alias";
|
---|
201 | alias ($alias);
|
---|
202 | next;
|
---|
203 | }
|
---|
204 | if ($alias =~ m{:(\w+)$}) {
|
---|
205 | $1 eq "full" || $1 eq "short" and
|
---|
206 | croak ":alias cannot use existing pragma :$1 (reversed order?)";
|
---|
207 | alias_file ($1) and $promote = 1;
|
---|
208 | next;
|
---|
209 | }
|
---|
210 | alias_file ($alias);
|
---|
211 | next;
|
---|
212 | }
|
---|
213 | if (substr($arg, 0, 1) eq ':' and ! ($arg eq ":full" || $arg eq ":short")) {
|
---|
214 | warn "unsupported special '$arg' in charnames";
|
---|
215 | next;
|
---|
216 | }
|
---|
217 | push @args, $arg;
|
---|
218 | }
|
---|
219 | @args == 0 && $promote and @args = (":full");
|
---|
220 | @h{@args} = (1) x @args;
|
---|
221 |
|
---|
222 | $^H{charnames_full} = delete $h{':full'};
|
---|
223 | $^H{charnames_short} = delete $h{':short'};
|
---|
224 | $^H{charnames_scripts} = [map uc, keys %h];
|
---|
225 |
|
---|
226 | ##
|
---|
227 | ## If utf8? warnings are enabled, and some scripts were given,
|
---|
228 | ## see if at least we can find one letter of each script.
|
---|
229 | ##
|
---|
230 | if (warnings::enabled('utf8') && @{$^H{charnames_scripts}}) {
|
---|
231 | $txt = do "unicore/Name.pl" unless $txt;
|
---|
232 |
|
---|
233 | for my $script (@{$^H{charnames_scripts}}) {
|
---|
234 | if (not $txt =~ m/\t\t$script (?:CAPITAL |SMALL )?LETTER /) {
|
---|
235 | warnings::warn('utf8', "No such script: '$script'");
|
---|
236 | }
|
---|
237 | }
|
---|
238 | }
|
---|
239 | } # import
|
---|
240 |
|
---|
241 | my %viacode;
|
---|
242 |
|
---|
243 | sub viacode
|
---|
244 | {
|
---|
245 | if (@_ != 1) {
|
---|
246 | carp "charnames::viacode() expects one argument";
|
---|
247 | return;
|
---|
248 | }
|
---|
249 |
|
---|
250 | my $arg = shift;
|
---|
251 |
|
---|
252 | # this comes actually from Unicode::UCD, where it is the named
|
---|
253 | # function _getcode (), but it avoids the overhead of loading it
|
---|
254 | my $hex;
|
---|
255 | if ($arg =~ /^[1-9]\d*$/) {
|
---|
256 | $hex = sprintf "%04X", $arg;
|
---|
257 | } elsif ($arg =~ /^(?:[Uu]\+|0[xX])?([[:xdigit:]]+)$/) {
|
---|
258 | $hex = $1;
|
---|
259 | } else {
|
---|
260 | carp("unexpected arg \"$arg\" to charnames::viacode()");
|
---|
261 | return;
|
---|
262 | }
|
---|
263 |
|
---|
264 | # checking the length first is slightly faster
|
---|
265 | if (length($hex) > 5 && hex($hex) > 0x10FFFF) {
|
---|
266 | carp sprintf "Unicode characters only allocated up to U+10FFFF (you asked for U+%X)", $hex;
|
---|
267 | return;
|
---|
268 | }
|
---|
269 |
|
---|
270 | return $viacode{$hex} if exists $viacode{$hex};
|
---|
271 |
|
---|
272 | $txt = do "unicore/Name.pl" unless $txt;
|
---|
273 |
|
---|
274 | return unless $txt =~ m/^$hex\t\t(.+)/m;
|
---|
275 |
|
---|
276 | $viacode{$hex} = $1;
|
---|
277 | } # viacode
|
---|
278 |
|
---|
279 | my %vianame;
|
---|
280 |
|
---|
281 | sub vianame
|
---|
282 | {
|
---|
283 | if (@_ != 1) {
|
---|
284 | carp "charnames::vianame() expects one name argument";
|
---|
285 | return ()
|
---|
286 | }
|
---|
287 |
|
---|
288 | my $arg = shift;
|
---|
289 |
|
---|
290 | return chr hex $1 if $arg =~ /^U\+([0-9a-fA-F]+)$/;
|
---|
291 |
|
---|
292 | return $vianame{$arg} if exists $vianame{$arg};
|
---|
293 |
|
---|
294 | $txt = do "unicore/Name.pl" unless $txt;
|
---|
295 |
|
---|
296 | my $pos = index $txt, "\t\t$arg\n";
|
---|
297 | if ($[ <= $pos) {
|
---|
298 | my $posLF = rindex $txt, "\n", $pos;
|
---|
299 | (my $code = substr $txt, $posLF + 1, 6) =~ tr/\t//d;
|
---|
300 | return $vianame{$arg} = hex $code;
|
---|
301 |
|
---|
302 | # If $pos is at the 1st line, $posLF must be $[ - 1 (not found);
|
---|
303 | # then $posLF + 1 equals to $[ (at the beginning of $txt).
|
---|
304 | # Otherwise $posLF is the position of "\n";
|
---|
305 | # then $posLF + 1 must be the position of the next to "\n"
|
---|
306 | # (the beginning of the line).
|
---|
307 | # substr($txt, $posLF + 1, 6) may be "0000\t\t", "00A1\t\t",
|
---|
308 | # "10300\t", "100000", etc. So we can get the code via removing TAB.
|
---|
309 | } else {
|
---|
310 | return;
|
---|
311 | }
|
---|
312 | } # vianame
|
---|
313 |
|
---|
314 |
|
---|
315 | 1;
|
---|
316 | __END__
|
---|
317 |
|
---|
318 | =head1 NAME
|
---|
319 |
|
---|
320 | charnames - define character names for C<\N{named}> string literal escapes
|
---|
321 |
|
---|
322 | =head1 SYNOPSIS
|
---|
323 |
|
---|
324 | use charnames ':full';
|
---|
325 | print "\N{GREEK SMALL LETTER SIGMA} is called sigma.\n";
|
---|
326 |
|
---|
327 | use charnames ':short';
|
---|
328 | print "\N{greek:Sigma} is an upper-case sigma.\n";
|
---|
329 |
|
---|
330 | use charnames qw(cyrillic greek);
|
---|
331 | print "\N{sigma} is Greek sigma, and \N{be} is Cyrillic b.\n";
|
---|
332 |
|
---|
333 | use charnames ":full", ":alias" => {
|
---|
334 | e_ACUTE => "LATIN SMALL LETTER E WITH ACUTE",
|
---|
335 | };
|
---|
336 | print "\N{e_ACUTE} is a small letter e with an acute.\n";
|
---|
337 |
|
---|
338 | use charnames ();
|
---|
339 | print charnames::viacode(0x1234); # prints "ETHIOPIC SYLLABLE SEE"
|
---|
340 | printf "%04X", charnames::vianame("GOTHIC LETTER AHSA"); # prints "10330"
|
---|
341 |
|
---|
342 | =head1 DESCRIPTION
|
---|
343 |
|
---|
344 | Pragma C<use charnames> supports arguments C<:full>, C<:short>, script
|
---|
345 | names and customized aliases. If C<:full> is present, for expansion of
|
---|
346 | C<\N{CHARNAME}>, the string C<CHARNAME> is first looked up in the list of
|
---|
347 | standard Unicode character names. If C<:short> is present, and
|
---|
348 | C<CHARNAME> has the form C<SCRIPT:CNAME>, then C<CNAME> is looked up
|
---|
349 | as a letter in script C<SCRIPT>. If pragma C<use charnames> is used
|
---|
350 | with script name arguments, then for C<\N{CHARNAME}> the name
|
---|
351 | C<CHARNAME> is looked up as a letter in the given scripts (in the
|
---|
352 | specified order). Customized aliases are explained in L</CUSTOM ALIASES>.
|
---|
353 |
|
---|
354 | For lookup of C<CHARNAME> inside a given script C<SCRIPTNAME>
|
---|
355 | this pragma looks for the names
|
---|
356 |
|
---|
357 | SCRIPTNAME CAPITAL LETTER CHARNAME
|
---|
358 | SCRIPTNAME SMALL LETTER CHARNAME
|
---|
359 | SCRIPTNAME LETTER CHARNAME
|
---|
360 |
|
---|
361 | in the table of standard Unicode names. If C<CHARNAME> is lowercase,
|
---|
362 | then the C<CAPITAL> variant is ignored, otherwise the C<SMALL> variant
|
---|
363 | is ignored.
|
---|
364 |
|
---|
365 | Note that C<\N{...}> is compile-time, it's a special form of string
|
---|
366 | constant used inside double-quoted strings: in other words, you cannot
|
---|
367 | use variables inside the C<\N{...}>. If you want similar run-time
|
---|
368 | functionality, use charnames::vianame().
|
---|
369 |
|
---|
370 | For the C0 and C1 control characters (U+0000..U+001F, U+0080..U+009F)
|
---|
371 | as of Unicode 3.1, there are no official Unicode names but you can use
|
---|
372 | instead the ISO 6429 names (LINE FEED, ESCAPE, and so forth). In
|
---|
373 | Unicode 3.2 (as of Perl 5.8) some naming changes take place ISO 6429
|
---|
374 | has been updated, see L</ALIASES>. Also note that the U+UU80, U+0081,
|
---|
375 | U+0084, and U+0099 do not have names even in ISO 6429.
|
---|
376 |
|
---|
377 | Since the Unicode standard uses "U+HHHH", so can you: "\N{U+263a}"
|
---|
378 | is the Unicode smiley face, or "\N{WHITE SMILING FACE}".
|
---|
379 |
|
---|
380 | =head1 CUSTOM TRANSLATORS
|
---|
381 |
|
---|
382 | The mechanism of translation of C<\N{...}> escapes is general and not
|
---|
383 | hardwired into F<charnames.pm>. A module can install custom
|
---|
384 | translations (inside the scope which C<use>s the module) with the
|
---|
385 | following magic incantation:
|
---|
386 |
|
---|
387 | use charnames (); # for $charnames::hint_bits
|
---|
388 | sub import {
|
---|
389 | shift;
|
---|
390 | $^H |= $charnames::hint_bits;
|
---|
391 | $^H{charnames} = \&translator;
|
---|
392 | }
|
---|
393 |
|
---|
394 | Here translator() is a subroutine which takes C<CHARNAME> as an
|
---|
395 | argument, and returns text to insert into the string instead of the
|
---|
396 | C<\N{CHARNAME}> escape. Since the text to insert should be different
|
---|
397 | in C<bytes> mode and out of it, the function should check the current
|
---|
398 | state of C<bytes>-flag as in:
|
---|
399 |
|
---|
400 | use bytes (); # for $bytes::hint_bits
|
---|
401 | sub translator {
|
---|
402 | if ($^H & $bytes::hint_bits) {
|
---|
403 | return bytes_translator(@_);
|
---|
404 | }
|
---|
405 | else {
|
---|
406 | return utf8_translator(@_);
|
---|
407 | }
|
---|
408 | }
|
---|
409 |
|
---|
410 | =head1 CUSTOM ALIASES
|
---|
411 |
|
---|
412 | This version of charnames supports three mechanisms of adding local
|
---|
413 | or customized aliases to standard Unicode naming conventions (:full)
|
---|
414 |
|
---|
415 | =head2 Anonymous hashes
|
---|
416 |
|
---|
417 | use charnames ":full", ":alias" => {
|
---|
418 | e_ACUTE => "LATIN SMALL LETTER E WITH ACUTE",
|
---|
419 | };
|
---|
420 | my $str = "\N{e_ACUTE}";
|
---|
421 |
|
---|
422 | =head2 Alias file
|
---|
423 |
|
---|
424 | use charnames ":full", ":alias" => "pro";
|
---|
425 |
|
---|
426 | will try to read "unicore/pro_alias.pl" from the @INC path. This
|
---|
427 | file should return a list in plain perl:
|
---|
428 |
|
---|
429 | (
|
---|
430 | A_GRAVE => "LATIN CAPITAL LETTER A WITH GRAVE",
|
---|
431 | A_CIRCUM => "LATIN CAPITAL LETTER A WITH CIRCUMFLEX",
|
---|
432 | A_DIAERES => "LATIN CAPITAL LETTER A WITH DIAERESIS",
|
---|
433 | A_TILDE => "LATIN CAPITAL LETTER A WITH TILDE",
|
---|
434 | A_BREVE => "LATIN CAPITAL LETTER A WITH BREVE",
|
---|
435 | A_RING => "LATIN CAPITAL LETTER A WITH RING ABOVE",
|
---|
436 | A_MACRON => "LATIN CAPITAL LETTER A WITH MACRON",
|
---|
437 | );
|
---|
438 |
|
---|
439 | =head2 Alias shortcut
|
---|
440 |
|
---|
441 | use charnames ":alias" => ":pro";
|
---|
442 |
|
---|
443 | works exactly the same as the alias pairs, only this time,
|
---|
444 | ":full" is inserted automatically as first argument (if no
|
---|
445 | other argument is given).
|
---|
446 |
|
---|
447 | =head1 charnames::viacode(code)
|
---|
448 |
|
---|
449 | Returns the full name of the character indicated by the numeric code.
|
---|
450 | The example
|
---|
451 |
|
---|
452 | print charnames::viacode(0x2722);
|
---|
453 |
|
---|
454 | prints "FOUR TEARDROP-SPOKED ASTERISK".
|
---|
455 |
|
---|
456 | Returns undef if no name is known for the code.
|
---|
457 |
|
---|
458 | This works only for the standard names, and does not yet apply
|
---|
459 | to custom translators.
|
---|
460 |
|
---|
461 | Notice that the name returned for of U+FEFF is "ZERO WIDTH NO-BREAK
|
---|
462 | SPACE", not "BYTE ORDER MARK".
|
---|
463 |
|
---|
464 | =head1 charnames::vianame(name)
|
---|
465 |
|
---|
466 | Returns the code point indicated by the name.
|
---|
467 | The example
|
---|
468 |
|
---|
469 | printf "%04X", charnames::vianame("FOUR TEARDROP-SPOKED ASTERISK");
|
---|
470 |
|
---|
471 | prints "2722".
|
---|
472 |
|
---|
473 | Returns undef if the name is unknown.
|
---|
474 |
|
---|
475 | This works only for the standard names, and does not yet apply
|
---|
476 | to custom translators.
|
---|
477 |
|
---|
478 | =head1 ALIASES
|
---|
479 |
|
---|
480 | A few aliases have been defined for convenience: instead of having
|
---|
481 | to use the official names
|
---|
482 |
|
---|
483 | LINE FEED (LF)
|
---|
484 | FORM FEED (FF)
|
---|
485 | CARRIAGE RETURN (CR)
|
---|
486 | NEXT LINE (NEL)
|
---|
487 |
|
---|
488 | (yes, with parentheses) one can use
|
---|
489 |
|
---|
490 | LINE FEED
|
---|
491 | FORM FEED
|
---|
492 | CARRIAGE RETURN
|
---|
493 | NEXT LINE
|
---|
494 | LF
|
---|
495 | FF
|
---|
496 | CR
|
---|
497 | NEL
|
---|
498 |
|
---|
499 | One can also use
|
---|
500 |
|
---|
501 | BYTE ORDER MARK
|
---|
502 | BOM
|
---|
503 |
|
---|
504 | and
|
---|
505 |
|
---|
506 | ZWNJ
|
---|
507 | ZWJ
|
---|
508 |
|
---|
509 | for ZERO WIDTH NON-JOINER and ZERO WIDTH JOINER.
|
---|
510 |
|
---|
511 | For backward compatibility one can use the old names for
|
---|
512 | certain C0 and C1 controls
|
---|
513 |
|
---|
514 | old new
|
---|
515 |
|
---|
516 | HORIZONTAL TABULATION CHARACTER TABULATION
|
---|
517 | VERTICAL TABULATION LINE TABULATION
|
---|
518 | FILE SEPARATOR INFORMATION SEPARATOR FOUR
|
---|
519 | GROUP SEPARATOR INFORMATION SEPARATOR THREE
|
---|
520 | RECORD SEPARATOR INFORMATION SEPARATOR TWO
|
---|
521 | UNIT SEPARATOR INFORMATION SEPARATOR ONE
|
---|
522 | PARTIAL LINE DOWN PARTIAL LINE FORWARD
|
---|
523 | PARTIAL LINE UP PARTIAL LINE BACKWARD
|
---|
524 |
|
---|
525 | but the old names in addition to giving the character
|
---|
526 | will also give a warning about being deprecated.
|
---|
527 |
|
---|
528 | =head1 ILLEGAL CHARACTERS
|
---|
529 |
|
---|
530 | If you ask by name for a character that does not exist, a warning is
|
---|
531 | given and the Unicode I<replacement character> "\x{FFFD}" is returned.
|
---|
532 |
|
---|
533 | If you ask by code for a character that does not exist, no warning is
|
---|
534 | given and C<undef> is returned. (Though if you ask for a code point
|
---|
535 | past U+10FFFF you do get a warning.)
|
---|
536 |
|
---|
537 | =head1 BUGS
|
---|
538 |
|
---|
539 | Since evaluation of the translation function happens in a middle of
|
---|
540 | compilation (of a string literal), the translation function should not
|
---|
541 | do any C<eval>s or C<require>s. This restriction should be lifted in
|
---|
542 | a future version of Perl.
|
---|
543 |
|
---|
544 | =cut
|
---|