1 | package URI::_idna;
|
---|
2 |
|
---|
3 | # This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep)
|
---|
4 | # based on Python-2.6.4/Lib/encodings/idna.py
|
---|
5 |
|
---|
6 | use strict;
|
---|
7 | use URI::_punycode qw(encode_punycode decode_punycode);
|
---|
8 | use Carp qw(croak);
|
---|
9 |
|
---|
10 | BEGIN {
|
---|
11 | *URI::_idna::_ENV_::JOIN_LEAKS_UTF8_FLAGS = $] < 5.008_003
|
---|
12 | ? sub () { 1 }
|
---|
13 | : sub () { 0 }
|
---|
14 | ;
|
---|
15 | }
|
---|
16 |
|
---|
17 | my $ASCII = qr/^[\x00-\x7F]*\z/;
|
---|
18 |
|
---|
19 | sub encode {
|
---|
20 | my $idomain = shift;
|
---|
21 | my @labels = split(/\./, $idomain, -1);
|
---|
22 | my @last_empty;
|
---|
23 | push(@last_empty, pop @labels) if @labels > 1 && $labels[-1] eq "";
|
---|
24 | for (@labels) {
|
---|
25 | $_ = ToASCII($_);
|
---|
26 | }
|
---|
27 |
|
---|
28 | return eval 'join(".", @labels, @last_empty)' if URI::_idna::_ENV_::JOIN_LEAKS_UTF8_FLAGS;
|
---|
29 | return join(".", @labels, @last_empty);
|
---|
30 | }
|
---|
31 |
|
---|
32 | sub decode {
|
---|
33 | my $domain = shift;
|
---|
34 | return join(".", map ToUnicode($_), split(/\./, $domain, -1))
|
---|
35 | }
|
---|
36 |
|
---|
37 | sub nameprep { # XXX real implementation missing
|
---|
38 | my $label = shift;
|
---|
39 | $label = lc($label);
|
---|
40 | return $label;
|
---|
41 | }
|
---|
42 |
|
---|
43 | sub check_size {
|
---|
44 | my $label = shift;
|
---|
45 | croak "Label empty" if $label eq "";
|
---|
46 | croak "Label too long" if length($label) > 63;
|
---|
47 | return $label;
|
---|
48 | }
|
---|
49 |
|
---|
50 | sub ToASCII {
|
---|
51 | my $label = shift;
|
---|
52 | return check_size($label) if $label =~ $ASCII;
|
---|
53 |
|
---|
54 | # Step 2: nameprep
|
---|
55 | $label = nameprep($label);
|
---|
56 | # Step 3: UseSTD3ASCIIRules is false
|
---|
57 | # Step 4: try ASCII again
|
---|
58 | return check_size($label) if $label =~ $ASCII;
|
---|
59 |
|
---|
60 | # Step 5: Check ACE prefix
|
---|
61 | if ($label =~ /^xn--/) {
|
---|
62 | croak "Label starts with ACE prefix";
|
---|
63 | }
|
---|
64 |
|
---|
65 | # Step 6: Encode with PUNYCODE
|
---|
66 | $label = encode_punycode($label);
|
---|
67 |
|
---|
68 | # Step 7: Prepend ACE prefix
|
---|
69 | $label = "xn--$label";
|
---|
70 |
|
---|
71 | # Step 8: Check size
|
---|
72 | return check_size($label);
|
---|
73 | }
|
---|
74 |
|
---|
75 | sub ToUnicode {
|
---|
76 | my $label = shift;
|
---|
77 | $label = nameprep($label) unless $label =~ $ASCII;
|
---|
78 | return $label unless $label =~ /^xn--/;
|
---|
79 | my $result = decode_punycode(substr($label, 4));
|
---|
80 | my $label2 = ToASCII($result);
|
---|
81 | if (lc($label) ne $label2) {
|
---|
82 | croak "IDNA does not round-trip: '\L$label\E' vs '$label2'";
|
---|
83 | }
|
---|
84 | return $result;
|
---|
85 | }
|
---|
86 |
|
---|
87 | 1;
|
---|