[27174] | 1 | package URI::_punycode;
|
---|
| 2 |
|
---|
| 3 | use strict;
|
---|
| 4 | our $VERSION = "0.04";
|
---|
| 5 |
|
---|
| 6 | require Exporter;
|
---|
| 7 | our @ISA = qw(Exporter);
|
---|
| 8 | our @EXPORT = qw(encode_punycode decode_punycode);
|
---|
| 9 |
|
---|
| 10 | use integer;
|
---|
| 11 |
|
---|
| 12 | our $DEBUG = 0;
|
---|
| 13 |
|
---|
| 14 | use constant BASE => 36;
|
---|
| 15 | use constant TMIN => 1;
|
---|
| 16 | use constant TMAX => 26;
|
---|
| 17 | use constant SKEW => 38;
|
---|
| 18 | use constant DAMP => 700;
|
---|
| 19 | use constant INITIAL_BIAS => 72;
|
---|
| 20 | use constant INITIAL_N => 128;
|
---|
| 21 |
|
---|
| 22 | my $Delimiter = chr 0x2D;
|
---|
| 23 | my $BasicRE = qr/[\x00-\x7f]/;
|
---|
| 24 |
|
---|
| 25 | sub _croak { require Carp; Carp::croak(@_); }
|
---|
| 26 |
|
---|
| 27 | sub digit_value {
|
---|
| 28 | my $code = shift;
|
---|
| 29 | return ord($code) - ord("A") if $code =~ /[A-Z]/;
|
---|
| 30 | return ord($code) - ord("a") if $code =~ /[a-z]/;
|
---|
| 31 | return ord($code) - ord("0") + 26 if $code =~ /[0-9]/;
|
---|
| 32 | return;
|
---|
| 33 | }
|
---|
| 34 |
|
---|
| 35 | sub code_point {
|
---|
| 36 | my $digit = shift;
|
---|
| 37 | return $digit + ord('a') if 0 <= $digit && $digit <= 25;
|
---|
| 38 | return $digit + ord('0') - 26 if 26 <= $digit && $digit <= 36;
|
---|
| 39 | die 'NOT COME HERE';
|
---|
| 40 | }
|
---|
| 41 |
|
---|
| 42 | sub adapt {
|
---|
| 43 | my($delta, $numpoints, $firsttime) = @_;
|
---|
| 44 | $delta = $firsttime ? $delta / DAMP : $delta / 2;
|
---|
| 45 | $delta += $delta / $numpoints;
|
---|
| 46 | my $k = 0;
|
---|
| 47 | while ($delta > ((BASE - TMIN) * TMAX) / 2) {
|
---|
| 48 | $delta /= BASE - TMIN;
|
---|
| 49 | $k += BASE;
|
---|
| 50 | }
|
---|
| 51 | return $k + (((BASE - TMIN + 1) * $delta) / ($delta + SKEW));
|
---|
| 52 | }
|
---|
| 53 |
|
---|
| 54 | sub decode_punycode {
|
---|
| 55 | my $code = shift;
|
---|
| 56 |
|
---|
| 57 | my $n = INITIAL_N;
|
---|
| 58 | my $i = 0;
|
---|
| 59 | my $bias = INITIAL_BIAS;
|
---|
| 60 | my @output;
|
---|
| 61 |
|
---|
| 62 | if ($code =~ s/(.*)$Delimiter//o) {
|
---|
| 63 | push @output, map ord, split //, $1;
|
---|
| 64 | return _croak('non-basic code point') unless $1 =~ /^$BasicRE*$/o;
|
---|
| 65 | }
|
---|
| 66 |
|
---|
| 67 | while ($code) {
|
---|
| 68 | my $oldi = $i;
|
---|
| 69 | my $w = 1;
|
---|
| 70 | LOOP:
|
---|
| 71 | for (my $k = BASE; 1; $k += BASE) {
|
---|
| 72 | my $cp = substr($code, 0, 1, '');
|
---|
| 73 | my $digit = digit_value($cp);
|
---|
| 74 | defined $digit or return _croak("invalid punycode input");
|
---|
| 75 | $i += $digit * $w;
|
---|
| 76 | my $t = ($k <= $bias) ? TMIN
|
---|
| 77 | : ($k >= $bias + TMAX) ? TMAX : $k - $bias;
|
---|
| 78 | last LOOP if $digit < $t;
|
---|
| 79 | $w *= (BASE - $t);
|
---|
| 80 | }
|
---|
| 81 | $bias = adapt($i - $oldi, @output + 1, $oldi == 0);
|
---|
| 82 | warn "bias becomes $bias" if $DEBUG;
|
---|
| 83 | $n += $i / (@output + 1);
|
---|
| 84 | $i = $i % (@output + 1);
|
---|
| 85 | splice(@output, $i, 0, $n);
|
---|
| 86 | warn join " ", map sprintf('%04x', $_), @output if $DEBUG;
|
---|
| 87 | $i++;
|
---|
| 88 | }
|
---|
| 89 | return join '', map chr, @output;
|
---|
| 90 | }
|
---|
| 91 |
|
---|
| 92 | sub encode_punycode {
|
---|
| 93 | my $input = shift;
|
---|
| 94 | my @input = split //, $input;
|
---|
| 95 |
|
---|
| 96 | my $n = INITIAL_N;
|
---|
| 97 | my $delta = 0;
|
---|
| 98 | my $bias = INITIAL_BIAS;
|
---|
| 99 |
|
---|
| 100 | my @output;
|
---|
| 101 | my @basic = grep /$BasicRE/, @input;
|
---|
| 102 | my $h = my $b = @basic;
|
---|
| 103 | push @output, @basic;
|
---|
| 104 | push @output, $Delimiter if $b && $h < @input;
|
---|
| 105 | warn "basic codepoints: (@output)" if $DEBUG;
|
---|
| 106 |
|
---|
| 107 | while ($h < @input) {
|
---|
| 108 | my $m = min(grep { $_ >= $n } map ord, @input);
|
---|
| 109 | warn sprintf "next code point to insert is %04x", $m if $DEBUG;
|
---|
| 110 | $delta += ($m - $n) * ($h + 1);
|
---|
| 111 | $n = $m;
|
---|
| 112 | for my $i (@input) {
|
---|
| 113 | my $c = ord($i);
|
---|
| 114 | $delta++ if $c < $n;
|
---|
| 115 | if ($c == $n) {
|
---|
| 116 | my $q = $delta;
|
---|
| 117 | LOOP:
|
---|
| 118 | for (my $k = BASE; 1; $k += BASE) {
|
---|
| 119 | my $t = ($k <= $bias) ? TMIN :
|
---|
| 120 | ($k >= $bias + TMAX) ? TMAX : $k - $bias;
|
---|
| 121 | last LOOP if $q < $t;
|
---|
| 122 | my $cp = code_point($t + (($q - $t) % (BASE - $t)));
|
---|
| 123 | push @output, chr($cp);
|
---|
| 124 | $q = ($q - $t) / (BASE - $t);
|
---|
| 125 | }
|
---|
| 126 | push @output, chr(code_point($q));
|
---|
| 127 | $bias = adapt($delta, $h + 1, $h == $b);
|
---|
| 128 | warn "bias becomes $bias" if $DEBUG;
|
---|
| 129 | $delta = 0;
|
---|
| 130 | $h++;
|
---|
| 131 | }
|
---|
| 132 | }
|
---|
| 133 | $delta++;
|
---|
| 134 | $n++;
|
---|
| 135 | }
|
---|
| 136 | return join '', @output;
|
---|
| 137 | }
|
---|
| 138 |
|
---|
| 139 | sub min {
|
---|
| 140 | my $min = shift;
|
---|
| 141 | for (@_) { $min = $_ if $_ <= $min }
|
---|
| 142 | return $min;
|
---|
| 143 | }
|
---|
| 144 |
|
---|
| 145 | 1;
|
---|
| 146 | __END__
|
---|
| 147 |
|
---|
| 148 | =head1 NAME
|
---|
| 149 |
|
---|
| 150 | URI::_punycode - encodes Unicode string in Punycode
|
---|
| 151 |
|
---|
| 152 | =head1 SYNOPSIS
|
---|
| 153 |
|
---|
| 154 | use URI::_punycode;
|
---|
| 155 | $punycode = encode_punycode($unicode);
|
---|
| 156 | $unicode = decode_punycode($punycode);
|
---|
| 157 |
|
---|
| 158 | =head1 DESCRIPTION
|
---|
| 159 |
|
---|
| 160 | URI::_punycode is a module to encode / decode Unicode strings into
|
---|
| 161 | Punycode, an efficient encoding of Unicode for use with IDNA.
|
---|
| 162 |
|
---|
| 163 | This module requires Perl 5.6.0 or over to handle UTF8 flagged Unicode
|
---|
| 164 | strings.
|
---|
| 165 |
|
---|
| 166 | =head1 FUNCTIONS
|
---|
| 167 |
|
---|
| 168 | This module exports following functions by default.
|
---|
| 169 |
|
---|
| 170 | =over 4
|
---|
| 171 |
|
---|
| 172 | =item encode_punycode
|
---|
| 173 |
|
---|
| 174 | $punycode = encode_punycode($unicode);
|
---|
| 175 |
|
---|
| 176 | takes Unicode string (UTF8-flagged variable) and returns Punycode
|
---|
| 177 | encoding for it.
|
---|
| 178 |
|
---|
| 179 | =item decode_punycode
|
---|
| 180 |
|
---|
| 181 | $unicode = decode_punycode($punycode)
|
---|
| 182 |
|
---|
| 183 | takes Punycode encoding and returns original Unicode string.
|
---|
| 184 |
|
---|
| 185 | =back
|
---|
| 186 |
|
---|
| 187 | These functions throw exceptions on failure. You can catch 'em via
|
---|
| 188 | C<eval>.
|
---|
| 189 |
|
---|
| 190 | =head1 AUTHOR
|
---|
| 191 |
|
---|
| 192 | Tatsuhiko Miyagawa E<lt>[email protected]<gt> is the author of
|
---|
| 193 | IDNA::Punycode v0.02 which was the basis for this module.
|
---|
| 194 |
|
---|
| 195 | This library is free software; you can redistribute it and/or modify
|
---|
| 196 | it under the same terms as Perl itself.
|
---|
| 197 |
|
---|
| 198 | =head1 SEE ALSO
|
---|
| 199 |
|
---|
| 200 | L<IDNA::Punycode>, RFC 3492
|
---|
| 201 |
|
---|
| 202 | =cut
|
---|