source: main/trunk/greenstone2/perllib/cpan/URI/_punycode.pm@ 27174

Last change on this file since 27174 was 27174, checked in by davidb, 11 years ago

Perl modules from CPAN that are used in supporting activate.pl, but not part of the Perl core. Only PMs included.

File size: 4.5 KB
Line 
1package URI::_punycode;
2
3use strict;
4our $VERSION = "0.04";
5
6require Exporter;
7our @ISA = qw(Exporter);
8our @EXPORT = qw(encode_punycode decode_punycode);
9
10use integer;
11
12our $DEBUG = 0;
13
14use constant BASE => 36;
15use constant TMIN => 1;
16use constant TMAX => 26;
17use constant SKEW => 38;
18use constant DAMP => 700;
19use constant INITIAL_BIAS => 72;
20use constant INITIAL_N => 128;
21
22my $Delimiter = chr 0x2D;
23my $BasicRE = qr/[\x00-\x7f]/;
24
25sub _croak { require Carp; Carp::croak(@_); }
26
27sub digit_value {
28 my $code = shift;
29 return ord($code) - ord("A") if $code =~ /[A-Z]/;
30 return ord($code) - ord("a") if $code =~ /[a-z]/;
31 return ord($code) - ord("0") + 26 if $code =~ /[0-9]/;
32 return;
33}
34
35sub code_point {
36 my $digit = shift;
37 return $digit + ord('a') if 0 <= $digit && $digit <= 25;
38 return $digit + ord('0') - 26 if 26 <= $digit && $digit <= 36;
39 die 'NOT COME HERE';
40}
41
42sub adapt {
43 my($delta, $numpoints, $firsttime) = @_;
44 $delta = $firsttime ? $delta / DAMP : $delta / 2;
45 $delta += $delta / $numpoints;
46 my $k = 0;
47 while ($delta > ((BASE - TMIN) * TMAX) / 2) {
48 $delta /= BASE - TMIN;
49 $k += BASE;
50 }
51 return $k + (((BASE - TMIN + 1) * $delta) / ($delta + SKEW));
52}
53
54sub decode_punycode {
55 my $code = shift;
56
57 my $n = INITIAL_N;
58 my $i = 0;
59 my $bias = INITIAL_BIAS;
60 my @output;
61
62 if ($code =~ s/(.*)$Delimiter//o) {
63 push @output, map ord, split //, $1;
64 return _croak('non-basic code point') unless $1 =~ /^$BasicRE*$/o;
65 }
66
67 while ($code) {
68 my $oldi = $i;
69 my $w = 1;
70 LOOP:
71 for (my $k = BASE; 1; $k += BASE) {
72 my $cp = substr($code, 0, 1, '');
73 my $digit = digit_value($cp);
74 defined $digit or return _croak("invalid punycode input");
75 $i += $digit * $w;
76 my $t = ($k <= $bias) ? TMIN
77 : ($k >= $bias + TMAX) ? TMAX : $k - $bias;
78 last LOOP if $digit < $t;
79 $w *= (BASE - $t);
80 }
81 $bias = adapt($i - $oldi, @output + 1, $oldi == 0);
82 warn "bias becomes $bias" if $DEBUG;
83 $n += $i / (@output + 1);
84 $i = $i % (@output + 1);
85 splice(@output, $i, 0, $n);
86 warn join " ", map sprintf('%04x', $_), @output if $DEBUG;
87 $i++;
88 }
89 return join '', map chr, @output;
90}
91
92sub encode_punycode {
93 my $input = shift;
94 my @input = split //, $input;
95
96 my $n = INITIAL_N;
97 my $delta = 0;
98 my $bias = INITIAL_BIAS;
99
100 my @output;
101 my @basic = grep /$BasicRE/, @input;
102 my $h = my $b = @basic;
103 push @output, @basic;
104 push @output, $Delimiter if $b && $h < @input;
105 warn "basic codepoints: (@output)" if $DEBUG;
106
107 while ($h < @input) {
108 my $m = min(grep { $_ >= $n } map ord, @input);
109 warn sprintf "next code point to insert is %04x", $m if $DEBUG;
110 $delta += ($m - $n) * ($h + 1);
111 $n = $m;
112 for my $i (@input) {
113 my $c = ord($i);
114 $delta++ if $c < $n;
115 if ($c == $n) {
116 my $q = $delta;
117 LOOP:
118 for (my $k = BASE; 1; $k += BASE) {
119 my $t = ($k <= $bias) ? TMIN :
120 ($k >= $bias + TMAX) ? TMAX : $k - $bias;
121 last LOOP if $q < $t;
122 my $cp = code_point($t + (($q - $t) % (BASE - $t)));
123 push @output, chr($cp);
124 $q = ($q - $t) / (BASE - $t);
125 }
126 push @output, chr(code_point($q));
127 $bias = adapt($delta, $h + 1, $h == $b);
128 warn "bias becomes $bias" if $DEBUG;
129 $delta = 0;
130 $h++;
131 }
132 }
133 $delta++;
134 $n++;
135 }
136 return join '', @output;
137}
138
139sub min {
140 my $min = shift;
141 for (@_) { $min = $_ if $_ <= $min }
142 return $min;
143}
144
1451;
146__END__
147
148=head1 NAME
149
150URI::_punycode - encodes Unicode string in Punycode
151
152=head1 SYNOPSIS
153
154 use URI::_punycode;
155 $punycode = encode_punycode($unicode);
156 $unicode = decode_punycode($punycode);
157
158=head1 DESCRIPTION
159
160URI::_punycode is a module to encode / decode Unicode strings into
161Punycode, an efficient encoding of Unicode for use with IDNA.
162
163This module requires Perl 5.6.0 or over to handle UTF8 flagged Unicode
164strings.
165
166=head1 FUNCTIONS
167
168This module exports following functions by default.
169
170=over 4
171
172=item encode_punycode
173
174 $punycode = encode_punycode($unicode);
175
176takes Unicode string (UTF8-flagged variable) and returns Punycode
177encoding for it.
178
179=item decode_punycode
180
181 $unicode = decode_punycode($punycode)
182
183takes Punycode encoding and returns original Unicode string.
184
185=back
186
187These functions throw exceptions on failure. You can catch 'em via
188C<eval>.
189
190=head1 AUTHOR
191
192Tatsuhiko Miyagawa E<lt>[email protected]<gt> is the author of
193IDNA::Punycode v0.02 which was the basis for this module.
194
195This library is free software; you can redistribute it and/or modify
196it under the same terms as Perl itself.
197
198=head1 SEE ALSO
199
200L<IDNA::Punycode>, RFC 3492
201
202=cut
Note: See TracBrowser for help on using the repository browser.