1 | package Encode::CN;
|
---|
2 | BEGIN {
|
---|
3 | if (ord("A") == 193) {
|
---|
4 | die "Encode::CN not supported on EBCDIC\n";
|
---|
5 | }
|
---|
6 | }
|
---|
7 | our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
|
---|
8 |
|
---|
9 | use Encode;
|
---|
10 | use XSLoader;
|
---|
11 | XSLoader::load(__PACKAGE__,$VERSION);
|
---|
12 |
|
---|
13 | # Relocated from Encode.pm
|
---|
14 |
|
---|
15 | use Encode::CN::HZ;
|
---|
16 | # use Encode::CN::2022_CN;
|
---|
17 |
|
---|
18 | 1;
|
---|
19 | __END__
|
---|
20 |
|
---|
21 | =head1 NAME
|
---|
22 |
|
---|
23 | Encode::CN - China-based Chinese Encodings
|
---|
24 |
|
---|
25 | =head1 SYNOPSIS
|
---|
26 |
|
---|
27 | use Encode qw/encode decode/;
|
---|
28 | $euc_cn = encode("euc-cn", $utf8); # loads Encode::CN implicitly
|
---|
29 | $utf8 = decode("euc-cn", $euc_cn); # ditto
|
---|
30 |
|
---|
31 | =head1 DESCRIPTION
|
---|
32 |
|
---|
33 | This module implements China-based Chinese charset encodings.
|
---|
34 | Encodings supported are as follows.
|
---|
35 |
|
---|
36 | Canonical Alias Description
|
---|
37 | --------------------------------------------------------------------
|
---|
38 | euc-cn /\beuc.*cn$/i EUC (Extended Unix Character)
|
---|
39 | /\bcn.*euc$/i
|
---|
40 | /\bGB[-_ ]?2312(?:\D.*$|$)/i (see below)
|
---|
41 | gb2312-raw The raw (low-bit) GB2312 character map
|
---|
42 | gb12345-raw Traditional chinese counterpart to
|
---|
43 | GB2312 (raw)
|
---|
44 | iso-ir-165 GB2312 + GB6345 + GB8565 + additions
|
---|
45 | MacChineseSimp GB2312 + Apple Additions
|
---|
46 | cp936 Code Page 936, also known as GBK
|
---|
47 | (Extended GuoBiao)
|
---|
48 | hz 7-bit escaped GB2312 encoding
|
---|
49 | --------------------------------------------------------------------
|
---|
50 |
|
---|
51 | To find how to use this module in detail, see L<Encode>.
|
---|
52 |
|
---|
53 | =head1 NOTES
|
---|
54 |
|
---|
55 | Due to size concerns, C<GB 18030> (an extension to C<GBK>) is distributed
|
---|
56 | separately on CPAN, under the name L<Encode::HanExtra>. That module
|
---|
57 | also contains extra Taiwan-based encodings.
|
---|
58 |
|
---|
59 | =head1 BUGS
|
---|
60 |
|
---|
61 | When you see C<charset=gb2312> on mails and web pages, they really
|
---|
62 | mean C<euc-cn> encodings. To fix that, C<gb2312> is aliased to C<euc-cn>.
|
---|
63 | Use C<gb2312-raw> when you really mean it.
|
---|
64 |
|
---|
65 | The ASCII region (0x00-0x7f) is preserved for all encodings, even though
|
---|
66 | this conflicts with mappings by the Unicode Consortium. See
|
---|
67 |
|
---|
68 | L<http://www.debian.or.jp/~kubota/unicode-symbols.html.en>
|
---|
69 |
|
---|
70 | to find out why it is implemented that way.
|
---|
71 |
|
---|
72 | =head1 SEE ALSO
|
---|
73 |
|
---|
74 | L<Encode>
|
---|
75 |
|
---|
76 | =cut
|
---|