1 | #
|
---|
2 | # Locale::Script - ISO codes for script identification (ISO 15924)
|
---|
3 | #
|
---|
4 | # $Id: Script.pm,v 2.7 2004/06/10 21:19:34 neilb Exp $
|
---|
5 | #
|
---|
6 |
|
---|
7 | package Locale::Script;
|
---|
8 | use strict;
|
---|
9 | require 5.002;
|
---|
10 |
|
---|
11 | require Exporter;
|
---|
12 | use Carp;
|
---|
13 | use Locale::Constants;
|
---|
14 |
|
---|
15 |
|
---|
16 | #-----------------------------------------------------------------------
|
---|
17 | # Public Global Variables
|
---|
18 | #-----------------------------------------------------------------------
|
---|
19 | use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
|
---|
20 | $VERSION = sprintf("%d.%02d", q$Revision: 2.7 $ =~ /(\d+)\.(\d+)/);
|
---|
21 | @ISA = qw(Exporter);
|
---|
22 | @EXPORT = qw(code2script script2code
|
---|
23 | all_script_codes all_script_names
|
---|
24 | script_code2code
|
---|
25 | LOCALE_CODE_ALPHA_2 LOCALE_CODE_ALPHA_3 LOCALE_CODE_NUMERIC);
|
---|
26 |
|
---|
27 | #-----------------------------------------------------------------------
|
---|
28 | # Private Global Variables
|
---|
29 | #-----------------------------------------------------------------------
|
---|
30 | my $CODES = [];
|
---|
31 | my $COUNTRIES = [];
|
---|
32 |
|
---|
33 |
|
---|
34 | #=======================================================================
|
---|
35 | #
|
---|
36 | # code2script ( CODE [, CODESET ] )
|
---|
37 | #
|
---|
38 | #=======================================================================
|
---|
39 | sub code2script
|
---|
40 | {
|
---|
41 | my $code = shift;
|
---|
42 | my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;
|
---|
43 |
|
---|
44 |
|
---|
45 | return undef unless defined $code;
|
---|
46 |
|
---|
47 | #-------------------------------------------------------------------
|
---|
48 | # Make sure the code is in the right form before we use it
|
---|
49 | # to look up the corresponding script.
|
---|
50 | # We have to sprintf because the codes are given as 3-digits,
|
---|
51 | # with leading 0's. Eg 070 for Egyptian demotic.
|
---|
52 | #-------------------------------------------------------------------
|
---|
53 | if ($codeset == LOCALE_CODE_NUMERIC)
|
---|
54 | {
|
---|
55 | return undef if ($code =~ /\D/);
|
---|
56 | $code = sprintf("%.3d", $code);
|
---|
57 | }
|
---|
58 | else
|
---|
59 | {
|
---|
60 | $code = lc($code);
|
---|
61 | }
|
---|
62 |
|
---|
63 | if (exists $CODES->[$codeset]->{$code})
|
---|
64 | {
|
---|
65 | return $CODES->[$codeset]->{$code};
|
---|
66 | }
|
---|
67 | else
|
---|
68 | {
|
---|
69 | #---------------------------------------------------------------
|
---|
70 | # no such script code!
|
---|
71 | #---------------------------------------------------------------
|
---|
72 | return undef;
|
---|
73 | }
|
---|
74 | }
|
---|
75 |
|
---|
76 |
|
---|
77 | #=======================================================================
|
---|
78 | #
|
---|
79 | # script2code ( SCRIPT [, CODESET ] )
|
---|
80 | #
|
---|
81 | #=======================================================================
|
---|
82 | sub script2code
|
---|
83 | {
|
---|
84 | my $script = shift;
|
---|
85 | my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;
|
---|
86 |
|
---|
87 |
|
---|
88 | return undef unless defined $script;
|
---|
89 | $script = lc($script);
|
---|
90 | if (exists $COUNTRIES->[$codeset]->{$script})
|
---|
91 | {
|
---|
92 | return $COUNTRIES->[$codeset]->{$script};
|
---|
93 | }
|
---|
94 | else
|
---|
95 | {
|
---|
96 | #---------------------------------------------------------------
|
---|
97 | # no such script!
|
---|
98 | #---------------------------------------------------------------
|
---|
99 | return undef;
|
---|
100 | }
|
---|
101 | }
|
---|
102 |
|
---|
103 |
|
---|
104 | #=======================================================================
|
---|
105 | #
|
---|
106 | # script_code2code ( CODE, IN-CODESET, OUT-CODESET )
|
---|
107 | #
|
---|
108 | #=======================================================================
|
---|
109 | sub script_code2code
|
---|
110 | {
|
---|
111 | (@_ == 3) or croak "script_code2code() takes 3 arguments!";
|
---|
112 |
|
---|
113 | my $code = shift;
|
---|
114 | my $inset = shift;
|
---|
115 | my $outset = shift;
|
---|
116 | my $outcode;
|
---|
117 | my $script;
|
---|
118 |
|
---|
119 |
|
---|
120 | return undef if $inset == $outset;
|
---|
121 | $script = code2script($code, $inset);
|
---|
122 | return undef if not defined $script;
|
---|
123 | $outcode = script2code($script, $outset);
|
---|
124 | return $outcode;
|
---|
125 | }
|
---|
126 |
|
---|
127 |
|
---|
128 | #=======================================================================
|
---|
129 | #
|
---|
130 | # all_script_codes()
|
---|
131 | #
|
---|
132 | #=======================================================================
|
---|
133 | sub all_script_codes
|
---|
134 | {
|
---|
135 | my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;
|
---|
136 |
|
---|
137 | return keys %{ $CODES->[$codeset] };
|
---|
138 | }
|
---|
139 |
|
---|
140 |
|
---|
141 | #=======================================================================
|
---|
142 | #
|
---|
143 | # all_script_names()
|
---|
144 | #
|
---|
145 | #=======================================================================
|
---|
146 | sub all_script_names
|
---|
147 | {
|
---|
148 | my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;
|
---|
149 |
|
---|
150 | return values %{ $CODES->[$codeset] };
|
---|
151 | }
|
---|
152 |
|
---|
153 |
|
---|
154 | #=======================================================================
|
---|
155 | #
|
---|
156 | # initialisation code - stuff the DATA into the ALPHA2 hash
|
---|
157 | #
|
---|
158 | #=======================================================================
|
---|
159 | {
|
---|
160 | my ($alpha2, $alpha3, $numeric);
|
---|
161 | my $script;
|
---|
162 | local $_;
|
---|
163 |
|
---|
164 |
|
---|
165 | while (<DATA>)
|
---|
166 | {
|
---|
167 | next unless /\S/;
|
---|
168 | chop;
|
---|
169 | ($alpha2, $alpha3, $numeric, $script) = split(/:/, $_, 4);
|
---|
170 |
|
---|
171 | $CODES->[LOCALE_CODE_ALPHA_2]->{$alpha2} = $script;
|
---|
172 | $COUNTRIES->[LOCALE_CODE_ALPHA_2]->{"\L$script"} = $alpha2;
|
---|
173 |
|
---|
174 | if ($alpha3)
|
---|
175 | {
|
---|
176 | $CODES->[LOCALE_CODE_ALPHA_3]->{$alpha3} = $script;
|
---|
177 | $COUNTRIES->[LOCALE_CODE_ALPHA_3]->{"\L$script"} = $alpha3;
|
---|
178 | }
|
---|
179 |
|
---|
180 | if ($numeric)
|
---|
181 | {
|
---|
182 | $CODES->[LOCALE_CODE_NUMERIC]->{$numeric} = $script;
|
---|
183 | $COUNTRIES->[LOCALE_CODE_NUMERIC]->{"\L$script"} = $numeric;
|
---|
184 | }
|
---|
185 |
|
---|
186 | }
|
---|
187 |
|
---|
188 | close(DATA);
|
---|
189 | }
|
---|
190 |
|
---|
191 | 1;
|
---|
192 |
|
---|
193 | __DATA__
|
---|
194 | am:ama:130:Aramaic
|
---|
195 | ar:ara:160:Arabic
|
---|
196 | av:ave:151:Avestan
|
---|
197 | bh:bhm:300:Brahmi (Ashoka)
|
---|
198 | bi:bid:372:Buhid
|
---|
199 | bn:ben:325:Bengali
|
---|
200 | bo:bod:330:Tibetan
|
---|
201 | bp:bpm:285:Bopomofo
|
---|
202 | br:brl:570:Braille
|
---|
203 | bt:btk:365:Batak
|
---|
204 | bu:bug:367:Buginese (Makassar)
|
---|
205 | by:bys:550:Blissymbols
|
---|
206 | ca:cam:358:Cham
|
---|
207 | ch:chu:221:Old Church Slavonic
|
---|
208 | ci:cir:291:Cirth
|
---|
209 | cm:cmn:402:Cypro-Minoan
|
---|
210 | co:cop:205:Coptic
|
---|
211 | cp:cpr:403:Cypriote syllabary
|
---|
212 | cy:cyr:220:Cyrillic
|
---|
213 | ds:dsr:250:Deserel (Mormon)
|
---|
214 | dv:dvn:315:Devanagari (Nagari)
|
---|
215 | ed:egd:070:Egyptian demotic
|
---|
216 | eg:egy:050:Egyptian hieroglyphs
|
---|
217 | eh:egh:060:Egyptian hieratic
|
---|
218 | el:ell:200:Greek
|
---|
219 | eo:eos:210:Etruscan and Oscan
|
---|
220 | et:eth:430:Ethiopic
|
---|
221 | gl:glg:225:Glagolitic
|
---|
222 | gm:gmu:310:Gurmukhi
|
---|
223 | gt:gth:206:Gothic
|
---|
224 | gu:guj:320:Gujarati
|
---|
225 | ha:han:500:Han ideographs
|
---|
226 | he:heb:125:Hebrew
|
---|
227 | hg:hgl:420:Hangul
|
---|
228 | hm:hmo:450:Pahawh Hmong
|
---|
229 | ho:hoo:371:Hanunoo
|
---|
230 | hr:hrg:410:Hiragana
|
---|
231 | hu:hun:176:Old Hungarian runic
|
---|
232 | hv:hvn:175:Kok Turki runic
|
---|
233 | hy:hye:230:Armenian
|
---|
234 | iv:ivl:610:Indus Valley
|
---|
235 | ja:jap:930:(alias for Han + Hiragana + Katakana)
|
---|
236 | jl:jlg:445:Cherokee syllabary
|
---|
237 | jw:jwi:360:Javanese
|
---|
238 | ka:kam:241:Georgian (Mxedruli)
|
---|
239 | kh:khn:931:(alias for Hangul + Han)
|
---|
240 | kk:kkn:411:Katakana
|
---|
241 | km:khm:354:Khmer
|
---|
242 | kn:kan:345:Kannada
|
---|
243 | kr:krn:357:Karenni (Kayah Li)
|
---|
244 | ks:kst:305:Kharoshthi
|
---|
245 | kx:kax:240:Georgian (Xucuri)
|
---|
246 | la:lat:217:Latin
|
---|
247 | lf:laf:215:Latin (Fraktur variant)
|
---|
248 | lg:lag:216:Latin (Gaelic variant)
|
---|
249 | lo:lao:356:Lao
|
---|
250 | lp:lpc:335:Lepcha (Rong)
|
---|
251 | md:mda:140:Mandaean
|
---|
252 | me:mer:100:Meroitic
|
---|
253 | mh:may:090:Mayan hieroglyphs
|
---|
254 | ml:mlm:347:Malayalam
|
---|
255 | mn:mon:145:Mongolian
|
---|
256 | my:mya:350:Burmese
|
---|
257 | na:naa:400:Linear A
|
---|
258 | nb:nbb:401:Linear B
|
---|
259 | og:ogm:212:Ogham
|
---|
260 | or:ory:327:Oriya
|
---|
261 | os:osm:260:Osmanya
|
---|
262 | ph:phx:115:Phoenician
|
---|
263 | ph:pah:150:Pahlavi
|
---|
264 | pl:pld:282:Pollard Phonetic
|
---|
265 | pq:pqd:295:Klingon plQaD
|
---|
266 | pr:prm:227:Old Permic
|
---|
267 | ps:pst:600:Phaistos Disk
|
---|
268 | rn:rnr:211:Runic (Germanic)
|
---|
269 | rr:rro:620:Rongo-rongo
|
---|
270 | sa:sar:110:South Arabian
|
---|
271 | si:sin:348:Sinhala
|
---|
272 | sj:syj:137:Syriac (Jacobite variant)
|
---|
273 | sl:slb:440:Unified Canadian Aboriginal Syllabics
|
---|
274 | sn:syn:136:Syriac (Nestorian variant)
|
---|
275 | sw:sww:281:Shavian (Shaw)
|
---|
276 | sy:syr:135:Syriac (Estrangelo)
|
---|
277 | ta:tam:346:Tamil
|
---|
278 | tb:tbw:373:Tagbanwa
|
---|
279 | te:tel:340:Telugu
|
---|
280 | tf:tfn:120:Tifnagh
|
---|
281 | tg:tag:370:Tagalog
|
---|
282 | th:tha:352:Thai
|
---|
283 | tn:tna:170:Thaana
|
---|
284 | tw:twr:290:Tengwar
|
---|
285 | va:vai:470:Vai
|
---|
286 | vs:vsp:280:Visible Speech
|
---|
287 | xa:xas:000:Cuneiform, Sumero-Akkadian
|
---|
288 | xf:xfa:105:Cuneiform, Old Persian
|
---|
289 | xk:xkn:412:(alias for Hiragana + Katakana)
|
---|
290 | xu:xug:106:Cuneiform, Ugaritic
|
---|
291 | yi:yii:460:Yi
|
---|
292 | zx:zxx:997:Unwritten language
|
---|
293 | zy:zyy:998:Undetermined script
|
---|
294 | zz:zzz:999:Uncoded script
|
---|