source: for-distributions/trunk/bin/windows/perl/lib/I18N/LangTags/List.pm@ 14489

Last change on this file since 14489 was 14489, checked in by oranfry, 17 years ago

upgrading to perl 5.8

File size: 28.2 KB
Line 
1
2require 5;
3package I18N::LangTags::List;
4# Time-stamp: "2004-10-06 23:26:21 ADT"
5use strict;
6use vars qw(%Name %Is_Disrec $Debug $VERSION);
7$VERSION = '0.35';
8# POD at the end.
9
10#----------------------------------------------------------------------
11{
12# read the table out of our own POD!
13 my $seeking = 1;
14 my $count = 0;
15 my($disrec,$tag,$name);
16 my $last_name = '';
17 while(<I18N::LangTags::List::DATA>) {
18 if($seeking) {
19 $seeking = 0 if m/=for woohah/;
20 } elsif( ($disrec, $tag, $name) =
21 m/(\[?)\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/
22 ) {
23 $name =~ s/\s*[;\.]*\s*$//g;
24 next unless $name;
25 ++$count;
26 print "<$tag> <$name>\n" if $Debug;
27 $last_name = $Name{$tag} = $name;
28 $Is_Disrec{$tag} = 1 if $disrec;
29 } elsif (m/[Ff]ormerly \"([-a-z0-9]+)\"/) {
30 $Name{$1} = "$last_name (old tag)" if $last_name;
31 $Is_Disrec{$1} = 1;
32 }
33 }
34 die "No tags read??" unless $count;
35}
36#----------------------------------------------------------------------
37
38sub name {
39 my $tag = lc($_[0] || return);
40 $tag =~ s/^\s+//s;
41 $tag =~ s/\s+$//s;
42
43 my $alt;
44 if($tag =~ m/^x-(.+)/) {
45 $alt = "i-$1";
46 } elsif($tag =~ m/^i-(.+)/) {
47 $alt = "x-$1";
48 } else {
49 $alt = '';
50 }
51
52 my $subform = '';
53 my $name = '';
54 print "Input: {$tag}\n" if $Debug;
55 while(length $tag) {
56 last if $name = $Name{$tag};
57 last if $name = $Name{$alt};
58 if($tag =~ s/(-[a-z0-9]+)$//s) {
59 print "Shaving off: $1 leaving $tag\n" if $Debug;
60 $subform = "$1$subform";
61 # and loop around again
62
63 $alt =~ s/(-[a-z0-9]+)$//s && $Debug && print " alt -> $alt\n";
64 } else {
65 # we're trying to pull a subform off a primary tag. TILT!
66 print "Aborting on: {$name}{$subform}\n" if $Debug;
67 last;
68 }
69 }
70 print "Output: {$name}{$subform}\n" if $Debug;
71
72 return unless $name; # Failure
73 return $name unless $subform; # Exact match
74 $subform =~ s/^-//s;
75 $subform =~ s/-$//s;
76 return "$name (Subform \"$subform\")";
77}
78
79#--------------------------------------------------------------------------
80
81sub is_decent {
82 my $tag = lc($_[0] || return 0);
83 #require I18N::LangTags;
84
85 return 0 unless
86 $tag =~
87 /^(?: # First subtag
88 [xi] | [a-z]{2,3}
89 )
90 (?: # Subtags thereafter
91 - # separator
92 [a-z0-9]{1,8} # subtag
93 )*
94 $/xs;
95
96 my @supers = ();
97 foreach my $bit (split('-', $tag)) {
98 push @supers,
99 scalar(@supers) ? ($supers[-1] . '-' . $bit) : $bit;
100 }
101 return 0 unless @supers;
102 shift @supers if $supers[0] =~ m<^(i|x|sgn)$>s;
103 return 0 unless @supers;
104
105 foreach my $f ($tag, @supers) {
106 return 0 if $Is_Disrec{$f};
107 return 2 if $Name{$f};
108 # so that decent subforms of indecent tags are decent
109 }
110 return 2 if $Name{$tag}; # not only is it decent, it's known!
111 return 1;
112}
113
114#--------------------------------------------------------------------------
1151;
116
117__DATA__
118
119=head1 NAME
120
121I18N::LangTags::List -- tags and names for human languages
122
123=head1 SYNOPSIS
124
125 use I18N::LangTags::List;
126 print "Parlez-vous... ", join(', ',
127 I18N::LangTags::List::name('elx') || 'unknown_language',
128 I18N::LangTags::List::name('ar-Kw') || 'unknown_language',
129 I18N::LangTags::List::name('en') || 'unknown_language',
130 I18N::LangTags::List::name('en-CA') || 'unknown_language',
131 ), "?\n";
132
133prints:
134
135 Parlez-vous... Elamite, Kuwait Arabic, English, Canadian English?
136
137=head1 DESCRIPTION
138
139This module provides a function
140C<I18N::LangTags::List::name( I<langtag> ) > that takes
141a language tag (see L<I18N::LangTags|I18N::LangTags>)
142and returns the best attempt at an English name for it, or
143undef if it can't make sense of the tag.
144
145The function I18N::LangTags::List::name(...) is not exported.
146
147This module also provides a function
148C<I18N::LangTags::List::is_decent( I<langtag> )> that returns true iff
149the language tag is syntactically valid and is for general use (like
150"fr" or "fr-ca", below). That is, it returns false for tags that are
151syntactically invalid and for tags, like "aus", that are listed in
152brackets below. This function is not exported.
153
154The map of tags-to-names that it uses is accessable as
155%I18N::LangTags::List::Name, and it's the same as the list
156that follows in this documentation, which should be useful
157to you even if you don't use this module.
158
159=head1 ABOUT LANGUAGE TAGS
160
161Internet language tags, as defined in RFC 3066, are a formalism
162for denoting human languages. The two-letter ISO 639-1 language
163codes are well known (as "en" for English), as are their forms
164when qualified by a country code ("en-US"). Less well-known are the
165arbitrary-length non-ISO codes (like "i-mingo"), and the
166recently (in 2001) introduced three-letter ISO-639-2 codes.
167
168Remember these important facts:
169
170=over
171
172=item *
173
174Language tags are not locale IDs. A locale ID is written with a "_"
175instead of a "-", (almost?) always matches C<m/^\w\w_\w\w\b/>, and
176I<means> something different than a language tag. A language tag
177denotes a language. A locale ID denotes a language I<as used in>
178a particular place, in combination with non-linguistic
179location-specific information such as what currency is used
180there. Locales I<also> often denote character set information,
181as in "en_US.ISO8859-1".
182
183=item *
184
185Language tags are not for computer languages.
186
187=item *
188
189"Dialect" is not a useful term, since there is no objective
190criterion for establishing when two language-forms are
191dialects of eachother, or are separate languages.
192
193=item *
194
195Language tags are not case-sensitive. en-US, en-us, En-Us, etc.,
196are all the same tag, and denote the same language.
197
198=item *
199
200Not every language tag really refers to a single language. Some
201language tags refer to conditions: i-default (system-message text
202in English plus maybe other languages), und (undetermined
203language). Others (notably lots of the three-letter codes) are
204bibliographic tags that classify whole groups of languages, as
205with cus "Cushitic (Other)" (i.e., a
206language that has been classed as Cushtic, but which has no more
207specific code) or the even less linguistically coherent
208sai for "South American Indian (Other)". Though useful in
209bibliography, B<SUCH TAGS ARE NOT
210FOR GENERAL USE>. For further guidance, email me.
211
212=item *
213
214Language tags are not country codes. In fact, they are often
215distinct codes, as with language tag ja for Japanese, and
216ISO 3166 country code C<.jp> for Japan.
217
218=back
219
220=head1 LIST OF LANGUAGES
221
222The first part of each item is the language tag, between
223{...}. It
224is followed by an English name for the language or language-group.
225Language tags that I judge to be not for general use, are bracketed.
226
227This list is in alphabetical order by English name of the language.
228
229=for reminder
230 The name in the =item line MUST NOT have E<...>'s in it!!
231
232=for woohah START
233
234=over
235
236=item {ab} : Abkhazian
237
238eq Abkhaz
239
240=item {ace} : Achinese
241
242=item {ach} : Acoli
243
244=item {ada} : Adangme
245
246=item {ady} : Adyghe
247
248eq Adygei
249
250=item {aa} : Afar
251
252=item {afh} : Afrihili
253
254(Artificial)
255
256=item {af} : Afrikaans
257
258=item [{afa} : Afro-Asiatic (Other)]
259
260=item {ak} : Akan
261
262(Formerly "aka".)
263
264=item {akk} : Akkadian
265
266(Historical)
267
268=item {sq} : Albanian
269
270=item {ale} : Aleut
271
272=item [{alg} : Algonquian languages]
273
274NOT Algonquin!
275
276=item [{tut} : Altaic (Other)]
277
278=item {am} : Amharic
279
280NOT Aramaic!
281
282=item {i-ami} : Ami
283
284eq Amis. eq 'Amis. eq Pangca.
285
286=item [{apa} : Apache languages]
287
288=item {ar} : Arabic
289
290Many forms are mutually un-intelligible in spoken media.
291Notable forms:
292{ar-ae} UAE Arabic;
293{ar-bh} Bahrain Arabic;
294{ar-dz} Algerian Arabic;
295{ar-eg} Egyptian Arabic;
296{ar-iq} Iraqi Arabic;
297{ar-jo} Jordanian Arabic;
298{ar-kw} Kuwait Arabic;
299{ar-lb} Lebanese Arabic;
300{ar-ly} Libyan Arabic;
301{ar-ma} Moroccan Arabic;
302{ar-om} Omani Arabic;
303{ar-qa} Qatari Arabic;
304{ar-sa} Sauda Arabic;
305{ar-sy} Syrian Arabic;
306{ar-tn} Tunisian Arabic;
307{ar-ye} Yemen Arabic.
308
309=item {arc} : Aramaic
310
311NOT Amharic! NOT Samaritan Aramaic!
312
313=item {arp} : Arapaho
314
315=item {arn} : Araucanian
316
317=item {arw} : Arawak
318
319=item {hy} : Armenian
320
321=item {an} : Aragonese
322
323=item [{art} : Artificial (Other)]
324
325=item {ast} : Asturian
326
327eq Bable.
328
329=item {as} : Assamese
330
331=item [{ath} : Athapascan languages]
332
333eq Athabaskan. eq Athapaskan. eq Athabascan.
334
335=item [{aus} : Australian languages]
336
337=item [{map} : Austronesian (Other)]
338
339=item {av} : Avaric
340
341(Formerly "ava".)
342
343=item {ae} : Avestan
344
345eq Zend
346
347=item {awa} : Awadhi
348
349=item {ay} : Aymara
350
351=item {az} : Azerbaijani
352
353eq Azeri
354
355Notable forms:
356{az-Arab} Azerbaijani in Arabic script;
357{az-Cyrl} Azerbaijani in Cyrillic script;
358{az-Latn} Azerbaijani in Latin script.
359
360=item {ban} : Balinese
361
362=item [{bat} : Baltic (Other)]
363
364=item {bal} : Baluchi
365
366=item {bm} : Bambara
367
368(Formerly "bam".)
369
370=item [{bai} : Bamileke languages]
371
372=item {bad} : Banda
373
374=item [{bnt} : Bantu (Other)]
375
376=item {bas} : Basa
377
378=item {ba} : Bashkir
379
380=item {eu} : Basque
381
382=item {btk} : Batak (Indonesia)
383
384=item {bej} : Beja
385
386=item {be} : Belarusian
387
388eq Belarussian. eq Byelarussian.
389eq Belorussian. eq Byelorussian.
390eq White Russian. eq White Ruthenian.
391NOT Ruthenian!
392
393=item {bem} : Bemba
394
395=item {bn} : Bengali
396
397eq Bangla.
398
399=item [{ber} : Berber (Other)]
400
401=item {bho} : Bhojpuri
402
403=item {bh} : Bihari
404
405=item {bik} : Bikol
406
407=item {bin} : Bini
408
409=item {bi} : Bislama
410
411eq Bichelamar.
412
413=item {bs} : Bosnian
414
415=item {bra} : Braj
416
417=item {br} : Breton
418
419=item {bug} : Buginese
420
421=item {bg} : Bulgarian
422
423=item {i-bnn} : Bunun
424
425=item {bua} : Buriat
426
427=item {my} : Burmese
428
429=item {cad} : Caddo
430
431=item {car} : Carib
432
433=item {ca} : Catalan
434
435eq CatalE<aacute>n. eq Catalonian.
436
437=item [{cau} : Caucasian (Other)]
438
439=item {ceb} : Cebuano
440
441=item [{cel} : Celtic (Other)]
442
443Notable forms:
444{cel-gaulish} Gaulish (Historical)
445
446=item [{cai} : Central American Indian (Other)]
447
448=item {chg} : Chagatai
449
450(Historical?)
451
452=item [{cmc} : Chamic languages]
453
454=item {ch} : Chamorro
455
456=item {ce} : Chechen
457
458=item {chr} : Cherokee
459
460eq Tsalagi
461
462=item {chy} : Cheyenne
463
464=item {chb} : Chibcha
465
466(Historical) NOT Chibchan (which is a language family).
467
468=item {ny} : Chichewa
469
470eq Nyanja. eq Chinyanja.
471
472=item {zh} : Chinese
473
474Many forms are mutually un-intelligible in spoken media.
475Notable forms:
476{zh-Hans} Chinese, in simplified script;
477{zh-Hant} Chinese, in traditional script;
478{zh-tw} Taiwan Chinese;
479{zh-cn} PRC Chinese;
480{zh-sg} Singapore Chinese;
481{zh-mo} Macau Chinese;
482{zh-hk} Hong Kong Chinese;
483{zh-guoyu} Mandarin [Putonghua/Guoyu];
484{zh-hakka} Hakka [formerly "i-hakka"];
485{zh-min} Hokkien;
486{zh-min-nan} Southern Hokkien;
487{zh-wuu} Shanghaiese;
488{zh-xiang} Hunanese;
489{zh-gan} Gan;
490{zh-yue} Cantonese.
491
492=for etc
493{i-hakka} Hakka (old tag)
494
495=item {chn} : Chinook Jargon
496
497eq Chinook Wawa.
498
499=item {chp} : Chipewyan
500
501=item {cho} : Choctaw
502
503=item {cu} : Church Slavic
504
505eq Old Church Slavonic.
506
507=item {chk} : Chuukese
508
509eq Trukese. eq Chuuk. eq Truk. eq Ruk.
510
511=item {cv} : Chuvash
512
513=item {cop} : Coptic
514
515=item {kw} : Cornish
516
517=item {co} : Corsican
518
519eq Corse.
520
521=item {cr} : Cree
522
523NOT Creek! (Formerly "cre".)
524
525=item {mus} : Creek
526
527NOT Cree!
528
529=item [{cpe} : English-based Creoles and pidgins (Other)]
530
531=item [{cpf} : French-based Creoles and pidgins (Other)]
532
533=item [{cpp} : Portuguese-based Creoles and pidgins (Other)]
534
535=item [{crp} : Creoles and pidgins (Other)]
536
537=item {hr} : Croatian
538
539eq Croat.
540
541=item [{cus} : Cushitic (Other)]
542
543=item {cs} : Czech
544
545=item {dak} : Dakota
546
547eq Nakota. eq Latoka.
548
549=item {da} : Danish
550
551=item {dar} : Dargwa
552
553=item {day} : Dayak
554
555=item {i-default} : Default (Fallthru) Language
556
557Defined in RFC 2277, this is for tagging text
558(which must include English text, and might/should include text
559in other appropriate languages) that is emitted in a context
560where language-negotiation wasn't possible -- in SMTP mail failure
561messages, for example.
562
563=item {del} : Delaware
564
565=item {din} : Dinka
566
567=item {dv} : Divehi
568
569eq Maldivian. (Formerly "div".)
570
571=item {doi} : Dogri
572
573NOT Dogrib!
574
575=item {dgr} : Dogrib
576
577NOT Dogri!
578
579=item [{dra} : Dravidian (Other)]
580
581=item {dua} : Duala
582
583=item {nl} : Dutch
584
585eq Netherlander. Notable forms:
586{nl-nl} Netherlands Dutch;
587{nl-be} Belgian Dutch.
588
589=item {dum} : Middle Dutch (ca.1050-1350)
590
591(Historical)
592
593=item {dyu} : Dyula
594
595=item {dz} : Dzongkha
596
597=item {efi} : Efik
598
599=item {egy} : Ancient Egyptian
600
601(Historical)
602
603=item {eka} : Ekajuk
604
605=item {elx} : Elamite
606
607(Historical)
608
609=item {en} : English
610
611Notable forms:
612{en-au} Australian English;
613{en-bz} Belize English;
614{en-ca} Canadian English;
615{en-gb} UK English;
616{en-ie} Irish English;
617{en-jm} Jamaican English;
618{en-nz} New Zealand English;
619{en-ph} Philippine English;
620{en-tt} Trinidad English;
621{en-us} US English;
622{en-za} South African English;
623{en-zw} Zimbabwe English.
624
625=item {enm} : Old English (1100-1500)
626
627(Historical)
628
629=item {ang} : Old English (ca.450-1100)
630
631eq Anglo-Saxon. (Historical)
632
633=item {i-enochian} : Enochian (Artificial)
634
635=item {myv} : Erzya
636
637=item {eo} : Esperanto
638
639(Artificial)
640
641=item {et} : Estonian
642
643=item {ee} : Ewe
644
645(Formerly "ewe".)
646
647=item {ewo} : Ewondo
648
649=item {fan} : Fang
650
651=item {fat} : Fanti
652
653=item {fo} : Faroese
654
655=item {fj} : Fijian
656
657=item {fi} : Finnish
658
659=item [{fiu} : Finno-Ugrian (Other)]
660
661eq Finno-Ugric. NOT Ugaritic!
662
663=item {fon} : Fon
664
665=item {fr} : French
666
667Notable forms:
668{fr-fr} France French;
669{fr-be} Belgian French;
670{fr-ca} Canadian French;
671{fr-ch} Swiss French;
672{fr-lu} Luxembourg French;
673{fr-mc} Monaco French.
674
675=item {frm} : Middle French (ca.1400-1600)
676
677(Historical)
678
679=item {fro} : Old French (842-ca.1400)
680
681(Historical)
682
683=item {fy} : Frisian
684
685=item {fur} : Friulian
686
687=item {ff} : Fulah
688
689(Formerly "ful".)
690
691=item {gaa} : Ga
692
693=item {gd} : Scots Gaelic
694
695NOT Scots!
696
697=item {gl} : Gallegan
698
699eq Galician
700
701=item {lg} : Ganda
702
703(Formerly "lug".)
704
705=item {gay} : Gayo
706
707=item {gba} : Gbaya
708
709=item {gez} : Geez
710
711eq Ge'ez
712
713=item {ka} : Georgian
714
715=item {de} : German
716
717Notable forms:
718{de-at} Austrian German;
719{de-be} Belgian German;
720{de-ch} Swiss German;
721{de-de} Germany German;
722{de-li} Liechtenstein German;
723{de-lu} Luxembourg German.
724
725=item {gmh} : Middle High German (ca.1050-1500)
726
727(Historical)
728
729=item {goh} : Old High German (ca.750-1050)
730
731(Historical)
732
733=item [{gem} : Germanic (Other)]
734
735=item {gil} : Gilbertese
736
737=item {gon} : Gondi
738
739=item {gor} : Gorontalo
740
741=item {got} : Gothic
742
743(Historical)
744
745=item {grb} : Grebo
746
747=item {grc} : Ancient Greek
748
749(Historical) (Until 15th century or so.)
750
751=item {el} : Modern Greek
752
753(Since 15th century or so.)
754
755=item {gn} : Guarani
756
757GuaranE<iacute>
758
759=item {gu} : Gujarati
760
761=item {gwi} : Gwich'in
762
763eq Gwichin
764
765=item {hai} : Haida
766
767=item {ht} : Haitian
768
769eq Haitian Creole
770
771=item {ha} : Hausa
772
773=item {haw} : Hawaiian
774
775Hawai'ian
776
777=item {he} : Hebrew
778
779(Formerly "iw".)
780
781=for etc
782{iw} Hebrew (old tag)
783
784=item {hz} : Herero
785
786=item {hil} : Hiligaynon
787
788=item {him} : Himachali
789
790=item {hi} : Hindi
791
792=item {ho} : Hiri Motu
793
794=item {hit} : Hittite
795
796(Historical)
797
798=item {hmn} : Hmong
799
800=item {hu} : Hungarian
801
802=item {hup} : Hupa
803
804=item {iba} : Iban
805
806=item {is} : Icelandic
807
808=item {io} : Ido
809
810(Artificial)
811
812=item {ig} : Igbo
813
814(Formerly "ibo".)
815
816=item {ijo} : Ijo
817
818=item {ilo} : Iloko
819
820=item [{inc} : Indic (Other)]
821
822=item [{ine} : Indo-European (Other)]
823
824=item {id} : Indonesian
825
826(Formerly "in".)
827
828=for etc
829{in} Indonesian (old tag)
830
831=item {inh} : Ingush
832
833=item {ia} : Interlingua (International Auxiliary Language Association)
834
835(Artificial) NOT Interlingue!
836
837=item {ie} : Interlingue
838
839(Artificial) NOT Interlingua!
840
841=item {iu} : Inuktitut
842
843A subform of "Eskimo".
844
845=item {ik} : Inupiaq
846
847A subform of "Eskimo".
848
849=item [{ira} : Iranian (Other)]
850
851=item {ga} : Irish
852
853=item {mga} : Middle Irish (900-1200)
854
855(Historical)
856
857=item {sga} : Old Irish (to 900)
858
859(Historical)
860
861=item [{iro} : Iroquoian languages]
862
863=item {it} : Italian
864
865Notable forms:
866{it-it} Italy Italian;
867{it-ch} Swiss Italian.
868
869=item {ja} : Japanese
870
871(NOT "jp"!)
872
873=item {jv} : Javanese
874
875(Formerly "jw" because of a typo.)
876
877=item {jrb} : Judeo-Arabic
878
879=item {jpr} : Judeo-Persian
880
881=item {kbd} : Kabardian
882
883=item {kab} : Kabyle
884
885=item {kac} : Kachin
886
887=item {kl} : Kalaallisut
888
889eq Greenlandic "Eskimo"
890
891=item {xal} : Kalmyk
892
893=item {kam} : Kamba
894
895=item {kn} : Kannada
896
897eq Kanarese. NOT Canadian!
898
899=item {kr} : Kanuri
900
901(Formerly "kau".)
902
903=item {krc} : Karachay-Balkar
904
905=item {kaa} : Kara-Kalpak
906
907=item {kar} : Karen
908
909=item {ks} : Kashmiri
910
911=item {csb} : Kashubian
912
913eq Kashub
914
915=item {kaw} : Kawi
916
917=item {kk} : Kazakh
918
919=item {kha} : Khasi
920
921=item {km} : Khmer
922
923eq Cambodian. eq Kampuchean.
924
925=item [{khi} : Khoisan (Other)]
926
927=item {kho} : Khotanese
928
929=item {ki} : Kikuyu
930
931eq Gikuyu.
932
933=item {kmb} : Kimbundu
934
935=item {rw} : Kinyarwanda
936
937=item {ky} : Kirghiz
938
939=item {i-klingon} : Klingon
940
941=item {kv} : Komi
942
943=item {kg} : Kongo
944
945(Formerly "kon".)
946
947=item {kok} : Konkani
948
949=item {ko} : Korean
950
951=item {kos} : Kosraean
952
953=item {kpe} : Kpelle
954
955=item {kro} : Kru
956
957=item {kj} : Kuanyama
958
959=item {kum} : Kumyk
960
961=item {ku} : Kurdish
962
963=item {kru} : Kurukh
964
965=item {kut} : Kutenai
966
967=item {lad} : Ladino
968
969eq Judeo-Spanish. NOT Ladin (a minority language in Italy).
970
971=item {lah} : Lahnda
972
973NOT Lamba!
974
975=item {lam} : Lamba
976
977NOT Lahnda!
978
979=item {lo} : Lao
980
981eq Laotian.
982
983=item {la} : Latin
984
985(Historical) NOT Ladin! NOT Ladino!
986
987=item {lv} : Latvian
988
989eq Lettish.
990
991=item {lb} : Letzeburgesch
992
993eq Luxemburgian, eq Luxemburger. (Formerly "i-lux".)
994
995=for etc
996{i-lux} Letzeburgesch (old tag)
997
998=item {lez} : Lezghian
999
1000=item {li} : Limburgish
1001
1002eq Limburger, eq Limburgan. NOT Letzeburgesch!
1003
1004=item {ln} : Lingala
1005
1006=item {lt} : Lithuanian
1007
1008=item {nds} : Low German
1009
1010eq Low Saxon. eq Low German. eq Low Saxon.
1011
1012=item {art-lojban} : Lojban (Artificial)
1013
1014=item {loz} : Lozi
1015
1016=item {lu} : Luba-Katanga
1017
1018(Formerly "lub".)
1019
1020=item {lua} : Luba-Lulua
1021
1022=item {lui} : Luiseno
1023
1024eq LuiseE<ntilde>o.
1025
1026=item {lun} : Lunda
1027
1028=item {luo} : Luo (Kenya and Tanzania)
1029
1030=item {lus} : Lushai
1031
1032=item {mk} : Macedonian
1033
1034eq the modern Slavic language spoken in what was Yugoslavia.
1035NOT the form of Greek spoken in Greek Macedonia!
1036
1037=item {mad} : Madurese
1038
1039=item {mag} : Magahi
1040
1041=item {mai} : Maithili
1042
1043=item {mak} : Makasar
1044
1045=item {mg} : Malagasy
1046
1047=item {ms} : Malay
1048
1049NOT Malayalam!
1050
1051=item {ml} : Malayalam
1052
1053NOT Malay!
1054
1055=item {mt} : Maltese
1056
1057=item {mnc} : Manchu
1058
1059=item {mdr} : Mandar
1060
1061NOT Mandarin!
1062
1063=item {man} : Mandingo
1064
1065=item {mni} : Manipuri
1066
1067eq Meithei.
1068
1069=item [{mno} : Manobo languages]
1070
1071=item {gv} : Manx
1072
1073=item {mi} : Maori
1074
1075NOT Mari!
1076
1077=item {mr} : Marathi
1078
1079=item {chm} : Mari
1080
1081NOT Maori!
1082
1083=item {mh} : Marshall
1084
1085eq Marshallese.
1086
1087=item {mwr} : Marwari
1088
1089=item {mas} : Masai
1090
1091=item [{myn} : Mayan languages]
1092
1093=item {men} : Mende
1094
1095=item {mic} : Micmac
1096
1097=item {min} : Minangkabau
1098
1099=item {i-mingo} : Mingo
1100
1101eq the Irquoian language West Virginia Seneca. NOT New York Seneca!
1102
1103=item [{mis} : Miscellaneous languages]
1104
1105Don't use this.
1106
1107=item {moh} : Mohawk
1108
1109=item {mdf} : Moksha
1110
1111=item {mo} : Moldavian
1112
1113eq Moldovan.
1114
1115=item [{mkh} : Mon-Khmer (Other)]
1116
1117=item {lol} : Mongo
1118
1119=item {mn} : Mongolian
1120
1121eq Mongol.
1122
1123=item {mos} : Mossi
1124
1125=item [{mul} : Multiple languages]
1126
1127Not for normal use.
1128
1129=item [{mun} : Munda languages]
1130
1131=item {nah} : Nahuatl
1132
1133=item {nap} : Neapolitan
1134
1135=item {na} : Nauru
1136
1137=item {nv} : Navajo
1138
1139eq Navaho. (Formerly "i-navajo".)
1140
1141=for etc
1142{i-navajo} Navajo (old tag)
1143
1144=item {nd} : North Ndebele
1145
1146=item {nr} : South Ndebele
1147
1148=item {ng} : Ndonga
1149
1150=item {ne} : Nepali
1151
1152eq Nepalese. Notable forms:
1153{ne-np} Nepal Nepali;
1154{ne-in} India Nepali.
1155
1156=item {new} : Newari
1157
1158=item {nia} : Nias
1159
1160=item [{nic} : Niger-Kordofanian (Other)]
1161
1162=item [{ssa} : Nilo-Saharan (Other)]
1163
1164=item {niu} : Niuean
1165
1166=item {nog} : Nogai
1167
1168=item {non} : Old Norse
1169
1170(Historical)
1171
1172=item [{nai} : North American Indian]
1173
1174Do not use this.
1175
1176=item {no} : Norwegian
1177
1178Note the two following forms:
1179
1180=item {nb} : Norwegian Bokmal
1181
1182eq BokmE<aring>l, (A form of Norwegian.) (Formerly "no-bok".)
1183
1184=for etc
1185{no-bok} Norwegian Bokmal (old tag)
1186
1187=item {nn} : Norwegian Nynorsk
1188
1189(A form of Norwegian.) (Formerly "no-nyn".)
1190
1191=for etc
1192{no-nyn} Norwegian Nynorsk (old tag)
1193
1194=item [{nub} : Nubian languages]
1195
1196=item {nym} : Nyamwezi
1197
1198=item {nyn} : Nyankole
1199
1200=item {nyo} : Nyoro
1201
1202=item {nzi} : Nzima
1203
1204=item {oc} : Occitan (post 1500)
1205
1206eq ProvenE<ccedil>al, eq Provencal
1207
1208=item {oj} : Ojibwa
1209
1210eq Ojibwe. (Formerly "oji".)
1211
1212=item {or} : Oriya
1213
1214=item {om} : Oromo
1215
1216=item {osa} : Osage
1217
1218=item {os} : Ossetian; Ossetic
1219
1220=item [{oto} : Otomian languages]
1221
1222Group of languages collectively called "OtomE<iacute>".
1223
1224=item {pal} : Pahlavi
1225
1226eq Pahlevi
1227
1228=item {i-pwn} : Paiwan
1229
1230eq Pariwan
1231
1232=item {pau} : Palauan
1233
1234=item {pi} : Pali
1235
1236(Historical?)
1237
1238=item {pam} : Pampanga
1239
1240=item {pag} : Pangasinan
1241
1242=item {pa} : Panjabi
1243
1244eq Punjabi
1245
1246=item {pap} : Papiamento
1247
1248eq Papiamentu.
1249
1250=item [{paa} : Papuan (Other)]
1251
1252=item {fa} : Persian
1253
1254eq Farsi. eq Iranian.
1255
1256=item {peo} : Old Persian (ca.600-400 B.C.)
1257
1258=item [{phi} : Philippine (Other)]
1259
1260=item {phn} : Phoenician
1261
1262(Historical)
1263
1264=item {pon} : Pohnpeian
1265
1266NOT Pompeiian!
1267
1268=item {pl} : Polish
1269
1270=item {pt} : Portuguese
1271
1272eq Portugese. Notable forms:
1273{pt-pt} Portugal Portuguese;
1274{pt-br} Brazilian Portuguese.
1275
1276=item [{pra} : Prakrit languages]
1277
1278=item {pro} : Old Provencal (to 1500)
1279
1280eq Old ProvenE<ccedil>al. (Historical.)
1281
1282=item {ps} : Pushto
1283
1284eq Pashto. eq Pushtu.
1285
1286=item {qu} : Quechua
1287
1288eq Quecha.
1289
1290=item {rm} : Raeto-Romance
1291
1292eq Romansh.
1293
1294=item {raj} : Rajasthani
1295
1296=item {rap} : Rapanui
1297
1298=item {rar} : Rarotongan
1299
1300=item [{qaa - qtz} : Reserved for local use.]
1301
1302=item [{roa} : Romance (Other)]
1303
1304NOT Romanian! NOT Romany! NOT Romansh!
1305
1306=item {ro} : Romanian
1307
1308eq Rumanian. NOT Romany!
1309
1310=item {rom} : Romany
1311
1312eq Rom. NOT Romanian!
1313
1314=item {rn} : Rundi
1315
1316=item {ru} : Russian
1317
1318NOT White Russian! NOT Rusyn!
1319
1320=item [{sal} : Salishan languages]
1321
1322Large language group.
1323
1324=item {sam} : Samaritan Aramaic
1325
1326NOT Aramaic!
1327
1328=item {se} : Northern Sami
1329
1330eq Lappish. eq Lapp. eq (Northern) Saami.
1331
1332=item {sma} : Southern Sami
1333
1334=item {smn} : Inari Sami
1335
1336=item {smj} : Lule Sami
1337
1338=item {sms} : Skolt Sami
1339
1340=item [{smi} : Sami languages (Other)]
1341
1342=item {sm} : Samoan
1343
1344=item {sad} : Sandawe
1345
1346=item {sg} : Sango
1347
1348=item {sa} : Sanskrit
1349
1350(Historical)
1351
1352=item {sat} : Santali
1353
1354=item {sc} : Sardinian
1355
1356eq Sard.
1357
1358=item {sas} : Sasak
1359
1360=item {sco} : Scots
1361
1362NOT Scots Gaelic!
1363
1364=item {sel} : Selkup
1365
1366=item [{sem} : Semitic (Other)]
1367
1368=item {sr} : Serbian
1369
1370eq Serb. NOT Sorbian.
1371
1372Notable forms:
1373{sr-Cyrl} : Serbian in Cyrillic script;
1374{sr-Latn} : Serbian in Latin script.
1375
1376=item {srr} : Serer
1377
1378=item {shn} : Shan
1379
1380=item {sn} : Shona
1381
1382=item {sid} : Sidamo
1383
1384=item {sgn-...} : Sign Languages
1385
1386Always use with a subtag. Notable forms:
1387{sgn-gb} British Sign Language (BSL);
1388{sgn-ie} Irish Sign Language (ESL);
1389{sgn-ni} Nicaraguan Sign Language (ISN);
1390{sgn-us} American Sign Language (ASL).
1391
1392(And so on with other country codes as the subtag.)
1393
1394=item {bla} : Siksika
1395
1396eq Blackfoot. eq Pikanii.
1397
1398=item {sd} : Sindhi
1399
1400=item {si} : Sinhalese
1401
1402eq Sinhala.
1403
1404=item [{sit} : Sino-Tibetan (Other)]
1405
1406=item [{sio} : Siouan languages]
1407
1408=item {den} : Slave (Athapascan)
1409
1410("Slavey" is a subform.)
1411
1412=item [{sla} : Slavic (Other)]
1413
1414=item {sk} : Slovak
1415
1416eq Slovakian.
1417
1418=item {sl} : Slovenian
1419
1420eq Slovene.
1421
1422=item {sog} : Sogdian
1423
1424=item {so} : Somali
1425
1426=item {son} : Songhai
1427
1428=item {snk} : Soninke
1429
1430=item {wen} : Sorbian languages
1431
1432eq Wendish. eq Sorb. eq Lusatian. eq Wend. NOT Venda! NOT Serbian!
1433
1434=item {nso} : Northern Sotho
1435
1436=item {st} : Southern Sotho
1437
1438eq Sutu. eq Sesotho.
1439
1440=item [{sai} : South American Indian (Other)]
1441
1442=item {es} : Spanish
1443
1444Notable forms:
1445{es-ar} Argentine Spanish;
1446{es-bo} Bolivian Spanish;
1447{es-cl} Chilean Spanish;
1448{es-co} Colombian Spanish;
1449{es-do} Dominican Spanish;
1450{es-ec} Ecuadorian Spanish;
1451{es-es} Spain Spanish;
1452{es-gt} Guatemalan Spanish;
1453{es-hn} Honduran Spanish;
1454{es-mx} Mexican Spanish;
1455{es-pa} Panamanian Spanish;
1456{es-pe} Peruvian Spanish;
1457{es-pr} Puerto Rican Spanish;
1458{es-py} Paraguay Spanish;
1459{es-sv} Salvadoran Spanish;
1460{es-us} US Spanish;
1461{es-uy} Uruguayan Spanish;
1462{es-ve} Venezuelan Spanish.
1463
1464=item {suk} : Sukuma
1465
1466=item {sux} : Sumerian
1467
1468(Historical)
1469
1470=item {su} : Sundanese
1471
1472=item {sus} : Susu
1473
1474=item {sw} : Swahili
1475
1476eq Kiswahili
1477
1478=item {ss} : Swati
1479
1480=item {sv} : Swedish
1481
1482Notable forms:
1483{sv-se} Sweden Swedish;
1484{sv-fi} Finland Swedish.
1485
1486=item {syr} : Syriac
1487
1488=item {tl} : Tagalog
1489
1490=item {ty} : Tahitian
1491
1492=item [{tai} : Tai (Other)]
1493
1494NOT Thai!
1495
1496=item {tg} : Tajik
1497
1498=item {tmh} : Tamashek
1499
1500=item {ta} : Tamil
1501
1502=item {i-tao} : Tao
1503
1504eq Yami.
1505
1506=item {tt} : Tatar
1507
1508=item {i-tay} : Tayal
1509
1510eq Atayal. eq Atayan.
1511
1512=item {te} : Telugu
1513
1514=item {ter} : Tereno
1515
1516=item {tet} : Tetum
1517
1518=item {th} : Thai
1519
1520NOT Tai!
1521
1522=item {bo} : Tibetan
1523
1524=item {tig} : Tigre
1525
1526=item {ti} : Tigrinya
1527
1528=item {tem} : Timne
1529
1530eq Themne. eq Timene.
1531
1532=item {tiv} : Tiv
1533
1534=item {tli} : Tlingit
1535
1536=item {tpi} : Tok Pisin
1537
1538=item {tkl} : Tokelau
1539
1540=item {tog} : Tonga (Nyasa)
1541
1542NOT Tsonga!
1543
1544=item {to} : Tonga (Tonga Islands)
1545
1546(Pronounced "Tong-a", not "Tong-ga")
1547
1548NOT Tsonga!
1549
1550=item {tsi} : Tsimshian
1551
1552eq Sm'algyax
1553
1554=item {ts} : Tsonga
1555
1556NOT Tonga!
1557
1558=item {i-tsu} : Tsou
1559
1560=item {tn} : Tswana
1561
1562Same as Setswana.
1563
1564=item {tum} : Tumbuka
1565
1566=item [{tup} : Tupi languages]
1567
1568=item {tr} : Turkish
1569
1570(Typically in Roman script)
1571
1572=item {ota} : Ottoman Turkish (1500-1928)
1573
1574(Typically in Arabic script) (Historical)
1575
1576=item {crh} : Crimean Turkish
1577
1578eq Crimean Tatar
1579
1580=item {tk} : Turkmen
1581
1582eq Turkmeni.
1583
1584=item {tvl} : Tuvalu
1585
1586=item {tyv} : Tuvinian
1587
1588eq Tuvan. eq Tuvin.
1589
1590=item {tw} : Twi
1591
1592=item {udm} : Udmurt
1593
1594=item {uga} : Ugaritic
1595
1596NOT Ugric!
1597
1598=item {ug} : Uighur
1599
1600=item {uk} : Ukrainian
1601
1602=item {umb} : Umbundu
1603
1604=item {und} : Undetermined
1605
1606Not a tag for normal use.
1607
1608=item {ur} : Urdu
1609
1610=item {uz} : Uzbek
1611
1612eq E<Ouml>zbek
1613
1614Notable forms:
1615{uz-Cyrl} Uzbek in Cyrillic script;
1616{uz-Latn} Uzbek in Latin script.
1617
1618=item {vai} : Vai
1619
1620=item {ve} : Venda
1621
1622NOT Wendish! NOT Wend! NOT Avestan! (Formerly "ven".)
1623
1624=item {vi} : Vietnamese
1625
1626eq Viet.
1627
1628=item {vo} : Volapuk
1629
1630eq VolapE<uuml>k. (Artificial)
1631
1632=item {vot} : Votic
1633
1634eq Votian. eq Vod.
1635
1636=item [{wak} : Wakashan languages]
1637
1638=item {wa} : Walloon
1639
1640=item {wal} : Walamo
1641
1642eq Wolaytta.
1643
1644=item {war} : Waray
1645
1646Presumably the Philippine language Waray-Waray (SamareE<ntilde>o),
1647not the smaller Philippine language Waray Sorsogon, nor the extinct
1648Australian language Waray.
1649
1650=item {was} : Washo
1651
1652eq Washoe
1653
1654=item {cy} : Welsh
1655
1656=item {wo} : Wolof
1657
1658=item {x-...} : Unregistered (Semi-Private Use)
1659
1660"x-" is a prefix for language tags that are not registered with ISO
1661or IANA. Example, x-double-dutch
1662
1663=item {xh} : Xhosa
1664
1665=item {sah} : Yakut
1666
1667=item {yao} : Yao
1668
1669(The Yao in Malawi?)
1670
1671=item {yap} : Yapese
1672
1673eq Yap
1674
1675=item {ii} : Sichuan Yi
1676
1677=item {yi} : Yiddish
1678
1679Formerly "ji". Usually in Hebrew script.
1680
1681Notable forms:
1682{yi-latn} Yiddish in Latin script
1683
1684=item {yo} : Yoruba
1685
1686=item [{ypk} : Yupik languages]
1687
1688Several "Eskimo" languages.
1689
1690=item {znd} : Zande
1691
1692=item [{zap} : Zapotec]
1693
1694(A group of languages.)
1695
1696=item {zen} : Zenaga
1697
1698NOT Zend.
1699
1700=item {za} : Zhuang
1701
1702=item {zu} : Zulu
1703
1704=item {zun} : Zuni
1705
1706eq ZuE<ntilde>i
1707
1708=back
1709
1710=for woohah END
1711
1712=head1 SEE ALSO
1713
1714L<I18N::LangTags|I18N::LangTags> and its "See Also" section.
1715
1716=head1 COPYRIGHT AND DISCLAIMER
1717
1718Copyright (c) 2001+ Sean M. Burke. All rights reserved.
1719
1720You can redistribute and/or
1721modify this document under the same terms as Perl itself.
1722
1723This document is provided in the hope that it will be
1724useful, but without any warranty;
1725without even the implied warranty of accuracy, authoritativeness,
1726completeness, merchantability, or fitness for a particular purpose.
1727
1728Email any corrections or questions to me.
1729
1730=head1 AUTHOR
1731
1732Sean M. Burke, sburkeE<64>cpan.org
1733
1734=cut
1735
1736
1737# To generate a list of just the two and three-letter codes:
1738
1739#!/usr/local/bin/perl -w
1740
1741require 5; # Time-stamp: "2001-03-13 21:53:39 MST"
1742 # Sean M. Burke, [email protected]
1743 # This program is for generating the language_codes.txt file
1744use strict;
1745use LWP::Simple;
1746use HTML::TreeBuilder 3.10;
1747my $root = HTML::TreeBuilder->new();
1748my $url = 'http://lcweb.loc.gov/standards/iso639-2/bibcodes.html';
1749$root->parse(get($url) || die "Can't get $url");
1750$root->eof();
1751
1752my @codes;
1753
1754foreach my $tr ($root->find_by_tag_name('tr')) {
1755 my @f = map $_->as_text(), $tr->content_list();
1756 #print map("<$_> ", @f), "\n";
1757 next unless @f == 5;
1758 pop @f; # nix the French name
1759 next if $f[-1] eq 'Language Name (English)'; # it's a header line
1760 my $xx = splice(@f, 2,1); # pull out the two-letter code
1761 $f[-1] =~ s/^\s+//;
1762 $f[-1] =~ s/\s+$//;
1763 if($xx =~ m/[a-zA-Z]/) { # there's a two-letter code for it
1764 push @codes, [ lc($f[-1]), "$xx\t$f[-1]\n" ];
1765 } else { # print the three-letter codes.
1766 if($f[0] eq $f[1]) {
1767 push @codes, [ lc($f[-1]), "$f[1]\t$f[2]\n" ];
1768 } else { # shouldn't happen
1769 push @codes, [ lc($f[-1]), "@f !!!!!!!!!!\n" ];
1770 }
1771 }
1772}
1773
1774print map $_->[1], sort {; $a->[0] cmp $b->[0] } @codes;
1775print "[ based on $url\n at ", scalar(localtime), "]\n",
1776 "[Note: doesn't include IANA-registered codes.]\n";
1777exit;
1778__END__
1779
Note: See TracBrowser for help on using the repository browser.