source: gsdl/trunk/perllib/encodings.pm@ 15073

Last change on this file since 15073 was 12604, checked in by mdewsnip, 18 years ago

Added definition for new DOS codepage 852 (Central European) encoding.

  • Property svn:keywords set to Author Date Id Revision
File size: 4.5 KB
Line 
1###########################################################################
2#
3# encodings.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2001 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# Each encoding supported by the Greenstone build-time software should be
27# specified in the following hash table ($encodings).
28
29package encodings;
30
31# $encodings takes the form:
32# --> identifier --> name --> The full display name of the encoding.
33# --> mapfile --> The ump file associated with the encoding
34# --> double --> 1 if it's a double byte encoding
35# --> converter --> If the encoding needs a specialized conversion
36# routine this is the name of that routine.
37
38$encodings::encodings = {
39 'iso_8859_1' => {'name' => 'Latin1 (western languages)', 'mapfile' => '8859_1.ump'},
40
41 'iso_8859_2' => {'name' => 'Latin2 (central and eastern european languages)',
42 'mapfile' => '8859_2.ump'},
43
44 'iso_8859_3' => {'name' => 'Latin3', 'mapfile' => '8859_3.ump'},
45
46 'iso_8859_4' => {'name' => 'Latin4', 'mapfile' => '8859_4.ump'},
47
48 'iso_8859_5' => {'name' => 'Cyrillic', 'mapfile' => '8859_5.ump'},
49
50 'iso_8859_6' => {'name' => 'Arabic', 'mapfile' => '8859_6.ump'},
51
52 'iso_8859_7' => {'name' => 'Greek', 'mapfile' => '8859_7.ump'},
53
54 'iso_8859_8' => {'name' => 'Hebrew', 'mapfile' => '8859_8.ump'},
55
56 'iso_8859_9' => {'name' => 'Turkish', 'mapfile' => '8859_9.ump'},
57
58 'iso_8859_15' => {'name' => 'Latin15 (revised western)', 'mapfile' => '8859_15.ump'},
59
60 'windows_1250' => {'name' => 'Windows codepage 1250 (WinLatin2)',
61 'mapfile' => 'win1250.ump'},
62
63 'windows_1251' => {'name' => 'Windows codepage 1251 (WinCyrillic)',
64 'mapfile' => 'win1251.ump'},
65
66 'windows_1252' => {'name' => 'Windows codepage 1252 (WinLatin1)',
67 'mapfile' => 'win1252.ump'},
68
69 'windows_1253' => {'name' => 'Windows codepage 1253 (WinGreek)',
70 'mapfile' => 'win1253.ump'},
71
72 'windows_1254' => {'name' => 'Windows codepage 1254 (WinTurkish)',
73 'mapfile' => 'win1254.ump'},
74
75 'windows_1255' => {'name' => 'Windows codepage 1255 (WinHebrew)',
76 'mapfile' => 'win1255.ump'},
77
78 'windows_1256' => {'name' => 'Windows codepage 1256 (WinArabic)',
79 'mapfile' => 'win1256.ump'},
80
81 'windows_1257' => {'name' => 'Windows codepage 1257 (WinBaltic)',
82 'mapfile' => 'win1257.ump'},
83
84 'windows_1258' => {'name' => 'Windows codepage 1258 (Vietnamese)',
85 'mapfile' => 'win1258.ump'},
86
87 'windows_874' => {'name' => 'Windows codepage 874 (Thai)', 'mapfile' => 'win874.ump'},
88
89 'dos_437' => {'name' => 'DOS codepage 437 (US English)', 'mapfile' => 'dos437.ump'},
90
91 'dos_850' => {'name' => 'DOS codepage 850 (Latin 1)', 'mapfile' => 'dos850.ump'},
92
93 'dos_852' => {'name' => 'DOS codepage 852 (Central European)', 'mapfile' => 'dos852.ump'},
94
95 'dos_866' => {'name' => 'DOS codepage 866 (Cyrillic)', 'mapfile' => 'dos866.ump'},
96
97 'koi8_r' => {'name' => 'Cyrillic', 'mapfile' => 'koi8_r.ump'},
98
99 'koi8_u' => {'name' => 'Cyrillic (Ukrainian)', 'mapfile' => 'koi8_u.ump'},
100
101 'iscii_de' => {'name' => 'ISCII Devanagari', 'mapfile' => 'iscii_de.ump'},
102
103 'shift_jis' => {'name' => 'Japanese (Shift-JIS)', 'mapfile' => 'shiftjis.ump',
104 'converter' => 'shiftjis2unicode'},
105
106 'euc_jp' => {'name' => 'Japanese (EUC)', 'mapfile' => 'euc_jp.ump'},
107
108 'korean' => {'name' => 'Korean (Unified Hangul Code - i.e. a superset of EUC-KR)',
109 'mapfile' => 'uhc.ump'},
110
111 'gb' => {'name' => 'Chinese Simplified (GB)', 'mapfile' => 'gbk.ump'},
112
113 'big5' => {'name' => 'Chinese Traditional (Big5)', 'mapfile' => 'big5.ump'}
114
115};
Note: See TracBrowser for help on using the repository browser.