source: trunk/gsdl/bin/script/makemapfile.pl@ 1870

Last change on this file since 1870 was 1870, checked in by sjboddie, 23 years ago

Tidied up language support stuff.

  • Property svn:keywords set to Author Date Id Revision
File size: 5.3 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# makemapfile.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2001 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28BEGIN {
29 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
30 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
31 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
32}
33
34use parsargv;
35use util;
36
37# %translations is of the form:
38#
39# encodings{encodingname-encodingname}->blocktranslation
40# blocktranslation->[[0-255],[256-511], ..., [65280-65535]]
41#
42# Any of the top translation blocks can point to an undefined
43# value. This data structure aims to allow fast translation and
44# efficient storage.
45%translations = ();
46
47# @array256 is used for initialisation, there must be
48# a better way...
49@array256 = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
50 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
51 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
52 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
53 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
54 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
55 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
56 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
57 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
58 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
59 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
60 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
61 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
62 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
63 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
64 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
65
66&main();
67
68sub print_usage {
69 print STDERR "\n usage: $0 [options]\n\n";
70 print STDERR " options:\n";
71 print STDERR " -encoding name\n";
72 print STDERR " -mapfile text file from which to create binary\n\n";
73}
74
75sub main {
76 if (!parsargv::parse(\@ARGV,
77 'encoding/.+', \$encoding,
78 'mapfile/.+', \$mapfile)) {
79 &print_usage();
80 die "\n";
81 }
82
83 if (!&loadencoding ($encoding, $mapfile)) {
84 die "couldn't load encoding $encoding";
85 }
86
87 # write out map files
88 &writemapfile ("$encoding-unicode", $encoding, 1);
89 &writemapfile ("unicode-$encoding", $encoding, 0);
90}
91
92sub writemapfile {
93 my ($encoding, $filename, $tounicode) = @_;
94
95 $filename .= ".ump"; # unicode map file
96 if ($tounicode) {
97 $filename = &util::filename_cat ($ENV{'GSDLHOME'}, "mappings", "to_uc", $filename);
98 } else {
99 $filename = &util::filename_cat ($ENV{'GSDLHOME'}, "mappings", "from_uc", $filename);
100 }
101
102 die "translation not defined" if (!defined $translations{$encoding});
103 my $block = $translations{$encoding};
104
105 print "writing $filename\n";
106 open (MAPFILE, ">" . $filename) || die;
107 binmode (MAPFILE);
108
109 my ($i, $j);
110 for ($i=0; $i<256; $i++) {
111 if (ref ($block->[$i]) eq "ARRAY") {
112 print MAPFILE pack ("C", $i);
113 for ($j=0; $j<256; $j++) {
114 # unsigned short in network order
115 print MAPFILE pack ("CC", int($block->[$i]->[$j] / 256),
116 $block->[$i]->[$j] % 256);
117 }
118 }
119 }
120 close (MAPFILE);
121}
122
123# loadencoding expects the mapfile to contain (at least) two
124# tab-separated fields. The first field is the mapped value
125# and the second field is the unicode value.
126#
127# It returns 1 if successful, 0 if unsuccessful
128sub loadencoding {
129 my ($encoding, $mapfile) = @_;
130
131 my $to = "$encoding-unicode";
132 my $from = "unicode-$encoding";
133
134 # check to see if the encoding has already been loaded
135 if (defined $translations{$to} && defined $translations{$from}) {
136 return 1;
137 }
138
139 return 0 unless open (MAPFILE, $mapfile);
140
141 my ($line, @line);
142 $translations{$to} = [@array256];
143 $translations{$from} = [@array256];
144 while (defined ($line = <MAPFILE>)) {
145 chomp $line;
146 # remove comments
147 $line =~ s/\#.*$//;
148 next unless $line =~ /\S/;
149
150 # split the line into fields and do a few
151 # simple sanity checks
152 @line = split (/\t/, $line);
153 next unless (scalar(@line) >= 2 &&
154 $line[0] =~ /^0x/ &&
155 $line[1] =~ /^0x/);
156
157 my $char = hex($line[0]);
158 my $unic = hex($line[1]);
159
160 # might need this for some versions of gb but not gbk
161# $char = $char | 0x8080 unless ($encoding =~ /gbk/i);
162
163 &addchartrans ($translations{$to}, $char, $unic);
164 &addchartrans ($translations{$from}, $unic, $char);
165 }
166
167 close (MAPFILE);
168
169 return 1;
170}
171
172# addchartrans adds one character translation to a translation block.
173# It also simplifies the translation block if possible.
174sub addchartrans {
175 my ($block, $from, $to) = @_;
176 my $i = 0;
177
178 my $high = ($from / 256) % 256;
179 my $low = $from % 256;
180
181 if (ref ($block->[$high]) ne "ARRAY") {
182 $block->[$high] = [@array256];
183 }
184 $block->[$high]->[$low] = $to;
185}
Note: See TracBrowser for help on using the repository browser.