source: main/trunk/greenstone2/bin/script/makemapfile.pl@ 22642

Last change on this file since 22642 was 17793, checked in by ak19, 15 years ago

Missing execute permission on some perl scripts

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 5.5 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# makemapfile.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2001 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28BEGIN {
29 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
30 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
31 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
32}
33
34use parsargv;
35use util;
36
37# %translations is of the form:
38#
39# encodings{encodingname-encodingname}->blocktranslation
40# blocktranslation->[[0-255],[256-511], ..., [65280-65535]]
41#
42# Any of the top translation blocks can point to an undefined
43# value. This data structure aims to allow fast translation and
44# efficient storage.
45%translations = ();
46
47# @array256 is used for initialisation, there must be
48# a better way...
49@array256 = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
50 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
51 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
52 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
53 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
54 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
55 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
56 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
57 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
58 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
59 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
60 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
61 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
62 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
63 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
64 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
65
66&main();
67
68sub print_usage {
69 print STDERR "\n";
70 print STDERR "makemapfile.pl: Creates unicode map (.ump) files from plain\n";
71 print STDERR " text code pages.\n\n";
72 print STDERR " usage: $0 [options]\n\n";
73 print STDERR " options:\n";
74 print STDERR " -encoding name\n";
75 print STDERR " -mapfile text file from which to create binary ump file\n\n";
76}
77
78sub main {
79 if (!parsargv::parse(\@ARGV,
80 'encoding/.+', \$encoding,
81 'mapfile/.+', \$mapfile)) {
82 &print_usage();
83 die "\n";
84 }
85
86 if (!&loadencoding ($encoding, $mapfile)) {
87 die "couldn't load encoding $encoding";
88 }
89
90 # write out map files
91 &writemapfile ("$encoding-unicode", $encoding, 1);
92 &writemapfile ("unicode-$encoding", $encoding, 0);
93}
94
95sub writemapfile {
96 my ($encoding, $filename, $tounicode) = @_;
97
98 $filename .= ".ump"; # unicode map file
99 if ($tounicode) {
100 $filename = &util::filename_cat ($ENV{'GSDLHOME'}, "mappings", "to_uc", $filename);
101 } else {
102 $filename = &util::filename_cat ($ENV{'GSDLHOME'}, "mappings", "from_uc", $filename);
103 }
104
105 die "translation not defined" if (!defined $translations{$encoding});
106 my $block = $translations{$encoding};
107
108 print "writing $filename\n";
109 open (MAPFILE, ">" . $filename) || die;
110 binmode (MAPFILE);
111
112 my ($i, $j);
113 for ($i=0; $i<256; $i++) {
114 if (ref ($block->[$i]) eq "ARRAY") {
115 print MAPFILE pack ("C", $i);
116 for ($j=0; $j<256; $j++) {
117 # unsigned short in network order
118 print MAPFILE pack ("CC", int($block->[$i]->[$j] / 256),
119 $block->[$i]->[$j] % 256);
120 }
121 }
122 }
123 close (MAPFILE);
124}
125
126# loadencoding expects the mapfile to contain (at least) two
127# tab-separated fields. The first field is the mapped value
128# and the second field is the unicode value.
129#
130# It returns 1 if successful, 0 if unsuccessful
131sub loadencoding {
132 my ($encoding, $mapfile) = @_;
133
134 my $to = "$encoding-unicode";
135 my $from = "unicode-$encoding";
136
137 # check to see if the encoding has already been loaded
138 if (defined $translations{$to} && defined $translations{$from}) {
139 return 1;
140 }
141
142 return 0 unless open (MAPFILE, $mapfile);
143
144 my ($line, @line);
145 $translations{$to} = [@array256];
146 $translations{$from} = [@array256];
147 while (defined ($line = <MAPFILE>)) {
148 chomp $line;
149 # remove comments
150 $line =~ s/\#.*$//;
151 next unless $line =~ /\S/;
152
153 # split the line into fields and do a few
154 # simple sanity checks
155 @line = split (/\t/, $line);
156 next unless (scalar(@line) >= 2 &&
157 $line[0] =~ /^0x/ &&
158 $line[1] =~ /^0x/);
159
160 my $char = hex($line[0]);
161 my $unic = hex($line[1]);
162
163 # might need this for some versions of gb but not gbk
164# $char = $char | 0x8080 unless ($encoding =~ /gbk/i);
165
166 &addchartrans ($translations{$to}, $char, $unic);
167 &addchartrans ($translations{$from}, $unic, $char);
168 }
169
170 close (MAPFILE);
171
172 return 1;
173}
174
175# addchartrans adds one character translation to a translation block.
176# It also simplifies the translation block if possible.
177sub addchartrans {
178 my ($block, $from, $to) = @_;
179 my $i = 0;
180
181 my $high = ($from / 256) % 256;
182 my $low = $from % 256;
183
184 if (ref ($block->[$high]) ne "ARRAY") {
185 $block->[$high] = [@array256];
186 }
187 $block->[$high]->[$low] = $to;
188}
Note: See TracBrowser for help on using the repository browser.