Changeset 1870 for trunk/gsdl/bin/script
- Timestamp:
- 2001-01-29T14:54:58+13:00 (23 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/bin/script/makemapfile.pl
r1868 r1870 26 26 ########################################################################### 27 27 28 # Creates a binary map file for use by complex character encodings29 # (e.g. CJK encodings like GBK and Shift-JIS). The map file is written to30 # the $GSDLHOME/unicode directory.31 32 28 BEGIN { 33 29 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'}; … … 37 33 38 34 use parsargv; 39 use cjk; 35 use util; 36 37 # %translations is of the form: 38 # 39 # encodings{encodingname-encodingname}->blocktranslation 40 # blocktranslation->[[0-255],[256-511], ..., [65280-65535]] 41 # 42 # Any of the top translation blocks can point to an undefined 43 # value. This data structure aims to allow fast translation and 44 # efficient storage. 45 %translations = (); 46 47 # @array256 is used for initialisation, there must be 48 # a better way... 49 @array256 = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 50 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 51 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 52 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 53 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 54 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 55 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 56 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 57 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 58 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 59 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 60 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 61 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 62 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 63 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 64 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0); 65 40 66 &main(); 41 67 … … 55 81 } 56 82 57 &cjk::makeencodingmapfile ($encoding, $mapfile); 83 if (!&loadencoding ($encoding, $mapfile)) { 84 die "couldn't load encoding $encoding"; 85 } 86 87 # write out map files 88 &writemapfile ("$encoding-unicode", $encoding, 1); 89 &writemapfile ("unicode-$encoding", $encoding, 0); 58 90 } 91 92 sub writemapfile { 93 my ($encoding, $filename, $tounicode) = @_; 94 95 $filename .= ".ump"; # unicode map file 96 if ($tounicode) { 97 $filename = &util::filename_cat ($ENV{'GSDLHOME'}, "mappings", "to_uc", $filename); 98 } else { 99 $filename = &util::filename_cat ($ENV{'GSDLHOME'}, "mappings", "from_uc", $filename); 100 } 101 102 die "translation not defined" if (!defined $translations{$encoding}); 103 my $block = $translations{$encoding}; 104 105 print "writing $filename\n"; 106 open (MAPFILE, ">" . $filename) || die; 107 binmode (MAPFILE); 108 109 my ($i, $j); 110 for ($i=0; $i<256; $i++) { 111 if (ref ($block->[$i]) eq "ARRAY") { 112 print MAPFILE pack ("C", $i); 113 for ($j=0; $j<256; $j++) { 114 # unsigned short in network order 115 print MAPFILE pack ("CC", int($block->[$i]->[$j] / 256), 116 $block->[$i]->[$j] % 256); 117 } 118 } 119 } 120 close (MAPFILE); 121 } 122 123 # loadencoding expects the mapfile to contain (at least) two 124 # tab-separated fields. The first field is the mapped value 125 # and the second field is the unicode value. 126 # 127 # It returns 1 if successful, 0 if unsuccessful 128 sub loadencoding { 129 my ($encoding, $mapfile) = @_; 130 131 my $to = "$encoding-unicode"; 132 my $from = "unicode-$encoding"; 133 134 # check to see if the encoding has already been loaded 135 if (defined $translations{$to} && defined $translations{$from}) { 136 return 1; 137 } 138 139 return 0 unless open (MAPFILE, $mapfile); 140 141 my ($line, @line); 142 $translations{$to} = [@array256]; 143 $translations{$from} = [@array256]; 144 while (defined ($line = <MAPFILE>)) { 145 chomp $line; 146 # remove comments 147 $line =~ s/\#.*$//; 148 next unless $line =~ /\S/; 149 150 # split the line into fields and do a few 151 # simple sanity checks 152 @line = split (/\t/, $line); 153 next unless (scalar(@line) >= 2 && 154 $line[0] =~ /^0x/ && 155 $line[1] =~ /^0x/); 156 157 my $char = hex($line[0]); 158 my $unic = hex($line[1]); 159 160 # might need this for some versions of gb but not gbk 161 # $char = $char | 0x8080 unless ($encoding =~ /gbk/i); 162 163 &addchartrans ($translations{$to}, $char, $unic); 164 &addchartrans ($translations{$from}, $unic, $char); 165 } 166 167 close (MAPFILE); 168 169 return 1; 170 } 171 172 # addchartrans adds one character translation to a translation block. 173 # It also simplifies the translation block if possible. 174 sub addchartrans { 175 my ($block, $from, $to) = @_; 176 my $i = 0; 177 178 my $high = ($from / 256) % 256; 179 my $low = $from % 256; 180 181 if (ref ($block->[$high]) ne "ARRAY") { 182 $block->[$high] = [@array256]; 183 } 184 $block->[$high]->[$low] = $to; 185 }
Note:
See TracChangeset
for help on using the changeset viewer.