- Timestamp:
- 2000-07-13T10:21:53+12:00 (24 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/New_Config_Format-branch/gsdl/perllib/unicode.pm
r537 r1279 31 31 package unicode; 32 32 33 34 35 # ascii2unicode takes a (extended) ascii string and36 # returns a unicode array.33 %translations = (); 34 35 # ascii2unicode takes an (extended) ascii string (ISO-8859-1) 36 # and returns a unicode array. 37 37 sub ascii2unicode { 38 38 my ($in) = @_; … … 43 43 while ($i < $len) { 44 44 push (@$out, ord(substr ($in, $i, 1))); 45 $i++; 46 } 47 48 return $out; 49 } 50 51 # arabic2unicode takes an 8 bit Arabic string (ISO-8859-6) 52 # and returns a unicode array 53 sub arabic2unicode { 54 my ($in) = @_; 55 my $out = []; 56 57 my $i = 0; 58 my $len = length($in); 59 while ($i < $len) { 60 my $c = ord(substr ($in, $i, 1)); 61 $c += (1567-191) if ($c >= 0x80); 62 push (@$out, $c); 63 $i++; 64 } 65 66 return $out; 67 } 68 69 # windows2unicode takes a windows encoding (e.g. Windows 1256 (Arabic)) 70 # and returns a unicode array. These encodings are similar to but not 71 # identical to the corresponding ISO-8859 encodings. 72 # 73 # The map files for these encodings should be in unicode/MAPPINGS/WINDOWS 74 sub windows2unicode { 75 my ($encoding, $in) = @_; 76 my $out = []; 77 78 my $mapfile = &util::filename_cat($ENV{'GSDLHOME'}, "unicode", "MAPPINGS", 79 "WINDOWS", "$encoding.TXT"); 80 return $out unless &loadmapping ($encoding, $mapfile); 81 82 my $i = 0; 83 my $len = length($in); 84 while ($i < $len) { 85 my $c = ord(substr ($in, $i, 1)); 86 $c = $translations{"$encoding-unicode"}->{$c} if ($c >= 0x80); 87 push (@$out, $c); 45 88 $i++; 46 89 } … … 193 236 } 194 237 238 # loadmapping expects the mapfile to contain (at least) two 239 # tab-separated fields. The first field is the mapped value 240 # and the second field is the unicode value. 241 # 242 # It returns 1 if successful, 0 if unsuccessful 243 sub loadmapping { 244 my ($encoding, $mapfile) = @_; 245 246 my $to = "$encoding-unicode"; 247 my $from = "unicode-$encoding"; 248 249 # check to see if the encoding has already been loaded 250 if (defined $translations{$to} && defined $translations{$from}) { 251 return 1; 252 } 253 254 if (!open (MAPFILE, $mapfile)) { 255 print STDERR "ERROR: unable to load mapfile $mapfile\n"; 256 return 0; 257 } 258 259 my ($line, @line); 260 $translations{$to} = {}; 261 $translations{$from} = {}; 262 while (defined ($line = <MAPFILE>)) { 263 # remove comments 264 $line =~ s/\#.*$//; 265 next unless $line =~ /\S/; 266 267 # split the line into fields and do a few 268 # simple sanity checks 269 @line = split (/\t/, $line); 270 next unless (scalar(@line) >= 2 && 271 $line[0] =~ /^0x/ && 272 $line[1] =~ /^0x/); 273 274 my $a = hex($line[0]); 275 my $b = hex($line[1]); 276 277 $translations{$to}->{$a} = $b; 278 $translations{$from}->{$b} = $a; 279 } 280 281 close (MAPFILE); 282 283 return 1; 284 } 285 195 286 196 287 1; 197
Note:
See TracChangeset
for help on using the changeset viewer.