Changeset 1870 for trunk/gsdl/bin/script


Ignore:
Timestamp:
2001-01-29T14:54:58+13:00 (23 years ago)
Author:
sjboddie
Message:

Tidied up language support stuff.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/bin/script/makemapfile.pl

    r1868 r1870  
    2626###########################################################################
    2727
    28 # Creates a binary map file for use by complex character encodings
    29 # (e.g. CJK encodings like GBK and Shift-JIS). The map file is written to
    30 # the $GSDLHOME/unicode directory.
    31 
    3228BEGIN {
    3329    die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
     
    3733
    3834use parsargv;
    39 use cjk;
     35use util;
     36
     37# %translations is of the form:
     38#
     39# encodings{encodingname-encodingname}->blocktranslation
     40# blocktranslation->[[0-255],[256-511], ..., [65280-65535]]
     41#
     42# Any of the top translation blocks can point to an undefined
     43# value. This data structure aims to allow fast translation and
     44# efficient storage.
     45%translations = ();
     46
     47# @array256 is used for initialisation, there must be
     48# a better way...
     49@array256 = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
     50         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
     51         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
     52         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
     53         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
     54         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
     55         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
     56         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
     57         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
     58         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
     59         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
     60         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
     61         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
     62         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
     63         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
     64         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
     65
    4066&main();
    4167
     
    5581    }
    5682
    57     &cjk::makeencodingmapfile ($encoding, $mapfile);
     83    if (!&loadencoding ($encoding, $mapfile)) {
     84    die "couldn't load encoding $encoding";
     85    }
     86
     87    # write out map files
     88    &writemapfile ("$encoding-unicode", $encoding, 1);
     89    &writemapfile ("unicode-$encoding", $encoding, 0);
    5890}
     91
     92sub writemapfile {
     93    my ($encoding, $filename, $tounicode) = @_;
     94
     95    $filename .= ".ump"; # unicode map file
     96    if ($tounicode) {
     97    $filename = &util::filename_cat ($ENV{'GSDLHOME'}, "mappings", "to_uc", $filename);
     98    } else {
     99    $filename = &util::filename_cat ($ENV{'GSDLHOME'}, "mappings", "from_uc", $filename);
     100    }
     101
     102    die "translation not defined" if (!defined $translations{$encoding});
     103    my $block = $translations{$encoding};
     104
     105    print "writing $filename\n";
     106    open (MAPFILE, ">" . $filename) || die;
     107    binmode (MAPFILE);
     108
     109    my ($i, $j);
     110    for ($i=0; $i<256; $i++) {
     111    if (ref ($block->[$i]) eq "ARRAY") {
     112        print MAPFILE pack ("C", $i);
     113        for ($j=0; $j<256; $j++) {
     114        # unsigned short in network order
     115        print MAPFILE pack ("CC", int($block->[$i]->[$j] / 256),
     116                    $block->[$i]->[$j] % 256);
     117        }
     118    }
     119    }
     120    close (MAPFILE);
     121}
     122
     123# loadencoding expects the mapfile to contain (at least) two
     124# tab-separated fields. The first field is the mapped value
     125# and the second field is the unicode value.
     126#
     127# It returns 1 if successful, 0 if unsuccessful
     128sub loadencoding {
     129    my ($encoding, $mapfile) = @_;
     130   
     131    my $to = "$encoding-unicode";
     132    my $from = "unicode-$encoding";
     133
     134    # check to see if the encoding has already been loaded
     135    if (defined $translations{$to} && defined $translations{$from}) {
     136    return 1;
     137    }
     138
     139    return 0 unless open (MAPFILE, $mapfile);
     140
     141    my ($line, @line);
     142    $translations{$to} = [@array256];
     143    $translations{$from} = [@array256];
     144    while (defined ($line = <MAPFILE>)) {
     145    chomp $line;
     146    # remove comments
     147    $line =~ s/\#.*$//;
     148    next unless $line =~ /\S/;
     149
     150    # split the line into fields and do a few
     151    # simple sanity checks
     152    @line = split (/\t/, $line);
     153    next unless (scalar(@line) >= 2 &&
     154             $line[0] =~ /^0x/ &&
     155             $line[1] =~ /^0x/);
     156
     157    my $char = hex($line[0]);
     158    my $unic = hex($line[1]);
     159
     160    # might need this for some versions of gb but not gbk
     161#   $char = $char | 0x8080 unless ($encoding =~ /gbk/i);
     162
     163    &addchartrans ($translations{$to}, $char, $unic);
     164    &addchartrans ($translations{$from}, $unic, $char);
     165    }
     166
     167    close (MAPFILE);
     168
     169    return 1;
     170}
     171
     172# addchartrans adds one character translation to a translation block.
     173# It also simplifies the translation block if possible.
     174sub addchartrans {
     175    my ($block, $from, $to) = @_;
     176    my $i = 0;
     177
     178    my $high = ($from / 256) % 256;
     179    my $low = $from % 256;
     180
     181    if (ref ($block->[$high]) ne "ARRAY") {
     182    $block->[$high] = [@array256];
     183    }
     184    $block->[$high]->[$low] = $to;
     185}
Note: See TracChangeset for help on using the changeset viewer.