Changeset 1227 for trunk/gsdl/bin/script


Ignore:
Timestamp:
2000-06-21T15:58:49+12:00 (24 years ago)
Author:
sjboddie
Message:

Modified the perl code for importing arabic encoded documents. Plugins
now support a windows_1256 and an iso_8859_6 encoding. I was briefly under
the impression that these two encodings were similar enough to be treated
the same. It turns out they're not. It appears that the Windows codepage
1256 is the most commonly used Arabic encoding so "arabic" is a synonym
for windows_1256.

Location:
trunk/gsdl/bin/script
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/bin/script/togb.pl

    r1226 r1227  
    3636if (!parsargv::parse(\@ARGV,
    3737             'unicode', \$unicode,
    38              'latin1', \$latin1,
     38             'iso_8859_1', \$iso_8859_1,
    3939             'gb', \$gb)) {
    4040    print STDERR "\n  usage: $0 [options]\n\n";
    4141    print STDERR "  options:\n";
    4242    print STDERR "   -unicode    input is in utf-8 or unicode (default)\n";
    43     print STDERR "   -latin1     input is in extended ascii (ISO-8859-1)\n";
     43    print STDERR "   -iso_8859_1 input is in extended ascii (ISO-8859-1 Latin 1)\n";
    4444    print STDERR "   -gb         input is in GB or GBK (simplified Chinese)\n\n";
    4545    die "\n";
     
    4747
    4848$encoding = "utf8" if $unicode;
    49 $encoding = "latin1" if $latin1;
     49$encoding = "iso_8859_1" if $iso_8859_1;
    5050$encoding = "gb" if $gb;
    5151
  • trunk/gsdl/bin/script/touc.pl

    r1226 r1227  
    3636if (!parsargv::parse(\@ARGV,
    3737             'unicode', \$unicode,
    38              'latin1', \$latin1,
    39              'arabic', \$arabic,
     38             'iso_8859_1', \$iso_8859_1,
     39             'iso_8859_6', \$iso_8859_6,
     40             'windows_1256', \$windows_1256,
    4041             'gb', \$gb)) {
    4142    print STDERR "\n  usage: $0 [options]\n\n";
    4243    print STDERR "  options:\n";
    43     print STDERR "   -unicode    input is in utf-8 or unicode (default)\n";
    44     print STDERR "   -latin1     input is in extended ascii (ISO-8859-1)\n";
    45     print STDERR "   -arabic     input is in 8 bit Arabic (ISO-8859-6)\n";
    46     print STDERR "   -gb         input is in GB or GBK (simplified Chinese)\n\n";
     44    print STDERR "   -unicode      input is in utf-8 or unicode (default)\n";
     45    print STDERR "   -iso_8859_1   input is in extended ascii (ISO-8859-1 Latin 1)\n";
     46    print STDERR "   -iso_8859_6   input is in 8 bit Arabic (ISO-8859-6)\n";
     47    print STDERR "   -windows_1256 input is in Windows 1256 (Arabic)\n";
     48    print STDERR "   -gb           input is in GB or GBK (simplified Chinese)\n\n";
    4749    die "\n";
    4850}
    4951
    5052$encoding = "utf8" if $unicode;
    51 $encoding = "latin1" if $latin1;
    52 $encoding = "arabic" if $arabic;
     53$encoding = "iso_8859_1" if $iso_8859_1;
     54$encoding = "iso_8859_6" if $iso_8859_6;
     55$encoding = "windows_1256" if $windows_1256;
    5356$encoding = "gb" if $gb;
    5457
  • trunk/gsdl/bin/script/toutf8.pl

    r1226 r1227  
    3636if (!parsargv::parse(\@ARGV,
    3737             'unicode', \$unicode,
    38              'latin1', \$latin1,
    39              'arabic', \$arabic,
     38             'iso_8859_1', \$iso_8859_1,
     39             'iso_8859_6', \$iso_8859_6,
     40             'windows_1256', \$windows_1256,
    4041             'gb', \$gb)) {
    4142    print STDERR "\n  usage: $0 [options]\n\n";
    4243    print STDERR "  options:\n";
    43     print STDERR "   -unicode    input is in utf-8 or unicode (default)\n";
    44     print STDERR "   -latin1     input is in extended ascii (ISO-8859-1)\n";
    45     print STDERR "   -arabic     input is in 8 bit Arabic (ISO-8859-6)\n";
    46     print STDERR "   -gb         input is in GB or GBK (simplified Chinese)\n\n";
     44    print STDERR "   -unicode      input is in utf-8 or unicode (default)\n";
     45    print STDERR "   -iso_8859_1   input is in extended ascii (ISO-8859-1 Latin 1)\n";
     46    print STDERR "   -iso_8859_6   input is in 8 bit Arabic (ISO-8859-6)\n";
     47    print STDERR "   -windows_1256 input is in Windows 1256 (Arabic)\n";
     48    print STDERR "   -gb           input is in GB or GBK (simplified Chinese)\n\n";
    4749    die "\n";
    4850}
    4951
    5052$encoding = "utf8" if $unicode;
    51 $encoding = "latin1" if $latin1;
    52 $encoding = "arabic" if $arabic;
     53$encoding = "iso_8859_1" if $iso_8859_1;
     54$encoding = "iso_8859_6" if $iso_8859_6;
     55$encoding = "windows_1256" if $windows_1256;
    5356$encoding = "gb" if $gb;
    5457
Note: See TracChangeset for help on using the changeset viewer.