Changeset 7818


Ignore:
Timestamp:
2004-07-23T12:58:37+12:00 (20 years ago)
Author:
jrm21
Message:

improvements to the handling of textcat's guessed encoding

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/BasPlug.pm

    r7668 r7818  
    680680    }
    681681
     682
     683    # check for equivalents where textcat doesn't have some encodings...
     684    # eg MS versions of standard encodings
     685    if ($encoding =~ /^iso_8859_(\d+)/) {
     686    my $iso = $1; # which variant of the iso standard?
     687    # iso-8859 sets don't use chars 0x80-0x9f, windows codepages do
     688    if ($text =~ /[\x80-\x9f]/) {
     689        # Western Europe
     690        if ($iso == 1 or $iso == 15) { $encoding = 'windows_1252' }
     691        elsif ($iso == 2) { $encoding = 'windows_1250' } # Central Europe
     692        elsif ($iso == 5) { $encoding = 'windows_1251' } # Cyrillic
     693        elsif ($iso == 6) { $encoding = 'windows_1256' } # Arabic
     694        elsif ($iso == 7) { $encoding = 'windows_1253' } # Greek
     695        elsif ($iso == 8) { $encoding = 'windows_1255' } # Hebrew
     696        elsif ($iso == 9) { $encoding = 'windows_1254' } # Turkish
     697    }
     698    }
     699
    682700    if ($encoding !~ /^(ascii|utf8|unicode)$/ &&
    683701    !defined $encodings::encodings->{$encoding}) {
    684702    if ($self->{'verbosity'}) {
    685         &gsprintf($outhandle, "BasPlug: {BasPlug.unsupported_encoding}\n", $filename, $encoding, $self->{'default_encoding'});
    686         # print $outhandle "BasPlug: WARNING: $filename appears to be encoded in an unsupported encoding ($encoding) - ";
    687         # print $outhandle "using $self->{'default_encoding'}\n";
     703        gsprintf($outhandle, "BasPlug: {BasPlug.unsupported_encoding}\n",
     704             $filename, $encoding, $self->{'default_encoding'});
    688705    }
    689706    $encoding = $self->{'default_encoding'};
Note: See TracChangeset for help on using the changeset viewer.