Changeset 3731


Ignore:
Timestamp:
2003-02-13T17:42:57+13:00 (21 years ago)
Author:
jrm21
Message:

If textcat returns too many possibilities, use the default language but
choose textcat's most popular encoding instead of using the default.
Otherwise we might generate bad archive files that aren't utf-8.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/BasPlug.pm

    r3540 r3731  
    595595    # first one in the list - otherwise use the defaults
    596596    if (scalar @$results > 3) {
    597    
     597    # changed 12 Feb 2003 by jrm21
     598    # use the most popular encoding at least... otherwise we might
     599    # generate invalid archive files!
     600    my %guessed_encodings = ();
     601    foreach my $result (@$results) {
     602        $result =~ /([^\-]+)$/;
     603        my $enc=$1;
     604        if (!defined($guessed_encodings{$enc})) {
     605        $guessed_encodings{$enc}=0;
     606        }
     607        $guessed_encodings{$enc}++;
     608    }
     609    my $best_encoding="";
     610    $guessed_encodings{""}=-1;
     611    foreach my $enc (keys %guessed_encodings) {
     612        if ($guessed_encodings{$enc} > $guessed_encodings{$best_encoding}){
     613        $best_encoding=$enc;
     614        }
     615    }
     616
    598617    if ($self->{'input_encoding'} ne 'auto') {
    599618        if ($self->{'extract_language'} && $self->{'verbosity'}) {
     
    605624    } else {
    606625        if ($self->{'verbosity'}) {
    607         print $outhandle "BASPlug: WARNING: language/encoding could not be extracted from $filename - ";
    608         print $outhandle "defaulting to $self->{'default_language'}/$self->{'default_encoding'}\n";
     626        print $outhandle "BASPlug: WARNING: language could not be extracted from $filename - ";
     627        print $outhandle "defaulting to $self->{'default_language'}.\n";
    609628        }
    610         return ($self->{'default_language'}, $self->{'default_encoding'});
     629        return ($self->{'default_language'}, $best_encoding);
    611630    }
    612631    }
Note: See TracChangeset for help on using the changeset viewer.