Ignore:
Timestamp:
2001-10-31T19:41:49+13:00 (23 years ago)
Author:
sjboddie
Message:

* empty log message *

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/BasPlug.pm

    r2796 r2811  
    272272    return undef;
    273273    }
    274     my $plugin_name = ref ($self);
    275274    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
    276275
    277     my ($language, $encoding);
    278     if ($self->{'input_encoding'} eq "auto") {
    279     # use textcat to automatically work out the input encoding and language
    280     ($language, $encoding) = $self->get_language_encoding ($filename);
    281 
    282     } elsif ($self->{'extract_language'}) {
    283     # use textcat to get language metadata
    284     ($language, $extracted_encoding) = $self->get_language_encoding ($filename);
    285     $encoding = $self->{'input_encoding'};
    286 
    287     if ($extracted_encoding ne $encoding && $self->{'verbosity'}) {
    288         print $outhandle "$plugin_name: WARNING: $file was read using $encoding encoding but ";
    289         print $outhandle "appears to be encoded as $extracted_encoding.\n";
    290     }
    291 
    292     } else {
    293     $language = $self->{'default_language'};
    294     $encoding = $self->{'input_encoding'};
    295     }
     276    # Do encoding stuff
     277    my ($language, $encoding) = $self->textcat_get_language_encoding ($filename);
    296278
    297279    # create a new document
     
    308290
    309291    if (!length ($text)) {
     292    my $plugin_name = ref ($self);
    310293    print $outhandle "$plugin_name: ERROR: $file contains no text\n" if $self->{'verbosity'};
    311294
     
    384367
    385368    close FILE;
     369}
     370
     371sub textcat_get_language_encoding {
     372    my $self = shift (@_);
     373    my ($filename) = @_;
     374
     375    my ($language, $encoding, $extracted_encoding);
     376    if ($self->{'input_encoding'} eq "auto") {
     377        # use textcat to automatically work out the input encoding and language
     378        ($language, $encoding) = $self->get_language_encoding ($filename);
     379    } elsif ($self->{'extract_language'}) {
     380        # use textcat to get language metadata
     381        ($language, $extracted_encoding) = $self->get_language_encoding ($filename);
     382        $encoding = $self->{'input_encoding'};
     383        if ($extracted_encoding ne $encoding && $self->{'verbosity'}) {
     384        my $plugin_name = ref ($self);
     385        my $outhandle = $self->{'outhandle'};
     386            print $outhandle "$plugin_name: WARNING: $filename was read using $encoding encoding but ";
     387            print $outhandle "appears to be encoded as $extracted_encoding.\n";
     388        }
     389    } else {
     390        $language = $self->{'default_language'};
     391        $encoding = $self->{'input_encoding'};
     392    }
     393    return ($language, $encoding);
    386394}
    387395
Note: See TracChangeset for help on using the changeset viewer.