Changeset 2811 for trunk/gsdl/perllib/plugins/BasPlug.pm
- Timestamp:
- 2001-10-31T19:41:49+13:00 (23 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/BasPlug.pm
r2796 r2811 272 272 return undef; 273 273 } 274 my $plugin_name = ref ($self);275 274 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 276 275 277 my ($language, $encoding); 278 if ($self->{'input_encoding'} eq "auto") { 279 # use textcat to automatically work out the input encoding and language 280 ($language, $encoding) = $self->get_language_encoding ($filename); 281 282 } elsif ($self->{'extract_language'}) { 283 # use textcat to get language metadata 284 ($language, $extracted_encoding) = $self->get_language_encoding ($filename); 285 $encoding = $self->{'input_encoding'}; 286 287 if ($extracted_encoding ne $encoding && $self->{'verbosity'}) { 288 print $outhandle "$plugin_name: WARNING: $file was read using $encoding encoding but "; 289 print $outhandle "appears to be encoded as $extracted_encoding.\n"; 290 } 291 292 } else { 293 $language = $self->{'default_language'}; 294 $encoding = $self->{'input_encoding'}; 295 } 276 # Do encoding stuff 277 my ($language, $encoding) = $self->textcat_get_language_encoding ($filename); 296 278 297 279 # create a new document … … 308 290 309 291 if (!length ($text)) { 292 my $plugin_name = ref ($self); 310 293 print $outhandle "$plugin_name: ERROR: $file contains no text\n" if $self->{'verbosity'}; 311 294 … … 384 367 385 368 close FILE; 369 } 370 371 sub textcat_get_language_encoding { 372 my $self = shift (@_); 373 my ($filename) = @_; 374 375 my ($language, $encoding, $extracted_encoding); 376 if ($self->{'input_encoding'} eq "auto") { 377 # use textcat to automatically work out the input encoding and language 378 ($language, $encoding) = $self->get_language_encoding ($filename); 379 } elsif ($self->{'extract_language'}) { 380 # use textcat to get language metadata 381 ($language, $extracted_encoding) = $self->get_language_encoding ($filename); 382 $encoding = $self->{'input_encoding'}; 383 if ($extracted_encoding ne $encoding && $self->{'verbosity'}) { 384 my $plugin_name = ref ($self); 385 my $outhandle = $self->{'outhandle'}; 386 print $outhandle "$plugin_name: WARNING: $filename was read using $encoding encoding but "; 387 print $outhandle "appears to be encoded as $extracted_encoding.\n"; 388 } 389 } else { 390 $language = $self->{'default_language'}; 391 $encoding = $self->{'input_encoding'}; 392 } 393 return ($language, $encoding); 386 394 } 387 395
Note:
See TracChangeset
for help on using the changeset viewer.