Ignore:
Timestamp:
2010-11-19T13:29:29+13:00 (13 years ago)
Author:
davidb
Message:

Work done on improving handing of filenames when the actualy filename encoding used is not necesarrily known. Tested for Linux. Work currently includes some debug statements that will be removed once testing for Windows and Mac is done.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/ReadTextFile.pm

    r22857 r23335  
    128128
    129129    # Do encoding stuff
    130     my ($language, $encoding) = $self->textcat_get_language_encoding ($filename_full_path);
     130    my ($language, $content_encoding) = $self->textcat_get_language_encoding ($filename_full_path);
    131131    if ($self->{'verbosity'} > 2) {
    132     print $outhandle "ReadTextFile: reading $file as ($encoding,$language)\n";
     132    print $outhandle "ReadTextFile: reading $file as ($content_encoding,$language)\n";
    133133    }
    134134
     
    140140    $doc_obj->add_utf8_metadata($top_section, "Plugin", "$self->{'plugin_type'}");
    141141    $doc_obj->add_utf8_metadata($top_section, "FileSize", (-s $filename_full_path));
    142     $self->set_Source_metadata($doc_obj, $filename_no_path, $encoding);
     142
     143    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata);
     144    $self->set_Source_metadata($doc_obj, $filename_no_path, $filename_encoding);
    143145
    144146    $doc_obj->add_utf8_metadata($top_section, "Language", $language);
    145     $doc_obj->add_utf8_metadata($top_section, "Encoding", $encoding);
     147    $doc_obj->add_utf8_metadata($top_section, "Encoding", $content_encoding);
    146148   
    147149    # read in file ($text will be in utf8)
    148150    my $text = "";
    149     $self->read_file ($filename_full_path, $encoding, $language, \$text);
     151    $self->read_file ($filename_full_path, $content_encoding, $language, \$text);
    150152
    151153    if (!length ($text)) {
Note: See TracChangeset for help on using the changeset viewer.