Changeset 22857


Ignore:
Timestamp:
2010-09-06T19:08:11+12:00 (14 years ago)
Author:
davidb
Message:

Further adjustments to our reading in of text files/data to be Unicode aware. These changes address the issue of when the data being read in is in a different encoding. The key 'problem' was that the (now) Unicode aware was being erroneously being re-encoded within HTMLPlugin

Location:
main/trunk/greenstone2/perllib/plugins
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/HTMLPlugin.pm

    r22842 r22857  
    12961296
    12971297    $$text =~ s/\s+/ /g; # remove \n's
     1298
     1299    # At this point $$text is a binary byte string
     1300    # => turn it into a Unicode aware string, so full
     1301    # Unicode aware pattern matching can be used.
     1302    # For instance: 's/\x{0101}//g' or '[[:upper:]]'
     1303    #
     1304
     1305    $$text = decode("utf8",$$text);
    12981306}
    12991307
  • main/trunk/greenstone2/perllib/plugins/MetadataXMLPlugin.pm

    r22853 r22857  
    8989use strict;
    9090no strict 'refs';
     91
     92use Encode;
     93
    9194use BasePlugin;
    9295use util;
     
    339342    }
    340343    elsif ($element eq "Metadata") {
    341     &metadatautil::store_saved_metadata($self,$self->{'metadata_name'}, $self->{'metadata_value'}, $self->{'metadata_accumulate'});
     344    my $metadata_name = decode("utf8",$self->{'metadata_name'});
     345    my $metadata_value = decode("utf8",$self->{'metadata_value'});
     346   
     347    &metadatautil::store_saved_metadata($self,
     348                        $metadata_name, $metadata_value,
     349                        $self->{'metadata_accumulate'});
    342350    $self->{'metadata_name'} = "";
    343351    }
  • main/trunk/greenstone2/perllib/plugins/ReadTextFile.pm

    r22844 r22857  
    222222    }
    223223
    224     # At this point $$testref is a binary byte string
     224    # At this point $$textref is a binary byte string
    225225    # => turn it into a Unicode aware string, so full
    226226    # Unicode aware pattern matching can be used.
     
    235235
    236236# Not currently used
    237 sub read_file_usingPerlsEncodeModule {
     237sub UNUSED_read_file_usingPerlsEncodeModule {
     238##sub read_file {
    238239    my $self = shift (@_);
    239240    my ($filename, $encoding, $language, $textref) = @_;
Note: See TracChangeset for help on using the changeset viewer.