Changeset 22857

Show
Ignore:
Timestamp:
06.09.2010 19:08:11 (9 years ago)
Author:
davidb
Message:

Further adjustments to our reading in of text files/data to be Unicode aware. These changes address the issue of when the data being read in is in a different encoding. The key 'problem' was that the (now) Unicode aware was being erroneously being re-encoded within HTMLPlugin

Location:
main/trunk/greenstone2/perllib/plugins
Files:
3 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/HTMLPlugin.pm

    r22842 r22857  
    12961296 
    12971297    $$text =~ s/\s+/ /g; # remove \n's 
     1298 
     1299    # At this point $$text is a binary byte string 
     1300    # => turn it into a Unicode aware string, so full 
     1301    # Unicode aware pattern matching can be used. 
     1302    # For instance: 's/\x{0101}//g' or '[[:upper:]]' 
     1303    #  
     1304 
     1305    $$text = decode("utf8",$$text); 
    12981306} 
    12991307 
  • main/trunk/greenstone2/perllib/plugins/MetadataXMLPlugin.pm

    r22853 r22857  
    8989use strict; 
    9090no strict 'refs'; 
     91 
     92use Encode; 
     93 
    9194use BasePlugin; 
    9295use util; 
     
    339342    } 
    340343    elsif ($element eq "Metadata") { 
    341     &metadatautil::store_saved_metadata($self,$self->{'metadata_name'}, $self->{'metadata_value'}, $self->{'metadata_accumulate'}); 
     344    my $metadata_name = decode("utf8",$self->{'metadata_name'}); 
     345    my $metadata_value = decode("utf8",$self->{'metadata_value'}); 
     346     
     347    &metadatautil::store_saved_metadata($self, 
     348                        $metadata_name, $metadata_value,  
     349                        $self->{'metadata_accumulate'}); 
    342350    $self->{'metadata_name'} = ""; 
    343351    } 
  • main/trunk/greenstone2/perllib/plugins/ReadTextFile.pm

    r22844 r22857  
    222222    } 
    223223 
    224     # At this point $$testref is a binary byte string 
     224    # At this point $$textref is a binary byte string 
    225225    # => turn it into a Unicode aware string, so full 
    226226    # Unicode aware pattern matching can be used. 
     
    235235 
    236236# Not currently used 
    237 sub read_file_usingPerlsEncodeModule { 
     237sub UNUSED_read_file_usingPerlsEncodeModule { 
     238##sub read_file { 
    238239    my $self = shift (@_); 
    239240    my ($filename, $encoding, $language, $textref) = @_;