Ignore:
Timestamp:
2014-02-12T18:03:06+13:00 (10 years ago)
Author:
ak19
Message:

A question on the mailing list involved accented characters in custom metadata set names (not metadata set values). This exposed an issue in greenstone that could not cope with utf8 characters in metaset names. The cause was the sub Char { use bytes; ... lines when reading XML. These needed to be commented out in both MetadataXMLPlugin and ReadXMLFile (as GreenstoneXMLPlugin inherits from ReadXMLFile). Doing so showed that extra Encode::decode() operations to decode strings read in from XML into utf8 were no longer needed. As a result MetaXMLPlug and GreenstoneXMLPlug no longer call decode on the metadaname name and value read in from XML, or for the full-text, since GreenstoneXMLPlugin in entirety now no longer does the 'use bytes' part. Tested with text and html collections where metadata set nanes created in custom .mds files, their assigned metadata values and a document's full-text all contained the utf-8 specific character of a-macron.

Location:
main/trunk/greenstone2/perllib/plugins
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/GreenstoneXMLPlugin.pm

    r28267 r28836  
    224224
    225225
    226     my $metadata_name = decode("utf-8",$self->{'metadata_name'});
    227     my $metadata_value = decode("utf-8",$self->{'metadata_value'});
     226    my $metadata_name = $self->{'metadata_name'};
     227    my $metadata_value = $self->{'metadata_value'};
     228    #my $metadata_name = decode("utf-8",$self->{'metadata_name'});
     229    #my $metadata_value = decode("utf-8",$self->{'metadata_value'});
    228230
    229231    $self->{'doc_obj'}->add_utf8_metadata($self->{'section'},
     
    271273    # text read in by XML::Parser is in Perl's binary byte value
    272274    # form ... need to explicitly make it UTF-8
    273     my $content = decode("utf-8",$self->{'content'});
     275    #my $content = decode("utf-8",$self->{'content'});
     276    my $content = $self->{'content'};
    274277
    275278    $self->{'doc_obj'}->add_utf8_text($self->{'section'}, $content);
  • main/trunk/greenstone2/perllib/plugins/MetadataXMLPlugin.pm

    r28489 r28836  
    364364    # form ... need to explicitly make it UTF-8
    365365   
    366     my $metadata_name = decode("utf-8",$self->{'metadata_name'});
    367     my $metadata_value = decode("utf-8",$self->{'metadata_value'});
     366    my $metadata_name = $self->{'metadata_name'};
     367    my $metadata_value = $self->{'metadata_value'};
     368    #my $metadata_name = decode("utf-8",$self->{'metadata_name'});
     369    #my $metadata_value = decode("utf-8",$self->{'metadata_value'});
    368370   
    369371    &metadatautil::store_saved_metadata($self,
     
    392394# things down significantly in some cases.
    393395sub Char {
    394     use bytes;  # Necessary to prevent encoding issues with XML::Parser 2.31+
     396#    use bytes;  # Necessary to prevent encoding issues with XML::Parser 2.31+
    395397
    396398#    if ($]<5.008) {
  • main/trunk/greenstone2/perllib/plugins/ReadXMLFile.pm

    r24348 r28836  
    291291# things down significantly in some cases.
    292292sub Char {
    293     use bytes;  # Necessary to prevent encoding issues with XML::Parser 2.31+
     293#    use bytes;  # Necessary to prevent encoding issues with XML::Parser 2.31+
    294294    $_[0]->{'Text'} .= $_[1];
    295295    return undef;
Note: See TracChangeset for help on using the changeset viewer.