Changeset 22952


Ignore:
Timestamp:
2010-09-23T17:22:49+12:00 (12 years ago)
Author:
davidb
Message:

Encode::decode cannot be applied to all characters returned by ghtml::getcharequiv(). If getcharequiv does not recognize a character then it does not encode it, and so we cannot apply decode() to it. getcharequiv() upgraded to include an optional extra param that says whether or not to decode the equiv char should there be one it can map it to.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/ghtml.pm

    r22653 r22952  
    202202# & and ; have been stripped off the string.
    203203sub getcharequiv {
    204     my ($entity, $convertsymbols) = @_;
     204    my ($entity, $convertsymbols, $and_decode) = @_;
     205
     206    my $char_equiv = undef;
    205207
    206208    # a numeric entity
     
    214216        elsif ($code == 0x94) {$code=0x201d} # 148 = double right quote
    215217        # ...
    216     }
    217     return &unicode::unicode2utf8([$code]);
     218    }   
     219    $char_equiv = &unicode::unicode2utf8([$code]);
    218220    }
    219221   
    220222    # a named character entity
    221     if (defined $charnetosf{$entity}) {
    222     return &unicode::unicode2utf8([$charnetosf{$entity}]);
     223    elsif (defined $charnetosf{$entity}) {
     224    $char_equiv = &unicode::unicode2utf8([$charnetosf{$entity}]);
    223225    }
    224226
    225227    # a named symbol entity
    226     if ($convertsymbols && defined $symnetosf{$entity}) {
    227     return &unicode::unicode2utf8([$symnetosf{$entity}]);
    228     }
    229 
    230     return "&$entity;"; # unknown character
     228    elsif ($convertsymbols && defined $symnetosf{$entity}) {
     229    $char_equiv = &unicode::unicode2utf8([$symnetosf{$entity}]);
     230    }
     231
     232    if (!defined $char_equiv) {
     233    return "&$entity;"; # unknown character
     234    }
     235    else {
     236    if ((defined $and_decode) && ($and_decode)) {
     237        $char_equiv = Encode::decode("utf8",$char_equiv);
     238    }
     239    return $char_equiv;
     240    }
    231241}
    232242
Note: See TracChangeset for help on using the changeset viewer.