Changeset 22952

Show
Ignore:
Timestamp:
23.09.2010 17:22:49 (9 years ago)
Author:
davidb
Message:

Encode::decode cannot be applied to all characters returned by ghtml::getcharequiv(). If getcharequiv does not recognize a character then it does not encode it, and so we cannot apply decode() to it. getcharequiv() upgraded to include an optional extra param that says whether or not to decode the equiv char should there be one it can map it to.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/ghtml.pm

    r22653 r22952  
    202202# & and ; have been stripped off the string. 
    203203sub getcharequiv { 
    204     my ($entity, $convertsymbols) = @_; 
     204    my ($entity, $convertsymbols, $and_decode) = @_; 
     205 
     206    my $char_equiv = undef; 
    205207 
    206208    # a numeric entity 
     
    214216        elsif ($code == 0x94) {$code=0x201d} # 148 = double right quote 
    215217        # ... 
    216     } 
    217     return &unicode::unicode2utf8([$code]); 
     218    }    
     219    $char_equiv = &unicode::unicode2utf8([$code]); 
    218220    } 
    219221     
    220222    # a named character entity 
    221     if (defined $charnetosf{$entity}) { 
    222     return &unicode::unicode2utf8([$charnetosf{$entity}]); 
     223    elsif (defined $charnetosf{$entity}) { 
     224    $char_equiv = &unicode::unicode2utf8([$charnetosf{$entity}]); 
    223225    } 
    224226 
    225227    # a named symbol entity 
    226     if ($convertsymbols && defined $symnetosf{$entity}) { 
    227     return &unicode::unicode2utf8([$symnetosf{$entity}]); 
    228     } 
    229  
    230     return "&$entity;"; # unknown character 
     228    elsif ($convertsymbols && defined $symnetosf{$entity}) { 
     229    $char_equiv = &unicode::unicode2utf8([$symnetosf{$entity}]); 
     230    } 
     231 
     232    if (!defined $char_equiv) { 
     233    return "&$entity;"; # unknown character 
     234    } 
     235    else { 
     236    if ((defined $and_decode) && ($and_decode)) { 
     237        $char_equiv = Encode::decode("utf8",$char_equiv); 
     238    } 
     239    return $char_equiv; 
     240    } 
    231241} 
    232242