Changeset 23362 for main/trunk/greenstone2/perllib/unicode.pm
- Timestamp:
- 2010-12-01T11:40:36+13:00 (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/unicode.pm
r23304 r23362 619 619 620 620 if (!&is_url_encoded($text)) { 621 $text =~ s/([^ A-Z0-9\ \.\-\_])/sprintf("%%%02X", ord($1))/iseg;621 $text =~ s/([^0-9A-Z\ \.\-\_])/sprintf("%%%02X", ord($1))/iseg; 622 622 # return the url-encoded character entity for underscore back to the entity 623 623 $text =~ s/%26%23095%3B/&\#095;/g; … … 629 629 my ($text) = @_; 630 630 631 $text =~ s/\%([A-F0-9]{2})/pack('C', hex($1))/ige; 631 $text =~ s/\%([0-9A-F]{2})/pack('C', hex($1))/ige; 632 $text =~ s/\&\#x([0-9A-F]+);/pack('C', hex($1))/ige; 633 $text =~ s/\&\#([0-9]+);/pack('C', $1)/ige; 634 632 635 return $text; 633 636 } … … 635 638 sub is_url_encoded { 636 639 my ($text) = @_; 637 return ($text =~ m/\%([ A-F0-9]{2})/);640 return ($text =~ m/\%([0-9A-F]{2})/i) || ($text =~ m/\&\#x([0-9A-F]+;)/i) || ($text =~ m/\&\#([0-9]+;)/i); 638 641 } 639 642 … … 756 759 757 760 my @url_encoded_chars 758 = map { $_ > 128 ? # if wide character... 759 sprintf("%%%2X", $_) : # \x{...} 760 chr($_) 761 } unpack("U*", $str_in); # unpack Unicode characters 761 = map { $_ > 255 ? # Needs to be represent in entity form 762 sprintf("&#x%X;",$_) : 763 $_ > 128 ? # Representable in %XX form 764 sprintf("%%%2X", $_) : 765 chr($_) # otherwise, Ascii char 766 } unpack("U*", $str_in); # Unpack Unicode characters 762 767 763 768 … … 775 780 my $str_out = $str_in; 776 781 777 $str_out =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; 782 $str_out =~ s/&#x([0-9A-F]+);/chr(hex($1))/eig; 783 $str_out =~ s/&#([0-9]+);/chr($1)/eig; 784 $str_out =~ s/%([0-9A-F]{2})/chr(hex($1))/eig; 778 785 779 786 return $str_out;
Note:
See TracChangeset
for help on using the changeset viewer.