Changeset 23387 for main/trunk/greenstone2/perllib/unicode.pm
- Timestamp:
- 2010-12-06T13:15:10+13:00 (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/unicode.pm
r23371 r23387 627 627 628 628 sub url_decode { 629 my ($text ) = @_;629 my ($text,$and_numeric_entities) = @_; 630 630 631 631 $text =~ s/\%([0-9A-F]{2})/pack('C', hex($1))/ige; 632 $text =~ s/\&\#x([0-9A-F]+);/pack('C', hex($1))/ige; 633 $text =~ s/\&\#([0-9]+);/pack('C', $1)/ige; 632 633 if ((defined $and_numeric_entities) && ($and_numeric_entities)) { 634 $text =~ s/\&\#x([0-9A-F]+);/pack('C', hex($1))/ige; 635 $text =~ s/\&\#([0-9]+);/pack('C', $1)/ige; 636 } 634 637 635 638 return $text; … … 773 776 } 774 777 775 776 778 sub url_encoded_to_raw_filename 777 779 { … … 787 789 } 788 790 791 792 sub raw_filename_to_utf8_url_encoded 793 { 794 my ($str_in) = @_; 795 796 $str_in = Encode::encode("utf8",$str_in) if !check_is_utf8($str_in); 797 798 my @url_encoded_chars 799 = map { $_ > 128 ? # Representable in %XX form 800 sprintf("%%%2X", $_) : 801 chr($_) # otherwise, Ascii char 802 } unpack("U*", $str_in); # Unpack utf8 characters 803 804 805 my $str_out = join("", @url_encoded_chars); 806 807 return $str_out; 808 809 } 810 811 sub utf8_url_encoded_to_raw_filename 812 { 813 my ($str_in) = @_; 814 815 my $utf8_str_out = $str_in; 816 817 $utf8_str_out =~ s/%([0-9A-F]{2})/chr(hex($1))/eig; 818 819 my $unicode_str_out = decode("utf8",$utf8_str_out); 820 my $raw_str_out = utf8::downgrade($unicode_str_out); 821 822 return $raw_str_out; 823 } 824 825 sub analyze_raw_string 826 { 827 my ($str_in) = @_; 828 829 my $uses_bytecodes = 0; 830 my $exceeds_bytecodes = 0; 831 832 map { $exceeds_bytecodes = 1 if ($_ >= 256); 833 $uses_bytecodes = 1 if (($_ >= 128) && ($_ < 256)); 834 } unpack("U*", $str_in); # Unpack Unicode characters 835 836 return ($uses_bytecodes,$exceeds_bytecodes); 837 } 838 839 789 840 1;
Note:
See TracChangeset
for help on using the changeset viewer.