Changeset 23304 for main/trunk
- Timestamp:
- 2010-11-09T14:48:28+13:00 (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/unicode.pm
r23285 r23304 39 39 40 40 no strict 'refs'; 41 42 43 44 sub utf8decomp 45 { 46 my ($str) = @_; 47 48 return if (!defined $str); 49 return "" if ($str eq ""); 50 51 my @unpacked_chars = unpack("C*", $str); # unpack Unicode characters 52 53 my @each_char 54 = map { ($_ > 255 ) 55 ? # if wide character... 56 sprintf("\\x{%04X}", $_) 57 : # \x{...} 58 (chr($_) =~ m/[[:cntrl:]]/ ) 59 ? # else if control character ... 60 sprintf("\\x%02X", $_) 61 : # \x.. 62 quotemeta(chr($_)) # else quoted or as themselves 63 } @unpacked_chars; 64 65 return join("",@each_char); 66 } 67 68 69 sub hex_codepoint { 70 if (my $char = shift) { 71 return sprintf '%2.2x', unpack('U0U*', $char); 72 } 73 } 74 75 76 41 77 42 78 # ascii2unicode takes an (extended) ascii string (ISO-8859-1) … … 714 750 } 715 751 752 753 sub raw_filename_to_url_encoded 754 { 755 my ($str_in) = @_; 756 757 my @url_encoded_chars 758 = map { $_ > 128 ? # if wide character... 759 sprintf("%%%2X", $_) : # \x{...} 760 chr($_) 761 } unpack("U*", $str_in); # unpack Unicode characters 762 763 764 my $str_out = join("", @url_encoded_chars); 765 766 return $str_out; 767 768 } 769 770 771 sub url_encoded_to_raw_filename 772 { 773 my ($str_in) = @_; 774 775 my $str_out = $str_in; 776 777 $str_out =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; 778 779 return $str_out; 780 } 781 716 782 1;
Note:
See TracChangeset
for help on using the changeset viewer.