Changeset 16553
- Timestamp:
- 2008-07-25T16:37:50+12:00 (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/unicode.pm
r15894 r16553 545 545 546 546 547 # Returns true (1) if the given string is utf8 and false (0) if it isn't. 548 # Does not modify the string parameter. 549 sub check_is_utf8 { 550 my $value=shift; 551 552 if (!defined($value)) { 553 return 0; # not utf8 because it is undefined 554 } 555 556 $value =~ m/^/g; # to set \G 557 while ($value =~ m!\G.*?([\x80-\xff]+)!sg) { 558 my $highbytes=$1; 559 # make sure this block of high bytes is utf-8 560 $highbytes =~ /^/g; # set pos() 561 while ($highbytes =~ 562 m!\G (?: [\xc0-\xdf][\x80-\xbf] | # 2 byte utf-8 563 [\xe0-\xef][\x80-\xbf]{2} | # 3 byte 564 [\xf0-\xf7][\x80-\xbf]{3} | # 4 byte 565 [\xf8-\xfb][\x80-\xbf]{4} | # 5 byte 566 [\xfc-\xfd][\x80-\xbf]{5} | # 6 byte 567 )*([\x80-\xff])? !xg 568 ) { 569 my $badbyte=$1; 570 if (defined $badbyte) { # not end of string 571 return 0; # non-utf8 found 572 } 573 } 574 } 575 576 return 1; 577 } 578 547 579 sub substr 548 580 {
Note:
See TracChangeset
for help on using the changeset viewer.