Changeset 9125


Ignore:
Timestamp:
2005-02-22T10:21:35+13:00 (19 years ago)
Author:
mdewsnip
Message:

Added a substr function to unicode.pm that should work correctly on Unicode strings. It might be slow, but it should be more reliable than "use utf8; substr(...)", which has never worked for me and causes problems on Windows with Perl 5.6.

Location:
trunk/gsdl/perllib
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/HTMLPlug.pm

    r9067 r9125  
    630630    $tmptext =~ s/\s+$//;
    631631    $tmptext =~ s/\s+/ /gs;
    632     # with perl 5.6 at least, substr might segment at a multi-byte char...
    633     use utf8;
    634     $tmptext = substr ($tmptext, 0, $size);
     632    $tmptext = &unicode::substr ($tmptext, 0, $size);
    635633    $tmptext =~ s/\s\S*$/…/; # adds an ellipse (...)
    636634    $doc_obj->add_utf8_metadata ($thissection, "First$size", $tmptext);
  • trunk/gsdl/perllib/unicode.pm

    r8903 r9125  
    527527}
    528528
     529
     530sub substr
     531{
     532    my ($utf8_string, $offset, $length) = @_;
     533
     534    my @unicode_string = @{&utf82unicode($utf8_string)};
     535    my $unicode_string_length = scalar(@unicode_string);
     536
     537    my $substr_start = $offset;
     538    if ($substr_start >= $unicode_string_length) {
     539    return "";
     540    }
     541
     542    my $substr_end = $offset + $length - 1;
     543    if ($substr_end >= $unicode_string_length) {
     544    $substr_end = $unicode_string_length - 1;
     545    }
     546
     547    my @unicode_substring = @unicode_string[$substr_start..$substr_end];
     548    return &unicode2utf8(\@unicode_substring);
     549}
     550
     551
    5295521;
Note: See TracChangeset for help on using the changeset viewer.