Ignore:
Timestamp:
2007-01-10T14:00:02+13:00 (17 years ago)
Author:
mdewsnip
Message:

Improvements to Unicode handling in preparation for adding the Unicode collation code. Now all metadata values are converted from UTF-8 strings to Unicode strings, and this means that the normal Perl length() and substr() functions work.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/classify/GenericList.pm

    r13340 r13550  
    233233            $metadata_value =~ s/^\s*//;
    234234            $metadata_value =~ s/\s*$//;
    235             push(@{$self->{$metadata_group . ".list"}->{$section_OID}}, $metadata_value);
     235
     236            # Convert the metadata value from a UTF-8 string to a Unicode string
     237            # This means that length() and substr() work properly
     238            # We need to be careful to convert classifier node title values back to UTF-8, however
     239            my $metadata_value_unicode_string = &convert_utf8_string_to_unicode_string($metadata_value);
     240
     241            # Add the metadata value into the list for this combination of metadata group and section
     242            push(@{$self->{$metadata_group . ".list"}->{$section_OID}}, $metadata_value_unicode_string);
    236243        }
    237244        last if (@metadata_values > 0);
     
    305312    my %metavaluetoOIDsubhash = ();
    306313
    307     my $lastpartition = &unicode::substr($sortedmetavalues[0], 0, 1);
     314    my $lastpartition = substr($sortedmetavalues[0], 0, 1);
    308315    foreach my $metavalue (@sortedmetavalues) {
    309         my $metavaluepartition = &unicode::substr($metavalue, 0, 1);
     316        my $metavaluepartition = substr($metavalue, 0, 1);
    310317
    311318        # Is this the start of a new partition?
     
    371378
    372379
    373 sub unicode_length
    374 {
    375     my $utf8string = shift(@_);
    376 
    377     my @unicodestring = @{&unicode::utf82unicode($utf8string)};
    378     return scalar(@unicodestring);
     380sub convert_utf8_string_to_unicode_string
     381{
     382    my $utf8_string = shift(@_);
     383
     384    my $unicode_string = "";
     385    foreach my $unicode_value (@{&unicode::utf82unicode($utf8_string)}) {
     386    $unicode_string .= chr($unicode_value);
     387    }
     388    return $unicode_string;
     389}
     390
     391
     392sub convert_unicode_string_to_utf8_string
     393{
     394    my $unicode_string = shift(@_);
     395
     396    my @unicode_array;
     397    for (my $i = 0; $i < length($unicode_string); $i++) {
     398    push(@unicode_array, ord(substr($unicode_string, $i, 1)));
     399    }
     400    return &unicode::unicode2utf8(\@unicode_array);
    379401}
    380402
     
    385407    my $lastpartitionend = shift(@_);
    386408
    387     my $partitionstart = &unicode::substr($metavalue, 0, 1);
     409    my $partitionstart = substr($metavalue, 0, 1);
    388410    if ($partitionstart le $lastpartitionend) {
    389     $partitionstart = &unicode::substr($metavalue, 0, 2);
     411    $partitionstart = substr($metavalue, 0, 2);
    390412    # Give up after three characters
    391413    if ($partitionstart le $lastpartitionend) {
    392         $partitionstart = &unicode::substr($metavalue, 0, 3);
     414        $partitionstart = substr($metavalue, 0, 3);
    393415    }
    394416    }
     
    403425    my $partitionstart = shift(@_);
    404426
    405     my $partitionend = &unicode::substr($metavalue, 0, &unicode_length($partitionstart));
     427    my $partitionend = substr($metavalue, 0, length($partitionstart));
    406428    if ($partitionend gt $partitionstart) {
    407     $partitionend = &unicode::substr($metavalue, 0, 1);
     429    $partitionend = substr($metavalue, 0, 1);
    408430    if ($partitionend le $partitionstart) {
    409         $partitionend = &unicode::substr($metavalue, 0, 2);
     431        $partitionend = substr($metavalue, 0, 2);
    410432        # Give up after three characters
    411433        if ($partitionend le $partitionstart) {
    412         $partitionend = &unicode::substr($metavalue, 0, 3);
     434        $partitionend = substr($metavalue, 0, 3);
    413435        }
    414436    }
     
    428450
    429451    # Create an hlist partition
    430     my %child_classifier_node = ( 'Title' => $partitionname,
     452    my %child_classifier_node = ( 'Title' => &convert_unicode_string_to_utf8_string($partitionname),
    431453                  'childtype' => "VList",
    432454                  'contains' => [] );
     
    471493    # Otherwise create a sublist (bookshelf) for the metadata value
    472494    else {
    473         my %child_classifier_node = ( 'Title' => $metavalue,
     495        my %child_classifier_node = ( 'Title' => &convert_unicode_string_to_utf8_string($metavalue),
    474496                      'childtype' => "VList",
    475497                      'contains' => [] );
Note: See TracChangeset for help on using the changeset viewer.