Changeset 33898 for main/trunk


Ignore:
Timestamp:
2020-02-04T12:59:00+13:00 (4 years ago)
Author:
kjdon
Message:

format_metadata_for_sorting now takes two additional args - casefold and accentfold. Do these to the metadata value if they are set to true

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/sorttools.pm

    r33476 r33898  
    3333
    3434use strict;
     35use Unicode::Normalize;
    3536
    3637my $has_custom_sort = 0;
     
    6566# moved here from BasClas so import can share it
    6667sub format_metadata_for_sorting {
    67     my ($metaname, $metavalue, $doc_obj) = @_;
    68 
    69     if ($has_custom_sort && defined (&customsorttools::format_metadata_for_sorting)) {
    70     return &customsorttools::format_metadata_for_sorting($metaname, $metavalue, $doc_obj);
    71     }
     68    my ($metaname, $metavalue, $doc_obj, $casefold, $accentfold) = @_;
    7269   
    7370    if (!defined $metaname || $metaname !~ /\S/ || ! defined $metavalue || $metavalue !~ /\S/) {
    7471    return "";
     72    }
     73   
     74    if ($has_custom_sort && defined (&customsorttools::format_metadata_for_sorting)) {
     75    return &customsorttools::format_metadata_for_sorting($metaname, $metavalue, $doc_obj, $casefold, $accentfold);
    7576    }
    7677
     
    8586    }
    8687    $lang = 'en' unless defined $lang;
    87    
     88
     89    if (defined $casefold && $casefold eq "true") {
     90    $metavalue = lc($metavalue);
     91    }
     92    if (defined $accentfold && $accentfold eq "true") {
     93    $metavalue = NFKD($metavalue);
     94    $metavalue =~ s/\p{NonspacingMark}//g;
     95    }
     96
    8897    # is this metadata likely to be a name?
    8998    my $function_name="format_string_name_$lang";
     
    117126    }
    118127   
    119     $$stringref = lc($$stringref);
    120128    $$stringref =~ s/&[^\;]+\;//g; # html entities
    121129    $$stringref =~ s/^\s*(the|a|an)\b//; # articles
     
    133141    }
    134142   
    135     $$stringref =~ tr/A-Z/a-z/;
    136143    $$stringref =~ s/&\S+;//g;
    137144
     
    163170    }
    164171
    165     $$stringref = lc($$stringref);
    166172    $$stringref =~ s/&[^\;]+\;//g; # html entities
    167173    $$stringref =~ s/^\s*(les?|la|une?)\b//; # articles
     
    179185    }
    180186
    181     $$stringref = lc($$stringref);
    182187    $$stringref =~ s/&[^\;]+\;//g; # html entities
    183188    $$stringref =~ s/^\s*(la|el)\b//; # articles
Note: See TracChangeset for help on using the changeset viewer.