Changeset 33898

Show
Ignore:
Timestamp:
04.02.2020 12:59:00 (2 weeks ago)
Author:
kjdon
Message:

format_metadata_for_sorting now takes two additional args - casefold and accentfold. Do these to the metadata value if they are set to true

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/sorttools.pm

    r33476 r33898  
    3333 
    3434use strict; 
     35use Unicode::Normalize; 
    3536 
    3637my $has_custom_sort = 0; 
     
    6566# moved here from BasClas so import can share it 
    6667sub format_metadata_for_sorting { 
    67     my ($metaname, $metavalue, $doc_obj) = @_; 
    68  
    69     if ($has_custom_sort && defined (&customsorttools::format_metadata_for_sorting)) { 
    70     return &customsorttools::format_metadata_for_sorting($metaname, $metavalue, $doc_obj); 
    71     } 
     68    my ($metaname, $metavalue, $doc_obj, $casefold, $accentfold) = @_; 
    7269     
    7370    if (!defined $metaname || $metaname !~ /\S/ || ! defined $metavalue || $metavalue !~ /\S/) { 
    7471    return ""; 
     72    } 
     73     
     74    if ($has_custom_sort && defined (&customsorttools::format_metadata_for_sorting)) { 
     75    return &customsorttools::format_metadata_for_sorting($metaname, $metavalue, $doc_obj, $casefold, $accentfold); 
    7576    } 
    7677 
     
    8586    } 
    8687    $lang = 'en' unless defined $lang; 
    87      
     88 
     89    if (defined $casefold && $casefold eq "true") { 
     90    $metavalue = lc($metavalue); 
     91    } 
     92    if (defined $accentfold && $accentfold eq "true") { 
     93    $metavalue = NFKD($metavalue); 
     94    $metavalue =~ s/\p{NonspacingMark}//g; 
     95    } 
     96 
    8897    # is this metadata likely to be a name? 
    8998    my $function_name="format_string_name_$lang"; 
     
    117126    } 
    118127     
    119     $$stringref = lc($$stringref); 
    120128    $$stringref =~ s/&[^\;]+\;//g; # html entities 
    121129    $$stringref =~ s/^\s*(the|a|an)\b//; # articles 
     
    133141    } 
    134142     
    135     $$stringref =~ tr/A-Z/a-z/; 
    136143    $$stringref =~ s/&\S+;//g; 
    137144 
     
    163170    } 
    164171 
    165     $$stringref = lc($$stringref); 
    166172    $$stringref =~ s/&[^\;]+\;//g; # html entities 
    167173    $$stringref =~ s/^\s*(les?|la|une?)\b//; # articles 
     
    179185    } 
    180186 
    181     $$stringref = lc($$stringref); 
    182187    $$stringref =~ s/&[^\;]+\;//g; # html entities 
    183188    $$stringref =~ s/^\s*(la|el)\b//; # articles