Changeset 9581


Ignore:
Timestamp:
2005-04-07T14:02:48+12:00 (19 years ago)
Author:
jrm21
Message:

couple of extra languages for formatting

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/sorttools.pm

    r9576 r9581  
    6161}
    6262
    63 # format an english string for sorting
    64 # i.e. convert to lowercase, remove the, a or an
    65 # from beginning of string etc.
     63### language-specific sorting functions (called by format_metadata_for_sorting)
     64
     65## format_string_$lang() converts to lowercase (where appropriate), and
     66# removes punctuation, articles from the start of string, etc
     67## format_string_name_$lang() converts to lowercase, puts the surname first,
     68# removes punctuation, etc
     69
    6670sub format_string_en {
    6771    my $stringref = shift;
    6872
    69     $$stringref =~ tr/A-Z/a-z/;
    70     $$stringref =~ s/&[^\;]+\;//g;
    71     $$stringref =~ s/[^a-z0-9 ]//g;
    72     $$stringref =~ s/^\s*(the|a|an)\b//;
     73    $$stringref = lc($$stringref);
     74    $$stringref =~ s/&[^\;]+\;//g; # html entities
     75    $$stringref =~ s/[^[:alnum:]]//g;
     76    $$stringref =~ s/^\s*(the|a|an)\b//; # articles
    7377    $$stringref =~ s/\s+/ /g;
    7478    $$stringref =~ s/^\s+//;
     
    7680}
    7781
    78 # format an english name for sorting
    79 # i.e. convert to lowercase, put surname before
    80 # first names etc.   
    8182sub format_string_name_en {
    8283    my ($stringref) = @_;
     
    8687
    8788    my $comma_format = ($$stringref =~ m/^.+,.+$/);
    88 
    89     $$stringref =~ s/[^a-z0-9 ]//g;
     89    $$stringref =~ s/[[:punct:]]//g;
    9090    $$stringref =~ s/\s+/ /g;
    9191    $$stringref =~ s/^\s+//;
     
    103103    $$stringref = $surname . " " . $$stringref;
    104104    }
     105    print "name: $$stringref\n";
    105106}
     107
     108
     109sub format_string_fr {
     110    my $stringref = shift;
     111
     112    $$stringref = lc($$stringref);
     113    $$stringref =~ s/&[^\;]+\;//g; # html entities
     114    $$stringref =~ s/[^[:alpha:]]//g;
     115    $$stringref =~ s/^\s*(les?|la|une?)\b//; # articles
     116    $$stringref =~ s/\s+/ /g;
     117    $$stringref =~ s/^\s+//;
     118    $$stringref =~ s/\s+$//;
     119}
     120
     121sub format_string_es {
     122    my $stringref = shift;
     123
     124    $$stringref = lc($$stringref);
     125    $$stringref =~ s/&[^\;]+\;//g; # html entities
     126    $$stringref =~ s/[^[:alpha:]]//g;
     127    $$stringref =~ s/^\s*(la|el)\b//; # articles
     128    $$stringref =~ s/\s+/ /g;
     129    $$stringref =~ s/^\s+//;
     130    $$stringref =~ s/\s+$//;
     131}
     132
     133### end of language-specific functions
    106134
    107135# takes arguments of day, month, year and converts to
Note: See TracChangeset for help on using the changeset viewer.