########################################################################### # # sorttools.pm -- # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### # various subroutines to format strings # for sorting package sorttools; # format an english string for sorting # i.e. convert to lowercase, remove the, a or an # from beginning of string etc. sub format_string_english { my ($stringref) = @_; $$stringref =~ tr/A-Z/a-z/; $$stringref =~ s/&\S+;//g; $$stringref =~ s/[^a-z0-9 ]//g; $$stringref =~ s/^\s*(the|a|an)\b//; $$stringref =~ s/^\s+//; } # format an english name for sorting # i.e. convert to lowercase, put surname before # first names etc. sub format_string_name_english { my ($stringref) = @_; $$stringref =~ tr/A-Z/a-z/; $$stringref =~ s/&\S+;//g; $$stringref =~ s/[^a-z0-9 ]//g; $$stringref =~ s/\s+/ /g; $$stringref =~ s/^\s+//; my @names = split / /, $$stringref; my $surname = pop @names; while (scalar @names && $surname =~ /^(jnr|snr)$/i) { $surname = pop @names; } $$stringref = $surname . " " . $$stringref; } # takes arguments of day, month, year and converts to # date of form yyyymmdd. month may be full (e.g. "January"), # abbreviated (e.g. "Jan"), or a number (1-12). Years like "86" # will be assumed to be "1986". sub format_date { my ($day, $month, $year) = @_; my %months = ('january' => '01', 'jan' => '01', 'february' => '02', 'feb' => '02', 'march' => '03', 'mar' => '03', 'april' => '04', 'apr' => '04', 'may' => '05', 'june' => '06', 'jun' => '06', 'july' => '07', 'jul' => '07', 'august' => '08', 'aug' => '08', 'september' => '09', 'sep' => '09', 'october' => '10', 'oct' => '10', 'november' => '11', 'nov' => '11', 'december' => '12', 'dec' => '12'); $month =~ tr/A-Z/a-z/; if ($day < 1) { print STDERR "sorttools::format_date WARNING day $day out of range\n"; $day = "01"; } elsif ($day > 31) { print STDERR "sorttools::format_date WARNING day $day out of range\n"; $day = "31"; } $day = "0$day" if (length($day) == 1); if ($month =~ /^\d\d?$/) { if ($month < 1) { print STDERR "sorttools::format_date WARNING month $month out of range\n"; $month = "01"; } elsif ($month > 12) { print STDERR "sorttools::format_date WARNING month $month out of range\n"; $month = "12"; } if ($month =~ /^\d$/) { $month = "0" . $month; } } elsif (!defined $months{$month}) { print STDERR "sorttools::format_date WARNING month $month out of range\n"; $month = "01"; } else { $month = $months{$month}; } if ($year !~ /^\d\d\d\d$/) { if ($year !~ /^\d\d$/) { print STDERR "sorttools::format_date WARNING year $year assumed to be 19$year\n"; $year = "19" . $year; } else { print STDERR "sorttools::format_date WARNING year $year out of range - reset to 1900\n"; $year = "1900"; } } return "$year$month$day"; } 1;