########################################################################### # # DateList.pm -- # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### # classifier plugin for sorting by date # no options - always sorts by 'Date' metadata # date is assumed to be in the form yyyymmdd # at present dates are split by year - this should change package DateList; use BasClas; use sorttools; sub BEGIN { @ISA = ('BasClas'); } sub print_usage { print STDERR " usage: classify DateList Classifier plugin for sorting by date. No options - always sorts by 'Date' metadata Date is assumed to be in the form yyyymmdd At present dates are split by year - this should change Any errors are Dana's problem. "; } sub new { my $class = shift (@_); my $self = new BasClas($class, @_); $self->{'list'} = {}; return bless $self, $class; } sub init { my $self = shift (@_); $self->{'list'} = {}; } sub classify { my $self = shift (@_); my ($doc_obj) = @_; my $doc_OID = $doc_obj->get_OID(); my $date = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), 'Date'); # if this document doesn't contain Date element we won't # include it in this classification if (defined $date && $date =~ /\d/) { if (defined $self->{'list'}->{$doc_OID}) { my $outhandle = $self->{'outhandle'}; print $outhandle "WARNING: DateList::classify called multiple times for $doc_OID\n"; } $self->{'list'}->{$doc_OID} = $date; } } sub get_classify_info { my $self = shift (@_); my @classlist = sort {$self->{'list'}->{$a} cmp $self->{'list'}->{$b};} keys %{$self->{'list'}}; return $self->splitlist (\@classlist); } sub get_entry { my $self = shift (@_); my ($title, $childtype, $thistype) = @_; # organise into classification structure my %classifyinfo = ('childtype'=>$childtype, 'Title'=>$title, 'contains'=>[]); $classifyinfo{'thistype'} = $thistype if defined $thistype && $thistype =~ /\w/; return \%classifyinfo; } # splitlist takes an ordered list of classifications (@$classlistref) and splits it # up into sub-sections by date sub splitlist { my $self = shift (@_); my ($classlistref) = @_; my $classhash = {}; # top level my $childtype = "HList"; if (scalar (@$classlistref) <= 20) {$childtype = "DateList";} my $classifyinfo = $self->get_entry ("Date", $childtype, "Invisible"); # don't need to do any splitting if there are less than 20 classifications if ((scalar @$classlistref) <= 20) { foreach $subOID (@$classlistref) { push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID}); } return $classifyinfo; } # first split up the list into separate year classifications foreach $classification (@$classlistref) { my $date = $self->{'list'}->{$classification}; $date =~ s/^(\d\d\d\d).*$/$1/; $classhash->{$date} = [] unless defined $classhash->{$date}; push (@{$classhash->{$date}}, $classification); } $classhash = $self->compactlist ($classhash); foreach $subclass (sort keys %$classhash) { my $tempclassify = $self->get_entry($subclass, "DateList"); foreach $subsubOID (@{$classhash->{$subclass}}) { push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID}); } push (@{$classifyinfo->{'contains'}}, $tempclassify); } return $classifyinfo; } sub compactlist { my $self = shift (@_); my ($classhashref) = @_; my $compactedhash = {}; my @currentOIDs = (); my $currentfirstletter = ""; my $currentlastletter = ""; my $lastkey = ""; # minimum and maximum documents to be displayed per page. # the actual maximum will be max + (min-1). # the smallest sub-section is a single letter at present # so in this case there may be many times max documents # displayed on a page. my $min = 10; my $max = 30; foreach $subsection (sort keys %$classhashref) { $currentfirstletter = $subsection if $currentfirstletter eq ""; if ((scalar (@currentOIDs) < $min) || ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) { push (@currentOIDs, @{$classhashref->{$subsection}}); $currentlastletter = $subsection; } else { if ($currentfirstletter eq $currentlastletter) { @{$compactedhash->{$currentfirstletter}} = @currentOIDs; $lastkey = $currentfirstletter; } else { @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs; $lastkey = "$currentfirstletter-$currentlastletter"; } if (scalar (@{$classhashref->{$subsection}}) >= $max) { $compactedhash->{$subsection} = $classhashref->{$subsection}; @currentOIDs = (); $currentfirstletter = ""; $lastkey = $subsection; } else { @currentOIDs = @{$classhashref->{$subsection}}; $currentfirstletter = $subsection; $currentlastletter = $subsection; } } } # add final OIDs to last sub-classification if there aren't many otherwise # add final sub-classification if (scalar (@currentOIDs) < $min) { my ($newkey) = $lastkey =~ /^(\d\d\d\d)/; @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs); delete $compactedhash->{$lastkey}; @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs; } else { if ($currentfirstletter eq $currentlastletter) { @{$compactedhash->{$currentfirstletter}} = @currentOIDs; } else { @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs; } } return $compactedhash; } 1;