########################################################################### # # AZList.pm -- # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### # classifier plugin for sorting alphabetically # options are: # metadata=Metaname -- all documents with Metaname metadata # will be included in list, list will be sorted # by this element. # title=Title -- (optional) the title field for this classification. # if not included title field will be Metaname. package AZList; use sorttools; sub new { my ($class, @options) = @_; my ($metaname, $title); foreach $option (@options) { if ($option =~ /^metadata=(.*)$/i) { $metaname = $1; } elsif ($option =~ /^title=(.*)$/i) { $title = $1; } } die "AZList used with no metadata name to classify by\n" unless defined $metaname; $title = $metaname unless defined $title; return bless { 'list'=>{}, 'metaname' => $metaname, 'title' => $title }, $class; } sub init { my $self = shift (@_); $self->{'list'} = {}; } sub classify { my $self = shift (@_); my ($doc_obj) = @_; my $doc_OID = $doc_obj->get_OID(); my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), $self->{'metaname'}); # if this document doesn't contain the metadata element we're # sorting by we won't include it in this classification if (defined $metavalue && $metavalue ne "") { if ($self->{'metaname'} eq 'Creator') { &sorttools::format_string_name_english (\$metavalue); } else { &sorttools::format_string_english (\$metavalue); } if (defined $self->{'list'}->{$doc_OID}) { print STDERR "WARNING: AZList::classify called multiple times for $doc_OID\n"; } $self->{'list'}->{$doc_OID} = $metavalue; } } sub alpha_numeric_cmp { my ($self,$a,$b) = @_; my $title_a = $self->{'list'}->{$a}; my $title_b = $self->{'list'}->{$b}; if ($title_a =~ m/^(\d+(\.\d+)?)/) { my $val_a = $1; if ($title_b =~ m/^(\d+(\.\d+)?)/) { my $val_b = $1; if ($val_a != $val_b) { return ($val_a <=> $val_b); } } } return ($title_a cmp $title_b); } sub get_classify_info { my $self = shift (@_); my @classlist = sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'list'}}; return $self->splitlist (\@classlist); } sub get_entry { my $self = shift (@_); my ($title, $childtype, $thistype) = @_; # organise into classification structure my %classifyinfo = ('childtype'=>$childtype, 'Title'=>$title, 'contains'=>[]); $classifyinfo{'thistype'} = $thistype if defined $thistype && $thistype =~ /\w/; return \%classifyinfo; } # splitlist takes an ordered list of classifications (@$classlistref) and splits it # up into alphabetical sub-sections. sub splitlist { my $self = shift (@_); my ($classlistref) = @_; my $classhash = {}; # top level my $childtype = "HList"; if (scalar (@$classlistref) <= 39) {$childtype = "VList";} my $classifyinfo = $self->get_entry ($self->{'title'}, $childtype, "Invisible"); # don't need to do any splitting if there are less than 39 (max + min -1) classifications if ((scalar @$classlistref) <= 39) { foreach $subOID (@$classlistref) { push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID}); } return $classifyinfo; } # first split up the list into separate A-Z and 0-9 classifications foreach $classification (@$classlistref) { my $title = $self->{'list'}->{$classification}; $title =~ s/^(&.{1,6};|<[^>]>|[^a-zA-Z0-9])//g; # remove any unwanted stuff $title =~ s/^(.).*$/$1/; $title =~ tr/[a-z]/[A-Z]/; if ($title =~ /^[0-9]$/) {$title = '0-9';} elsif ($title !~ /^[A-Z]$/) { print STDERR "AZList: WARNING $classification has badly formatted title ($title)\n"; } $classhash->{$title} = [] unless defined $classhash->{$title}; push (@{$classhash->{$title}}, $classification); } $classhash = $self->compactlist ($classhash); my @tmparr = (); foreach $subsection (sort keys (%$classhash)) { push (@tmparr, $subsection); } # if there's a 0-9 section it will have been sorted to the beginning # but we want it at the end if ($tmparr[0] eq '0-9') { shift @tmparr; push (@tmparr, '0-9'); } foreach $subclass (@tmparr) { my $tempclassify = $self->get_entry($subclass, "VList"); foreach $subsubOID (@{$classhash->{$subclass}}) { push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID}); } push (@{$classifyinfo->{'contains'}}, $tempclassify); } return $classifyinfo; } sub compactlist { my $self = shift (@_); my ($classhashref) = @_; my $compactedhash = {}; my @currentOIDs = (); my $currentfirstletter = ""; my $currentlastletter = ""; my $lastkey = ""; # minimum and maximum documents to be displayed per page. # the actual maximum will be max + (min-1). # the smallest sub-section is a single letter at present # so in this case there may be many times max documents # displayed on a page. my $min = 10; my $max = 30; foreach $subsection (sort keys %$classhashref) { if ($subsection eq '0-9') { @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}}; next; } $currentfirstletter = $subsection if $currentfirstletter eq ""; if ((scalar (@currentOIDs) < $min) || ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) { push (@currentOIDs, @{$classhashref->{$subsection}}); $currentlastletter = $subsection; } else { if ($currentfirstletter eq $currentlastletter) { @{$compactedhash->{$currentfirstletter}} = @currentOIDs; $lastkey = $currentfirstletter; } else { @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs; $lastkey = "$currentfirstletter-$currentlastletter"; } if (scalar (@{$classhashref->{$subsection}}) >= $max) { $compactedhash->{$subsection} = $classhashref->{$subsection}; @currentOIDs = (); $currentfirstletter = ""; $lastkey = $subsection; } else { @currentOIDs = @{$classhashref->{$subsection}}; $currentfirstletter = $subsection; $currentlastletter = $subsection; } } } # add final OIDs to last sub-classification if there aren't many otherwise # add final sub-classification if (scalar (@currentOIDs) < $min) { my ($newkey) = $lastkey =~ /^(.)/; @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs); delete $compactedhash->{$lastkey}; @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs; } else { if ($currentfirstletter eq $currentlastletter) { @{$compactedhash->{$currentfirstletter}} = @currentOIDs; } else { @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs; } } return $compactedhash; } 1;