source: trunk/gsdl/perllib/classify/AZList.pm@ 837

Last change on this file since 837 was 837, checked in by davidb, 24 years ago

added alpha_numeric search

  • Property svn:keywords set to Author Date Id Revision
File size: 7.7 KB
Line 
1###########################################################################
2#
3# AZList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting alphabetically
27# options are:
28# metadata=Metaname -- all documents with Metaname metadata
29# will be included in list, list will be sorted
30# by this element.
31# title=Title -- (optional) the title field for this classification.
32# if not included title field will be Metaname.
33
34package AZList;
35
36use sorttools;
37
38sub new {
39 my ($class, @options) = @_;
40
41 my ($metaname, $title);
42
43 foreach $option (@options) {
44 if ($option =~ /^metadata=(.*)$/i) {
45 $metaname = $1;
46 } elsif ($option =~ /^title=(.*)$/i) {
47 $title = $1;
48 }
49 }
50
51 die "AZList used with no metadata name to classify by\n"
52 unless defined $metaname;
53 $title = $metaname unless defined $title;
54
55 return bless {
56 'list'=>{},
57 'metaname' => $metaname,
58 'title' => $title
59 }, $class;
60}
61
62sub init {
63 my $self = shift (@_);
64
65 $self->{'list'} = {};
66}
67
68sub classify {
69 my $self = shift (@_);
70 my ($doc_obj) = @_;
71
72 my $doc_OID = $doc_obj->get_OID();
73 my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(),
74 $self->{'metaname'});
75
76 # if this document doesn't contain the metadata element we're
77 # sorting by we won't include it in this classification
78 if (defined $metavalue && $metavalue ne "") {
79 if ($self->{'metaname'} eq 'Creator') {
80 &sorttools::format_string_name_english (\$metavalue);
81 } else {
82 &sorttools::format_string_english (\$metavalue);
83 }
84 if (defined $self->{'list'}->{$doc_OID}) {
85 print STDERR "WARNING: AZList::classify called multiple times for $doc_OID\n";
86 }
87 $self->{'list'}->{$doc_OID} = $metavalue;
88 }
89}
90
91sub alpha_numeric_cmp
92{
93 my ($self,$a,$b) = @_;
94
95 my $title_a = $self->{'list'}->{$a};
96 my $title_b = $self->{'list'}->{$b};
97
98 if ($title_a =~ m/^(\d+(\.\d+)?)/)
99 {
100 my $val_a = $1;
101 if ($title_b =~ m/^(\d+(\.\d+)?)/)
102 {
103 my $val_b = $1;
104 if ($val_a != $val_b)
105 {
106 return ($val_a <=> $val_b);
107 }
108 }
109 }
110
111 return ($title_a cmp $title_b);
112}
113
114sub get_classify_info {
115 my $self = shift (@_);
116
117 my @classlist
118 = sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'list'}};
119
120 return $self->splitlist (\@classlist);
121}
122
123sub get_entry {
124 my $self = shift (@_);
125 my ($title, $childtype, $thistype) = @_;
126
127 # organise into classification structure
128 my %classifyinfo = ('childtype'=>$childtype,
129 'Title'=>$title,
130 'contains'=>[]);
131 $classifyinfo{'thistype'} = $thistype
132 if defined $thistype && $thistype =~ /\w/;
133
134 return \%classifyinfo;
135}
136
137# splitlist takes an ordered list of classifications (@$classlistref) and splits it
138# up into alphabetical sub-sections.
139sub splitlist {
140 my $self = shift (@_);
141 my ($classlistref) = @_;
142 my $classhash = {};
143
144 # top level
145 my $childtype = "HList";
146 if (scalar (@$classlistref) <= 39) {$childtype = "VList";}
147 my $classifyinfo = $self->get_entry ($self->{'title'}, $childtype, "Invisible");
148
149 # don't need to do any splitting if there are less than 39 (max + min -1) classifications
150 if ((scalar @$classlistref) <= 39) {
151 foreach $subOID (@$classlistref) {
152 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
153 }
154 return $classifyinfo;
155 }
156
157 # first split up the list into separate A-Z and 0-9 classifications
158 foreach $classification (@$classlistref) {
159 my $title = $self->{'list'}->{$classification};
160
161 $title =~ s/^(&.{1,6};|<[^>]>|[^a-zA-Z0-9])//g; # remove any unwanted stuff
162 $title =~ s/^(.).*$/$1/;
163 $title =~ tr/[a-z]/[A-Z]/;
164 if ($title =~ /^[0-9]$/) {$title = '0-9';}
165 elsif ($title !~ /^[A-Z]$/) {
166 print STDERR "AZList: WARNING $classification has badly formatted title ($title)\n";
167 }
168 $classhash->{$title} = [] unless defined $classhash->{$title};
169 push (@{$classhash->{$title}}, $classification);
170 }
171 $classhash = $self->compactlist ($classhash);
172
173 my @tmparr = ();
174 foreach $subsection (sort keys (%$classhash)) {
175 push (@tmparr, $subsection);
176 }
177
178 # if there's a 0-9 section it will have been sorted to the beginning
179 # but we want it at the end
180 if ($tmparr[0] eq '0-9') {
181 shift @tmparr;
182 push (@tmparr, '0-9');
183 }
184
185 foreach $subclass (@tmparr) {
186 my $tempclassify = $self->get_entry($subclass, "VList");
187 foreach $subsubOID (@{$classhash->{$subclass}}) {
188 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
189 }
190 push (@{$classifyinfo->{'contains'}}, $tempclassify);
191 }
192
193 return $classifyinfo;
194}
195
196sub compactlist {
197 my $self = shift (@_);
198 my ($classhashref) = @_;
199 my $compactedhash = {};
200 my @currentOIDs = ();
201 my $currentfirstletter = "";
202 my $currentlastletter = "";
203 my $lastkey = "";
204
205 # minimum and maximum documents to be displayed per page.
206 # the actual maximum will be max + (min-1).
207 # the smallest sub-section is a single letter at present
208 # so in this case there may be many times max documents
209 # displayed on a page.
210 my $min = 10;
211 my $max = 30;
212
213 foreach $subsection (sort keys %$classhashref) {
214 if ($subsection eq '0-9') {
215 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
216 next;
217 }
218 $currentfirstletter = $subsection if $currentfirstletter eq "";
219 if ((scalar (@currentOIDs) < $min) ||
220 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
221 push (@currentOIDs, @{$classhashref->{$subsection}});
222 $currentlastletter = $subsection;
223 } else {
224
225 if ($currentfirstletter eq $currentlastletter) {
226 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
227 $lastkey = $currentfirstletter;
228 } else {
229 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
230 $lastkey = "$currentfirstletter-$currentlastletter";
231 }
232 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
233 $compactedhash->{$subsection} = $classhashref->{$subsection};
234 @currentOIDs = ();
235 $currentfirstletter = "";
236 $lastkey = $subsection;
237 } else {
238 @currentOIDs = @{$classhashref->{$subsection}};
239 $currentfirstletter = $subsection;
240 $currentlastletter = $subsection;
241 }
242 }
243 }
244
245 # add final OIDs to last sub-classification if there aren't many otherwise
246 # add final sub-classification
247 if (scalar (@currentOIDs) < $min) {
248 my ($newkey) = $lastkey =~ /^(.)/;
249 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
250 delete $compactedhash->{$lastkey};
251 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
252 } else {
253 if ($currentfirstletter eq $currentlastletter) {
254 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
255 } else {
256 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
257 }
258 }
259
260 return $compactedhash;
261}
262
2631;
Note: See TracBrowser for help on using the repository browser.