source: trunk/gsdl/perllib/classify/AZList.pm@ 2022

Last change on this file since 2022 was 2022, checked in by sjboddie, 23 years ago

Caught some of the classifiers up with the documentation (finally). The
old "title" option has been replaced with the "buttonname" option.

  • Property svn:keywords set to Author Date Id Revision
File size: 8.1 KB
Line 
1###########################################################################
2#
3# AZList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting alphabetically
27
28package AZList;
29
30use BasClas;
31use sorttools;
32use iso639;
33
34sub BEGIN {
35 @ISA = ('BasClas');
36}
37
38sub print_usage {
39 print STDERR "
40 usage: classify AZList [options]
41 options:
42
43 -metadata X (required) Metadata field used for classification.
44 List will be sorted by this element.
45
46 -buttonname X (optional) Button name for this classification.
47 defaults to metadata name.
48";
49}
50
51sub new {
52 my $class = shift (@_);
53 my $self = new BasClas($class, @_);
54
55 my ($metaname, $title);
56
57 if (!parsargv::parse(\@_,
58 q^metadata/.*/^, \$metaname,
59 q^buttonname/.*/^, \$title,
60 "allow_extra_options")) {
61
62 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
63 &print_usage();
64 die "\n";
65 }
66
67 if (!defined $metaname) {
68 &print_usage;
69 print STDERR "AZList used with no metadata name\n";
70 die "\n";
71 }
72
73 $title = $metaname unless ($title);
74
75 $self->{'list'} = {};
76 $self->{'metaname'} = $metaname;
77 $self->{'title'} = $title;
78
79 return bless $self, $class;
80}
81
82sub init {
83 my $self = shift (@_);
84
85 $self->{'list'} = {};
86}
87
88sub classify {
89 my $self = shift (@_);
90 my ($doc_obj) = @_;
91
92 my $doc_OID = $doc_obj->get_OID();
93 my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(),
94 $self->{'metaname'});
95
96 # if this document doesn't contain the metadata element we're
97 # sorting by we won't include it in this classification
98 if (defined $metavalue && $metavalue ne "") {
99 if ($self->{'metaname'} eq 'Language') {
100 $metavalue = $iso639::fromiso639{$metavalue};
101 } elsif ($self->{'metaname'} eq 'Creator') {
102 &sorttools::format_string_name_english (\$metavalue);
103 } else {
104 &sorttools::format_string_english (\$metavalue);
105 }
106 if (defined $self->{'list'}->{$doc_OID}) {
107 my $outhandle = $self->{'outhandle'};
108 print $outhandle "WARNING: AZList::classify called multiple times for $doc_OID\n";
109 }
110 $self->{'list'}->{$doc_OID} = $metavalue;
111 }
112}
113
114sub alpha_numeric_cmp
115{
116 my ($self,$a,$b) = @_;
117
118 my $title_a = $self->{'list'}->{$a};
119 my $title_b = $self->{'list'}->{$b};
120
121 if ($title_a =~ m/^(\d+(\.\d+)?)/)
122 {
123 my $val_a = $1;
124 if ($title_b =~ m/^(\d+(\.\d+)?)/)
125 {
126 my $val_b = $1;
127 if ($val_a != $val_b)
128 {
129 return ($val_a <=> $val_b);
130 }
131 }
132 }
133
134 return ($title_a cmp $title_b);
135}
136
137sub get_classify_info {
138 my $self = shift (@_);
139
140 my @classlist
141 = sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'list'}};
142
143 return $self->splitlist (\@classlist);
144}
145
146sub get_entry {
147 my $self = shift (@_);
148 my ($title, $childtype, $thistype) = @_;
149
150 # organise into classification structure
151 my %classifyinfo = ('childtype'=>$childtype,
152 'Title'=>$title,
153 'contains'=>[]);
154 $classifyinfo{'thistype'} = $thistype
155 if defined $thistype && $thistype =~ /\w/;
156
157 return \%classifyinfo;
158}
159
160# splitlist takes an ordered list of classifications (@$classlistref) and splits it
161# up into alphabetical sub-sections.
162sub splitlist {
163 my $self = shift (@_);
164 my ($classlistref) = @_;
165 my $classhash = {};
166
167 # top level
168 my $childtype = "HList";
169 if (scalar (@$classlistref) <= 39) {$childtype = "VList";}
170 my $classifyinfo = $self->get_entry ($self->{'title'}, $childtype, "Invisible");
171
172 # don't need to do any splitting if there are less than 39 (max + min -1) classifications
173 if ((scalar @$classlistref) <= 39) {
174 foreach $subOID (@$classlistref) {
175 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
176 }
177 return $classifyinfo;
178 }
179
180 # first split up the list into separate A-Z and 0-9 classifications
181 foreach $classification (@$classlistref) {
182 my $title = $self->{'list'}->{$classification};
183
184 $title =~ s/^(&.{1,6};|<[^>]>|[^a-zA-Z0-9])//g; # remove any unwanted stuff
185 $title =~ s/^(.).*$/$1/;
186 $title =~ tr/[a-z]/[A-Z]/;
187 if ($title =~ /^[0-9]$/) {$title = '0-9';}
188 elsif ($title !~ /^[A-Z]$/) {
189 my $outhandle = $self->{'outhandle'};
190 print $outhandle "AZList: WARNING $classification has badly formatted title ($title)\n";
191 }
192 $classhash->{$title} = [] unless defined $classhash->{$title};
193 push (@{$classhash->{$title}}, $classification);
194 }
195 $classhash = $self->compactlist ($classhash);
196
197 my @tmparr = ();
198 foreach $subsection (sort keys (%$classhash)) {
199 push (@tmparr, $subsection);
200 }
201
202 # if there's a 0-9 section it will have been sorted to the beginning
203 # but we want it at the end
204 if ($tmparr[0] eq '0-9') {
205 shift @tmparr;
206 push (@tmparr, '0-9');
207 }
208
209 foreach $subclass (@tmparr) {
210 my $tempclassify = $self->get_entry($subclass, "VList");
211 foreach $subsubOID (@{$classhash->{$subclass}}) {
212 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
213 }
214 push (@{$classifyinfo->{'contains'}}, $tempclassify);
215 }
216
217 return $classifyinfo;
218}
219
220sub compactlist {
221 my $self = shift (@_);
222 my ($classhashref) = @_;
223 my $compactedhash = {};
224 my @currentOIDs = ();
225 my $currentfirstletter = "";
226 my $currentlastletter = "";
227 my $lastkey = "";
228
229 # minimum and maximum documents to be displayed per page.
230 # the actual maximum will be max + (min-1).
231 # the smallest sub-section is a single letter at present
232 # so in this case there may be many times max documents
233 # displayed on a page.
234 my $min = 10;
235 my $max = 30;
236
237 foreach $subsection (sort keys %$classhashref) {
238 if ($subsection eq '0-9') {
239 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
240 next;
241 }
242 $currentfirstletter = $subsection if $currentfirstletter eq "";
243 if ((scalar (@currentOIDs) < $min) ||
244 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
245 push (@currentOIDs, @{$classhashref->{$subsection}});
246 $currentlastletter = $subsection;
247 } else {
248
249 if ($currentfirstletter eq $currentlastletter) {
250 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
251 $lastkey = $currentfirstletter;
252 } else {
253 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
254 $lastkey = "$currentfirstletter-$currentlastletter";
255 }
256 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
257 $compactedhash->{$subsection} = $classhashref->{$subsection};
258 @currentOIDs = ();
259 $currentfirstletter = "";
260 $lastkey = $subsection;
261 } else {
262 @currentOIDs = @{$classhashref->{$subsection}};
263 $currentfirstletter = $subsection;
264 $currentlastletter = $subsection;
265 }
266 }
267 }
268
269 # add final OIDs to last sub-classification if there aren't many otherwise
270 # add final sub-classification
271 if (scalar (@currentOIDs) < $min) {
272 my ($newkey) = $lastkey =~ /^(.)/;
273 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
274 delete $compactedhash->{$lastkey};
275 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
276 } else {
277 if ($currentfirstletter eq $currentlastletter) {
278 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
279 } else {
280 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
281 }
282 }
283
284 return $compactedhash;
285}
286
2871;
Note: See TracBrowser for help on using the repository browser.