source: trunk/gsdl/perllib/classify/AZList.pm@ 2000

Last change on this file since 2000 was 1839, checked in by paynter, 23 years ago

Updated classifiers to use the parsearg library instead of ad-hoc
"x=y" style parsing. (Backwards compatability maintained through
a quick hack to the load_classifier function in classfy.pm.)

  • Property svn:keywords set to Author Date Id Revision
File size: 8.3 KB
Line 
1###########################################################################
2#
3# AZList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting alphabetically
27# options are:
28# metadata=Metaname -- all documents with Metaname metadata
29# will be included in list, list will be sorted
30# by this element.
31# title=Title -- (optional) the title field for this classification.
32# if not included title field will be Metaname.
33
34package AZList;
35
36use BasClas;
37use sorttools;
38
39sub BEGIN {
40 @ISA = ('BasClas');
41}
42
43sub print_usage {
44 print STDERR "
45 usage: classify AZList [options]
46 options:
47
48 -metadata X (erquired) Metadata field used for classification.
49 List will be sorted by this element.
50
51 -title X (optional) Title field for this classification.
52 defaults to metadata name.
53";
54}
55
56sub new {
57 my $class = shift (@_);
58 my $self = new BasClas($class, @_);
59
60 my ($metaname, $title);
61
62 if (!parsargv::parse(\@_,
63 q^metadata/.*/^, \$metaname,
64 q^title/.*/^, \$title,
65 "allow_extra_options")) {
66
67 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
68 &print_usage();
69 die "\n";
70 }
71
72 if (!defined $metaname) {
73 &print_usage;
74 print STDERR "AZList used with no metadata name\n";
75 die "\n";
76 }
77
78 $title = $metaname unless ($title);
79
80 $self->{'list'} = {};
81 $self->{'metaname'} = $metaname;
82 $self->{'title'} = $title;
83
84 return bless $self, $class;
85}
86
87sub init {
88 my $self = shift (@_);
89
90 $self->{'list'} = {};
91}
92
93sub classify {
94 my $self = shift (@_);
95 my ($doc_obj) = @_;
96
97 my $doc_OID = $doc_obj->get_OID();
98 my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(),
99 $self->{'metaname'});
100
101 # if this document doesn't contain the metadata element we're
102 # sorting by we won't include it in this classification
103 if (defined $metavalue && $metavalue ne "") {
104 if ($self->{'metaname'} eq 'Creator') {
105 &sorttools::format_string_name_english (\$metavalue);
106 } else {
107 &sorttools::format_string_english (\$metavalue);
108 }
109 if (defined $self->{'list'}->{$doc_OID}) {
110 my $outhandle = $self->{'outhandle'};
111 print $outhandle "WARNING: AZList::classify called multiple times for $doc_OID\n";
112 }
113 $self->{'list'}->{$doc_OID} = $metavalue;
114 }
115}
116
117sub alpha_numeric_cmp
118{
119 my ($self,$a,$b) = @_;
120
121 my $title_a = $self->{'list'}->{$a};
122 my $title_b = $self->{'list'}->{$b};
123
124 if ($title_a =~ m/^(\d+(\.\d+)?)/)
125 {
126 my $val_a = $1;
127 if ($title_b =~ m/^(\d+(\.\d+)?)/)
128 {
129 my $val_b = $1;
130 if ($val_a != $val_b)
131 {
132 return ($val_a <=> $val_b);
133 }
134 }
135 }
136
137 return ($title_a cmp $title_b);
138}
139
140sub get_classify_info {
141 my $self = shift (@_);
142
143 my @classlist
144 = sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'list'}};
145
146 return $self->splitlist (\@classlist);
147}
148
149sub get_entry {
150 my $self = shift (@_);
151 my ($title, $childtype, $thistype) = @_;
152
153 # organise into classification structure
154 my %classifyinfo = ('childtype'=>$childtype,
155 'Title'=>$title,
156 'contains'=>[]);
157 $classifyinfo{'thistype'} = $thistype
158 if defined $thistype && $thistype =~ /\w/;
159
160 return \%classifyinfo;
161}
162
163# splitlist takes an ordered list of classifications (@$classlistref) and splits it
164# up into alphabetical sub-sections.
165sub splitlist {
166 my $self = shift (@_);
167 my ($classlistref) = @_;
168 my $classhash = {};
169
170 # top level
171 my $childtype = "HList";
172 if (scalar (@$classlistref) <= 39) {$childtype = "VList";}
173 my $classifyinfo = $self->get_entry ($self->{'title'}, $childtype, "Invisible");
174
175 # don't need to do any splitting if there are less than 39 (max + min -1) classifications
176 if ((scalar @$classlistref) <= 39) {
177 foreach $subOID (@$classlistref) {
178 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
179 }
180 return $classifyinfo;
181 }
182
183 # first split up the list into separate A-Z and 0-9 classifications
184 foreach $classification (@$classlistref) {
185 my $title = $self->{'list'}->{$classification};
186
187 $title =~ s/^(&.{1,6};|<[^>]>|[^a-zA-Z0-9])//g; # remove any unwanted stuff
188 $title =~ s/^(.).*$/$1/;
189 $title =~ tr/[a-z]/[A-Z]/;
190 if ($title =~ /^[0-9]$/) {$title = '0-9';}
191 elsif ($title !~ /^[A-Z]$/) {
192 my $outhandle = $self->{'outhandle'};
193 print $outhandle "AZList: WARNING $classification has badly formatted title ($title)\n";
194 }
195 $classhash->{$title} = [] unless defined $classhash->{$title};
196 push (@{$classhash->{$title}}, $classification);
197 }
198 $classhash = $self->compactlist ($classhash);
199
200 my @tmparr = ();
201 foreach $subsection (sort keys (%$classhash)) {
202 push (@tmparr, $subsection);
203 }
204
205 # if there's a 0-9 section it will have been sorted to the beginning
206 # but we want it at the end
207 if ($tmparr[0] eq '0-9') {
208 shift @tmparr;
209 push (@tmparr, '0-9');
210 }
211
212 foreach $subclass (@tmparr) {
213 my $tempclassify = $self->get_entry($subclass, "VList");
214 foreach $subsubOID (@{$classhash->{$subclass}}) {
215 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
216 }
217 push (@{$classifyinfo->{'contains'}}, $tempclassify);
218 }
219
220 return $classifyinfo;
221}
222
223sub compactlist {
224 my $self = shift (@_);
225 my ($classhashref) = @_;
226 my $compactedhash = {};
227 my @currentOIDs = ();
228 my $currentfirstletter = "";
229 my $currentlastletter = "";
230 my $lastkey = "";
231
232 # minimum and maximum documents to be displayed per page.
233 # the actual maximum will be max + (min-1).
234 # the smallest sub-section is a single letter at present
235 # so in this case there may be many times max documents
236 # displayed on a page.
237 my $min = 10;
238 my $max = 30;
239
240 foreach $subsection (sort keys %$classhashref) {
241 if ($subsection eq '0-9') {
242 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
243 next;
244 }
245 $currentfirstletter = $subsection if $currentfirstletter eq "";
246 if ((scalar (@currentOIDs) < $min) ||
247 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
248 push (@currentOIDs, @{$classhashref->{$subsection}});
249 $currentlastletter = $subsection;
250 } else {
251
252 if ($currentfirstletter eq $currentlastletter) {
253 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
254 $lastkey = $currentfirstletter;
255 } else {
256 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
257 $lastkey = "$currentfirstletter-$currentlastletter";
258 }
259 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
260 $compactedhash->{$subsection} = $classhashref->{$subsection};
261 @currentOIDs = ();
262 $currentfirstletter = "";
263 $lastkey = $subsection;
264 } else {
265 @currentOIDs = @{$classhashref->{$subsection}};
266 $currentfirstletter = $subsection;
267 $currentlastletter = $subsection;
268 }
269 }
270 }
271
272 # add final OIDs to last sub-classification if there aren't many otherwise
273 # add final sub-classification
274 if (scalar (@currentOIDs) < $min) {
275 my ($newkey) = $lastkey =~ /^(.)/;
276 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
277 delete $compactedhash->{$lastkey};
278 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
279 } else {
280 if ($currentfirstletter eq $currentlastletter) {
281 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
282 } else {
283 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
284 }
285 }
286
287 return $compactedhash;
288}
289
2901;
Note: See TracBrowser for help on using the repository browser.