source: trunk/gsdl/perllib/classify/AZList.pm@ 3109

Last change on this file since 3109 was 3109, checked in by jrm21, 22 years ago

When getting first char for classification, s/(.).*$/$1/g isn't good enough
as problems occur if the title is eg "Line 1\nLine 2" - we get a
"first letter" of "LLine 2" !!! Do m/
(.)/ ; $title=$1 instead, so we
are absolutely sure that the classification is only one character long.

  • Property svn:keywords set to Author Date Id Revision
File size: 8.5 KB
Line 
1###########################################################################
2#
3# AZList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting alphabetically
27
28package AZList;
29
30use BasClas;
31use sorttools;
32use iso639;
33
34sub BEGIN {
35 @ISA = ('BasClas');
36}
37
38sub print_usage {
39 print STDERR "
40 usage: classify AZList [options]
41 options:
42
43 -metadata X (required) Metadata field used for classification.
44 List will be sorted by this element.
45
46 -buttonname X (optional) Button name for this classification.
47 defaults to metadata name.
48
49 -removeprefix regex (optional) A prefix to ignore in the Metadata values
50 for the field when sorting.
51";
52}
53
54sub new {
55 my $class = shift (@_);
56 my $self = new BasClas($class, @_);
57
58 my ($metaname, $title, $removeprefix);
59
60 if (!parsargv::parse(\@_,
61 q^metadata/.*/^, \$metaname,
62 q^buttonname/.*/^, \$title,
63 q^removeprefix/.*/^, \$removeprefix,
64 "allow_extra_options")) {
65
66 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
67 &print_usage();
68 die "\n";
69 }
70
71 if (!defined $metaname) {
72 &print_usage;
73 print STDERR "AZList used with no metadata name\n";
74 die "\n";
75 }
76
77 $title = $metaname unless ($title);
78
79 $self->{'list'} = {};
80 $self->{'metaname'} = $metaname;
81 $self->{'title'} = $title;
82 if (defined($removeprefix) && $removeprefix) {
83 $self->{'removeprefix'} = $removeprefix;
84 }
85
86 return bless $self, $class;
87}
88
89sub init {
90 my $self = shift (@_);
91
92 $self->{'list'} = {};
93}
94
95sub classify {
96 my $self = shift (@_);
97 my ($doc_obj) = @_;
98
99 my $doc_OID = $doc_obj->get_OID();
100 my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(),
101 $self->{'metaname'});
102
103 # if this document doesn't contain the metadata element we're
104 # sorting by we won't include it in this classification
105 if (defined $metavalue && $metavalue ne "") {
106 if ($self->{'removeprefix'}) {
107 $metavalue =~ s/^$self->{'removeprefix'}//;
108 }
109
110 if ($self->{'metaname'} eq 'Language') {
111 $metavalue = $iso639::fromiso639{$metavalue};
112 } elsif ($self->{'metaname'} eq 'Creator') {
113 &sorttools::format_string_name_english (\$metavalue);
114 } else {
115 &sorttools::format_string_english (\$metavalue);
116 }
117 if (defined $self->{'list'}->{$doc_OID}) {
118 my $outhandle = $self->{'outhandle'};
119 print $outhandle "WARNING: AZList::classify called multiple times for $doc_OID\n";
120 }
121 $self->{'list'}->{$doc_OID} = $metavalue;
122 }
123}
124
125sub alpha_numeric_cmp
126{
127 my ($self,$a,$b) = @_;
128
129 my $title_a = $self->{'list'}->{$a};
130 my $title_b = $self->{'list'}->{$b};
131
132 if ($title_a =~ m/^(\d+(\.\d+)?)/)
133 {
134 my $val_a = $1;
135 if ($title_b =~ m/^(\d+(\.\d+)?)/)
136 {
137 my $val_b = $1;
138 if ($val_a != $val_b)
139 {
140 return ($val_a <=> $val_b);
141 }
142 }
143 }
144
145 return ($title_a cmp $title_b);
146}
147
148sub get_classify_info {
149 my $self = shift (@_);
150
151 my @classlist
152 = sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'list'}};
153
154 return $self->splitlist (\@classlist);
155}
156
157sub get_entry {
158 my $self = shift (@_);
159 my ($title, $childtype, $thistype) = @_;
160
161 # organise into classification structure
162 my %classifyinfo = ('childtype'=>$childtype,
163 'Title'=>$title,
164 'contains'=>[]);
165 $classifyinfo{'thistype'} = $thistype
166 if defined $thistype && $thistype =~ /\w/;
167
168 return \%classifyinfo;
169}
170
171# splitlist takes an ordered list of classifications (@$classlistref) and splits it
172# up into alphabetical sub-sections.
173sub splitlist {
174 my $self = shift (@_);
175 my ($classlistref) = @_;
176 my $classhash = {};
177
178 # top level
179 my $childtype = "HList";
180 if (scalar (@$classlistref) <= 39) {$childtype = "VList";}
181 my $classifyinfo = $self->get_entry ($self->{'title'}, $childtype, "Invisible");
182
183 # don't need to do any splitting if there are less than 39 (max + min -1) classifications
184 if ((scalar @$classlistref) <= 39) {
185 foreach $subOID (@$classlistref) {
186 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
187 }
188 return $classifyinfo;
189 }
190
191 # first split up the list into separate A-Z and 0-9 classifications
192 foreach $classification (@$classlistref) {
193 my $title = $self->{'list'}->{$classification};
194
195 $title =~ s/^(&.{1,6};|<[^>]>|[^a-zA-Z0-9])//g; # remove any unwanted stuff
196 # only need first char for classification
197 $title =~ m/^(.)/; $title=$1;
198 $title =~ tr/[a-z]/[A-Z]/;
199 if ($title =~ /^[0-9]$/) {$title = '0-9';}
200 elsif ($title !~ /^[A-Z]$/) {
201 my $outhandle = $self->{'outhandle'};
202 print $outhandle "AZList: WARNING $classification has badly formatted title ($title)\n";
203 }
204 $classhash->{$title} = [] unless defined $classhash->{$title};
205 push (@{$classhash->{$title}}, $classification);
206 }
207 $classhash = $self->compactlist ($classhash);
208
209 my @tmparr = ();
210 foreach $subsection (sort keys (%$classhash)) {
211 push (@tmparr, $subsection);
212 }
213
214 # if there's a 0-9 section it will have been sorted to the beginning
215 # but we want it at the end
216 if ($tmparr[0] eq '0-9') {
217 shift @tmparr;
218 push (@tmparr, '0-9');
219 }
220
221 foreach $subclass (@tmparr) {
222 my $tempclassify = $self->get_entry($subclass, "VList");
223 foreach $subsubOID (@{$classhash->{$subclass}}) {
224 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
225 }
226 push (@{$classifyinfo->{'contains'}}, $tempclassify);
227 }
228
229 return $classifyinfo;
230}
231
232sub compactlist {
233 my $self = shift (@_);
234 my ($classhashref) = @_;
235 my $compactedhash = {};
236 my @currentOIDs = ();
237 my $currentfirstletter = "";
238 my $currentlastletter = "";
239 my $lastkey = "";
240
241 # minimum and maximum documents to be displayed per page.
242 # the actual maximum will be max + (min-1).
243 # the smallest sub-section is a single letter at present
244 # so in this case there may be many times max documents
245 # displayed on a page.
246 my $min = 10;
247 my $max = 30;
248
249 foreach $subsection (sort keys %$classhashref) {
250 if ($subsection eq '0-9') {
251 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
252 next;
253 }
254 $currentfirstletter = $subsection if $currentfirstletter eq "";
255 if ((scalar (@currentOIDs) < $min) ||
256 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
257 push (@currentOIDs, @{$classhashref->{$subsection}});
258 $currentlastletter = $subsection;
259 } else {
260
261 if ($currentfirstletter eq $currentlastletter) {
262 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
263 $lastkey = $currentfirstletter;
264 } else {
265 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
266 $lastkey = "$currentfirstletter-$currentlastletter";
267 }
268 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
269 $compactedhash->{$subsection} = $classhashref->{$subsection};
270 @currentOIDs = ();
271 $currentfirstletter = "";
272 $lastkey = $subsection;
273 } else {
274 @currentOIDs = @{$classhashref->{$subsection}};
275 $currentfirstletter = $subsection;
276 $currentlastletter = $subsection;
277 }
278 }
279 }
280
281 # add final OIDs to last sub-classification if there aren't many otherwise
282 # add final sub-classification
283 if (scalar (@currentOIDs) < $min) {
284 my ($newkey) = $lastkey =~ /^(.)/;
285 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
286 delete $compactedhash->{$lastkey};
287 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
288 } else {
289 if ($currentfirstletter eq $currentlastletter) {
290 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
291 } else {
292 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
293 }
294 }
295
296 return $compactedhash;
297}
298
2991;
Note: See TracBrowser for help on using the repository browser.