source: trunk/gsdl/perllib/classify/AZList.pm@ 5646

Last change on this file since 5646 was 5646, checked in by mdewsnip, 21 years ago

Added fix to prevent bad "W-P" type hlist entries, many thanks to Don Gourley.

  • Property svn:keywords set to Author Date Id Revision
File size: 10.2 KB
Line 
1###########################################################################
2#
3# AZList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting alphabetically
27
28package AZList;
29
30use BasClas;
31use sorttools;
32use iso639;
33
34sub BEGIN {
35 @ISA = ('BasClas');
36}
37
38my $arguments =
39 [ { 'name' => "metadata",
40 'desc' => "{AZList.metadata}",
41 'type' => "metadata",
42 'reqd' => "yes" } ,
43 { 'name' => "buttonname",
44 'desc' => "{AZList.buttonname}",
45 'type' => "string",
46 'deft' => "Metadata element specified with -metadata",
47 'reqd' => "no" },
48 { 'name' => "removeprefix",
49 'desc' => "{AZList.removeprefix}",
50 'type' => "string",
51 'deft' => "",
52 'reqd' => "no" } ,
53 { 'name' => "removesuffix",
54 'desc' => "{AZList.removesuffix}",
55 'type' => "string",
56 'deft' => "",
57 'reqd' => "no" }
58 ];
59
60my $options = { 'name' => "AZList",
61 'desc' => "{AZList.desc}",
62 'inherits' => "Yes",
63 'args' => $arguments };
64
65# sub print_usage {
66# print STDERR "
67# usage: classify AZList [options]
68# options:
69
70# -metadata X (required) Metadata field used for classification.
71# List will be sorted by this element.
72
73# -buttonname X (optional) Button name for this classification.
74# defaults to metadata name.
75
76# -removeprefix regex (optional) A prefix to ignore in the Metadata values
77# for the field when sorting.
78# ";
79# }
80
81sub new {
82 my $class = shift (@_);
83 my $self = new BasClas($class, @_);
84
85 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
86 my $option_list = $self->{'option_list'};
87 push( @{$option_list}, $options );
88
89 my ($metaname, $title, $removeprefix, $removesuffix);
90
91 if (!parsargv::parse(\@_,
92 q^metadata/.*/^, \$metaname,
93 q^buttonname/.*/^, \$title,
94 q^removeprefix/.*/^, \$removeprefix,
95 q^removesuffix/.*/^, \$removesuffix,
96 "allow_extra_options")) {
97
98 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
99 $self->print_txt_usage(""); # Use default resource bundle
100 die "\n";
101 }
102
103 if (!defined $metaname) {
104 $self->print_txt_usage(""); # Use default resource bundle
105 print STDERR "AZList used with no metadata name\n";
106 die "\n";
107 }
108
109 $title = $metaname unless ($title);
110
111 $self->{'list'} = {};
112 $self->{'metaname'} = $metaname;
113 $self->{'title'} = $title;
114 if (defined($removeprefix) && $removeprefix) {
115 $removeprefix =~ s/^\^//; # don't need a leading ^
116 $self->{'removeprefix'} = $removeprefix;
117 }
118 if (defined($removesuffix) && $removesuffix) {
119 $removesuffix =~ s/\$$//; # don't need a trailing $
120 $self->{'removesuffix'} = $removesuffix;
121 }
122
123 return bless $self, $class;
124}
125
126sub init {
127 my $self = shift (@_);
128
129 $self->{'list'} = {};
130}
131
132sub classify {
133 my $self = shift (@_);
134 my ($doc_obj) = @_;
135
136 my $doc_OID = $doc_obj->get_OID();
137 my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(),
138 $self->{'metaname'});
139
140 # if this document doesn't contain the metadata element we're
141 # sorting by we won't include it in this classification
142 if (defined $metavalue && $metavalue ne "") {
143 if (defined($self->{'removeprefix'}) &&
144 length($self->{'removeprefix'})) {
145 $metavalue =~ s/^$self->{'removeprefix'}//;
146 }
147 if (defined($self->{'removesuffix'}) &&
148 length($self->{'removesuffix'})) {
149 $metavalue =~ s/$self->{'removesuffix'}$//;
150 }
151
152 if ($self->{'metaname'} eq 'Language') {
153 $metavalue = $iso639::fromiso639{$metavalue};
154 } elsif ($self->{'metaname'} eq 'Creator') {
155 &sorttools::format_string_name_english (\$metavalue);
156 } else {
157 &sorttools::format_string_english (\$metavalue);
158 }
159 if (defined $self->{'list'}->{$doc_OID}) {
160 my $outhandle = $self->{'outhandle'};
161 print $outhandle "WARNING: AZList::classify called multiple times for $doc_OID\n";
162 }
163 if ($metavalue) {
164 $self->{'list'}->{$doc_OID} = $metavalue;
165 } else {
166 my $outhandle = $self->{'outhandle'};
167 print $outhandle "WARNING: AZList: $doc_OID metadata is empty - not classifying\n";
168 }
169 }
170}
171
172sub alpha_numeric_cmp
173{
174 my ($self,$a,$b) = @_;
175
176 my $title_a = $self->{'list'}->{$a};
177 my $title_b = $self->{'list'}->{$b};
178
179 if ($title_a =~ m/^(\d+(\.\d+)?)/)
180 {
181 my $val_a = $1;
182 if ($title_b =~ m/^(\d+(\.\d+)?)/)
183 {
184 my $val_b = $1;
185 if ($val_a != $val_b)
186 {
187 return ($val_a <=> $val_b);
188 }
189 }
190 }
191
192 return ($title_a cmp $title_b);
193}
194
195sub get_classify_info {
196 my $self = shift (@_);
197
198 my @classlist
199 = sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'list'}};
200
201 return $self->splitlist (\@classlist);
202}
203
204sub get_entry {
205 my $self = shift (@_);
206 my ($title, $childtype, $thistype) = @_;
207
208 # organise into classification structure
209 my %classifyinfo = ('childtype'=>$childtype,
210 'Title'=>$title,
211 'contains'=>[]);
212 $classifyinfo{'thistype'} = $thistype
213 if defined $thistype && $thistype =~ /\w/;
214
215 return \%classifyinfo;
216}
217
218# splitlist takes an ordered list of classifications (@$classlistref) and splits it
219# up into alphabetical sub-sections.
220sub splitlist {
221 my $self = shift (@_);
222 my ($classlistref) = @_;
223 my $classhash = {};
224
225 # top level
226 my $childtype = "HList";
227 if (scalar (@$classlistref) <= 39) {$childtype = "VList";}
228 my $classifyinfo = $self->get_entry ($self->{'title'}, $childtype, "Invisible");
229
230 # don't need to do any splitting if there are less than 39 (max + min -1) classifications
231 if ((scalar @$classlistref) <= 39) {
232 foreach $subOID (@$classlistref) {
233 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
234 }
235 return $classifyinfo;
236 }
237
238 # first split up the list into separate A-Z and 0-9 classifications
239 foreach $classification (@$classlistref) {
240 my $title = $self->{'list'}->{$classification};
241
242 $title =~ s/^(&.{1,6};|<[^>]>|[^a-zA-Z0-9])//g; # remove any unwanted stuff
243 # only need first char for classification
244 $title =~ m/^(.)/; $title=$1;
245 $title =~ tr/[a-z]/[A-Z]/;
246 if ($title =~ /^[0-9]$/) {$title = '0-9';}
247 elsif ($title !~ /^[A-Z]$/) {
248 my $outhandle = $self->{'outhandle'};
249 print $outhandle "AZList: WARNING $classification has badly formatted title ($title)\n";
250 }
251 $classhash->{$title} = [] unless defined $classhash->{$title};
252 push (@{$classhash->{$title}}, $classification);
253 }
254 $classhash = $self->compactlist ($classhash);
255
256 my @tmparr = ();
257 foreach $subsection (sort keys (%$classhash)) {
258 push (@tmparr, $subsection);
259 }
260
261 # if there's a 0-9 section it will have been sorted to the beginning
262 # but we want it at the end
263 if ($tmparr[0] eq '0-9') {
264 shift @tmparr;
265 push (@tmparr, '0-9');
266 }
267
268 foreach $subclass (@tmparr) {
269 my $tempclassify = $self->get_entry($subclass, "VList");
270 foreach $subsubOID (@{$classhash->{$subclass}}) {
271 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
272 }
273 push (@{$classifyinfo->{'contains'}}, $tempclassify);
274 }
275
276 return $classifyinfo;
277}
278
279sub compactlist {
280 my $self = shift (@_);
281 my ($classhashref) = @_;
282 my $compactedhash = {};
283 my @currentOIDs = ();
284 my $currentfirstletter = "";
285 my $currentlastletter = "";
286 my $lastkey = "";
287
288 # minimum and maximum documents to be displayed per page.
289 # the actual maximum will be max + (min-1).
290 # the smallest sub-section is a single letter at present
291 # so in this case there may be many times max documents
292 # displayed on a page.
293 my $min = 10;
294 my $max = 30;
295
296 foreach $subsection (sort keys %$classhashref) {
297 if ($subsection eq '0-9') {
298 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
299 next;
300 }
301 $currentfirstletter = $subsection if $currentfirstletter eq "";
302 if ((scalar (@currentOIDs) < $min) ||
303 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
304 push (@currentOIDs, @{$classhashref->{$subsection}});
305 $currentlastletter = $subsection;
306 } else {
307
308 if ($currentfirstletter eq $currentlastletter) {
309 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
310 $lastkey = $currentfirstletter;
311 } else {
312 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
313 $lastkey = "$currentfirstletter-$currentlastletter";
314 }
315 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
316 $compactedhash->{$subsection} = $classhashref->{$subsection};
317 @currentOIDs = ();
318 $currentfirstletter = "";
319 $lastkey = $subsection;
320 } else {
321 @currentOIDs = @{$classhashref->{$subsection}};
322 $currentfirstletter = $subsection;
323 $currentlastletter = $subsection;
324 }
325 }
326 }
327
328 # add final OIDs to last sub-classification if there aren't many otherwise
329 # add final sub-classification
330 # BUG FIX: don't add anything if there are no currentOIDs (thanks to Don Gourley)
331 if (@currentOIDS) {
332 if (scalar (@currentOIDs) < $min) {
333 my ($newkey) = $lastkey =~ /^(.)/;
334 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
335 delete $compactedhash->{$lastkey};
336 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
337 }
338 else {
339 if ($currentfirstletter eq $currentlastletter) {
340 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
341 }
342 else {
343 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
344 }
345 }
346 }
347
348 return $compactedhash;
349}
350
3511;
Note: See TracBrowser for help on using the repository browser.