source: trunk/gsdl/perllib/classify/AZList.pm@ 3719

Last change on this file since 3719 was 3719, checked in by sjboddie, 21 years ago

Prevent dodgy format_string_english() functions from destroying any
non-english text that might come by.

  • Property svn:keywords set to Author Date Id Revision
File size: 9.8 KB
Line 
1###########################################################################
2#
3# AZList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting alphabetically
27
28package AZList;
29
30use BasClas;
31use sorttools;
32use iso639;
33
34sub BEGIN {
35 @ISA = ('BasClas');
36}
37
38my $arguments = [ { 'name' => "metadata",
39 'desc' => "Metadata field used for classification. List will be sorted by this element.",
40 'type' => "metadata",
41 'reqd' => "yes" } ,
42 { 'name' => "buttonname",
43 'desc' => "Button name for this classification. Defaults to metadata name.",
44 'type' => "string",
45 'reqd' => "no" } ,
46 { 'name' => "removeprefix",
47 'desc' => "A prefix to ignore in the Metadata values for the field when sorting.",
48 'type' => "string",
49 'reqd' => "no" } ];
50
51my $options = { 'name' => "AZList",
52 'desc' => "Classifier plugin for sorting alphabetically",
53 'inherits' => "Yes",
54 'args' => $arguments };
55
56sub print_usage {
57 print STDERR "
58 usage: classify AZList [options]
59 options:
60
61 -metadata X (required) Metadata field used for classification.
62 List will be sorted by this element.
63
64 -buttonname X (optional) Button name for this classification.
65 defaults to metadata name.
66
67 -removeprefix regex (optional) A prefix to ignore in the Metadata values
68 for the field when sorting.
69";
70}
71
72sub new {
73 my $class = shift (@_);
74 my $self = new BasClas($class, @_);
75
76 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
77 my $option_list = $self->{'option_list'};
78 push( @{$option_list}, $options );
79
80 my ($metaname, $title, $removeprefix);
81
82 if (!parsargv::parse(\@_,
83 q^metadata/.*/^, \$metaname,
84 q^buttonname/.*/^, \$title,
85 q^removeprefix/.*/^, \$removeprefix,
86 "allow_extra_options")) {
87
88 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
89 &print_usage();
90 die "\n";
91 }
92
93 if (!defined $metaname) {
94 &print_usage;
95 print STDERR "AZList used with no metadata name\n";
96 die "\n";
97 }
98
99 $title = $metaname unless ($title);
100
101 $self->{'list'} = {};
102 $self->{'metaname'} = $metaname;
103 $self->{'title'} = $title;
104 if (defined($removeprefix) && $removeprefix) {
105 $removeprefix =~ s/^\^//; # don't need a leading ^
106 $self->{'removeprefix'} = $removeprefix;
107 }
108
109 return bless $self, $class;
110}
111
112sub init {
113 my $self = shift (@_);
114
115 $self->{'list'} = {};
116}
117
118sub classify {
119 my $self = shift (@_);
120 my ($doc_obj) = @_;
121
122 my $doc_OID = $doc_obj->get_OID();
123 my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(),
124 $self->{'metaname'});
125
126 my $lang = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), 'Language');
127
128 # if this document doesn't contain the metadata element we're
129 # sorting by we won't include it in this classification
130 if (defined $metavalue && $metavalue ne "") {
131 if (defined($self->{'removeprefix'}) &&
132 length($self->{'removeprefix'})) {
133 $metavalue =~ s/^$self->{'removeprefix'}//;
134 }
135
136 if ($self->{'metaname'} eq 'Language') {
137 $metavalue = $iso639::fromiso639{$metavalue};
138 } elsif ($self->{'metaname'} eq 'Creator') {
139 if ($lang eq 'en') {
140 &sorttools::format_string_name_english (\$metavalue);
141 }
142 } else {
143 if ($lang eq 'en') {
144 &sorttools::format_string_english (\$metavalue);
145 }
146 }
147 if (defined $self->{'list'}->{$doc_OID}) {
148 my $outhandle = $self->{'outhandle'};
149 print $outhandle "WARNING: AZList::classify called multiple times for $doc_OID\n";
150 }
151 if ($metavalue) {
152 $self->{'list'}->{$doc_OID} = $metavalue;
153 } else {
154 my $outhandle = $self->{'outhandle'};
155 print $outhandle "WARNING: AZList: $doc_OID metadata is empty - not classifying\n";
156 }
157 }
158}
159
160sub alpha_numeric_cmp
161{
162 my ($self,$a,$b) = @_;
163
164 my $title_a = $self->{'list'}->{$a};
165 my $title_b = $self->{'list'}->{$b};
166
167 if ($title_a =~ m/^(\d+(\.\d+)?)/)
168 {
169 my $val_a = $1;
170 if ($title_b =~ m/^(\d+(\.\d+)?)/)
171 {
172 my $val_b = $1;
173 if ($val_a != $val_b)
174 {
175 return ($val_a <=> $val_b);
176 }
177 }
178 }
179
180 return ($title_a cmp $title_b);
181}
182
183sub get_classify_info {
184 my $self = shift (@_);
185
186 my @classlist
187 = sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'list'}};
188
189 return $self->splitlist (\@classlist);
190}
191
192sub get_entry {
193 my $self = shift (@_);
194 my ($title, $childtype, $thistype) = @_;
195
196 # organise into classification structure
197 my %classifyinfo = ('childtype'=>$childtype,
198 'Title'=>$title,
199 'contains'=>[]);
200 $classifyinfo{'thistype'} = $thistype
201 if defined $thistype && $thistype =~ /\w/;
202
203 return \%classifyinfo;
204}
205
206# splitlist takes an ordered list of classifications (@$classlistref) and splits it
207# up into alphabetical sub-sections.
208sub splitlist {
209 my $self = shift (@_);
210 my ($classlistref) = @_;
211 my $classhash = {};
212
213 # top level
214 my $childtype = "HList";
215 if (scalar (@$classlistref) <= 39) {$childtype = "VList";}
216 my $classifyinfo = $self->get_entry ($self->{'title'}, $childtype, "Invisible");
217
218 # don't need to do any splitting if there are less than 39 (max + min -1) classifications
219 if ((scalar @$classlistref) <= 39) {
220 foreach $subOID (@$classlistref) {
221 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
222 }
223 return $classifyinfo;
224 }
225
226 # first split up the list into separate A-Z and 0-9 classifications
227 foreach $classification (@$classlistref) {
228 my $title = $self->{'list'}->{$classification};
229
230 $title =~ s/^(&.{1,6};|<[^>]>|[^a-zA-Z0-9])//g; # remove any unwanted stuff
231 # only need first char for classification
232 $title =~ m/^(.)/; $title=$1;
233 $title =~ tr/[a-z]/[A-Z]/;
234 if ($title =~ /^[0-9]$/) {$title = '0-9';}
235 elsif ($title !~ /^[A-Z]$/) {
236 my $outhandle = $self->{'outhandle'};
237 print $outhandle "AZList: WARNING $classification has badly formatted title ($title)\n";
238 }
239 $classhash->{$title} = [] unless defined $classhash->{$title};
240 push (@{$classhash->{$title}}, $classification);
241 }
242 $classhash = $self->compactlist ($classhash);
243
244 my @tmparr = ();
245 foreach $subsection (sort keys (%$classhash)) {
246 push (@tmparr, $subsection);
247 }
248
249 # if there's a 0-9 section it will have been sorted to the beginning
250 # but we want it at the end
251 if ($tmparr[0] eq '0-9') {
252 shift @tmparr;
253 push (@tmparr, '0-9');
254 }
255
256 foreach $subclass (@tmparr) {
257 my $tempclassify = $self->get_entry($subclass, "VList");
258 foreach $subsubOID (@{$classhash->{$subclass}}) {
259 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
260 }
261 push (@{$classifyinfo->{'contains'}}, $tempclassify);
262 }
263
264 return $classifyinfo;
265}
266
267sub compactlist {
268 my $self = shift (@_);
269 my ($classhashref) = @_;
270 my $compactedhash = {};
271 my @currentOIDs = ();
272 my $currentfirstletter = "";
273 my $currentlastletter = "";
274 my $lastkey = "";
275
276 # minimum and maximum documents to be displayed per page.
277 # the actual maximum will be max + (min-1).
278 # the smallest sub-section is a single letter at present
279 # so in this case there may be many times max documents
280 # displayed on a page.
281 my $min = 10;
282 my $max = 30;
283
284 foreach $subsection (sort keys %$classhashref) {
285 if ($subsection eq '0-9') {
286 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
287 next;
288 }
289 $currentfirstletter = $subsection if $currentfirstletter eq "";
290 if ((scalar (@currentOIDs) < $min) ||
291 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
292 push (@currentOIDs, @{$classhashref->{$subsection}});
293 $currentlastletter = $subsection;
294 } else {
295
296 if ($currentfirstletter eq $currentlastletter) {
297 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
298 $lastkey = $currentfirstletter;
299 } else {
300 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
301 $lastkey = "$currentfirstletter-$currentlastletter";
302 }
303 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
304 $compactedhash->{$subsection} = $classhashref->{$subsection};
305 @currentOIDs = ();
306 $currentfirstletter = "";
307 $lastkey = $subsection;
308 } else {
309 @currentOIDs = @{$classhashref->{$subsection}};
310 $currentfirstletter = $subsection;
311 $currentlastletter = $subsection;
312 }
313 }
314 }
315
316 # add final OIDs to last sub-classification if there aren't many otherwise
317 # add final sub-classification
318 if (scalar (@currentOIDs) < $min) {
319 my ($newkey) = $lastkey =~ /^(.)/;
320 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
321 delete $compactedhash->{$lastkey};
322 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
323 } else {
324 if ($currentfirstletter eq $currentlastletter) {
325 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
326 } else {
327 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
328 }
329 }
330
331 return $compactedhash;
332}
333
3341;
Note: See TracBrowser for help on using the repository browser.