source: trunk/gsdl/perllib/classify/AZList.pm@ 3727

Last change on this file since 3727 was 3727, checked in by sjboddie, 21 years ago

Reverted back last change to AZList.pm. Attempting to make it work better
with non-ascii characters wasn't real smart when it's only classifying
documents based on ascii alpha-numeric characters anyway.

  • Property svn:keywords set to Author Date Id Revision
File size: 9.6 KB
Line 
1###########################################################################
2#
3# AZList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting alphabetically
27
28package AZList;
29
30use BasClas;
31use sorttools;
32use iso639;
33
34sub BEGIN {
35 @ISA = ('BasClas');
36}
37
38my $arguments = [ { 'name' => "metadata",
39 'desc' => "Metadata field used for classification. List will be sorted by this element.",
40 'type' => "metadata",
41 'reqd' => "yes" } ,
42 { 'name' => "buttonname",
43 'desc' => "Button name for this classification. Defaults to metadata name.",
44 'type' => "string",
45 'reqd' => "no" } ,
46 { 'name' => "removeprefix",
47 'desc' => "A prefix to ignore in the Metadata values for the field when sorting.",
48 'type' => "string",
49 'reqd' => "no" } ];
50
51my $options = { 'name' => "AZList",
52 'desc' => "Classifier plugin for sorting alphabetically",
53 'inherits' => "Yes",
54 'args' => $arguments };
55
56sub print_usage {
57 print STDERR "
58 usage: classify AZList [options]
59 options:
60
61 -metadata X (required) Metadata field used for classification.
62 List will be sorted by this element.
63
64 -buttonname X (optional) Button name for this classification.
65 defaults to metadata name.
66
67 -removeprefix regex (optional) A prefix to ignore in the Metadata values
68 for the field when sorting.
69";
70}
71
72sub new {
73 my $class = shift (@_);
74 my $self = new BasClas($class, @_);
75
76 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
77 my $option_list = $self->{'option_list'};
78 push( @{$option_list}, $options );
79
80 my ($metaname, $title, $removeprefix);
81
82 if (!parsargv::parse(\@_,
83 q^metadata/.*/^, \$metaname,
84 q^buttonname/.*/^, \$title,
85 q^removeprefix/.*/^, \$removeprefix,
86 "allow_extra_options")) {
87
88 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
89 &print_usage();
90 die "\n";
91 }
92
93 if (!defined $metaname) {
94 &print_usage;
95 print STDERR "AZList used with no metadata name\n";
96 die "\n";
97 }
98
99 $title = $metaname unless ($title);
100
101 $self->{'list'} = {};
102 $self->{'metaname'} = $metaname;
103 $self->{'title'} = $title;
104 if (defined($removeprefix) && $removeprefix) {
105 $removeprefix =~ s/^\^//; # don't need a leading ^
106 $self->{'removeprefix'} = $removeprefix;
107 }
108
109 return bless $self, $class;
110}
111
112sub init {
113 my $self = shift (@_);
114
115 $self->{'list'} = {};
116}
117
118sub classify {
119 my $self = shift (@_);
120 my ($doc_obj) = @_;
121
122 my $doc_OID = $doc_obj->get_OID();
123 my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(),
124 $self->{'metaname'});
125
126 # if this document doesn't contain the metadata element we're
127 # sorting by we won't include it in this classification
128 if (defined $metavalue && $metavalue ne "") {
129 if (defined($self->{'removeprefix'}) &&
130 length($self->{'removeprefix'})) {
131 $metavalue =~ s/^$self->{'removeprefix'}//;
132 }
133
134 if ($self->{'metaname'} eq 'Language') {
135 $metavalue = $iso639::fromiso639{$metavalue};
136 } elsif ($self->{'metaname'} eq 'Creator') {
137 &sorttools::format_string_name_english (\$metavalue);
138 } else {
139 &sorttools::format_string_english (\$metavalue);
140 }
141 if (defined $self->{'list'}->{$doc_OID}) {
142 my $outhandle = $self->{'outhandle'};
143 print $outhandle "WARNING: AZList::classify called multiple times for $doc_OID\n";
144 }
145 if ($metavalue) {
146 $self->{'list'}->{$doc_OID} = $metavalue;
147 } else {
148 my $outhandle = $self->{'outhandle'};
149 print $outhandle "WARNING: AZList: $doc_OID metadata is empty - not classifying\n";
150 }
151 }
152}
153
154sub alpha_numeric_cmp
155{
156 my ($self,$a,$b) = @_;
157
158 my $title_a = $self->{'list'}->{$a};
159 my $title_b = $self->{'list'}->{$b};
160
161 if ($title_a =~ m/^(\d+(\.\d+)?)/)
162 {
163 my $val_a = $1;
164 if ($title_b =~ m/^(\d+(\.\d+)?)/)
165 {
166 my $val_b = $1;
167 if ($val_a != $val_b)
168 {
169 return ($val_a <=> $val_b);
170 }
171 }
172 }
173
174 return ($title_a cmp $title_b);
175}
176
177sub get_classify_info {
178 my $self = shift (@_);
179
180 my @classlist
181 = sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'list'}};
182
183 return $self->splitlist (\@classlist);
184}
185
186sub get_entry {
187 my $self = shift (@_);
188 my ($title, $childtype, $thistype) = @_;
189
190 # organise into classification structure
191 my %classifyinfo = ('childtype'=>$childtype,
192 'Title'=>$title,
193 'contains'=>[]);
194 $classifyinfo{'thistype'} = $thistype
195 if defined $thistype && $thistype =~ /\w/;
196
197 return \%classifyinfo;
198}
199
200# splitlist takes an ordered list of classifications (@$classlistref) and splits it
201# up into alphabetical sub-sections.
202sub splitlist {
203 my $self = shift (@_);
204 my ($classlistref) = @_;
205 my $classhash = {};
206
207 # top level
208 my $childtype = "HList";
209 if (scalar (@$classlistref) <= 39) {$childtype = "VList";}
210 my $classifyinfo = $self->get_entry ($self->{'title'}, $childtype, "Invisible");
211
212 # don't need to do any splitting if there are less than 39 (max + min -1) classifications
213 if ((scalar @$classlistref) <= 39) {
214 foreach $subOID (@$classlistref) {
215 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
216 }
217 return $classifyinfo;
218 }
219
220 # first split up the list into separate A-Z and 0-9 classifications
221 foreach $classification (@$classlistref) {
222 my $title = $self->{'list'}->{$classification};
223
224 $title =~ s/^(&.{1,6};|<[^>]>|[^a-zA-Z0-9])//g; # remove any unwanted stuff
225 # only need first char for classification
226 $title =~ m/^(.)/; $title=$1;
227 $title =~ tr/[a-z]/[A-Z]/;
228 if ($title =~ /^[0-9]$/) {$title = '0-9';}
229 elsif ($title !~ /^[A-Z]$/) {
230 my $outhandle = $self->{'outhandle'};
231 print $outhandle "AZList: WARNING $classification has badly formatted title ($title)\n";
232 }
233 $classhash->{$title} = [] unless defined $classhash->{$title};
234 push (@{$classhash->{$title}}, $classification);
235 }
236 $classhash = $self->compactlist ($classhash);
237
238 my @tmparr = ();
239 foreach $subsection (sort keys (%$classhash)) {
240 push (@tmparr, $subsection);
241 }
242
243 # if there's a 0-9 section it will have been sorted to the beginning
244 # but we want it at the end
245 if ($tmparr[0] eq '0-9') {
246 shift @tmparr;
247 push (@tmparr, '0-9');
248 }
249
250 foreach $subclass (@tmparr) {
251 my $tempclassify = $self->get_entry($subclass, "VList");
252 foreach $subsubOID (@{$classhash->{$subclass}}) {
253 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
254 }
255 push (@{$classifyinfo->{'contains'}}, $tempclassify);
256 }
257
258 return $classifyinfo;
259}
260
261sub compactlist {
262 my $self = shift (@_);
263 my ($classhashref) = @_;
264 my $compactedhash = {};
265 my @currentOIDs = ();
266 my $currentfirstletter = "";
267 my $currentlastletter = "";
268 my $lastkey = "";
269
270 # minimum and maximum documents to be displayed per page.
271 # the actual maximum will be max + (min-1).
272 # the smallest sub-section is a single letter at present
273 # so in this case there may be many times max documents
274 # displayed on a page.
275 my $min = 10;
276 my $max = 30;
277
278 foreach $subsection (sort keys %$classhashref) {
279 if ($subsection eq '0-9') {
280 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
281 next;
282 }
283 $currentfirstletter = $subsection if $currentfirstletter eq "";
284 if ((scalar (@currentOIDs) < $min) ||
285 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
286 push (@currentOIDs, @{$classhashref->{$subsection}});
287 $currentlastletter = $subsection;
288 } else {
289
290 if ($currentfirstletter eq $currentlastletter) {
291 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
292 $lastkey = $currentfirstletter;
293 } else {
294 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
295 $lastkey = "$currentfirstletter-$currentlastletter";
296 }
297 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
298 $compactedhash->{$subsection} = $classhashref->{$subsection};
299 @currentOIDs = ();
300 $currentfirstletter = "";
301 $lastkey = $subsection;
302 } else {
303 @currentOIDs = @{$classhashref->{$subsection}};
304 $currentfirstletter = $subsection;
305 $currentlastletter = $subsection;
306 }
307 }
308 }
309
310 # add final OIDs to last sub-classification if there aren't many otherwise
311 # add final sub-classification
312 if (scalar (@currentOIDs) < $min) {
313 my ($newkey) = $lastkey =~ /^(.)/;
314 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
315 delete $compactedhash->{$lastkey};
316 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
317 } else {
318 if ($currentfirstletter eq $currentlastletter) {
319 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
320 } else {
321 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
322 }
323 }
324
325 return $compactedhash;
326}
327
3281;
Note: See TracBrowser for help on using the repository browser.