source: trunk/gsdl/perllib/classify/AZList.pm@ 2001

Last change on this file since 2001 was 2001, checked in by sjboddie, 23 years ago

Added a hack that mysteriously converts iso639 language codes appearing
in formatstrings to the English name of the language (i.e. Language
metadata is now treated as a special case much like Date is).

  • Property svn:keywords set to Author Date Id Revision
File size: 8.4 KB
Line 
1###########################################################################
2#
3# AZList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting alphabetically
27# options are:
28# metadata=Metaname -- all documents with Metaname metadata
29# will be included in list, list will be sorted
30# by this element.
31# title=Title -- (optional) the title field for this classification.
32# if not included title field will be Metaname.
33
34package AZList;
35
36use BasClas;
37use sorttools;
38use iso639;
39
40sub BEGIN {
41 @ISA = ('BasClas');
42}
43
44sub print_usage {
45 print STDERR "
46 usage: classify AZList [options]
47 options:
48
49 -metadata X (erquired) Metadata field used for classification.
50 List will be sorted by this element.
51
52 -title X (optional) Title field for this classification.
53 defaults to metadata name.
54";
55}
56
57sub new {
58 my $class = shift (@_);
59 my $self = new BasClas($class, @_);
60
61 my ($metaname, $title);
62
63 if (!parsargv::parse(\@_,
64 q^metadata/.*/^, \$metaname,
65 q^title/.*/^, \$title,
66 "allow_extra_options")) {
67
68 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
69 &print_usage();
70 die "\n";
71 }
72
73 if (!defined $metaname) {
74 &print_usage;
75 print STDERR "AZList used with no metadata name\n";
76 die "\n";
77 }
78
79 $title = $metaname unless ($title);
80
81 $self->{'list'} = {};
82 $self->{'metaname'} = $metaname;
83 $self->{'title'} = $title;
84
85 return bless $self, $class;
86}
87
88sub init {
89 my $self = shift (@_);
90
91 $self->{'list'} = {};
92}
93
94sub classify {
95 my $self = shift (@_);
96 my ($doc_obj) = @_;
97
98 my $doc_OID = $doc_obj->get_OID();
99 my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(),
100 $self->{'metaname'});
101
102 # if this document doesn't contain the metadata element we're
103 # sorting by we won't include it in this classification
104 if (defined $metavalue && $metavalue ne "") {
105 if ($self->{'metaname'} eq 'Language') {
106 $metavalue = $iso639::fromiso639{$metavalue};
107 } elsif ($self->{'metaname'} eq 'Creator') {
108 &sorttools::format_string_name_english (\$metavalue);
109 } else {
110 &sorttools::format_string_english (\$metavalue);
111 }
112 if (defined $self->{'list'}->{$doc_OID}) {
113 my $outhandle = $self->{'outhandle'};
114 print $outhandle "WARNING: AZList::classify called multiple times for $doc_OID\n";
115 }
116 $self->{'list'}->{$doc_OID} = $metavalue;
117 }
118}
119
120sub alpha_numeric_cmp
121{
122 my ($self,$a,$b) = @_;
123
124 my $title_a = $self->{'list'}->{$a};
125 my $title_b = $self->{'list'}->{$b};
126
127 if ($title_a =~ m/^(\d+(\.\d+)?)/)
128 {
129 my $val_a = $1;
130 if ($title_b =~ m/^(\d+(\.\d+)?)/)
131 {
132 my $val_b = $1;
133 if ($val_a != $val_b)
134 {
135 return ($val_a <=> $val_b);
136 }
137 }
138 }
139
140 return ($title_a cmp $title_b);
141}
142
143sub get_classify_info {
144 my $self = shift (@_);
145
146 my @classlist
147 = sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'list'}};
148
149 return $self->splitlist (\@classlist);
150}
151
152sub get_entry {
153 my $self = shift (@_);
154 my ($title, $childtype, $thistype) = @_;
155
156 # organise into classification structure
157 my %classifyinfo = ('childtype'=>$childtype,
158 'Title'=>$title,
159 'contains'=>[]);
160 $classifyinfo{'thistype'} = $thistype
161 if defined $thistype && $thistype =~ /\w/;
162
163 return \%classifyinfo;
164}
165
166# splitlist takes an ordered list of classifications (@$classlistref) and splits it
167# up into alphabetical sub-sections.
168sub splitlist {
169 my $self = shift (@_);
170 my ($classlistref) = @_;
171 my $classhash = {};
172
173 # top level
174 my $childtype = "HList";
175 if (scalar (@$classlistref) <= 39) {$childtype = "VList";}
176 my $classifyinfo = $self->get_entry ($self->{'title'}, $childtype, "Invisible");
177
178 # don't need to do any splitting if there are less than 39 (max + min -1) classifications
179 if ((scalar @$classlistref) <= 39) {
180 foreach $subOID (@$classlistref) {
181 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
182 }
183 return $classifyinfo;
184 }
185
186 # first split up the list into separate A-Z and 0-9 classifications
187 foreach $classification (@$classlistref) {
188 my $title = $self->{'list'}->{$classification};
189
190 $title =~ s/^(&.{1,6};|<[^>]>|[^a-zA-Z0-9])//g; # remove any unwanted stuff
191 $title =~ s/^(.).*$/$1/;
192 $title =~ tr/[a-z]/[A-Z]/;
193 if ($title =~ /^[0-9]$/) {$title = '0-9';}
194 elsif ($title !~ /^[A-Z]$/) {
195 my $outhandle = $self->{'outhandle'};
196 print $outhandle "AZList: WARNING $classification has badly formatted title ($title)\n";
197 }
198 $classhash->{$title} = [] unless defined $classhash->{$title};
199 push (@{$classhash->{$title}}, $classification);
200 }
201 $classhash = $self->compactlist ($classhash);
202
203 my @tmparr = ();
204 foreach $subsection (sort keys (%$classhash)) {
205 push (@tmparr, $subsection);
206 }
207
208 # if there's a 0-9 section it will have been sorted to the beginning
209 # but we want it at the end
210 if ($tmparr[0] eq '0-9') {
211 shift @tmparr;
212 push (@tmparr, '0-9');
213 }
214
215 foreach $subclass (@tmparr) {
216 my $tempclassify = $self->get_entry($subclass, "VList");
217 foreach $subsubOID (@{$classhash->{$subclass}}) {
218 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
219 }
220 push (@{$classifyinfo->{'contains'}}, $tempclassify);
221 }
222
223 return $classifyinfo;
224}
225
226sub compactlist {
227 my $self = shift (@_);
228 my ($classhashref) = @_;
229 my $compactedhash = {};
230 my @currentOIDs = ();
231 my $currentfirstletter = "";
232 my $currentlastletter = "";
233 my $lastkey = "";
234
235 # minimum and maximum documents to be displayed per page.
236 # the actual maximum will be max + (min-1).
237 # the smallest sub-section is a single letter at present
238 # so in this case there may be many times max documents
239 # displayed on a page.
240 my $min = 10;
241 my $max = 30;
242
243 foreach $subsection (sort keys %$classhashref) {
244 if ($subsection eq '0-9') {
245 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
246 next;
247 }
248 $currentfirstletter = $subsection if $currentfirstletter eq "";
249 if ((scalar (@currentOIDs) < $min) ||
250 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
251 push (@currentOIDs, @{$classhashref->{$subsection}});
252 $currentlastletter = $subsection;
253 } else {
254
255 if ($currentfirstletter eq $currentlastletter) {
256 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
257 $lastkey = $currentfirstletter;
258 } else {
259 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
260 $lastkey = "$currentfirstletter-$currentlastletter";
261 }
262 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
263 $compactedhash->{$subsection} = $classhashref->{$subsection};
264 @currentOIDs = ();
265 $currentfirstletter = "";
266 $lastkey = $subsection;
267 } else {
268 @currentOIDs = @{$classhashref->{$subsection}};
269 $currentfirstletter = $subsection;
270 $currentlastletter = $subsection;
271 }
272 }
273 }
274
275 # add final OIDs to last sub-classification if there aren't many otherwise
276 # add final sub-classification
277 if (scalar (@currentOIDs) < $min) {
278 my ($newkey) = $lastkey =~ /^(.)/;
279 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
280 delete $compactedhash->{$lastkey};
281 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
282 } else {
283 if ($currentfirstletter eq $currentlastletter) {
284 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
285 } else {
286 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
287 }
288 }
289
290 return $compactedhash;
291}
292
2931;
Note: See TracBrowser for help on using the repository browser.