source: trunk/gsdl/perllib/classify/AZList.pm@ 6111

Last change on this file since 6111 was 6111, checked in by jmt12, 20 years ago

Changed the description for the -metadata flag to foreshadow the coming enhancement. This commit also happens to include the prototype -ignore_arguments flag to AZList, that will never actually be used because of the aforementioned metadata enhancement.

  • Property svn:keywords set to Author Date Id Revision
File size: 10.2 KB
Line 
1###########################################################################
2#
3# AZList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting alphabetically
27
28package AZList;
29
30use BasClas;
31use sorttools;
32use iso639;
33
34sub BEGIN {
35 @ISA = ('BasClas');
36}
37
38my $arguments =
39 [ { 'name' => "metadata",
40 'desc' => "{List.metadata}",
41 'type' => "metadata",
42 'reqd' => "yes" } ,
43 { 'name' => "buttonname",
44 'desc' => "{AZList.buttonname}",
45 'type' => "string",
46 'deft' => "Metadata element specified with -metadata",
47 'reqd' => "no" },
48 { 'name' => "removeprefix",
49 'desc' => "{AZList.removeprefix}",
50 'type' => "string",
51 'deft' => "",
52 'reqd' => "no" } ,
53 { 'name' => "removesuffix",
54 'desc' => "{AZList.removesuffix}",
55 'type' => "string",
56 'deft' => "",
57 'reqd' => "no" }
58 ];
59
60my $options = { 'name' => "AZList",
61 'desc' => "{AZList.desc}",
62 'inherits' => "Yes",
63 'args' => $arguments };
64
65# sub print_usage {
66# print STDERR "
67# usage: classify AZList [options]
68# options:
69
70# -metadata X (required) Metadata field used for classification.
71# List will be sorted by this element.
72
73# -buttonname X (optional) Button name for this classification.
74# defaults to metadata name.
75
76# -removeprefix regex (optional) A prefix to ignore in the Metadata values
77# for the field when sorting.
78# ";
79# }
80
81sub new {
82 my $class = shift (@_);
83 my $self = new BasClas($class, @_);
84
85 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
86 my $option_list = $self->{'option_list'};
87 push( @{$option_list}, $options );
88
89 my ($metaname, $title, $removeprefix, $removesuffix);
90
91 if (!parsargv::parse(\@_,
92 q^metadata/.*/^, \$metaname,
93 q^buttonname/.*/^, \$title,
94 q^removeprefix/.*/^, \$removeprefix,
95 q^removesuffix/.*/^, \$removesuffix,
96 "allow_extra_options")) {
97
98 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
99 $self->print_txt_usage(""); # Use default resource bundle
100 die "\n";
101 }
102
103 if (!defined $metaname) {
104 $self->print_txt_usage(""); # Use default resource bundle
105 print STDERR "AZList used with no metadata name\n";
106 die "\n";
107 }
108
109 $title = $metaname unless ($title);
110
111 $self->{'list'} = {};
112 $self->{'metaname'} = $metaname;
113 $self->{'title'} = $title;
114 if (defined($removeprefix) && $removeprefix) {
115 $removeprefix =~ s/^\^//; # don't need a leading ^
116 $self->{'removeprefix'} = $removeprefix;
117 }
118 if (defined($removesuffix) && $removesuffix) {
119 $removesuffix =~ s/\$$//; # don't need a trailing $
120 $self->{'removesuffix'} = $removesuffix;
121 }
122
123 return bless $self, $class;
124}
125
126sub init {
127 my $self = shift (@_);
128
129 $self->{'list'} = {};
130}
131
132sub classify {
133 my $self = shift (@_);
134 my ($doc_obj) = @_;
135
136 my $doc_OID = $doc_obj->get_OID();
137 my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), $self->{'metaname'}, $self->{'ignore_namespace'});
138
139 # if this document doesn't contain the metadata element we're
140 # sorting by we won't include it in this classification
141 if (defined $metavalue && $metavalue ne "") {
142 if (defined($self->{'removeprefix'}) &&
143 length($self->{'removeprefix'})) {
144 $metavalue =~ s/^$self->{'removeprefix'}//;
145 }
146 if (defined($self->{'removesuffix'}) &&
147 length($self->{'removesuffix'})) {
148 $metavalue =~ s/$self->{'removesuffix'}$//;
149 }
150
151 if ($self->{'metaname'} eq 'Language') {
152 $metavalue = $iso639::fromiso639{$metavalue};
153 } elsif ($self->{'metaname'} eq 'Creator') {
154 &sorttools::format_string_name_english (\$metavalue);
155 } else {
156 &sorttools::format_string_english (\$metavalue);
157 }
158 if (defined $self->{'list'}->{$doc_OID}) {
159 my $outhandle = $self->{'outhandle'};
160 print $outhandle "WARNING: AZList::classify called multiple times for $doc_OID\n";
161 }
162 if ($metavalue) {
163 $self->{'list'}->{$doc_OID} = $metavalue;
164 } else {
165 my $outhandle = $self->{'outhandle'};
166 print $outhandle "WARNING: AZList: $doc_OID metadata is empty - not classifying\n";
167 }
168 }
169}
170
171sub alpha_numeric_cmp
172{
173 my ($self,$a,$b) = @_;
174
175 my $title_a = $self->{'list'}->{$a};
176 my $title_b = $self->{'list'}->{$b};
177
178 if ($title_a =~ m/^(\d+(\.\d+)?)/)
179 {
180 my $val_a = $1;
181 if ($title_b =~ m/^(\d+(\.\d+)?)/)
182 {
183 my $val_b = $1;
184 if ($val_a != $val_b)
185 {
186 return ($val_a <=> $val_b);
187 }
188 }
189 }
190
191 return ($title_a cmp $title_b);
192}
193
194sub get_classify_info {
195 my $self = shift (@_);
196
197 my @classlist
198 = sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'list'}};
199
200 return $self->splitlist (\@classlist);
201}
202
203sub get_entry {
204 my $self = shift (@_);
205 my ($title, $childtype, $thistype) = @_;
206
207 # organise into classification structure
208 my %classifyinfo = ('childtype'=>$childtype,
209 'Title'=>$title,
210 'contains'=>[]);
211 $classifyinfo{'thistype'} = $thistype
212 if defined $thistype && $thistype =~ /\w/;
213
214 return \%classifyinfo;
215}
216
217# splitlist takes an ordered list of classifications (@$classlistref) and splits it
218# up into alphabetical sub-sections.
219sub splitlist {
220 my $self = shift (@_);
221 my ($classlistref) = @_;
222 my $classhash = {};
223
224 # top level
225 my $childtype = "HList";
226 if (scalar (@$classlistref) <= 39) {$childtype = "VList";}
227 my $classifyinfo = $self->get_entry ($self->{'title'}, $childtype, "Invisible");
228
229 # don't need to do any splitting if there are less than 39 (max + min -1) classifications
230 if ((scalar @$classlistref) <= 39) {
231 foreach $subOID (@$classlistref) {
232 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
233 }
234 return $classifyinfo;
235 }
236
237 # first split up the list into separate A-Z and 0-9 classifications
238 foreach $classification (@$classlistref) {
239 my $title = $self->{'list'}->{$classification};
240
241 $title =~ s/^(&.{1,6};|<[^>]>|[^a-zA-Z0-9])//g; # remove any unwanted stuff
242 # only need first char for classification
243 $title =~ m/^(.)/; $title=$1;
244 $title =~ tr/[a-z]/[A-Z]/;
245 if ($title =~ /^[0-9]$/) {$title = '0-9';}
246 elsif ($title !~ /^[A-Z]$/) {
247 my $outhandle = $self->{'outhandle'};
248 print $outhandle "AZList: WARNING $classification has badly formatted title ($title)\n";
249 }
250 $classhash->{$title} = [] unless defined $classhash->{$title};
251 push (@{$classhash->{$title}}, $classification);
252 }
253 $classhash = $self->compactlist ($classhash);
254
255 my @tmparr = ();
256 foreach $subsection (sort keys (%$classhash)) {
257 push (@tmparr, $subsection);
258 }
259
260 # if there's a 0-9 section it will have been sorted to the beginning
261 # but we want it at the end
262 if ($tmparr[0] eq '0-9') {
263 shift @tmparr;
264 push (@tmparr, '0-9');
265 }
266
267 foreach $subclass (@tmparr) {
268 my $tempclassify = $self->get_entry($subclass, "VList");
269 foreach $subsubOID (@{$classhash->{$subclass}}) {
270 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
271 }
272 push (@{$classifyinfo->{'contains'}}, $tempclassify);
273 }
274
275 return $classifyinfo;
276}
277
278sub compactlist {
279 my $self = shift (@_);
280 my ($classhashref) = @_;
281 my $compactedhash = {};
282 my @currentOIDs = ();
283 my $currentfirstletter = "";
284 my $currentlastletter = "";
285 my $lastkey = "";
286
287 # minimum and maximum documents to be displayed per page.
288 # the actual maximum will be max + (min-1).
289 # the smallest sub-section is a single letter at present
290 # so in this case there may be many times max documents
291 # displayed on a page.
292 my $min = 10;
293 my $max = 30;
294
295 foreach $subsection (sort keys %$classhashref) {
296 if ($subsection eq '0-9') {
297 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
298 next;
299 }
300 $currentfirstletter = $subsection if $currentfirstletter eq "";
301 if ((scalar (@currentOIDs) < $min) ||
302 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
303 push (@currentOIDs, @{$classhashref->{$subsection}});
304 $currentlastletter = $subsection;
305 } else {
306
307 if ($currentfirstletter eq $currentlastletter) {
308 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
309 $lastkey = $currentfirstletter;
310 } else {
311 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
312 $lastkey = "$currentfirstletter-$currentlastletter";
313 }
314 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
315 $compactedhash->{$subsection} = $classhashref->{$subsection};
316 @currentOIDs = ();
317 $currentfirstletter = "";
318 $lastkey = $subsection;
319 } else {
320 @currentOIDs = @{$classhashref->{$subsection}};
321 $currentfirstletter = $subsection;
322 $currentlastletter = $subsection;
323 }
324 }
325 }
326
327 # add final OIDs to last sub-classification if there aren't many otherwise
328 # add final sub-classification
329 # BUG FIX: don't add anything if there are no currentOIDs (thanks to Don Gourley)
330 if (@currentOIDS) {
331 if (scalar (@currentOIDs) < $min) {
332 my ($newkey) = $lastkey =~ /^(.)/;
333 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
334 delete $compactedhash->{$lastkey};
335 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
336 }
337 else {
338 if ($currentfirstletter eq $currentlastletter) {
339 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
340 }
341 else {
342 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
343 }
344 }
345 }
346
347 return $compactedhash;
348}
349
3501;
Note: See TracBrowser for help on using the repository browser.