source: main/tags/2.30/gsdl/perllib/classify/AZList.pm@ 23841

Last change on this file since 23841 was 1483, checked in by sjboddie, 24 years ago

added -out option to classifiers

  • Property svn:keywords set to Author Date Id Revision
File size: 7.9 KB
Line 
1###########################################################################
2#
3# AZList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting alphabetically
27# options are:
28# metadata=Metaname -- all documents with Metaname metadata
29# will be included in list, list will be sorted
30# by this element.
31# title=Title -- (optional) the title field for this classification.
32# if not included title field will be Metaname.
33
34package AZList;
35
36use BasClas;
37use sorttools;
38
39sub BEGIN {
40 @ISA = ('BasClas');
41}
42
43sub new {
44 my ($class, @options) = @_;
45 my $self = new BasClas ($class, @_);
46
47 my ($metaname, $title);
48
49 foreach $option (@options) {
50 if ($option =~ /^metadata=(.*)$/i) {
51 $metaname = $1;
52 } elsif ($option =~ /^title=(.*)$/i) {
53 $title = $1;
54 }
55 }
56
57 if (!defined $metaname) {
58 my $outhandle = $self->{'outhandle'};
59 print $outhandle "AZList used with no metadata name to classify by\n";
60 die "\n";
61 }
62
63 $title = $metaname unless defined $title;
64
65 $self->{'list'} = {};
66 $self->{'metaname'} = $metaname;
67 $self->{'title'} = $title;
68
69 return bless $self, $class;
70}
71
72sub init {
73 my $self = shift (@_);
74
75 $self->{'list'} = {};
76}
77
78sub classify {
79 my $self = shift (@_);
80 my ($doc_obj) = @_;
81
82 my $doc_OID = $doc_obj->get_OID();
83 my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(),
84 $self->{'metaname'});
85
86 # if this document doesn't contain the metadata element we're
87 # sorting by we won't include it in this classification
88 if (defined $metavalue && $metavalue ne "") {
89 if ($self->{'metaname'} eq 'Creator') {
90 &sorttools::format_string_name_english (\$metavalue);
91 } else {
92 &sorttools::format_string_english (\$metavalue);
93 }
94 if (defined $self->{'list'}->{$doc_OID}) {
95 my $outhandle = $self->{'outhandle'};
96 print $outhandle "WARNING: AZList::classify called multiple times for $doc_OID\n";
97 }
98 $self->{'list'}->{$doc_OID} = $metavalue;
99 }
100}
101
102sub alpha_numeric_cmp
103{
104 my ($self,$a,$b) = @_;
105
106 my $title_a = $self->{'list'}->{$a};
107 my $title_b = $self->{'list'}->{$b};
108
109 if ($title_a =~ m/^(\d+(\.\d+)?)/)
110 {
111 my $val_a = $1;
112 if ($title_b =~ m/^(\d+(\.\d+)?)/)
113 {
114 my $val_b = $1;
115 if ($val_a != $val_b)
116 {
117 return ($val_a <=> $val_b);
118 }
119 }
120 }
121
122 return ($title_a cmp $title_b);
123}
124
125sub get_classify_info {
126 my $self = shift (@_);
127
128 my @classlist
129 = sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'list'}};
130
131 return $self->splitlist (\@classlist);
132}
133
134sub get_entry {
135 my $self = shift (@_);
136 my ($title, $childtype, $thistype) = @_;
137
138 # organise into classification structure
139 my %classifyinfo = ('childtype'=>$childtype,
140 'Title'=>$title,
141 'contains'=>[]);
142 $classifyinfo{'thistype'} = $thistype
143 if defined $thistype && $thistype =~ /\w/;
144
145 return \%classifyinfo;
146}
147
148# splitlist takes an ordered list of classifications (@$classlistref) and splits it
149# up into alphabetical sub-sections.
150sub splitlist {
151 my $self = shift (@_);
152 my ($classlistref) = @_;
153 my $classhash = {};
154
155 # top level
156 my $childtype = "HList";
157 if (scalar (@$classlistref) <= 39) {$childtype = "VList";}
158 my $classifyinfo = $self->get_entry ($self->{'title'}, $childtype, "Invisible");
159
160 # don't need to do any splitting if there are less than 39 (max + min -1) classifications
161 if ((scalar @$classlistref) <= 39) {
162 foreach $subOID (@$classlistref) {
163 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
164 }
165 return $classifyinfo;
166 }
167
168 # first split up the list into separate A-Z and 0-9 classifications
169 foreach $classification (@$classlistref) {
170 my $title = $self->{'list'}->{$classification};
171
172 $title =~ s/^(&.{1,6};|<[^>]>|[^a-zA-Z0-9])//g; # remove any unwanted stuff
173 $title =~ s/^(.).*$/$1/;
174 $title =~ tr/[a-z]/[A-Z]/;
175 if ($title =~ /^[0-9]$/) {$title = '0-9';}
176 elsif ($title !~ /^[A-Z]$/) {
177 my $outhandle = $self->{'outhandle'};
178 print $outhandle "AZList: WARNING $classification has badly formatted title ($title)\n";
179 }
180 $classhash->{$title} = [] unless defined $classhash->{$title};
181 push (@{$classhash->{$title}}, $classification);
182 }
183 $classhash = $self->compactlist ($classhash);
184
185 my @tmparr = ();
186 foreach $subsection (sort keys (%$classhash)) {
187 push (@tmparr, $subsection);
188 }
189
190 # if there's a 0-9 section it will have been sorted to the beginning
191 # but we want it at the end
192 if ($tmparr[0] eq '0-9') {
193 shift @tmparr;
194 push (@tmparr, '0-9');
195 }
196
197 foreach $subclass (@tmparr) {
198 my $tempclassify = $self->get_entry($subclass, "VList");
199 foreach $subsubOID (@{$classhash->{$subclass}}) {
200 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
201 }
202 push (@{$classifyinfo->{'contains'}}, $tempclassify);
203 }
204
205 return $classifyinfo;
206}
207
208sub compactlist {
209 my $self = shift (@_);
210 my ($classhashref) = @_;
211 my $compactedhash = {};
212 my @currentOIDs = ();
213 my $currentfirstletter = "";
214 my $currentlastletter = "";
215 my $lastkey = "";
216
217 # minimum and maximum documents to be displayed per page.
218 # the actual maximum will be max + (min-1).
219 # the smallest sub-section is a single letter at present
220 # so in this case there may be many times max documents
221 # displayed on a page.
222 my $min = 10;
223 my $max = 30;
224
225 foreach $subsection (sort keys %$classhashref) {
226 if ($subsection eq '0-9') {
227 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
228 next;
229 }
230 $currentfirstletter = $subsection if $currentfirstletter eq "";
231 if ((scalar (@currentOIDs) < $min) ||
232 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
233 push (@currentOIDs, @{$classhashref->{$subsection}});
234 $currentlastletter = $subsection;
235 } else {
236
237 if ($currentfirstletter eq $currentlastletter) {
238 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
239 $lastkey = $currentfirstletter;
240 } else {
241 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
242 $lastkey = "$currentfirstletter-$currentlastletter";
243 }
244 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
245 $compactedhash->{$subsection} = $classhashref->{$subsection};
246 @currentOIDs = ();
247 $currentfirstletter = "";
248 $lastkey = $subsection;
249 } else {
250 @currentOIDs = @{$classhashref->{$subsection}};
251 $currentfirstletter = $subsection;
252 $currentlastletter = $subsection;
253 }
254 }
255 }
256
257 # add final OIDs to last sub-classification if there aren't many otherwise
258 # add final sub-classification
259 if (scalar (@currentOIDs) < $min) {
260 my ($newkey) = $lastkey =~ /^(.)/;
261 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
262 delete $compactedhash->{$lastkey};
263 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
264 } else {
265 if ($currentfirstletter eq $currentlastletter) {
266 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
267 } else {
268 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
269 }
270 }
271
272 return $compactedhash;
273}
274
2751;
Note: See TracBrowser for help on using the repository browser.