source: trunk/gsdl/perllib/classify/AZCompactList.pm@ 1250

Last change on this file since 1250 was 1250, checked in by sjboddie, 24 years ago

Tidied up the classfiers slightly, made them a little more object oriented
and removed large chunks of identical code that existed in several
different places. There's still lots to be done to them but I'd like to
wait for gsdl-3.0 (as one of the changes will effect the way options are
passed so all collect.cfg files will need to be updated).

  • Property svn:keywords set to Author Date Id Revision
File size: 9.4 KB
Line 
1###########################################################################
2#
3# AZCompactList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting alphabetically
27# options are:
28# metadata=Metaname -- all documents with Metaname metadata
29# will be included in list, list will be sorted
30# by this element.
31# title=Title -- (optional) the title field for this classification.
32# if not included title field will be Metaname.
33# mingroup=Num -- (optional) the smallest value that will cause
34# a group in the hierarchy to form.
35
36package AZCompactList;
37
38use AZList;
39use sorttools;
40
41sub BEGIN {
42 @ISA = ('AZList');
43}
44
45sub new {
46 my ($class, @options) = @_;
47
48 my ($metaname, $title);
49 my $mingroup = 2;
50
51 foreach $option (@options) {
52 if ($option =~ /^metadata=(.*)$/i) {
53 $metaname = $1;
54 } elsif ($option =~ /^title=(.*)$/i) {
55 $title = $1;
56 } elsif ($option =~ /^mingroup(size)?=(.*)$/i) {
57 $mingroup = $2;
58 }
59 }
60
61 die "AZCompactList used with no metadata name to classify by\n"
62 unless defined $metaname;
63 $title = $metaname unless defined $title;
64
65 return bless {
66 'list'=>{},
67 'listmetavalue'=>{},
68 'reclassify'=>{},
69 'reclassifylist'=>{},
70 'metaname' => $metaname,
71 'title' => $title,
72 'mingroup' => $mingroup
73 }, $class;
74}
75
76sub init
77{
78 my $self = shift (@_);
79
80 $self->{'list'} = {};
81 $self->{'listmetavalue'} = {};
82 $self->{'reclassify'} = {};
83 $self->{'reclassifylist'} = {};
84}
85
86$tmp = 0;
87
88sub classify
89{
90 my $self = shift (@_);
91 my ($doc_obj) = @_;
92
93 my $doc_OID = $doc_obj->get_OID();
94
95 my $thissection = $doc_obj->get_top_section();
96 my $metaname = $self->{'metaname'};
97
98 my $metavalue = $doc_obj->get_metadata_element($thissection,$metaname);
99 my $date = $doc_obj->get_metadata_element($thissection,"Date");
100
101 # if this document doesn't contain the metadata element we're
102 # sorting by we won't include it in this classification
103 if (defined $metavalue && $metavalue =~ /\w/)
104 {
105 my $formatted_metavalue = $metavalue;
106
107 if ($self->{'metaname'} eq 'Creator')
108 {
109 &sorttools::format_string_name_english (\$formatted_metavalue);
110 }
111 else
112 {
113 &sorttools::format_string_english (\$formatted_metavalue);
114 }
115 if (defined $self->{'list'}->{$doc_OID})
116 {
117 print STDERR "WARNING: AZCompactList::classify called multiple times for $doc_OID\n";
118 }
119
120 $self->{'list'}->{$doc_OID} = $formatted_metavalue;
121 $self->{'listmetavalue'}->{$doc_OID} = $metavalue;
122 $self->{'reclassify'}->{$doc_OID} = [$doc_obj,$date]
123 }
124}
125
126sub reinit
127{
128 my ($self,$classlist_ref) = @_;
129
130 my %mtfreq = ();
131 my @single_classlist = ();
132 my @multiple_classlist = ();
133
134 # find out how often each metavalue occurs
135 map { $mtfreq{$self->{'listmetavalue'}->{$_}}++; } @$classlist_ref;
136
137 # use this information to split the list: single metavalue/repeated value
138 map
139 {
140 my $metavalue = $self->{'listmetavalue'}->{$_};
141 print "meta value = $metavalue; count = $mtfreq{$metavalue}\n";
142
143 if ($mtfreq{$metavalue}>=$self->{'mingroup'})
144 {
145 push(@multiple_classlist,$_);
146 }
147 else
148 {
149 push(@single_classlist,$_);
150 $self->{'reclassifylist'}->{$_} = $metavalue;
151 }
152 } @$classlist_ref;
153
154
155 # Setup sub-classifiers for multiple list
156
157 $self->{'classifiers'} = {};
158
159 my $listname
160 = &util::filename_cat($ENV{'GSDLHOME'},"perllib/classify/List.pm");
161 if (-e $listname) { require $listname; }
162 else
163 {
164 die "TCCList ERROR - couldn't find classifier \"$listname\"\n";
165 }
166
167 my $metavalue;
168 foreach $metavalue (keys %mtfreq)
169 {
170 if ($mtfreq{$metavalue}>=$self->{'mingroup'})
171 {
172 my $listclassobj;
173 my $metaname = $self->{'metaname'};
174
175 eval ("\$listclassobj = new List(\"metadata=$metaname\", \"title=\$metavalue\", \"sort=Date\")");
176 die "$@" if $@;
177
178 $listclassobj->init();
179
180 if (defined $metavalue && $metavalue =~ /\w/)
181 {
182 my $formatted_node = $metavalue;
183 if ($self->{'metaname'} eq 'Creator')
184 {
185 &sorttools::format_string_name_english(\$formatted_node);
186 }
187 else
188 {
189 &sorttools::format_string_english(\$formatted_node);
190 }
191
192 $self->{'classifiers'}->{$metavalue}
193 = { 'classifyobj' => $listclassobj,
194 'formattednode' => $formatted_node };
195 }
196 }
197 }
198
199
200 return (\@single_classlist,\@multiple_classlist);
201}
202
203
204sub reclassify
205{
206 my ($self,$multiple_cl_ref) = @_;
207
208 my $metaname = $self->{'metaname'};
209
210 my $doc_OID;
211 foreach $doc_OID (@$multiple_cl_ref)
212 {
213 my $listclassobj;
214 my $metavalue = $self->{'listmetavalue'}->{$doc_OID};
215
216 # find metavalue in list of sub-classifiers
217 my $found = 0;
218 my $node_name;
219 foreach $node_name (keys %{$self->{'classifiers'}})
220 {
221 if ($metavalue =~ /^$node_name$/i)
222 {
223 my ($doc_obj,$date) = @{$self->{'reclassify'}->{$doc_OID}};
224
225 $self->{'classifiers'}->{$node_name}->{'classifyobj'}
226 ->classify($doc_obj, $date);
227
228 $found = 1;
229 last;
230 }
231 }
232
233 if (!$found)
234 {
235 print STDERR "Warning: AZCompactList::reclassify ";
236 print STDERR "could not find sub-node for $metavalue\n";
237 }
238 }
239}
240
241
242
243sub get_reclassify_info
244{
245 my $self = shift (@_);
246
247 my $node_name;
248 foreach $node_name (keys %{$self->{'classifiers'}})
249 {
250 my $classifyinfo
251 = $self->{'classifiers'}->{$node_name}->{'classifyobj'}
252 ->get_classify_info(1);
253 $self->{'classifiers'}->{$node_name}->{'classifyinfo'}
254 = $classifyinfo;
255 $self->{'reclassifylist'}->{"CLASSIFY.$node_name"}
256 = $self->{'classifiers'}->{$node_name}->{'formattednode'};
257 }
258}
259
260
261sub get_classify_info {
262 my $self = shift (@_);
263
264 my @classlist = sort {$self->{'list'}->{$a} cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
265
266 my ($single_cl_ref,$multiple_cl_ref) = $self->reinit(\@classlist);
267 $self->reclassify($multiple_cl_ref);
268 $self->get_reclassify_info();
269
270 my @reclassified_classlist
271 = sort {$self->{'reclassifylist'}->{$a} cmp $self->{'reclassifylist'}->{$b};} keys %{$self->{'reclassifylist'}};
272
273 return $self->splitlist (\@reclassified_classlist);
274}
275
276# splitlist takes an ordered list of classifications (@$classlistref) and splits it
277# up into alphabetical sub-sections.
278sub splitlist {
279 my $self = shift (@_);
280 my ($classlistref) = @_;
281 my $classhash = {};
282
283 # top level
284 my $childtype = "HList";
285 if (scalar (@$classlistref) <= 39) {$childtype = "VList";}
286 my $classifyinfo = $self->get_entry ($self->{'title'}, $childtype, "Invisible");
287
288 # don't need to do any splitting if there are less than 39 (max + min-1) classifications
289 if ((scalar @$classlistref) <= 39) {
290 foreach $subOID (@$classlistref) {
291 if ($subOID =~ /^CLASSIFY\.(.*)$/
292 && defined $self->{'classifiers'}->{$1})
293 {
294 push (@{$classifyinfo->{'contains'}},
295 $self->{'classifiers'}->{$1}->{'classifyinfo'});
296 }
297 else
298 {
299 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
300 }
301 }
302 return $classifyinfo;
303 }
304
305 # first split up the list into separate A-Z and 0-9 classifications
306 foreach $classification (@$classlistref) {
307 my $title = $self->{'reclassifylist'}->{$classification};
308 $title =~ s/&(.){2,4};//g; # remove any HTML special chars
309 $title =~ s/^(\'|\`|\"|\:|\()//g; # remove any opening punctutation
310 $title =~ s/^(.).*$/$1/;
311 $title =~ tr/[a-z]/[A-Z]/;
312
313 if ($title =~ /^[0-9]$/) {$title = '0-9';}
314 elsif ($title !~ /^[A-Z]$/) {
315 print STDERR "AZCompactList: WARNING $classification has badly formatted title ($title)\n";
316 }
317 $classhash->{$title} = [] unless defined $classhash->{$title};
318 push (@{$classhash->{$title}}, $classification);
319 }
320 $classhash = $self->compactlist ($classhash);
321
322 my @tmparr = ();
323 foreach $subsection (sort keys (%$classhash)) {
324 push (@tmparr, $subsection);
325 }
326
327 # if there's a 0-9 section it will have been sorted to the beginning
328 # but we want it at the end
329 if ($tmparr[0] eq '0-9') {
330 shift @tmparr;
331 push (@tmparr, '0-9');
332 }
333
334 foreach $subclass (@tmparr)
335 {
336 my $tempclassify = $self->get_entry ($subclass, "VList");
337 foreach $subsubOID (@{$classhash->{$subclass}})
338 {
339 if ($subsubOID =~ /^CLASSIFY\.(.*)$/
340 && defined $self->{'classifiers'}->{$1})
341 {
342 push (@{$tempclassify->{'contains'}},
343 $self->{'classifiers'}->{$1}->{'classifyinfo'});
344 }
345 else
346 {
347 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
348 }
349 }
350 push (@{$classifyinfo->{'contains'}}, $tempclassify);
351 }
352
353 return $classifyinfo;
354}
355
3561;
Note: See TracBrowser for help on using the repository browser.