Context Navigation

source: main/tags/2.30/gsdl/perllib/classify/AZList.pm@ 23841

Last change on this file since 23841 was 1483, checked in by sjboddie, 24 years ago
added -out option to classifiers
Property svn:keywords set to `Author Date Id Revision`
File size: 7.9 KB

Line
1	###########################################################################
2	#
3	# AZList.pm --
4	# A component of the Greenstone digital library software
5	# from the New Zealand Digital Library Project at the
6	# University of Waikato, New Zealand.
7	#
8	# Copyright (C) 1999 New Zealand Digital Library Project
9	#
10	# This program is free software; you can redistribute it and/or modify
11	# it under the terms of the GNU General Public License as published by
12	# the Free Software Foundation; either version 2 of the License, or
13	# (at your option) any later version.
14	#
15	# This program is distributed in the hope that it will be useful,
16	# but WITHOUT ANY WARRANTY; without even the implied warranty of
17	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18	# GNU General Public License for more details.
19	#
20	# You should have received a copy of the GNU General Public License
21	# along with this program; if not, write to the Free Software
22	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23	#
24	###########################################################################
25
26	# classifier plugin for sorting alphabetically
27	# options are:
28	# metadata=Metaname -- all documents with Metaname metadata
29	# will be included in list, list will be sorted
30	# by this element.
31	# title=Title -- (optional) the title field for this classification.
32	# if not included title field will be Metaname.
33
34	package AZList;
35
36	use BasClas;
37	use sorttools;
38
39	sub BEGIN {
40	@ISA = ('BasClas');
41	}
42
43	sub new {
44	my ($class, @options) = @_;
45	my $self = new BasClas ($class, @_);
46
47	my ($metaname, $title);
48
49	foreach $option (@options) {
50	if ($option =~ /^metadata=(.*)$/i) {
51	$metaname = $1;
52	} elsif ($option =~ /^title=(.*)$/i) {
53	$title = $1;
54	}
55	}
56
57	if (!defined $metaname) {
58	my $outhandle = $self->{'outhandle'};
59	print $outhandle "AZList used with no metadata name to classify by\n";
60	die "\n";
61	}
62
63	$title = $metaname unless defined $title;
64
65	$self->{'list'} = {};
66	$self->{'metaname'} = $metaname;
67	$self->{'title'} = $title;
68
69	return bless $self, $class;
70	}
71
72	sub init {
73	my $self = shift (@_);
74
75	$self->{'list'} = {};
76	}
77
78	sub classify {
79	my $self = shift (@_);
80	my ($doc_obj) = @_;
81
82	my $doc_OID = $doc_obj->get_OID();
83	my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(),
84	$self->{'metaname'});
85
86	# if this document doesn't contain the metadata element we're
87	# sorting by we won't include it in this classification
88	if (defined $metavalue && $metavalue ne "") {
89	if ($self->{'metaname'} eq 'Creator') {
90	&sorttools::format_string_name_english (\$metavalue);
91	} else {
92	&sorttools::format_string_english (\$metavalue);
93	}
94	if (defined $self->{'list'}->{$doc_OID}) {
95	my $outhandle = $self->{'outhandle'};
96	print $outhandle "WARNING: AZList::classify called multiple times for $doc_OID\n";
97	}
98	$self->{'list'}->{$doc_OID} = $metavalue;
99	}
100	}
101
102	sub alpha_numeric_cmp
103	{
104	my ($self,$a,$b) = @_;
105
106	my $title_a = $self->{'list'}->{$a};
107	my $title_b = $self->{'list'}->{$b};
108
109	if ($title_a =~ m/^(\d+(\.\d+)?)/)
110	{
111	my $val_a = $1;
112	if ($title_b =~ m/^(\d+(\.\d+)?)/)
113	{
114	my $val_b = $1;
115	if ($val_a != $val_b)
116	{
117	return ($val_a <=> $val_b);
118	}
119	}
120	}
121
122	return ($title_a cmp $title_b);
123	}
124
125	sub get_classify_info {
126	my $self = shift (@_);
127
128	my @classlist
129	= sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'list'}};
130
131	return $self->splitlist (\@classlist);
132	}
133
134	sub get_entry {
135	my $self = shift (@_);
136	my ($title, $childtype, $thistype) = @_;
137
138	# organise into classification structure
139	my %classifyinfo = ('childtype'=>$childtype,
140	'Title'=>$title,
141	'contains'=>[]);
142	$classifyinfo{'thistype'} = $thistype
143	if defined $thistype && $thistype =~ /\w/;
144
145	return \%classifyinfo;
146	}
147
148	# splitlist takes an ordered list of classifications (@$classlistref) and splits it
149	# up into alphabetical sub-sections.
150	sub splitlist {
151	my $self = shift (@_);
152	my ($classlistref) = @_;
153	my $classhash = {};
154
155	# top level
156	my $childtype = "HList";
157	if (scalar (@$classlistref) <= 39) {$childtype = "VList";}
158	my $classifyinfo = $self->get_entry ($self->{'title'}, $childtype, "Invisible");
159
160	# don't need to do any splitting if there are less than 39 (max + min -1) classifications
161	if ((scalar @$classlistref) <= 39) {
162	foreach $subOID (@$classlistref) {
163	push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
164	}
165	return $classifyinfo;
166	}
167
168	# first split up the list into separate A-Z and 0-9 classifications
169	foreach $classification (@$classlistref) {
170	my $title = $self->{'list'}->{$classification};
171
172	$title =~ s/^(&.{1,6};\|<[^>]>\|[^a-zA-Z0-9])//g; # remove any unwanted stuff
173	$title =~ s/^(.).*$/$1/;
174	$title =~ tr/[a-z]/[A-Z]/;
175	if ($title =~ /^[0-9]$/) {$title = '0-9';}
176	elsif ($title !~ /^[A-Z]$/) {
177	my $outhandle = $self->{'outhandle'};
178	print $outhandle "AZList: WARNING $classification has badly formatted title ($title)\n";
179	}
180	$classhash->{$title} = [] unless defined $classhash->{$title};
181	push (@{$classhash->{$title}}, $classification);
182	}
183	$classhash = $self->compactlist ($classhash);
184
185	my @tmparr = ();
186	foreach $subsection (sort keys (%$classhash)) {
187	push (@tmparr, $subsection);
188	}
189
190	# if there's a 0-9 section it will have been sorted to the beginning
191	# but we want it at the end
192	if ($tmparr[0] eq '0-9') {
193	shift @tmparr;
194	push (@tmparr, '0-9');
195	}
196
197	foreach $subclass (@tmparr) {
198	my $tempclassify = $self->get_entry($subclass, "VList");
199	foreach $subsubOID (@{$classhash->{$subclass}}) {
200	push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
201	}
202	push (@{$classifyinfo->{'contains'}}, $tempclassify);
203	}
204
205	return $classifyinfo;
206	}
207
208	sub compactlist {
209	my $self = shift (@_);
210	my ($classhashref) = @_;
211	my $compactedhash = {};
212	my @currentOIDs = ();
213	my $currentfirstletter = "";
214	my $currentlastletter = "";
215	my $lastkey = "";
216
217	# minimum and maximum documents to be displayed per page.
218	# the actual maximum will be max + (min-1).
219	# the smallest sub-section is a single letter at present
220	# so in this case there may be many times max documents
221	# displayed on a page.
222	my $min = 10;
223	my $max = 30;
224
225	foreach $subsection (sort keys %$classhashref) {
226	if ($subsection eq '0-9') {
227	@{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
228	next;
229	}
230	$currentfirstletter = $subsection if $currentfirstletter eq "";
231	if ((scalar (@currentOIDs) < $min) \|\|
232	((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
233	push (@currentOIDs, @{$classhashref->{$subsection}});
234	$currentlastletter = $subsection;
235	} else {
236
237	if ($currentfirstletter eq $currentlastletter) {
238	@{$compactedhash->{$currentfirstletter}} = @currentOIDs;
239	$lastkey = $currentfirstletter;
240	} else {
241	@{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
242	$lastkey = "$currentfirstletter-$currentlastletter";
243	}
244	if (scalar (@{$classhashref->{$subsection}}) >= $max) {
245	$compactedhash->{$subsection} = $classhashref->{$subsection};
246	@currentOIDs = ();
247	$currentfirstletter = "";
248	$lastkey = $subsection;
249	} else {
250	@currentOIDs = @{$classhashref->{$subsection}};
251	$currentfirstletter = $subsection;
252	$currentlastletter = $subsection;
253	}
254	}
255	}
256
257	# add final OIDs to last sub-classification if there aren't many otherwise
258	# add final sub-classification
259	if (scalar (@currentOIDs) < $min) {
260	my ($newkey) = $lastkey =~ /^(.)/;
261	@currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
262	delete $compactedhash->{$lastkey};
263	@{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
264	} else {
265	if ($currentfirstletter eq $currentlastletter) {
266	@{$compactedhash->{$currentfirstletter}} = @currentOIDs;
267	} else {
268	@{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
269	}
270	}
271
272	return $compactedhash;
273	}
274
275	1;

Note: See TracBrowser for help on using the repository browser.

Download in other formats: