Context Navigation

source: trunk/gsdl/perllib/classify/AZList.pm@ 3510

Last change on this file since 3510 was 3510, checked in by jrm21, 22 years ago
need to check that remove_prefix is defined before checking its length
Property svn:keywords set to `Author Date Id Revision`
File size: 8.8 KB

Line
1	###########################################################################
2	#
3	# AZList.pm --
4	# A component of the Greenstone digital library software
5	# from the New Zealand Digital Library Project at the
6	# University of Waikato, New Zealand.
7	#
8	# Copyright (C) 1999 New Zealand Digital Library Project
9	#
10	# This program is free software; you can redistribute it and/or modify
11	# it under the terms of the GNU General Public License as published by
12	# the Free Software Foundation; either version 2 of the License, or
13	# (at your option) any later version.
14	#
15	# This program is distributed in the hope that it will be useful,
16	# but WITHOUT ANY WARRANTY; without even the implied warranty of
17	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18	# GNU General Public License for more details.
19	#
20	# You should have received a copy of the GNU General Public License
21	# along with this program; if not, write to the Free Software
22	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23	#
24	###########################################################################
25
26	# classifier plugin for sorting alphabetically
27
28	package AZList;
29
30	use BasClas;
31	use sorttools;
32	use iso639;
33
34	sub BEGIN {
35	@ISA = ('BasClas');
36	}
37
38	sub print_usage {
39	print STDERR "
40	usage: classify AZList [options]
41	options:
42
43	-metadata X (required) Metadata field used for classification.
44	List will be sorted by this element.
45
46	-buttonname X (optional) Button name for this classification.
47	defaults to metadata name.
48
49	-removeprefix regex (optional) A prefix to ignore in the Metadata values
50	for the field when sorting.
51	";
52	}
53
54	sub new {
55	my $class = shift (@_);
56	my $self = new BasClas($class, @_);
57
58	my ($metaname, $title, $removeprefix);
59
60	if (!parsargv::parse(\@_,
61	q^metadata/.*/^, \$metaname,
62	q^buttonname/.*/^, \$title,
63	q^removeprefix/.*/^, \$removeprefix,
64	"allow_extra_options")) {
65
66	print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
67	&print_usage();
68	die "\n";
69	}
70
71	if (!defined $metaname) {
72	&print_usage;
73	print STDERR "AZList used with no metadata name\n";
74	die "\n";
75	}
76
77	$title = $metaname unless ($title);
78
79	$self->{'list'} = {};
80	$self->{'metaname'} = $metaname;
81	$self->{'title'} = $title;
82	if (defined($removeprefix) && $removeprefix) {
83	$removeprefix =~ s/^\^//; # don't need a leading ^
84	$self->{'removeprefix'} = $removeprefix;
85	}
86
87	return bless $self, $class;
88	}
89
90	sub init {
91	my $self = shift (@_);
92
93	$self->{'list'} = {};
94	}
95
96	sub classify {
97	my $self = shift (@_);
98	my ($doc_obj) = @_;
99
100	my $doc_OID = $doc_obj->get_OID();
101	my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(),
102	$self->{'metaname'});
103
104	# if this document doesn't contain the metadata element we're
105	# sorting by we won't include it in this classification
106	if (defined $metavalue && $metavalue ne "") {
107	if (defined($self->{'removeprefix'}) &&
108	length($self->{'removeprefix'})) {
109	$metavalue =~ s/^$self->{'removeprefix'}//;
110	}
111
112	if ($self->{'metaname'} eq 'Language') {
113	$metavalue = $iso639::fromiso639{$metavalue};
114	} elsif ($self->{'metaname'} eq 'Creator') {
115	&sorttools::format_string_name_english (\$metavalue);
116	} else {
117	&sorttools::format_string_english (\$metavalue);
118	}
119	if (defined $self->{'list'}->{$doc_OID}) {
120	my $outhandle = $self->{'outhandle'};
121	print $outhandle "WARNING: AZList::classify called multiple times for $doc_OID\n";
122	}
123	if ($metavalue) {
124	$self->{'list'}->{$doc_OID} = $metavalue;
125	} else {
126	my $outhandle = $self->{'outhandle'};
127	print $outhandle "WARNING: AZList: $doc_OID metadata is empty - not classifying\n";
128	}
129	}
130	}
131
132	sub alpha_numeric_cmp
133	{
134	my ($self,$a,$b) = @_;
135
136	my $title_a = $self->{'list'}->{$a};
137	my $title_b = $self->{'list'}->{$b};
138
139	if ($title_a =~ m/^(\d+(\.\d+)?)/)
140	{
141	my $val_a = $1;
142	if ($title_b =~ m/^(\d+(\.\d+)?)/)
143	{
144	my $val_b = $1;
145	if ($val_a != $val_b)
146	{
147	return ($val_a <=> $val_b);
148	}
149	}
150	}
151
152	return ($title_a cmp $title_b);
153	}
154
155	sub get_classify_info {
156	my $self = shift (@_);
157
158	my @classlist
159	= sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'list'}};
160
161	return $self->splitlist (\@classlist);
162	}
163
164	sub get_entry {
165	my $self = shift (@_);
166	my ($title, $childtype, $thistype) = @_;
167
168	# organise into classification structure
169	my %classifyinfo = ('childtype'=>$childtype,
170	'Title'=>$title,
171	'contains'=>[]);
172	$classifyinfo{'thistype'} = $thistype
173	if defined $thistype && $thistype =~ /\w/;
174
175	return \%classifyinfo;
176	}
177
178	# splitlist takes an ordered list of classifications (@$classlistref) and splits it
179	# up into alphabetical sub-sections.
180	sub splitlist {
181	my $self = shift (@_);
182	my ($classlistref) = @_;
183	my $classhash = {};
184
185	# top level
186	my $childtype = "HList";
187	if (scalar (@$classlistref) <= 39) {$childtype = "VList";}
188	my $classifyinfo = $self->get_entry ($self->{'title'}, $childtype, "Invisible");
189
190	# don't need to do any splitting if there are less than 39 (max + min -1) classifications
191	if ((scalar @$classlistref) <= 39) {
192	foreach $subOID (@$classlistref) {
193	push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
194	}
195	return $classifyinfo;
196	}
197
198	# first split up the list into separate A-Z and 0-9 classifications
199	foreach $classification (@$classlistref) {
200	my $title = $self->{'list'}->{$classification};
201
202	$title =~ s/^(&.{1,6};\|<[^>]>\|[^a-zA-Z0-9])//g; # remove any unwanted stuff
203	# only need first char for classification
204	$title =~ m/^(.)/; $title=$1;
205	$title =~ tr/[a-z]/[A-Z]/;
206	if ($title =~ /^[0-9]$/) {$title = '0-9';}
207	elsif ($title !~ /^[A-Z]$/) {
208	my $outhandle = $self->{'outhandle'};
209	print $outhandle "AZList: WARNING $classification has badly formatted title ($title)\n";
210	}
211	$classhash->{$title} = [] unless defined $classhash->{$title};
212	push (@{$classhash->{$title}}, $classification);
213	}
214	$classhash = $self->compactlist ($classhash);
215
216	my @tmparr = ();
217	foreach $subsection (sort keys (%$classhash)) {
218	push (@tmparr, $subsection);
219	}
220
221	# if there's a 0-9 section it will have been sorted to the beginning
222	# but we want it at the end
223	if ($tmparr[0] eq '0-9') {
224	shift @tmparr;
225	push (@tmparr, '0-9');
226	}
227
228	foreach $subclass (@tmparr) {
229	my $tempclassify = $self->get_entry($subclass, "VList");
230	foreach $subsubOID (@{$classhash->{$subclass}}) {
231	push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
232	}
233	push (@{$classifyinfo->{'contains'}}, $tempclassify);
234	}
235
236	return $classifyinfo;
237	}
238
239	sub compactlist {
240	my $self = shift (@_);
241	my ($classhashref) = @_;
242	my $compactedhash = {};
243	my @currentOIDs = ();
244	my $currentfirstletter = "";
245	my $currentlastletter = "";
246	my $lastkey = "";
247
248	# minimum and maximum documents to be displayed per page.
249	# the actual maximum will be max + (min-1).
250	# the smallest sub-section is a single letter at present
251	# so in this case there may be many times max documents
252	# displayed on a page.
253	my $min = 10;
254	my $max = 30;
255
256	foreach $subsection (sort keys %$classhashref) {
257	if ($subsection eq '0-9') {
258	@{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
259	next;
260	}
261	$currentfirstletter = $subsection if $currentfirstletter eq "";
262	if ((scalar (@currentOIDs) < $min) \|\|
263	((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
264	push (@currentOIDs, @{$classhashref->{$subsection}});
265	$currentlastletter = $subsection;
266	} else {
267
268	if ($currentfirstletter eq $currentlastletter) {
269	@{$compactedhash->{$currentfirstletter}} = @currentOIDs;
270	$lastkey = $currentfirstletter;
271	} else {
272	@{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
273	$lastkey = "$currentfirstletter-$currentlastletter";
274	}
275	if (scalar (@{$classhashref->{$subsection}}) >= $max) {
276	$compactedhash->{$subsection} = $classhashref->{$subsection};
277	@currentOIDs = ();
278	$currentfirstletter = "";
279	$lastkey = $subsection;
280	} else {
281	@currentOIDs = @{$classhashref->{$subsection}};
282	$currentfirstletter = $subsection;
283	$currentlastletter = $subsection;
284	}
285	}
286	}
287
288	# add final OIDs to last sub-classification if there aren't many otherwise
289	# add final sub-classification
290	if (scalar (@currentOIDs) < $min) {
291	my ($newkey) = $lastkey =~ /^(.)/;
292	@currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
293	delete $compactedhash->{$lastkey};
294	@{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
295	} else {
296	if ($currentfirstletter eq $currentlastletter) {
297	@{$compactedhash->{$currentfirstletter}} = @currentOIDs;
298	} else {
299	@{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
300	}
301	}
302
303	return $compactedhash;
304	}
305
306	1;

Note: See TracBrowser for help on using the repository browser.

Download in other formats: