Context Navigation

source: trunk/gsdl/perllib/classify/AZList.pm@ 3109

Last change on this file since 3109 was 3109, checked in by jrm21, 22 years ago
When getting first char for classification, s/^{(.).*$/$1/g isn't good enough as problems occur if the title is eg "Line 1\nLine 2" - we get a "first letter" of "LLine 2" !!! Do m/}(.)/ ; $title=$1 instead, so we are absolutely sure that the classification is only one character long.
Property svn:keywords set to `Author Date Id Revision`
File size: 8.5 KB

Line
1	###########################################################################
2	#
3	# AZList.pm --
4	# A component of the Greenstone digital library software
5	# from the New Zealand Digital Library Project at the
6	# University of Waikato, New Zealand.
7	#
8	# Copyright (C) 1999 New Zealand Digital Library Project
9	#
10	# This program is free software; you can redistribute it and/or modify
11	# it under the terms of the GNU General Public License as published by
12	# the Free Software Foundation; either version 2 of the License, or
13	# (at your option) any later version.
14	#
15	# This program is distributed in the hope that it will be useful,
16	# but WITHOUT ANY WARRANTY; without even the implied warranty of
17	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18	# GNU General Public License for more details.
19	#
20	# You should have received a copy of the GNU General Public License
21	# along with this program; if not, write to the Free Software
22	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23	#
24	###########################################################################
25
26	# classifier plugin for sorting alphabetically
27
28	package AZList;
29
30	use BasClas;
31	use sorttools;
32	use iso639;
33
34	sub BEGIN {
35	@ISA = ('BasClas');
36	}
37
38	sub print_usage {
39	print STDERR "
40	usage: classify AZList [options]
41	options:
42
43	-metadata X (required) Metadata field used for classification.
44	List will be sorted by this element.
45
46	-buttonname X (optional) Button name for this classification.
47	defaults to metadata name.
48
49	-removeprefix regex (optional) A prefix to ignore in the Metadata values
50	for the field when sorting.
51	";
52	}
53
54	sub new {
55	my $class = shift (@_);
56	my $self = new BasClas($class, @_);
57
58	my ($metaname, $title, $removeprefix);
59
60	if (!parsargv::parse(\@_,
61	q^metadata/.*/^, \$metaname,
62	q^buttonname/.*/^, \$title,
63	q^removeprefix/.*/^, \$removeprefix,
64	"allow_extra_options")) {
65
66	print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
67	&print_usage();
68	die "\n";
69	}
70
71	if (!defined $metaname) {
72	&print_usage;
73	print STDERR "AZList used with no metadata name\n";
74	die "\n";
75	}
76
77	$title = $metaname unless ($title);
78
79	$self->{'list'} = {};
80	$self->{'metaname'} = $metaname;
81	$self->{'title'} = $title;
82	if (defined($removeprefix) && $removeprefix) {
83	$self->{'removeprefix'} = $removeprefix;
84	}
85
86	return bless $self, $class;
87	}
88
89	sub init {
90	my $self = shift (@_);
91
92	$self->{'list'} = {};
93	}
94
95	sub classify {
96	my $self = shift (@_);
97	my ($doc_obj) = @_;
98
99	my $doc_OID = $doc_obj->get_OID();
100	my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(),
101	$self->{'metaname'});
102
103	# if this document doesn't contain the metadata element we're
104	# sorting by we won't include it in this classification
105	if (defined $metavalue && $metavalue ne "") {
106	if ($self->{'removeprefix'}) {
107	$metavalue =~ s/^$self->{'removeprefix'}//;
108	}
109
110	if ($self->{'metaname'} eq 'Language') {
111	$metavalue = $iso639::fromiso639{$metavalue};
112	} elsif ($self->{'metaname'} eq 'Creator') {
113	&sorttools::format_string_name_english (\$metavalue);
114	} else {
115	&sorttools::format_string_english (\$metavalue);
116	}
117	if (defined $self->{'list'}->{$doc_OID}) {
118	my $outhandle = $self->{'outhandle'};
119	print $outhandle "WARNING: AZList::classify called multiple times for $doc_OID\n";
120	}
121	$self->{'list'}->{$doc_OID} = $metavalue;
122	}
123	}
124
125	sub alpha_numeric_cmp
126	{
127	my ($self,$a,$b) = @_;
128
129	my $title_a = $self->{'list'}->{$a};
130	my $title_b = $self->{'list'}->{$b};
131
132	if ($title_a =~ m/^(\d+(\.\d+)?)/)
133	{
134	my $val_a = $1;
135	if ($title_b =~ m/^(\d+(\.\d+)?)/)
136	{
137	my $val_b = $1;
138	if ($val_a != $val_b)
139	{
140	return ($val_a <=> $val_b);
141	}
142	}
143	}
144
145	return ($title_a cmp $title_b);
146	}
147
148	sub get_classify_info {
149	my $self = shift (@_);
150
151	my @classlist
152	= sort { $self->alpha_numeric_cmp($a,$b) } keys %{$self->{'list'}};
153
154	return $self->splitlist (\@classlist);
155	}
156
157	sub get_entry {
158	my $self = shift (@_);
159	my ($title, $childtype, $thistype) = @_;
160
161	# organise into classification structure
162	my %classifyinfo = ('childtype'=>$childtype,
163	'Title'=>$title,
164	'contains'=>[]);
165	$classifyinfo{'thistype'} = $thistype
166	if defined $thistype && $thistype =~ /\w/;
167
168	return \%classifyinfo;
169	}
170
171	# splitlist takes an ordered list of classifications (@$classlistref) and splits it
172	# up into alphabetical sub-sections.
173	sub splitlist {
174	my $self = shift (@_);
175	my ($classlistref) = @_;
176	my $classhash = {};
177
178	# top level
179	my $childtype = "HList";
180	if (scalar (@$classlistref) <= 39) {$childtype = "VList";}
181	my $classifyinfo = $self->get_entry ($self->{'title'}, $childtype, "Invisible");
182
183	# don't need to do any splitting if there are less than 39 (max + min -1) classifications
184	if ((scalar @$classlistref) <= 39) {
185	foreach $subOID (@$classlistref) {
186	push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
187	}
188	return $classifyinfo;
189	}
190
191	# first split up the list into separate A-Z and 0-9 classifications
192	foreach $classification (@$classlistref) {
193	my $title = $self->{'list'}->{$classification};
194
195	$title =~ s/^(&.{1,6};\|<[^>]>\|[^a-zA-Z0-9])//g; # remove any unwanted stuff
196	# only need first char for classification
197	$title =~ m/^(.)/; $title=$1;
198	$title =~ tr/[a-z]/[A-Z]/;
199	if ($title =~ /^[0-9]$/) {$title = '0-9';}
200	elsif ($title !~ /^[A-Z]$/) {
201	my $outhandle = $self->{'outhandle'};
202	print $outhandle "AZList: WARNING $classification has badly formatted title ($title)\n";
203	}
204	$classhash->{$title} = [] unless defined $classhash->{$title};
205	push (@{$classhash->{$title}}, $classification);
206	}
207	$classhash = $self->compactlist ($classhash);
208
209	my @tmparr = ();
210	foreach $subsection (sort keys (%$classhash)) {
211	push (@tmparr, $subsection);
212	}
213
214	# if there's a 0-9 section it will have been sorted to the beginning
215	# but we want it at the end
216	if ($tmparr[0] eq '0-9') {
217	shift @tmparr;
218	push (@tmparr, '0-9');
219	}
220
221	foreach $subclass (@tmparr) {
222	my $tempclassify = $self->get_entry($subclass, "VList");
223	foreach $subsubOID (@{$classhash->{$subclass}}) {
224	push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
225	}
226	push (@{$classifyinfo->{'contains'}}, $tempclassify);
227	}
228
229	return $classifyinfo;
230	}
231
232	sub compactlist {
233	my $self = shift (@_);
234	my ($classhashref) = @_;
235	my $compactedhash = {};
236	my @currentOIDs = ();
237	my $currentfirstletter = "";
238	my $currentlastletter = "";
239	my $lastkey = "";
240
241	# minimum and maximum documents to be displayed per page.
242	# the actual maximum will be max + (min-1).
243	# the smallest sub-section is a single letter at present
244	# so in this case there may be many times max documents
245	# displayed on a page.
246	my $min = 10;
247	my $max = 30;
248
249	foreach $subsection (sort keys %$classhashref) {
250	if ($subsection eq '0-9') {
251	@{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
252	next;
253	}
254	$currentfirstletter = $subsection if $currentfirstletter eq "";
255	if ((scalar (@currentOIDs) < $min) \|\|
256	((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
257	push (@currentOIDs, @{$classhashref->{$subsection}});
258	$currentlastletter = $subsection;
259	} else {
260
261	if ($currentfirstletter eq $currentlastletter) {
262	@{$compactedhash->{$currentfirstletter}} = @currentOIDs;
263	$lastkey = $currentfirstletter;
264	} else {
265	@{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
266	$lastkey = "$currentfirstletter-$currentlastletter";
267	}
268	if (scalar (@{$classhashref->{$subsection}}) >= $max) {
269	$compactedhash->{$subsection} = $classhashref->{$subsection};
270	@currentOIDs = ();
271	$currentfirstletter = "";
272	$lastkey = $subsection;
273	} else {
274	@currentOIDs = @{$classhashref->{$subsection}};
275	$currentfirstletter = $subsection;
276	$currentlastletter = $subsection;
277	}
278	}
279	}
280
281	# add final OIDs to last sub-classification if there aren't many otherwise
282	# add final sub-classification
283	if (scalar (@currentOIDs) < $min) {
284	my ($newkey) = $lastkey =~ /^(.)/;
285	@currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
286	delete $compactedhash->{$lastkey};
287	@{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
288	} else {
289	if ($currentfirstletter eq $currentlastletter) {
290	@{$compactedhash->{$currentfirstletter}} = @currentOIDs;
291	} else {
292	@{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
293	}
294	}
295
296	return $compactedhash;
297	}
298
299	1;

Note: See TracBrowser for help on using the repository browser.

Download in other formats: