Context Navigation

source: trunk/gsdl/perllib/classify/AZList.pm@ 316

Last change on this file since 316 was 316, checked in by sjboddie, 25 years ago
changed the way classifiers work
Property svn:keywords set to `Author Date Id Revision`
File size: 6.7 KB

Line
1	# classifier plugin for sorting alphabetically
2
3	package AZList;
4
5	sub new {
6	my ($class, @options) = @_;
7
8	if (!defined @options \|\| !scalar @options) {
9	die "AZList used with no metadata name to classify by\n";
10	}
11
12	return bless {
13	'list'=>{},
14	'metaname' => $options[0]
15	}, $class;
16	}
17
18	sub init {
19	my $self = shift (@_);
20
21	$self->{'list'} = {};
22	}
23
24	sub classify {
25	my $self = shift (@_);
26	my ($doc_obj) = @_;
27
28	my $doc_OID = $doc_obj->get_OID();
29	my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(),
30	$self->{'metaname'});
31
32	# if this document doesn't contain the metadata element we're
33	# sorting by we won't include it in this classification
34	if (defined $metavalue) {
35	if ($self->{'metaname'} eq 'Creator') {
36	$self->format_string_name_english (\$metavalue);
37	} else {
38	$self->format_string_english (\$metavalue);
39	}
40	if (defined $self->{'list'}->{$doc_OID}) {
41	print STDERR "WARNING: AZList::classify called multiple times for $doc_OID\n";
42	}
43	$self->{'list'}->{$doc_OID} = $metavalue;
44	}
45	}
46
47	sub get_classify_info {
48	my $self = shift (@_);
49
50	my @classlist = sort {$self->{'list'}->{$a} cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
51	my $subclasses = $self->splitlist (\@classlist);
52
53	my ($classifyinfo);
54
55	if (scalar @$subclasses) {
56	$classifyinfo = $self->get_entry ($self->{'metaname'}, "AZList");
57	foreach $subclass (@$subclasses) {
58	my $tempclassify = $self->get_entry($subclass->[0], "AZList");
59	foreach $subsubOID (@{$subclass->[2]}) {
60	push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
61	}
62	push (@{$classifyinfo->{'contains'}}, $tempclassify);
63	}
64
65	} else {
66	$classifyinfo = $self->get_entry($self->{'metaname'}, "AZList");
67	foreach $subOID (@classlist) {
68	push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
69	}
70	}
71
72	return $classifyinfo;
73	}
74
75	sub get_entry {
76	my $self = shift (@_);
77	my ($title, $classifytype) = @_;
78
79	# organise into classification structure
80	my %classifyinfo = ('classifytype'=>$classifytype,
81	'Title'=>$title,
82	'contains'=>[]);
83
84	return \%classifyinfo;
85	}
86
87	# splitlist takes an ordered list of classifications (@$classlistref) and splits it
88	# up into alphabetical sub-sections. returns a list reference each entry of which is
89	# in the form ["sub-section title", "sub-section number", [contents list]]
90	# note that the title of each classification in @$classlistref is obtained from
91	# $self->{'list'}->{$classlistref->[n]}
92	sub splitlist {
93	my $self = shift (@_);
94	my ($classlistref) = @_;
95	my $classhash = {};
96	my $rarr = [];
97
98	# don't need to do any splitting if there are less than 20 classifications
99	return [] if ((scalar @$classlistref) <= 20);
100
101	# first split up the list into separate A-Z and 0-9 classifications
102	foreach $classification (@$classlistref) {
103	my $title = $self->{'list'}->{$classification};
104	$title =~ s/^(.).*$/$1/;
105	$title =~ tr/[a-z]/[A-Z]/;
106	if ($title =~ /^[0-9]$/) {$title = '0-9';}
107	elsif ($title !~ /^[A-Z]$/) {
108	print STDERR "AZList: WARNING $classification has badly formatted title ($title)\n";
109	}
110	$classhash->{$title} = [] unless defined $classhash->{$title};
111	push (@{$classhash->{$title}}, $classification);
112	}
113
114	$classhash = $self->compactlist ($classhash);
115
116	my @tmparr = ();
117	foreach $subsection (sort keys (%$classhash)) {
118	push (@tmparr, $subsection);
119	}
120
121	# if there's a 0-9 section it will have been sorted to the beginning
122	# but we want it at the end
123	if ($tmparr[0] eq '0-9') {
124	shift @tmparr;
125	push (@tmparr, '0-9');
126	}
127
128	my $count = 1;
129	foreach $subsection (@tmparr) {
130	push (@$rarr, [$subsection, $count, $classhash->{$subsection}]);
131	$count ++;
132	}
133
134	return $rarr;
135	}
136
137	sub compactlist {
138	my $self = shift (@_);
139	my ($classhashref) = @_;
140	my $compactedhash = {};
141	my @currentOIDs = ();
142	my $currentfirstletter = "";
143	my $currentlastletter = "";
144	my $lastkey = "";
145
146	# minimum and maximum documents to be displayed per page.
147	# the actual maximum will be max + (min-1).
148	# the smallest sub-section is a single letter at present
149	# so in this case there may be many times max documents
150	# displayed on a page.
151	my $min = 10;
152	my $max = 30;
153
154	foreach $subsection (sort keys %$classhashref) {
155	if ($subsection eq '0-9') {
156	@{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
157	next;
158	}
159	$currentfirstletter = $subsection if $currentfirstletter eq "";
160	if ((scalar (@currentOIDs) < $min) \|\|
161	((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
162	push (@currentOIDs, @{$classhashref->{$subsection}});
163	$currentlastletter = $subsection;
164	} else {
165
166	if ($currentfirstletter eq $currentlastletter) {
167	@{$compactedhash->{$currentfirstletter}} = @currentOIDs;
168	$lastkey = $currentfirstletter;
169	} else {
170	@{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
171	$lastkey = "$currentfirstletter-$currentlastletter";
172	}
173	if (scalar (@{$classhashref->{$subsection}}) >= $max) {
174	$compactedhash->{$subsection} = $classhashref->{$subsection};
175	@currentOIDs = ();
176	$currentfirstletter = "";
177	} else {
178	@currentOIDs = @{$classhashref->{$subsection}};
179	$currentfirstletter = $subsection;
180	$currentlastletter = $subsection;
181	}
182	}
183	}
184
185	# add final OIDs to last sub-classification if there aren't many otherwise
186	# add final sub-classification
187	if (scalar (@currentOIDs) < $min) {
188	my ($newkey) = $lastkey =~ /^(.)/;
189	@currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
190	delete $compactedhash->{$lastkey};
191	@{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
192	} else {
193	if ($currentfirstletter eq $currentlastletter) {
194	@{$compactedhash->{$currentfirstletter}} = @currentOIDs;
195	} else {
196	@{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
197	}
198	}
199
200	return $compactedhash;
201	}
202
203	# format an english name for sorting
204	# i.e. convert to lowercase, put surname before
205	# first names etc.
206	sub format_string_name_english {
207	my $self = shift (@_);
208	my ($stringref) = @_;
209
210	$$stringref =~ tr/A-Z/a-z/;
211	$$stringref =~ s/[^a-z0-9 ]//g;
212	$$stringref =~ s/\s+/ /g;
213	$$stringref =~ s/^\s+//;
214	my @names = split / /, $$stringref;
215	my $surname = pop @names;
216	while (scalar @names && $surname =~ /^(jnr\|snr)$/i) {
217	$surname = pop @names;
218	}
219	$$stringref = $surname . " " . $$stringref;
220	}
221
222	# format an english string for sorting
223	# i.e. convert to lowercase, remove the, a or an
224	# from beginning of string etc.
225	sub format_string_english {
226	my $self = shift (@_);
227	my ($stringref) = @_;
228
229	$$stringref =~ tr/A-Z/a-z/;
230	$$stringref =~ s/[^a-z0-9 ]//g;
231	$$stringref =~ s/^\s*(the\|a\|an)\b//;
232	$$stringref =~ s/^\s+//;
233	}
234
235	1;

Note: See TracBrowser for help on using the repository browser.

Download in other formats: