Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: trunk/gsdl/perllib/classify/AZList.pm@ 256

Last change on this file since 256 was 256, checked in by sjboddie, 25 years ago
renamed AZClassify AZList
Property svn:keywords set to `Author Date Id Revision`
File size: 7.1 KB

Line
1	# classifier plugin for sorting alphabetically
2
3	package AZList;
4
5	sub new {
6	my ($class, @options) = @_;
7
8	if (!defined @options \|\| !scalar @options) {
9	die "AZList used with no metadata name to classify by\n";
10	}
11
12	return bless {
13	'OID'=>"NULL",
14	'list'=>{},
15	'metaname' => $options[0]
16	}, $class;
17	}
18
19	sub init {
20	my $self = shift (@_);
21
22	$self->{'list'} = {};
23	}
24
25	sub set_OID {
26	my $self = shift (@_);
27	my ($OID) = @_;
28
29	$self->{'OID'} = $OID;
30	}
31
32	sub get_OID {
33	my $self = shift (@_);
34	return $self->{'OID'};
35	}
36
37	sub classify {
38	my $self = shift (@_);
39	my ($doc_obj) = @_;
40
41	my $doc_OID = $doc_obj->get_OID();
42	my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(),
43	$self->{'metaname'});
44
45	# if this document doesn't contain the metadata element we're
46	# sorting by we won't include it in this classification
47
48	if (defined $metavalue) {
49	if ($self->{'metaname'} eq 'Creator') {
50	$self->format_string_name_english (\$metavalue);
51	} else {
52	$self->format_string_english (\$metavalue);
53	}
54	if (defined $self->{'list'}->{$doc_OID}) {
55	print STDERR "WARNING: AZList::classify called multiple times for $doc_OID\n";
56	}
57	$self->{'list'}->{$doc_OID} = $metavalue;
58	}
59	}
60
61	sub output_classify_info {
62	my $self = shift (@_);
63	my ($handle) = @_;
64
65	my @classlist = sort {$self->{'list'}->{$a} cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
66	my $subclasses = $self->splitlist (\@classlist);
67
68	if (scalar @$subclasses) {
69
70	my @contents = ();
71	map { push (@contents, "\".$_->[1]"); } @$subclasses;
72	# write out top level entry
73	$self->output_entry ($handle, $self->{'OID'}, $self->{'metaname'}, \@contents, "AZList");
74
75	# write out the entries for each sub-classification
76	map {$self->output_entry ($handle, "$self->{'OID'}.$_->[1]", $_->[0], $_->[2]);} @$subclasses;
77
78	} else {
79
80	# write out top level entry
81	$self->output_entry ($handle, $self->{'OID'}, $self->{'metaname'}, \@classlist, "AZList");
82	}
83	}
84
85	# splitlist takes an ordered list of classifications (@$classlistref) and splits it
86	# up into alphabetical sub-sections. returns a list reference each entry of which is
87	# in the form ["sub-section title", "sub-section number", [contents list]]
88	# note that the title of each classification in @$classlistref is obtained from
89	# $self->{'list'}->{$classlistref->[n]}
90	sub splitlist {
91	my $self = shift (@_);
92	my ($classlistref) = @_;
93	my $classhash = {};
94	my $rarr = [];
95
96	# don't need to do any splitting if there are less than 20 classifications
97	return [] if ((scalar @$classlistref) <= 20);
98
99	# first split up the list into separate A-Z and 0-9 classifications
100	foreach $classification (@$classlistref) {
101	my $title = $self->{'list'}->{$classification};
102	$title =~ s/^(.).*$/$1/;
103	$title =~ tr/[a-z]/[A-Z]/;
104	if ($title =~ /^[0-9]$/) {$title = '0-9';}
105	elsif ($title !~ /^[A-Z]$/) {
106	print STDERR "AZList: WARNING $classification has badly formatted title ($title)\n";
107	}
108	$classhash->{$title} = [] unless defined $classhash->{$title};
109	push (@{$classhash->{$title}}, $classification);
110	}
111
112	$classhash = $self->compactlist ($classhash);
113
114	my @tmparr = ();
115	foreach $subsection (sort keys (%$classhash)) {
116	push (@tmparr, $subsection);
117	}
118
119	# if there's a 0-9 section it will have been sorted to the beginning
120	# but we want it at the end
121	if ($tmparr[0] eq '0-9') {
122	shift @tmparr;
123	push (@tmparr, '0-9');
124	}
125
126	my $count = 1;
127	foreach $subsection (@tmparr) {
128	push (@$rarr, [$subsection, $count, $classhash->{$subsection}]);
129	$count ++;
130	}
131
132	return $rarr;
133	}
134
135	sub compactlist {
136	my $self = shift (@_);
137	my ($classhashref) = @_;
138	my $compactedhash = {};
139	my @currentOIDs = ();
140	my $currentfirstletter = "";
141	my $currentlastletter = "";
142	my $lastkey = "";
143
144	# minimum and maximum documents to be displayed per page.
145	# the actual maximum will be max + (min-1).
146	# the smallest sub-section is a single letter at present
147	# so in this case there may be many times max documents
148	# displayed on a page.
149	my $min = 10;
150	my $max = 30;
151
152	foreach $subsection (sort keys %$classhashref) {
153	if ($subsection eq '0-9') {
154	@{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
155	next;
156	}
157	$currentfirstletter = $subsection if $currentfirstletter eq "";
158	if ((scalar (@currentOIDs) < $min) \|\|
159	((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
160	push (@currentOIDs, @{$classhashref->{$subsection}});
161	$currentlastletter = $subsection;
162	} else {
163
164	if ($currentfirstletter eq $currentlastletter) {
165	@{$compactedhash->{$currentfirstletter}} = @currentOIDs;
166	$lastkey = $currentfirstletter;
167	} else {
168	@{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
169	$lastkey = "$currentfirstletter-$currentlastletter";
170	}
171	if (scalar (@{$classhashref->{$subsection}}) >= $max) {
172	$compactedhash->{$subsection} = $classhashref->{$subsection};
173	@currentOIDs = ();
174	$currentfirstletter = "";
175	} else {
176	@currentOIDs = @{$classhashref->{$subsection}};
177	$currentfirstletter = $subsection;
178	$currentlastletter = $subsection;
179	}
180	}
181	}
182
183	# add final OIDs to last sub-classification if there aren't many otherwise
184	# add final sub-classification
185	if (scalar (@currentOIDs) < $min) {
186	my ($newkey) = $lastkey =~ /^(.)/;
187	@currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
188	delete $compactedhash->{$lastkey};
189	@{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
190	} else {
191	if ($currentfirstletter eq $currentlastletter) {
192	@{$compactedhash->{$currentfirstletter}} = @currentOIDs;
193	} else {
194	@{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
195	}
196	}
197
198	return $compactedhash;
199	}
200
201	sub output_entry {
202	my $self = shift (@_);
203	my ($handle, $OID, $title, $contentsref, $classifytype) = @_;
204
205	print $handle "[$OID]\n";
206	print $handle "<doctype>classify\n";
207	print $handle "<hastxt>0\n";
208	print $handle "<Title>$title\n";
209	print $handle "<classifytype>$classifytype\n" if defined $classifytype;
210	if (scalar @$contentsref) {
211	print $handle "<contains>", join (";", @$contentsref), "\n";
212	}
213	print $handle '-' x 70, "\n";
214	}
215
216	# format an english name for sorting
217	# i.e. convert to lowercase, put surname before
218	# first names etc.
219	sub format_string_name_english {
220	my $self = shift (@_);
221	my ($stringref) = @_;
222
223	$$stringref =~ tr/A-Z/a-z/;
224	$$stringref =~ s/[^a-z0-9 ]//g;
225	$$stringref =~ s/\s+/ /g;
226	$$stringref =~ s/^\s+//;
227	my @names = split / /, $$stringref;
228	my $surname = pop @names;
229	while (scalar @names && $surname =~ /^(jnr\|snr)$/i) {
230	$surname = pop @names;
231	}
232	$$stringref = $surname . " " . $$stringref;
233	}
234
235	# format an english string for sorting
236	# i.e. convert to lowercase, remove the, a or an
237	# from beginning of string etc.
238	sub format_string_english {
239	my $self = shift (@_);
240	my ($stringref) = @_;
241
242	$$stringref =~ tr/A-Z/a-z/;
243	$$stringref =~ s/[^a-z0-9 ]//g;
244	$$stringref =~ s/^\s*(the\|a\|an)\b//;
245	$$stringref =~ s/^\s+//;
246	}
247
248	1;

Note: See TracBrowser for help on using the repository browser.

Download in other formats: