Context Navigation

source: trunk/gsdl/perllib/classify/AZClassify.pm@ 244

Last change on this file since 244 was 244, checked in by sjboddie, 25 years ago
fixed a couple of bugs
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 6.8 KB

Line
1	# classifier plugin for sorting alphabetically
2
3	package AZClassify;
4
5	sub new {
6	my ($class, @options) = @_;
7
8	if (!defined @options \|\| !scalar @options) {
9	die "AZClassify used with no metadata name to classify by\n";
10	}
11
12	return bless {
13	'OID'=>"NULL",
14	'list'=>{},
15	'metaname' => $options[0]
16	}, $class;
17	}
18
19	sub init {
20	my $self = shift (@_);
21
22	$self->{'list'} = {};
23	}
24
25	sub set_OID {
26	my $self = shift (@_);
27	my ($OID) = @_;
28
29	$self->{'OID'} = $OID;
30	}
31
32	sub get_OID {
33	my $self = shift (@_);
34	return $self->{'OID'};
35	}
36
37	sub classify {
38	my $self = shift (@_);
39	my ($doc_obj) = @_;
40
41	my $doc_OID = $doc_obj->get_OID();
42	my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(),
43	$self->{'metaname'});
44
45	# if this document doesn't contain the metadata element we're
46	# sorting by we won't include it in this classification
47	if (defined $metavalue) {
48	if ($self->{'metaname'} eq 'Creator') {
49	$self->format_string_name_english (\$metavalue);
50	} else {
51	$self->format_string_english (\$metavalue);
52	}
53	if (defined $self->{'list'}->{$doc_OID}) {
54	print STDERR "WARNING: AZClassify::classify called multiple times for $doc_OID\n";
55	}
56	$self->{'list'}->{$doc_OID} = $metavalue;
57	}
58	}
59
60	sub output_classify_info {
61	my $self = shift (@_);
62	my ($handle) = @_;
63
64	my @classlist = sort {$self->{'list'}->{$a} cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
65	my $subclasses = $self->splitlist (\@classlist);
66
67	if (scalar @$subclasses) {
68
69	my @contents = ();
70	map { push (@contents, "\".$_->[1]"); } @$subclasses;
71	# write out top level entry
72	$self->output_entry ($handle, $self->{'OID'}, $self->{'metaname'}, \@contents, "List");
73
74	# write out the entries for each sub-classification
75	map {$self->output_entry ($handle, "$self->{'OID'}.$_->[1]", $_->[0], $_->[2]);} @$subclasses;
76
77	} else {
78
79	# write out top level entry
80	$self->output_entry ($handle, $self->{'OID'}, $self->{'metaname'}, \@classlist, "List");
81	}
82	}
83
84	# splitlist takes an ordered list of classifications (@$classlistref) and splits it
85	# up into alphabetical sub-sections. returns a list reference each entry of which is
86	# in the form ["sub-section title", "sub-section number", [contents list]]
87	# note that the title of each classification in @$classlistref is obtained from
88	# $self->{'list'}->{$classlistref->[n]}
89	sub splitlist {
90	my $self = shift (@_);
91	my ($classlistref) = @_;
92	my $classhash = {};
93	my $rarr = [];
94
95	# don't need to do any splitting if there are less than 20 classifications
96	return [] if ((scalar @$classlistref) <= 20);
97
98	# first split up the list into separate A-Z and 0-9 classifications
99	foreach $classification (@$classlistref) {
100	my $title = $self->{'list'}->{$classification};
101	$title =~ s/^(.).*$/$1/;
102	$title =~ tr/[a-z]/[A-Z]/;
103	if ($title =~ /^[0-9]$/) {$title = '0-9';}
104	elsif ($title !~ /^[A-Z]$/) {
105	print STDERR "AZClassify: WARNING $classification has badly formatted title ($title)\n";
106	}
107	$classhash->{$title} = [] unless defined $classhash->{$title};
108	push (@{$classhash->{$title}}, $classification);
109	}
110
111	$classhash = $self->compactlist ($classhash);
112
113	my $count = 1;
114	foreach $subsection (sort keys (%$classhash)) {
115	push (@$rarr, [$subsection, $count, $classhash->{$subsection}]);
116	$count ++;
117	}
118	return $rarr;
119	}
120
121	sub compactlist {
122	my $self = shift (@_);
123	my ($classhashref) = @_;
124	my $compactedhash = {};
125	my @currentOIDs = ();
126	my $currentfirstletter = "";
127	my $currentlastletter = "";
128	my $lastkey = "";
129
130	# minimum and maximum documents to be displayed per page.
131	# the actual maximum will be max + (min-1).
132	# the smallest sub-section is a single letter at present
133	# so in this case there may be many times max documents
134	# displayed on a page.
135	my $min = 10;
136	my $max = 30;
137
138	foreach $subsection (sort keys %$classhashref) {
139	if ($subsection eq '0-9') {
140	@{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
141	next;
142	}
143	$currentfirstletter = $subsection if $currentfirstletter eq "";
144	if ((scalar (@currentOIDs) < $min) \|\|
145	((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
146	push (@currentOIDs, @{$classhashref->{$subsection}});
147	$currentlastletter = $subsection;
148	} else {
149
150	if ($currentfirstletter eq $currentlastletter) {
151	@{$compactedhash->{$currentfirstletter}} = @currentOIDs;
152	$lastkey = $currentfirstletter;
153	} else {
154	@{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
155	$lastkey = "$currentfirstletter-$currentlastletter";
156	}
157	if (scalar (@{$classhashref->{$subsection}}) >= $max) {
158	$compactedhash->{$subsection} = $classhashref->{$subsection};
159	@currentOIDs = ();
160	$currentfirstletter = "";
161	} else {
162	@currentOIDs = @{$classhashref->{$subsection}};
163	$currentfirstletter = $subsection;
164	$currentlastletter = $subsection;
165	}
166	}
167	}
168
169	# add final OIDs to last sub-classification if there aren't many otherwise
170	# add final sub-classification
171	if (scalar (@currentOIDs) < $min) {
172	my ($newkey) = $lastkey =~ /^(.)/;
173	@currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
174	delete $compactedhash->{$lastkey};
175	@{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
176	} else {
177	if ($currentfirstletter eq $currentlastletter) {
178	@{$compactedhash->{$currentfirstletter}} = @currentOIDs;
179	} else {
180	@{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
181	}
182	}
183
184	return $compactedhash;
185	}
186
187	sub output_entry {
188	my $self = shift (@_);
189	my ($handle, $OID, $title, $contentsref, $classifytype) = @_;
190
191	print $handle "[$OID]\n";
192	print $handle "<doctype>classify\n";
193	print $handle "<hastxt>0\n";
194	print $handle "<Title>$title\n";
195	print $handle "<classifytype>$classifytype\n" if defined $classifytype;
196	if (scalar @$contentsref) {
197	print $handle "<contains>", join (";", @$contentsref), "\n";
198	}
199	print $handle '-' x 70, "\n";
200	}
201
202	# format an english name for sorting
203	# i.e. convert to lowercase, put surname before
204	# first names etc.
205	sub format_string_name_english {
206	my $self = shift (@_);
207	my ($stringref) = @_;
208
209	$$stringref =~ tr/A-Z/a-z/;
210	$$stringref =~ s/[^a-z0-9 ]//g;
211	$$stringref =~ s/\s+/ /g;
212	$$stringref =~ s/^\s+//;
213	my @names = split / /, $$stringref;
214	my $surname = pop @names;
215	while (scalar @names && $surname =~ /^(jnr\|snr)$/i) {
216	$surname = pop @names;
217	}
218	$$stringref = $surname . " " . $$stringref;
219	}
220
221	# format an english string for sorting
222	# i.e. convert to lowercase, remove the, a or an
223	# from beginning of string etc.
224	sub format_string_english {
225	my $self = shift (@_);
226	my ($stringref) = @_;
227
228	$$stringref =~ tr/A-Z/a-z/;
229	$$stringref =~ s/[^a-z0-9 ]//g;
230	$$stringref =~ s/^\s*(the\|a\|an)\b//;
231	$$stringref =~ s/^\s+//;
232	}
233
234	1;

Note: See TracBrowser for help on using the repository browser.

Download in other formats: