source: trunk/gsdl/perllib/classify/AZList.pm@ 316

Last change on this file since 316 was 316, checked in by sjboddie, 25 years ago
  • changed the way classifiers work
  • Property svn:keywords set to Author Date Id Revision
File size: 6.7 KB
Line 
1# classifier plugin for sorting alphabetically
2
3package AZList;
4
5sub new {
6 my ($class, @options) = @_;
7
8 if (!defined @options || !scalar @options) {
9 die "AZList used with no metadata name to classify by\n";
10 }
11
12 return bless {
13 'list'=>{},
14 'metaname' => $options[0]
15 }, $class;
16}
17
18sub init {
19 my $self = shift (@_);
20
21 $self->{'list'} = {};
22}
23
24sub classify {
25 my $self = shift (@_);
26 my ($doc_obj) = @_;
27
28 my $doc_OID = $doc_obj->get_OID();
29 my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(),
30 $self->{'metaname'});
31
32 # if this document doesn't contain the metadata element we're
33 # sorting by we won't include it in this classification
34 if (defined $metavalue) {
35 if ($self->{'metaname'} eq 'Creator') {
36 $self->format_string_name_english (\$metavalue);
37 } else {
38 $self->format_string_english (\$metavalue);
39 }
40 if (defined $self->{'list'}->{$doc_OID}) {
41 print STDERR "WARNING: AZList::classify called multiple times for $doc_OID\n";
42 }
43 $self->{'list'}->{$doc_OID} = $metavalue;
44 }
45}
46
47sub get_classify_info {
48 my $self = shift (@_);
49
50 my @classlist = sort {$self->{'list'}->{$a} cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
51 my $subclasses = $self->splitlist (\@classlist);
52
53 my ($classifyinfo);
54
55 if (scalar @$subclasses) {
56 $classifyinfo = $self->get_entry ($self->{'metaname'}, "AZList");
57 foreach $subclass (@$subclasses) {
58 my $tempclassify = $self->get_entry($subclass->[0], "AZList");
59 foreach $subsubOID (@{$subclass->[2]}) {
60 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
61 }
62 push (@{$classifyinfo->{'contains'}}, $tempclassify);
63 }
64
65 } else {
66 $classifyinfo = $self->get_entry($self->{'metaname'}, "AZList");
67 foreach $subOID (@classlist) {
68 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
69 }
70 }
71
72 return $classifyinfo;
73}
74
75sub get_entry {
76 my $self = shift (@_);
77 my ($title, $classifytype) = @_;
78
79 # organise into classification structure
80 my %classifyinfo = ('classifytype'=>$classifytype,
81 'Title'=>$title,
82 'contains'=>[]);
83
84 return \%classifyinfo;
85}
86
87# splitlist takes an ordered list of classifications (@$classlistref) and splits it
88# up into alphabetical sub-sections. returns a list reference each entry of which is
89# in the form ["sub-section title", "sub-section number", [contents list]]
90# note that the title of each classification in @$classlistref is obtained from
91# $self->{'list'}->{$classlistref->[n]}
92sub splitlist {
93 my $self = shift (@_);
94 my ($classlistref) = @_;
95 my $classhash = {};
96 my $rarr = [];
97
98 # don't need to do any splitting if there are less than 20 classifications
99 return [] if ((scalar @$classlistref) <= 20);
100
101 # first split up the list into separate A-Z and 0-9 classifications
102 foreach $classification (@$classlistref) {
103 my $title = $self->{'list'}->{$classification};
104 $title =~ s/^(.).*$/$1/;
105 $title =~ tr/[a-z]/[A-Z]/;
106 if ($title =~ /^[0-9]$/) {$title = '0-9';}
107 elsif ($title !~ /^[A-Z]$/) {
108 print STDERR "AZList: WARNING $classification has badly formatted title ($title)\n";
109 }
110 $classhash->{$title} = [] unless defined $classhash->{$title};
111 push (@{$classhash->{$title}}, $classification);
112 }
113
114 $classhash = $self->compactlist ($classhash);
115
116 my @tmparr = ();
117 foreach $subsection (sort keys (%$classhash)) {
118 push (@tmparr, $subsection);
119 }
120
121 # if there's a 0-9 section it will have been sorted to the beginning
122 # but we want it at the end
123 if ($tmparr[0] eq '0-9') {
124 shift @tmparr;
125 push (@tmparr, '0-9');
126 }
127
128 my $count = 1;
129 foreach $subsection (@tmparr) {
130 push (@$rarr, [$subsection, $count, $classhash->{$subsection}]);
131 $count ++;
132 }
133
134 return $rarr;
135}
136
137sub compactlist {
138 my $self = shift (@_);
139 my ($classhashref) = @_;
140 my $compactedhash = {};
141 my @currentOIDs = ();
142 my $currentfirstletter = "";
143 my $currentlastletter = "";
144 my $lastkey = "";
145
146 # minimum and maximum documents to be displayed per page.
147 # the actual maximum will be max + (min-1).
148 # the smallest sub-section is a single letter at present
149 # so in this case there may be many times max documents
150 # displayed on a page.
151 my $min = 10;
152 my $max = 30;
153
154 foreach $subsection (sort keys %$classhashref) {
155 if ($subsection eq '0-9') {
156 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
157 next;
158 }
159 $currentfirstletter = $subsection if $currentfirstletter eq "";
160 if ((scalar (@currentOIDs) < $min) ||
161 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
162 push (@currentOIDs, @{$classhashref->{$subsection}});
163 $currentlastletter = $subsection;
164 } else {
165
166 if ($currentfirstletter eq $currentlastletter) {
167 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
168 $lastkey = $currentfirstletter;
169 } else {
170 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
171 $lastkey = "$currentfirstletter-$currentlastletter";
172 }
173 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
174 $compactedhash->{$subsection} = $classhashref->{$subsection};
175 @currentOIDs = ();
176 $currentfirstletter = "";
177 } else {
178 @currentOIDs = @{$classhashref->{$subsection}};
179 $currentfirstletter = $subsection;
180 $currentlastletter = $subsection;
181 }
182 }
183 }
184
185 # add final OIDs to last sub-classification if there aren't many otherwise
186 # add final sub-classification
187 if (scalar (@currentOIDs) < $min) {
188 my ($newkey) = $lastkey =~ /^(.)/;
189 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
190 delete $compactedhash->{$lastkey};
191 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
192 } else {
193 if ($currentfirstletter eq $currentlastletter) {
194 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
195 } else {
196 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
197 }
198 }
199
200 return $compactedhash;
201}
202
203# format an english name for sorting
204# i.e. convert to lowercase, put surname before
205# first names etc.
206sub format_string_name_english {
207 my $self = shift (@_);
208 my ($stringref) = @_;
209
210 $$stringref =~ tr/A-Z/a-z/;
211 $$stringref =~ s/[^a-z0-9 ]//g;
212 $$stringref =~ s/\s+/ /g;
213 $$stringref =~ s/^\s+//;
214 my @names = split / /, $$stringref;
215 my $surname = pop @names;
216 while (scalar @names && $surname =~ /^(jnr|snr)$/i) {
217 $surname = pop @names;
218 }
219 $$stringref = $surname . " " . $$stringref;
220}
221
222# format an english string for sorting
223# i.e. convert to lowercase, remove the, a or an
224# from beginning of string etc.
225sub format_string_english {
226 my $self = shift (@_);
227 my ($stringref) = @_;
228
229 $$stringref =~ tr/A-Z/a-z/;
230 $$stringref =~ s/[^a-z0-9 ]//g;
231 $$stringref =~ s/^\s*(the|a|an)\b//;
232 $$stringref =~ s/^\s+//;
233}
234
2351;
Note: See TracBrowser for help on using the repository browser.