source: trunk/gsdl/perllib/classify/AZList.pm@ 408

Last change on this file since 408 was 385, checked in by sjboddie, 25 years ago

moved string formatting for sorting into it's own module

  • Property svn:keywords set to Author Date Id Revision
File size: 5.9 KB
Line 
1# classifier plugin for sorting alphabetically
2
3package AZList;
4
5use sorttools;
6
7sub new {
8 my ($class, @options) = @_;
9
10 if (!defined @options || !scalar @options) {
11 die "AZList used with no metadata name to classify by\n";
12 }
13
14 return bless {
15 'list'=>{},
16 'metaname' => $options[0]
17 }, $class;
18}
19
20sub init {
21 my $self = shift (@_);
22
23 $self->{'list'} = {};
24}
25
26sub classify {
27 my $self = shift (@_);
28 my ($doc_obj) = @_;
29
30 my $doc_OID = $doc_obj->get_OID();
31 my $metavalue = $doc_obj->get_metadata_element ($doc_obj->get_top_section(),
32 $self->{'metaname'});
33
34 # if this document doesn't contain the metadata element we're
35 # sorting by we won't include it in this classification
36 if (defined $metavalue && $metavalue =~ /\w/) {
37 if ($self->{'metaname'} eq 'Creator') {
38 &sorttools::format_string_name_english (\$metavalue);
39 } else {
40 &sorttools::format_string_english (\$metavalue);
41 }
42 if (defined $self->{'list'}->{$doc_OID}) {
43 print STDERR "WARNING: AZList::classify called multiple times for $doc_OID\n";
44 }
45 $self->{'list'}->{$doc_OID} = $metavalue;
46 }
47}
48
49sub get_classify_info {
50 my $self = shift (@_);
51
52 my @classlist = sort {$self->{'list'}->{$a} cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
53 my $subclasses = $self->splitlist (\@classlist);
54
55 my ($classifyinfo);
56
57 if (scalar @$subclasses) {
58 $classifyinfo = $self->get_entry ($self->{'metaname'}, "AZList");
59 foreach $subclass (@$subclasses) {
60 my $tempclassify = $self->get_entry($subclass->[0], "AZList");
61 foreach $subsubOID (@{$subclass->[2]}) {
62 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
63 }
64 push (@{$classifyinfo->{'contains'}}, $tempclassify);
65 }
66
67 } else {
68 $classifyinfo = $self->get_entry($self->{'metaname'}, "AZList");
69 foreach $subOID (@classlist) {
70 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
71 }
72 }
73
74 return $classifyinfo;
75}
76
77sub get_entry {
78 my $self = shift (@_);
79 my ($title, $classifytype) = @_;
80
81 # organise into classification structure
82 my %classifyinfo = ('classifytype'=>$classifytype,
83 'Title'=>$title,
84 'contains'=>[]);
85
86 return \%classifyinfo;
87}
88
89# splitlist takes an ordered list of classifications (@$classlistref) and splits it
90# up into alphabetical sub-sections. returns a list reference each entry of which is
91# in the form ["sub-section title", "sub-section number", [contents list]]
92# note that the title of each classification in @$classlistref is obtained from
93# $self->{'list'}->{$classlistref->[n]}
94sub splitlist {
95 my $self = shift (@_);
96 my ($classlistref) = @_;
97 my $classhash = {};
98 my $rarr = [];
99
100 # don't need to do any splitting if there are less than 20 classifications
101 return [] if ((scalar @$classlistref) <= 20);
102
103 # first split up the list into separate A-Z and 0-9 classifications
104 foreach $classification (@$classlistref) {
105 my $title = $self->{'list'}->{$classification};
106 $title =~ s/^(.).*$/$1/;
107 $title =~ tr/[a-z]/[A-Z]/;
108 if ($title =~ /^[0-9]$/) {$title = '0-9';}
109 elsif ($title !~ /^[A-Z]$/) {
110 print STDERR "AZList: WARNING $classification has badly formatted title ($title)\n";
111 }
112 $classhash->{$title} = [] unless defined $classhash->{$title};
113 push (@{$classhash->{$title}}, $classification);
114 }
115
116 $classhash = $self->compactlist ($classhash);
117
118 my @tmparr = ();
119 foreach $subsection (sort keys (%$classhash)) {
120 push (@tmparr, $subsection);
121 }
122
123 # if there's a 0-9 section it will have been sorted to the beginning
124 # but we want it at the end
125 if ($tmparr[0] eq '0-9') {
126 shift @tmparr;
127 push (@tmparr, '0-9');
128 }
129
130 my $count = 1;
131 foreach $subsection (@tmparr) {
132 push (@$rarr, [$subsection, $count, $classhash->{$subsection}]);
133 $count ++;
134 }
135
136 return $rarr;
137}
138
139sub compactlist {
140 my $self = shift (@_);
141 my ($classhashref) = @_;
142 my $compactedhash = {};
143 my @currentOIDs = ();
144 my $currentfirstletter = "";
145 my $currentlastletter = "";
146 my $lastkey = "";
147
148 # minimum and maximum documents to be displayed per page.
149 # the actual maximum will be max + (min-1).
150 # the smallest sub-section is a single letter at present
151 # so in this case there may be many times max documents
152 # displayed on a page.
153 my $min = 10;
154 my $max = 30;
155
156 foreach $subsection (sort keys %$classhashref) {
157 if ($subsection eq '0-9') {
158 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
159 next;
160 }
161 $currentfirstletter = $subsection if $currentfirstletter eq "";
162 if ((scalar (@currentOIDs) < $min) ||
163 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
164 push (@currentOIDs, @{$classhashref->{$subsection}});
165 $currentlastletter = $subsection;
166 } else {
167
168 if ($currentfirstletter eq $currentlastletter) {
169 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
170 $lastkey = $currentfirstletter;
171 } else {
172 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
173 $lastkey = "$currentfirstletter-$currentlastletter";
174 }
175 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
176 $compactedhash->{$subsection} = $classhashref->{$subsection};
177 @currentOIDs = ();
178 $currentfirstletter = "";
179 } else {
180 @currentOIDs = @{$classhashref->{$subsection}};
181 $currentfirstletter = $subsection;
182 $currentlastletter = $subsection;
183 }
184 }
185 }
186
187 # add final OIDs to last sub-classification if there aren't many otherwise
188 # add final sub-classification
189 if (scalar (@currentOIDs) < $min) {
190 my ($newkey) = $lastkey =~ /^(.)/;
191 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
192 delete $compactedhash->{$lastkey};
193 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
194 } else {
195 if ($currentfirstletter eq $currentlastletter) {
196 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
197 } else {
198 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
199 }
200 }
201
202 return $compactedhash;
203}
204
2051;
Note: See TracBrowser for help on using the repository browser.