source: trunk/gsdl/perllib/classify/AZSectionList.pm@ 408

Last change on this file since 408 was 391, checked in by sjboddie, 25 years ago

new section level AZList classification

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 6.4 KB
Line 
1# classifier plugin for sorting sections alphabetically
2
3# this is very similar to AZList except it sorts by
4# section level metadata (excluding the top level)
5# instead of just top level metadata
6
7# the only change is to the classify() subroutine which
8# must now iterate through each section, adding each
9# to the classification
10
11package AZSectionList;
12
13use sorttools;
14
15sub new {
16 my ($class, @options) = @_;
17
18 if (!defined @options || !scalar @options) {
19 die "AZList used with no metadata name to classify by\n";
20 }
21
22 return bless {
23 'list'=>{},
24 'metaname' => $options[0]
25 }, $class;
26}
27
28sub init {
29 my $self = shift (@_);
30
31 $self->{'list'} = {};
32}
33
34sub classify {
35 my $self = shift (@_);
36 my ($doc_obj) = @_;
37
38 my $doc_OID = $doc_obj->get_OID();
39 my $thissection = $doc_obj->get_next_section ($doc_obj->get_top_section());
40
41 while (defined $thissection) {
42
43 my $metavalue = $doc_obj->get_metadata_element ($thissection, $self->{'metaname'});
44
45 # if this document doesn't contain the metadata element we're
46 # sorting by we won't include it in this classification
47 if (defined $metavalue && $metavalue =~ /\w/) {
48 if ($self->{'metaname'} eq 'Creator') {
49 &sorttools::format_string_name_english (\$metavalue);
50 } else {
51 &sorttools::format_string_english (\$metavalue);
52 }
53 if (defined $self->{'list'}->{"$doc_OID.$thissection"}) {
54 print STDERR "WARNING: AZList::classify called multiple times for $doc_OID\n";
55 }
56 $self->{'list'}->{"$doc_OID.$thissection"} = $metavalue;
57 }
58 $thissection = $doc_obj->get_next_section ($thissection);
59 }
60}
61
62sub get_classify_info {
63 my $self = shift (@_);
64
65 my @classlist = sort {$self->{'list'}->{$a} cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
66 my $subclasses = $self->splitlist (\@classlist);
67
68 my ($classifyinfo);
69
70 if (scalar @$subclasses) {
71 $classifyinfo = $self->get_entry ($self->{'metaname'}, "AZList");
72 foreach $subclass (@$subclasses) {
73 my $tempclassify = $self->get_entry($subclass->[0], "AZList");
74 foreach $subsubOID (@{$subclass->[2]}) {
75 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
76 }
77 push (@{$classifyinfo->{'contains'}}, $tempclassify);
78 }
79
80 } else {
81 $classifyinfo = $self->get_entry($self->{'metaname'}, "AZList");
82 foreach $subOID (@classlist) {
83 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
84 }
85 }
86
87 return $classifyinfo;
88}
89
90sub get_entry {
91 my $self = shift (@_);
92 my ($title, $classifytype) = @_;
93
94 # organise into classification structure
95 my %classifyinfo = ('classifytype'=>$classifytype,
96 'Title'=>$title,
97 'contains'=>[]);
98
99 return \%classifyinfo;
100}
101
102# splitlist takes an ordered list of classifications (@$classlistref) and splits it
103# up into alphabetical sub-sections. returns a list reference each entry of which is
104# in the form ["sub-section title", "sub-section number", [contents list]]
105# note that the title of each classification in @$classlistref is obtained from
106# $self->{'list'}->{$classlistref->[n]}
107sub splitlist {
108 my $self = shift (@_);
109 my ($classlistref) = @_;
110 my $classhash = {};
111 my $rarr = [];
112
113 # don't need to do any splitting if there are less than 20 classifications
114 return [] if ((scalar @$classlistref) <= 20);
115
116 # first split up the list into separate A-Z and 0-9 classifications
117 foreach $classification (@$classlistref) {
118 my $title = $self->{'list'}->{$classification};
119 $title =~ s/^(.).*$/$1/;
120 $title =~ tr/[a-z]/[A-Z]/;
121 if ($title =~ /^[0-9]$/) {$title = '0-9';}
122 elsif ($title !~ /^[A-Z]$/) {
123 print STDERR "AZList: WARNING $classification has badly formatted title ($title)\n";
124 }
125 $classhash->{$title} = [] unless defined $classhash->{$title};
126 push (@{$classhash->{$title}}, $classification);
127 }
128
129 $classhash = $self->compactlist ($classhash);
130
131 my @tmparr = ();
132 foreach $subsection (sort keys (%$classhash)) {
133 push (@tmparr, $subsection);
134 }
135
136 # if there's a 0-9 section it will have been sorted to the beginning
137 # but we want it at the end
138 if ($tmparr[0] eq '0-9') {
139 shift @tmparr;
140 push (@tmparr, '0-9');
141 }
142
143 my $count = 1;
144 foreach $subsection (@tmparr) {
145 push (@$rarr, [$subsection, $count, $classhash->{$subsection}]);
146 $count ++;
147 }
148
149 return $rarr;
150}
151
152sub compactlist {
153 my $self = shift (@_);
154 my ($classhashref) = @_;
155 my $compactedhash = {};
156 my @currentOIDs = ();
157 my $currentfirstletter = "";
158 my $currentlastletter = "";
159 my $lastkey = "";
160
161 # minimum and maximum documents to be displayed per page.
162 # the actual maximum will be max + (min-1).
163 # the smallest sub-section is a single letter at present
164 # so in this case there may be many times max documents
165 # displayed on a page.
166 my $min = 10;
167 my $max = 30;
168
169 foreach $subsection (sort keys %$classhashref) {
170 if ($subsection eq '0-9') {
171 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
172 next;
173 }
174 $currentfirstletter = $subsection if $currentfirstletter eq "";
175 if ((scalar (@currentOIDs) < $min) ||
176 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
177 push (@currentOIDs, @{$classhashref->{$subsection}});
178 $currentlastletter = $subsection;
179 } else {
180
181 if ($currentfirstletter eq $currentlastletter) {
182 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
183 $lastkey = $currentfirstletter;
184 } else {
185 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
186 $lastkey = "$currentfirstletter-$currentlastletter";
187 }
188 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
189 $compactedhash->{$subsection} = $classhashref->{$subsection};
190 @currentOIDs = ();
191 $currentfirstletter = "";
192 } else {
193 @currentOIDs = @{$classhashref->{$subsection}};
194 $currentfirstletter = $subsection;
195 $currentlastletter = $subsection;
196 }
197 }
198 }
199
200 # add final OIDs to last sub-classification if there aren't many otherwise
201 # add final sub-classification
202 if (scalar (@currentOIDs) < $min) {
203 my ($newkey) = $lastkey =~ /^(.)/;
204 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
205 delete $compactedhash->{$lastkey};
206 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
207 } else {
208 if ($currentfirstletter eq $currentlastletter) {
209 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
210 } else {
211 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
212 }
213 }
214
215 return $compactedhash;
216}
217
2181;
Note: See TracBrowser for help on using the repository browser.