source: trunk/gsdl/perllib/classify/AZSectionList.pm@ 641

Last change on this file since 641 was 618, checked in by sjboddie, 25 years ago

removed a couple of instances where =~ /\w/ was used to test for an empty
string. this doesn't work for chinese as most characters don't match \w

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 7.2 KB
Line 
1###########################################################################
2#
3# AZSectionList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting sections alphabetically
27
28# this is very similar to AZList except it sorts by
29# section level metadata (excluding the top level)
30# instead of just top level metadata
31
32# options are:
33# metadata=Metadata
34
35# the only change is to the classify() subroutine which
36# must now iterate through each section, adding each
37# to the classification
38
39package AZSectionList;
40
41use sorttools;
42
43sub new {
44 my ($class, @options) = @_;
45
46 my ($metaname);
47 foreach $option (@options) {
48 if ($option =~ /^metadata=(.*)$/i) {
49 $metaname = $1;
50 }
51 }
52
53 if (!defined $metaname) {
54 die "AZSectionList used with no metadata name to classify by\n";
55 }
56
57 return bless {
58 'list'=>{},
59 'metaname' => $metaname
60 }, $class;
61}
62
63sub init {
64 my $self = shift (@_);
65
66 $self->{'list'} = {};
67}
68
69sub classify {
70 my $self = shift (@_);
71 my ($doc_obj) = @_;
72
73 my $doc_OID = $doc_obj->get_OID();
74 my $thissection = $doc_obj->get_next_section ($doc_obj->get_top_section());
75
76 while (defined $thissection) {
77 $self->classify_section ($thissection, $doc_obj);
78 $thissection = $doc_obj->get_next_section ($thissection);
79 }
80}
81
82sub classify_section {
83 my $self = shift (@_);
84 my ($section, $doc_obj) = @_;
85
86 my $doc_OID = $doc_obj->get_OID();
87 my $metavalue = $doc_obj->get_metadata_element ($section, $self->{'metaname'});
88
89 # if this section doesn't contain the metadata element we're
90 # sorting by we won't include it in this classification
91 if (defined $metavalue && $metavalue ne "") {
92 if ($self->{'metaname'} eq 'Creator') {
93 &sorttools::format_string_name_english (\$metavalue);
94 } else {
95 &sorttools::format_string_english (\$metavalue);
96 }
97 if (defined $self->{'list'}->{"$doc_OID.$section"}) {
98 print STDERR "WARNING: AZSectionList::classify called multiple times for $doc_OID.$section\n";
99 }
100 $self->{'list'}->{"$doc_OID.$section"} = $metavalue;
101 }
102}
103
104sub get_classify_info {
105 my $self = shift (@_);
106
107 my @classlist = sort {$self->{'list'}->{$a} cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
108
109 return $self->splitlist (\@classlist);
110}
111
112sub get_entry {
113 my $self = shift (@_);
114 my ($title, $classifytype) = @_;
115
116 # organise into classification structure
117 my %classifyinfo = ('classifytype'=>$classifytype,
118 'Title'=>$title,
119 'contains'=>[]);
120
121 return \%classifyinfo;
122}
123
124# splitlist takes an ordered list of classifications (@$classlistref) and splits it
125# up into alphabetical sub-sections.
126sub splitlist {
127 my $self = shift (@_);
128 my ($classlistref) = @_;
129 my $classhash = {};
130
131 # top level
132 my $classifyinfo = $self->get_entry ($self->{'metaname'}, "AZList");
133
134 # don't need to do any splitting if there are less than 20 classifications
135 if ((scalar @$classlistref) <= 20) {
136 foreach $subOID (@$classlistref) {
137 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
138 }
139 return $classifyinfo;
140 }
141
142 # first split up the list into separate A-Z and 0-9 classifications
143 foreach $classification (@$classlistref) {
144 my $title = $self->{'list'}->{$classification};
145 $title =~ s/^(.).*$/$1/;
146 $title =~ tr/[a-z]/[A-Z]/;
147 if ($title =~ /^[0-9]$/) {$title = '0-9';}
148 elsif ($title !~ /^[A-Z]$/) {
149 print STDERR "AZSectionList: WARNING $classification has badly formatted title ($title)\n";
150 }
151 $classhash->{$title} = [] unless defined $classhash->{$title};
152 push (@{$classhash->{$title}}, $classification);
153 }
154 $classhash = $self->compactlist ($classhash);
155
156 my @tmparr = ();
157 foreach $subsection (sort keys (%$classhash)) {
158 push (@tmparr, $subsection);
159 }
160
161 # if there's a 0-9 section it will have been sorted to the beginning
162 # but we want it at the end
163 if ($tmparr[0] eq '0-9') {
164 shift @tmparr;
165 push (@tmparr, '0-9');
166 }
167
168 foreach $subclass (@tmparr) {
169 my $tempclassify = $self->get_entry($subclass, "AZList");
170 foreach $subsubOID (@{$classhash->{$subclass}}) {
171 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
172 }
173 push (@{$classifyinfo->{'contains'}}, $tempclassify);
174 }
175
176 return $classifyinfo;
177}
178
179sub compactlist {
180 my $self = shift (@_);
181 my ($classhashref) = @_;
182 my $compactedhash = {};
183 my @currentOIDs = ();
184 my $currentfirstletter = "";
185 my $currentlastletter = "";
186 my $lastkey = "";
187
188 # minimum and maximum documents to be displayed per page.
189 # the actual maximum will be max + (min-1).
190 # the smallest sub-section is a single letter at present
191 # so in this case there may be many times max documents
192 # displayed on a page.
193 my $min = 10;
194 my $max = 30;
195
196 foreach $subsection (sort keys %$classhashref) {
197 if ($subsection eq '0-9') {
198 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
199 next;
200 }
201 $currentfirstletter = $subsection if $currentfirstletter eq "";
202 if ((scalar (@currentOIDs) < $min) ||
203 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
204 push (@currentOIDs, @{$classhashref->{$subsection}});
205 $currentlastletter = $subsection;
206 } else {
207
208 if ($currentfirstletter eq $currentlastletter) {
209 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
210 $lastkey = $currentfirstletter;
211 } else {
212 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
213 $lastkey = "$currentfirstletter-$currentlastletter";
214 }
215 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
216 $compactedhash->{$subsection} = $classhashref->{$subsection};
217 @currentOIDs = ();
218 $currentfirstletter = "";
219 } else {
220 @currentOIDs = @{$classhashref->{$subsection}};
221 $currentfirstletter = $subsection;
222 $currentlastletter = $subsection;
223 }
224 }
225 }
226
227 # add final OIDs to last sub-classification if there aren't many otherwise
228 # add final sub-classification
229 if (scalar (@currentOIDs) < $min) {
230 my ($newkey) = $lastkey =~ /^(.)/;
231 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
232 delete $compactedhash->{$lastkey};
233 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
234 } else {
235 if ($currentfirstletter eq $currentlastletter) {
236 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
237 } else {
238 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
239 }
240 }
241
242 return $compactedhash;
243}
244
2451;
Note: See TracBrowser for help on using the repository browser.