source: trunk/gsdl/perllib/classify/DateList.pm@ 10199

Last change on this file since 10199 was 10199, checked in by kjdon, 19 years ago

dateList now calls format_metadata_for_sorting on its sort meta, so that leading articles are removed

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 10.8 KB
Line 
1###########################################################################
2#
3# DateList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting by date
27
28# date is assumed to be in the form yyyymmdd
29
30# at present dates are split by year - this should change
31# jrm21 - added option "bymonth", which splits by year and month.
32
33# 23/09/03 Added some more options -kjdon.
34# these include:
35# -nogroup, which makes each year (or year+month) an individual entry in
36# the horizontal list and prevents compaction
37# -metadata, use a different metadata for the date (instead of Date), still expects yyyymmdd format. this affects display cos greenstone displays Date metadata as dd month yyyy, whereas any other date metadata is displayed as yyyymmdd - this needs fixing
38# -sort specifies an additional metadata to use in sorting, will take affect when two docs have the same date.
39
40package DateList;
41
42use BasClas;
43use sorttools;
44
45sub BEGIN {
46 @ISA = ('BasClas');
47}
48
49my $arguments =
50 [ { 'name' => "metadata",
51 'desc' => "{DateList.metadata}",
52 'type' => "metadata",
53 'deft' => "Date",
54 'reqd' => "no" } ,
55 { 'name' => "sort",
56 'desc' => "{DateList.sort}",
57 'type' => "metadata",
58 'reqd' => "no" } ,
59 { 'name' => "reverse_sort",
60 'desc' => "{DateList.reverse_sort}",
61 'type' => "flag",
62 'reqd' => "no" },
63 { 'name' => "bymonth",
64 'desc' => "{DateList.bymonth}",
65 'type' => "flag",
66 'reqd' => "no" },
67 { 'name' => "nogroup",
68 'desc' => "{DateList.nogroup}",
69 'type' => "flag",
70 'reqd' => "no" }
71 ];
72
73my $options = { 'name' => "DateList",
74 'desc' => "{DateList.desc}",
75 'abstract' => "no",
76 'inherits' => "yes",
77 'args' => $arguments };
78
79
80sub new {
81 my $class = shift (@_);
82 my $self = new BasClas($class, @_);
83
84 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
85 my $option_list = $self->{'option_list'};
86 push( @{$option_list}, $options );
87
88 if ($self->{'info_only'}) {
89 # created from classinfo.pl - don't need to parse the arguments
90 return bless $self, $class;
91 }
92
93 $self->{'list'} = {};
94
95 my ($datemeta, $sortmeta);
96 $self->{'nogroup'}=0;
97 if (!parsargv::parse(\@_,
98 q^bymonth^, \$self->{'bymonth'},
99 q^nogroup^, \$self->{'nogroup'},
100 q^metadata/.*/^, \$datemeta,
101 q^sort/.*/^, \$sortmeta,
102 q^reverse_sort^, \$self->{'reverse_sort'},
103 "allow_extra_options")) {
104 $self->print_txt_usage(""); # Use default resource bundle
105 die "\n";
106 }
107
108 if (!defined $datemeta || $datemeta eq "") {
109 $datemeta = "Date";
110 }
111 $self->{'datemeta'} = $datemeta;
112
113 if (defined $sortmeta && $sortmeta ne "") {
114 $self->{'sortmeta'} = $sortmeta;
115 }
116 return bless $self, $class;
117}
118
119sub init {
120 my $self = shift (@_);
121
122 $self->{'list'} = {};
123}
124
125sub classify {
126 my $self = shift (@_);
127 my ($doc_obj) = @_;
128
129 my $doc_OID = $doc_obj->get_OID();
130 my $date = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), $self->{'datemeta'});
131
132 my $sort_other = "";
133 if (defined $self->{'sortmeta'} && $self->{'sortmeta'} ne "") {
134 $sort_other = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), $self->{'sortmeta'});
135 $sort_other = &sorttools::format_metadata_for_sorting($self->{'sortmeta'}, $sort_other, $doc_obj);
136 }
137 # if this document doesn't contain Date element we won't
138 # include it in this classification
139 if (defined $date && $date =~ /\d/) {
140 if (defined $self->{'list'}->{$doc_OID}) {
141 my $outhandle = $self->{'outhandle'};
142 print $outhandle "WARNING: DateList::classify called multiple times for $doc_OID\n";
143 }
144
145
146 $self->{'list'}->{$doc_OID} = "$date$sort_other";
147 }
148}
149
150
151sub get_classify_info {
152 my $self = shift (@_);
153
154 my @classlist = sort {$self->{'list'}->{$a} cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
155 if ($self->{'reverse_sort'}) {
156 @classlist = reverse @classlist;
157 }
158
159 return $self->splitlist (\@classlist);
160}
161
162
163sub get_entry {
164 my $self = shift (@_);
165 my ($title, $childtype, $thistype) = @_;
166
167 # organise into classification structure
168 my %classifyinfo = ('childtype'=>$childtype,
169 'Title'=>$title,
170 'contains'=>[],
171 'mdtype'=>$self->{'datemeta'});
172 $classifyinfo{'thistype'} = $thistype
173 if defined $thistype && $thistype =~ /\w/;
174
175 return \%classifyinfo;
176}
177
178# splitlist takes an ordered list of classifications (@$classlistref) and
179# splits it up into sub-sections by date
180sub splitlist {
181 my $self = shift (@_);
182 my ($classlistref) = @_;
183 my $classhash = {};
184
185 # top level
186 my $childtype = "HList";
187
188 if (scalar (@$classlistref) <= 39 &&
189 !$self->{'nogroup'}) {$childtype = "DateList";}
190 my $classifyinfo = $self->get_entry ("Date", $childtype, "Invisible");
191 # don't need to do any splitting if there are less than 39 (max + min -1)
192 # classifications, unless nogroup is specified
193 if ((scalar @$classlistref) <= 39 && !$self->{'nogroup'}) {
194 foreach $subOID (@$classlistref) {
195 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
196 }
197 return $classifyinfo;
198 }
199
200
201 if ($self->{'bymonth'}) {
202 # first split up the list into separate year+month classifications
203
204 if (!$self->{'nogroup'}) { # hlist of year+month pairs
205 # single level of classifications
206 foreach $classification (@$classlistref) {
207 my $date = $self->{'list'}->{$classification};
208 $date =~ s/^(\d\d\d\d)(\d\d).*$/$1&nbsp;_textmonth$2_/;
209 # sanity check if month is zero
210 if ($date =~ /00_$/) {
211 $date =~ s/^(\d\d\d\d).*$/$1/g;
212 }
213 $classhash->{$date} = [] unless defined $classhash->{$date};
214 push (@{$classhash->{$date}}, $classification);
215 }
216
217 } else { # don't group - individual years and months
218 foreach $classification (@$classlistref) {
219 my $date = $self->{'list'}->{$classification};
220 $date =~ s/^(\d\d\d\d)(\d\d).*$/$1&nbsp;_textmonth$2_/;
221 my ($year, $month)=($1,$2);
222 # sanity check if month is zero
223 if ($date =~ /00_$/) {
224 $date =~ s/^(\d\d\d\d).*$/$1/g;
225 }
226 # create subclass if it doesn't already exist
227 $classhash->{$year} = () unless defined $classhash->{$year};
228 $classhash->{$year}->{$month} = []
229 unless defined $classhash->{$year}->{$month};
230 push (@{$classhash->{$year}->{$month}}, $classification);
231 }
232 # create hlist of years containing hlists of months
233
234 foreach my $subclass (sort {$a <=> $b} keys %$classhash) {
235 my $yearclassify = $self->get_entry($subclass, "HList");
236 foreach my $subsubclass (sort {$a <=> $b}
237 (keys %{$classhash->{$subclass}})) {
238 my $monthname=$subsubclass;
239 if ($monthname >= 1 && $monthname <= 12) {
240 $monthname="_textmonth" . $monthname . "_";
241 }
242 my $monthclassify=$self->get_entry($monthname, "DateList");
243 push (@{$yearclassify->{'contains'}}, $monthclassify);
244
245 foreach $subsubOID
246 (@{$classhash->{$subclass}->{$subsubclass}}) {
247 push (@{$monthclassify->{'contains'}},
248 {'OID'=>$subsubOID});
249 }
250 }
251 push (@{$classifyinfo->{'contains'}}, $yearclassify);
252 }
253 return $classifyinfo;
254 } # nogroup
255 } else {
256 # not by month
257 # first split up the list into separate year classifications
258 foreach $classification (@$classlistref) {
259 my $date = $self->{'list'}->{$classification};
260 $date =~ s/^(\d\d\d\d).*$/$1/;
261 $classhash->{$date} = [] unless defined $classhash->{$date};
262 push (@{$classhash->{$date}}, $classification);
263 }
264 }
265
266 # only compact the list if nogroup not specified
267 if (!$self->{'nogroup'}) {
268 $classhash = $self->compactlist ($classhash);
269 }
270 foreach $subclass (sort keys %$classhash) {
271 my $tempclassify = $self->get_entry($subclass, "DateList");
272 foreach $subsubOID (@{$classhash->{$subclass}}) {
273 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
274 }
275 push (@{$classifyinfo->{'contains'}}, $tempclassify);
276 }
277
278 return $classifyinfo;
279}
280
281sub compactlist {
282 my $self = shift (@_);
283 my ($classhashref) = @_;
284 my $compactedhash = {};
285 my @currentOIDs = ();
286 my $currentfirstdate = "";
287 my $currentlastdate = "";
288 my $lastkey = "";
289
290 # minimum and maximum documents to be displayed per page.
291 # the actual maximum will be max + (min-1).
292 # the smallest sub-section is a single letter at present
293 # so in this case there may be many times max documents
294 # displayed on a page.
295 my $min = 10;
296 my $max = 30;
297 foreach my $subsection (sort keys %$classhashref) {
298 $currentfirstdate = $subsection if $currentfirstdate eq "";
299 if ((scalar (@currentOIDs) < $min) ||
300 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
301 push (@currentOIDs, @{$classhashref->{$subsection}});
302 $currentlastdate = $subsection;
303 } else {
304
305 if ($currentfirstdate eq $currentlastdate) {
306 @{$compactedhash->{$currentfirstdate}} = @currentOIDs;
307 $lastkey = $currentfirstdate;
308 } else {
309 @{$compactedhash->{"$currentfirstdate-$currentlastdate"}} = @currentOIDs;
310 $lastkey = "$currentfirstdate-$currentlastdate";
311 }
312 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
313 $compactedhash->{$subsection} = $classhashref->{$subsection};
314 @currentOIDs = ();
315 $currentfirstdate = "";
316 $lastkey = $subsection;
317 } else {
318 @currentOIDs = @{$classhashref->{$subsection}};
319 $currentfirstdate = $subsection;
320 $currentlastdate = $subsection;
321 }
322 }
323 }
324
325 # add final OIDs to last sub-classification if there aren't many otherwise
326 # add final sub-classification
327 if ((scalar (@currentOIDs) < $min) && (scalar (@currentOIDs) > 0)) {
328 # want every thing in previous up to the dash
329 my ($newkey) = $lastkey =~ /^([^\-]+)/;
330 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
331 delete $compactedhash->{$lastkey};
332 @{$compactedhash->{"$newkey-$currentlastdate"}} = @currentOIDs;
333 } else {
334 if ($currentfirstdate eq $currentlastdate) {
335 @{$compactedhash->{$currentfirstdate}} = @currentOIDs;
336 } else {
337 @{$compactedhash->{"$currentfirstdate-$currentlastdate"}} = @currentOIDs;
338 }
339 }
340
341 return $compactedhash;
342}
343
3441;
Note: See TracBrowser for help on using the repository browser.