source: main/trunk/greenstone2/perllib/classify/DateList.pm@ 34109

Last change on this file since 34109 was 34109, checked in by kjdon, 4 years ago

tidied this up a bit. Now we leave in _textmonth00_ if the month is undefined. in gs2 this is resolved to empty string, in gs3 I have added 'undefined' string. when classifying the doc, sanity check the date rather than doing it later on, in multiple places in the code. if we are classifying bymonth and the month is absent or invalid, then set it to 00. no longer allows dates that don't at least start with yyyy.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 12.7 KB
Line 
1###########################################################################
2#
3# DateList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting by date
27
28# date is assumed to be in the form yyyymmdd
29
30# options
31# -bymonth: splits by month as well as year
32# -nogroup, which makes each year (or year+month) an individual entry in
33# the horizontal list and prevents compaction
34# -metadata, use a different metadata for the date (instead of Date), still expects yyyymmdd format. this affects display cos greenstone displays Date metadata as dd month yyyy, whereas any other date metadata is displayed as yyyymmdd - this needs fixing
35# -sort specifies an additional metadata to use in sorting, will take affect when two docs have the same date.
36
37package DateList;
38
39use BaseClassifier;
40use sorttools;
41
42use strict;
43no strict 'refs'; # allow filehandles to be variables and viceversa
44
45sub BEGIN {
46 @DateList::ISA = ('BaseClassifier');
47}
48
49my $arguments =
50 [ { 'name' => "metadata",
51 'desc' => "{DateList.metadata}",
52 'type' => "metadata",
53 'deft' => "Date",
54 'reqd' => "yes" } ,
55 { 'name' => "sort",
56 'desc' => "{DateList.sort}",
57 'type' => "metadata",
58 'reqd' => "no" } ,
59 { 'name' => "reverse_sort",
60 'desc' => "{DateList.reverse_sort}",
61 'type' => "flag",
62 'reqd' => "no" },
63 { 'name' => "bymonth",
64 'desc' => "{DateList.bymonth}",
65 'type' => "flag",
66 'reqd' => "no" },
67 { 'name' => "nogroup",
68 'desc' => "{DateList.nogroup}",
69 'type' => "flag",
70 'reqd' => "no" },
71 { 'name' => "no_special_formatting",
72 'desc' => "{DateList.no_special_formatting}",
73 'type' => "flag",
74 'reqd' => "no" }
75
76 ];
77
78my $options = { 'name' => "DateList",
79 'desc' => "{DateList.desc}",
80 'abstract' => "no",
81 'inherits' => "yes",
82 'args' => $arguments };
83
84
85sub new {
86 my ($class) = shift (@_);
87 my ($classifierslist,$inputargs,$hashArgOptLists) = @_;
88 push(@$classifierslist, $class);
89
90 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
91 push(@{$hashArgOptLists->{"OptList"}},$options);
92
93 my $self = new BaseClassifier($classifierslist, $inputargs, $hashArgOptLists);
94
95 if ($self->{'info_only'}) {
96 # don't worry about any options etc
97 return bless $self, $class;
98 }
99
100 # Manually set $self parameters.
101 $self->{'list'} = {};
102
103 if (!defined $self->{"metadata"} || $self->{"metadata"} eq "") {
104 $self->{'metadata'} = "Date";
105 }
106 # remove any ex.s
107 $self->{'metadata'} = $self->strip_ex_from_metadata($self->{'metadata'});
108 $self->{'sort'} = $self->strip_ex_from_metadata($self->{'sort'});
109
110 # now can have comma separated list of Dates - we just use the first value found (for now)
111 my @meta_list = split(/,/, $self->{"metadata"});
112 $self->{'meta_list'} = \@meta_list;
113
114 $self->{'buttonname'} = $self->generate_title_from_metadata($self->{'metadata'}) unless ($self->{'buttonname'});
115
116 $self->{'childtype'} = "DateList";
117 if ($self->{'no_special_formatting'}) {
118 $self->{'childtype'} = "VList";
119 }
120
121 return bless $self, $class;
122}
123
124sub init {
125 my $self = shift (@_);
126
127 $self->{'list'} = {};
128}
129
130sub classify {
131 my $self = shift (@_);
132 my ($doc_obj) = @_;
133
134 my $doc_OID = $doc_obj->get_OID();
135 my $outhandle = $self->{'outhandle'};
136 my $verbosity = $self->{'verbosity'};
137 # find the first available metadata
138 my $date;
139 foreach my $m (@{$self->{'meta_list'}}) {
140 $date = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $m);
141 last if defined $date;
142 }
143
144 if (!defined $date || $date eq "") {
145 # if this document doesn't contain Date element we won't
146 # include it in this classification
147 print $outhandle "DateList: $doc_OID has no date, not including it\n" if $verbosity >=2;
148 return;
149 }
150
151 # sanity check date
152 if ($date !~ /^\d\d\d\d.*/) {
153 print $outhandle "DateList: $doc_OID date: '$date' malformed: expected it to start with yyyy; not classifying\n" if $verbosity >=2;
154 return;
155 }
156 if ($self->{'bymonth'}) {
157 # check that we have valid month - if not, set it to 00 == undefined
158 if ($date !~ /^\d\d\d\d-?\d\d/) {
159 print $outhandle "DateList $doc_OID date: '$date' has no month (expecting yyyymm... or yyyy-mm...), setting date to yyyy-00\n" if $verbosity >=2;
160 $date =~ s/^(\d\d\d\d).*$/$1-00/;
161 } else {
162 my ($year, $month) = $date =~ /^(\d\d\d\d)-?(\d\d)/;
163 if ($month > 12) {
164 print $outhandle "DateList $doc_OID date: '$date' has invalid month, setting date to $year-00\n" if $verbosity >=2;
165 $date = "$year-00";
166 }
167 }
168 }
169
170 my $sort_other = "";
171 if (defined $self->{'sort'} && $self->{'sort'} ne "") {
172 $sort_other = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), $self->{'sort'});
173 $sort_other = &sorttools::format_metadata_for_sorting($self->{'sort'}, $sort_other, $doc_obj, $self->{'casefold_metadata_for_sorting'}, $self->{'accentfold_metadata_for_sorting'}) unless $self->{'no_metadata_formatting'};
174 }
175
176 if (defined $self->{'list'}->{$doc_OID}) {
177
178 print $outhandle "WARNING: DateList::classify called multiple times for $doc_OID, overwriting previous stored date value ($self->{'list'}->{$doc_OID}) with $date$sort_other \n";
179 }
180
181 $self->{'list'}->{$doc_OID} = "$date$sort_other";
182
183}
184
185
186sub get_classify_info {
187 my $self = shift (@_);
188
189 my @classlist = sort {$self->{'list'}->{$a} cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
190
191 if ($self->{'reverse_sort'}) {
192 @classlist = reverse @classlist;
193 }
194
195
196 return $self->splitlist (\@classlist);
197}
198
199
200sub get_entry {
201 my $self = shift (@_);
202 my ($title, $childtype, $thistype) = @_;
203
204 # organise into classification structure
205 my %classifyinfo = ('childtype'=>$childtype,
206 'Title'=>$title,
207 'contains'=>[],
208 'mdtype'=>$self->{'metadata'});
209 $classifyinfo{'thistype'} = $thistype
210 if defined $thistype && $thistype =~ /\w/;
211
212 return \%classifyinfo;
213}
214
215# splitlist takes an ordered list of classifications (@$classlistref) and
216# splits it up into sub-sections by date
217sub splitlist {
218 my $self = shift (@_);
219 my ($classlistref) = @_;
220 my $classhash = {};
221
222 # top level
223 my $childtype = "HList";
224
225 if (scalar (@$classlistref) <= 39 &&
226 !$self->{'nogroup'}) {$childtype = $self->{'childtype'};}
227
228 my $classifyinfo = $self->get_entry ($self->{'buttonname'}, $childtype, "Invisible");
229 # don't need to do any splitting if there are less than 39 (max + min -1)
230 # classifications, unless nogroup is specified
231 if ((scalar @$classlistref) <= 39 && !$self->{'nogroup'}) {
232 foreach my $subOID (@$classlistref) {
233 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
234 }
235 return $classifyinfo;
236 }
237
238
239
240 if ($self->{'bymonth'}) {
241 # first split up the list into separate year+month classifications
242
243 if (!$self->{'nogroup'}) { # hlist of year+month pairs
244 # single level of classifications
245 foreach my $classification (@$classlistref) {
246 my $date = $self->{'list'}->{$classification};
247 $date =~ s/^(\d\d\d\d)-?(\d\d).*$/$1&nbsp;_textmonth$2_/;
248 $classhash->{$date} = [] unless defined $classhash->{$date};
249 push (@{$classhash->{$date}}, $classification);
250 }
251
252 } else { # -nogroup - individual years and months
253 foreach my $classification (@$classlistref) {
254 my $date = $self->{'list'}->{$classification};
255 $date =~ s/^(\d\d\d\d)-?(\d\d).*$/$1&nbsp;_textmonth$2_/;
256 my ($year, $month)=($1,$2);
257 # create subclass if it doesn't already exist
258 $classhash->{$year} = () unless defined $classhash->{$year};
259
260 $classhash->{$year}->{$month} = []
261 unless defined $classhash->{$year}->{$month};
262 push (@{$classhash->{$year}->{$month}}, $classification);
263
264 }
265 # create hlist of years containing hlists of months
266
267 my @subclasslist = sort {$a <=> $b} (keys %$classhash);
268 if ($self->{'reverse_sort'}) {
269 @subclasslist = reverse @subclasslist;
270 }
271
272 foreach my $subclass (@subclasslist) {
273 my $yearclassify = $self->get_entry($subclass, "HList");
274 my @subsubclasslist = sort {$a <=> $b} (keys %{$classhash->{$subclass}});
275 if ($self->{'reverse_sort'}) {
276 @subsubclasslist = reverse @subsubclasslist;
277 }
278
279 foreach my $subsubclass (@subsubclasslist) {
280 my $monthname=$subsubclass;
281 $monthname="_textmonth" . $monthname . "_";
282 my $monthclassify=$self->get_entry($monthname, $self->{'childtype'});
283 push (@{$yearclassify->{'contains'}}, $monthclassify);
284
285 foreach my $subsubOID
286 (@{$classhash->{$subclass}->{$subsubclass}}) {
287 push (@{$monthclassify->{'contains'}},
288 {'OID'=>$subsubOID});
289 }
290 }
291 push (@{$classifyinfo->{'contains'}}, $yearclassify);
292 }
293
294 return $classifyinfo;
295 } # nogroup
296 } else {
297 # not by month
298 # first split up the list into separate year classifications
299 foreach my $classification (@$classlistref) {
300 my $date = $self->{'list'}->{$classification};
301 $date =~ s/^(\d\d\d\d).*$/$1/;
302 $classhash->{$date} = [] unless defined $classhash->{$date};
303 push (@{$classhash->{$date}}, $classification);
304 }
305
306 }
307
308 # only compact the list if nogroup not specified
309 if (!$self->{'nogroup'}) {
310 $classhash = $self->compactlist ($classhash);
311 }
312 my @subclasslist = sort keys %$classhash;
313 if ($self->{'reverse_sort'}) {
314 @subclasslist = reverse @subclasslist;
315 }
316 foreach my $subclass (@subclasslist) {
317 my $tempclassify = $self->get_entry($subclass, $self->{'childtype'});
318 foreach my $subsubOID (@{$classhash->{$subclass}}) {
319 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
320 }
321 push (@{$classifyinfo->{'contains'}}, $tempclassify);
322 }
323
324 return $classifyinfo;
325}
326
327sub compactlist {
328 my $self = shift (@_);
329 my ($classhashref) = @_;
330 my $compactedhash = {};
331 my @currentOIDs = ();
332 my $currentfirstdate = "";
333 my $currentlastdate = "";
334 my $lastkey = "";
335
336 # minimum and maximum documents to be displayed per page.
337 # the actual maximum will be max + (min-1).
338 # the smallest sub-section is a single letter at present
339 # so in this case there may be many times max documents
340 # displayed on a page.
341 my $min = 10;
342 my $max = 30;
343 my @subsectionlist = sort keys %$classhashref;
344 if ($self->{'reverse_sort'}) {
345 @subsectionlist = reverse @subsectionlist;
346 }
347 foreach my $subsection (@subsectionlist) {
348 $currentfirstdate = $subsection if $currentfirstdate eq "";
349 if ((scalar (@currentOIDs) < $min) ||
350 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
351 push (@currentOIDs, @{$classhashref->{$subsection}});
352 $currentlastdate = $subsection;
353 } else {
354 if ($currentfirstdate eq $currentlastdate) {
355 @{$compactedhash->{$currentfirstdate}} = @currentOIDs;
356 $lastkey = $currentfirstdate;
357 } else {
358 @{$compactedhash->{"$currentfirstdate-$currentlastdate"}} = @currentOIDs;
359 $lastkey = "$currentfirstdate-$currentlastdate";
360 }
361 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
362 $compactedhash->{$subsection} = $classhashref->{$subsection};
363 @currentOIDs = ();
364 $currentfirstdate = "";
365 $lastkey = $subsection;
366 } else {
367 @currentOIDs = @{$classhashref->{$subsection}};
368 $currentfirstdate = $subsection;
369 $currentlastdate = $subsection;
370 }
371 }
372 }
373
374 # add final OIDs to last sub-classification if there aren't many otherwise
375 # add final sub-classification
376 if (scalar (@currentOIDs) > 0) {
377 if ((scalar (@currentOIDs) < $min)) {
378
379 # want every thing in previous up to the dash
380 my ($newkey) = $lastkey =~ /^([^\-]+)/;
381 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
382 delete $compactedhash->{$lastkey};
383 @{$compactedhash->{"$newkey-$currentlastdate"}} = @currentOIDs;
384 } else {
385 if ($currentfirstdate eq $currentlastdate) {
386 @{$compactedhash->{$currentfirstdate}} = @currentOIDs;
387 } else {
388 @{$compactedhash->{"$currentfirstdate-$currentlastdate"}} = @currentOIDs;
389 }
390 }
391 }
392
393 return $compactedhash;
394}
395
3961;
Note: See TracBrowser for help on using the repository browser.