1 | ###########################################################################
|
---|
2 | #
|
---|
3 | # DateList.pm --
|
---|
4 | # A component of the Greenstone digital library software
|
---|
5 | # from the New Zealand Digital Library Project at the
|
---|
6 | # University of Waikato, New Zealand.
|
---|
7 | #
|
---|
8 | # Copyright (C) 1999 New Zealand Digital Library Project
|
---|
9 | #
|
---|
10 | # This program is free software; you can redistribute it and/or modify
|
---|
11 | # it under the terms of the GNU General Public License as published by
|
---|
12 | # the Free Software Foundation; either version 2 of the License, or
|
---|
13 | # (at your option) any later version.
|
---|
14 | #
|
---|
15 | # This program is distributed in the hope that it will be useful,
|
---|
16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
18 | # GNU General Public License for more details.
|
---|
19 | #
|
---|
20 | # You should have received a copy of the GNU General Public License
|
---|
21 | # along with this program; if not, write to the Free Software
|
---|
22 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
23 | #
|
---|
24 | ###########################################################################
|
---|
25 |
|
---|
26 | # classifier plugin for sorting by date
|
---|
27 |
|
---|
28 | # date is assumed to be in the form yyyymmdd
|
---|
29 |
|
---|
30 | # options
|
---|
31 | # -bymonth: splits by month as well as year
|
---|
32 | # -nogroup, which makes each year (or year+month) an individual entry in
|
---|
33 | # the horizontal list and prevents compaction
|
---|
34 | # -metadata, use a different metadata for the date (instead of Date), still expects yyyymmdd format. this affects display cos greenstone displays Date metadata as dd month yyyy, whereas any other date metadata is displayed as yyyymmdd - this needs fixing
|
---|
35 | # -sort specifies an additional metadata to use in sorting, will take affect when two docs have the same date.
|
---|
36 | # -reverse_sort - sort in reverse order
|
---|
37 | # -no_special_formatting - makes the list a VList instead of a DateList - don't display Months down the side of the list
|
---|
38 | # -valid_date_regex - what constitutes a valid date? deft is \d\d\d\d. eg for Heritage, want to customise this to allow dates like 198?
|
---|
39 | # -allow_invalid_dates - do we include in the classifier documents with invalid dates? deft = no.
|
---|
40 | # -invalid_date_partition_name - if docs with invalid dates are included, they get put into one bucket. this gives the name of that bucket. eg "no date"
|
---|
41 | package DateList;
|
---|
42 |
|
---|
43 | use BaseClassifier;
|
---|
44 | use sorttools;
|
---|
45 |
|
---|
46 | use strict;
|
---|
47 | no strict 'refs'; # allow filehandles to be variables and viceversa
|
---|
48 |
|
---|
49 | sub BEGIN {
|
---|
50 | @DateList::ISA = ('BaseClassifier');
|
---|
51 | }
|
---|
52 |
|
---|
53 | my $arguments =
|
---|
54 | [ { 'name' => "metadata",
|
---|
55 | 'desc' => "{DateList.metadata}",
|
---|
56 | 'type' => "metadata",
|
---|
57 | 'deft' => "Date",
|
---|
58 | 'reqd' => "yes" } ,
|
---|
59 | { 'name' => "sort",
|
---|
60 | 'desc' => "{DateList.sort}",
|
---|
61 | 'type' => "metadata",
|
---|
62 | 'reqd' => "no" } ,
|
---|
63 | { 'name' => "reverse_sort",
|
---|
64 | 'desc' => "{DateList.reverse_sort}",
|
---|
65 | 'type' => "flag",
|
---|
66 | 'reqd' => "no" },
|
---|
67 | { 'name' => "bymonth",
|
---|
68 | 'desc' => "{DateList.bymonth}",
|
---|
69 | 'type' => "flag",
|
---|
70 | 'reqd' => "no" },
|
---|
71 | { 'name' => "nogroup",
|
---|
72 | 'desc' => "{DateList.nogroup}",
|
---|
73 | 'type' => "flag",
|
---|
74 | 'reqd' => "no" },
|
---|
75 | { 'name' => "no_special_formatting",
|
---|
76 | 'desc' => "{DateList.no_special_formatting}",
|
---|
77 | 'type' => "flag",
|
---|
78 | 'reqd' => "no" },
|
---|
79 | { 'name' => "valid_date_regex",
|
---|
80 | 'desc' => "{DateList.valid_date_regex}",
|
---|
81 | 'type' => "regexp",
|
---|
82 | 'deft' => "\\d\\d\\d\\d",
|
---|
83 | 'reqd' => "no" },
|
---|
84 | { 'name' => "allow_invalid_dates",
|
---|
85 | 'desc' => "{DateList.allow_invalid_dates}",
|
---|
86 | 'type' => "flag",
|
---|
87 | 'reqd' => "no" },
|
---|
88 | { 'name' => "invalid_date_partition_name",
|
---|
89 | 'desc' => "{DateList.invalid_date_partition_name}",
|
---|
90 | 'type' => "string",
|
---|
91 | 'deft' => "No Date" }
|
---|
92 | ];
|
---|
93 |
|
---|
94 | my $options = { 'name' => "DateList",
|
---|
95 | 'desc' => "{DateList.desc}",
|
---|
96 | 'abstract' => "no",
|
---|
97 | 'inherits' => "yes",
|
---|
98 | 'args' => $arguments };
|
---|
99 |
|
---|
100 |
|
---|
101 | sub new {
|
---|
102 | my ($class) = shift (@_);
|
---|
103 | my ($classifierslist,$inputargs,$hashArgOptLists) = @_;
|
---|
104 | push(@$classifierslist, $class);
|
---|
105 |
|
---|
106 | push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
|
---|
107 | push(@{$hashArgOptLists->{"OptList"}},$options);
|
---|
108 |
|
---|
109 | my $self = new BaseClassifier($classifierslist, $inputargs, $hashArgOptLists);
|
---|
110 |
|
---|
111 | if ($self->{'info_only'}) {
|
---|
112 | # don't worry about any options etc
|
---|
113 | return bless $self, $class;
|
---|
114 | }
|
---|
115 |
|
---|
116 | # Manually set $self parameters.
|
---|
117 | $self->{'list'} = {};
|
---|
118 |
|
---|
119 | if (!defined $self->{"metadata"} || $self->{"metadata"} eq "") {
|
---|
120 | $self->{'metadata'} = "Date";
|
---|
121 | }
|
---|
122 | # remove any ex.s
|
---|
123 | $self->{'metadata'} = $self->strip_ex_from_metadata($self->{'metadata'});
|
---|
124 | $self->{'sort'} = $self->strip_ex_from_metadata($self->{'sort'});
|
---|
125 |
|
---|
126 | # now can have comma separated list of Dates - we just use the first value found (for now)
|
---|
127 | my @meta_list = split(/,/, $self->{"metadata"});
|
---|
128 | $self->{'meta_list'} = \@meta_list;
|
---|
129 |
|
---|
130 | $self->{'buttonname'} = $self->generate_title_from_metadata($self->{'metadata'}) unless ($self->{'buttonname'});
|
---|
131 |
|
---|
132 | $self->{'childtype'} = "DateList";
|
---|
133 | if ($self->{'no_special_formatting'}) {
|
---|
134 | $self->{'childtype'} = "VList";
|
---|
135 | }
|
---|
136 |
|
---|
137 | return bless $self, $class;
|
---|
138 | }
|
---|
139 |
|
---|
140 | sub init {
|
---|
141 | my $self = shift (@_);
|
---|
142 |
|
---|
143 | $self->{'list'} = {};
|
---|
144 | }
|
---|
145 |
|
---|
146 | sub classify {
|
---|
147 | my $self = shift (@_);
|
---|
148 | my ($doc_obj) = @_;
|
---|
149 |
|
---|
150 | my $doc_OID = $doc_obj->get_OID();
|
---|
151 | my $outhandle = $self->{'outhandle'};
|
---|
152 | my $verbosity = $self->{'verbosity'};
|
---|
153 | # find the first available metadata
|
---|
154 | my $date;
|
---|
155 | my $invalid = 0;
|
---|
156 | my $validre = $self->{'valid_date_regex'};
|
---|
157 |
|
---|
158 | foreach my $m (@{$self->{'meta_list'}}) {
|
---|
159 | $date = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $m);
|
---|
160 | last if defined $date;
|
---|
161 | }
|
---|
162 |
|
---|
163 | if (!defined $date || $date eq "") {
|
---|
164 | if (!$self->{'allow_invalid_dates'}) {
|
---|
165 | # if this document doesn't contain Date element we won't
|
---|
166 | # include it in this classification
|
---|
167 | print $outhandle "DateList: $doc_OID has no date, not including it\n" if $verbosity >=2;
|
---|
168 | return;
|
---|
169 | } else {
|
---|
170 | $invalid = 1;
|
---|
171 | $date = "INVALID";
|
---|
172 | }
|
---|
173 | }
|
---|
174 |
|
---|
175 | # sanity check date
|
---|
176 | #if ($date !~ /^\d\d\d\d.*/) {
|
---|
177 | if ($date !~ /^$validre.*/) {
|
---|
178 | if (!$self->{'allow_invalid_dates'}) {
|
---|
179 | print $outhandle "DateList: $doc_OID date: '$date' malformed: expected it to start with $validre; not classifying\n" if $verbosity >=2;
|
---|
180 | return;
|
---|
181 | }
|
---|
182 | else {
|
---|
183 | $invalid = 1;
|
---|
184 | $date = "INVALID";
|
---|
185 | }
|
---|
186 | }
|
---|
187 | if ($self->{'bymonth'} && !$invalid) {
|
---|
188 | # check that we have valid month - if not, set it to 00 == undefined
|
---|
189 | if ($date !~ /^$validre-?\d\d/) {
|
---|
190 | print $outhandle "DateList $doc_OID date: '$date' has no month (expecting yyyymm... or yyyy-mm...), setting date to yyyy-00\n" if $verbosity >=2;
|
---|
191 | $date =~ s/^($validre).*$/$1-00/;
|
---|
192 | } else {
|
---|
193 | my ($year, $month) = $date =~ /^($validre)-?(\d\d)/;
|
---|
194 | if ($month > 12) {
|
---|
195 | print $outhandle "DateList $doc_OID date: '$date' has invalid month, setting date to $year-00\n" if $verbosity >=2;
|
---|
196 | $date = "$year-00";
|
---|
197 | }
|
---|
198 | }
|
---|
199 | }
|
---|
200 |
|
---|
201 | my $sort_other = "";
|
---|
202 | if (defined $self->{'sort'} && $self->{'sort'} ne "") {
|
---|
203 | $sort_other = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), $self->{'sort'});
|
---|
204 | $sort_other = &sorttools::format_metadata_for_sorting($self->{'sort'}, $sort_other, $doc_obj, $self->{'casefold_metadata_for_sorting'}, $self->{'accentfold_metadata_for_sorting'}) unless $self->{'no_metadata_formatting'};
|
---|
205 | }
|
---|
206 |
|
---|
207 | if (defined $self->{'list'}->{$doc_OID}) {
|
---|
208 |
|
---|
209 | print $outhandle "WARNING: DateList::classify called multiple times for $doc_OID, overwriting previous stored date value ($self->{'list'}->{$doc_OID}) with $date$sort_other \n";
|
---|
210 | }
|
---|
211 |
|
---|
212 | $self->{'list'}->{$doc_OID} = "$date$sort_other";
|
---|
213 |
|
---|
214 | }
|
---|
215 |
|
---|
216 |
|
---|
217 | sub get_classify_info {
|
---|
218 | my $self = shift (@_);
|
---|
219 |
|
---|
220 | my @classlist = sort {$self->{'list'}->{$a} cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
|
---|
221 |
|
---|
222 | if ($self->{'reverse_sort'}) {
|
---|
223 | @classlist = reverse @classlist;
|
---|
224 | }
|
---|
225 |
|
---|
226 |
|
---|
227 | return $self->splitlist (\@classlist);
|
---|
228 | }
|
---|
229 |
|
---|
230 |
|
---|
231 | sub get_entry {
|
---|
232 | my $self = shift (@_);
|
---|
233 | my ($title, $childtype, $thistype) = @_;
|
---|
234 |
|
---|
235 | # organise into classification structure
|
---|
236 | my %classifyinfo = ('childtype'=>$childtype,
|
---|
237 | 'Title'=>$title,
|
---|
238 | 'contains'=>[],
|
---|
239 | 'mdtype'=>$self->{'metadata'});
|
---|
240 | $classifyinfo{'thistype'} = $thistype
|
---|
241 | if defined $thistype && $thistype =~ /\w/;
|
---|
242 |
|
---|
243 | return \%classifyinfo;
|
---|
244 | }
|
---|
245 |
|
---|
246 | # splitlist takes an ordered list of classifications (@$classlistref) and
|
---|
247 | # splits it up into sub-sections by date
|
---|
248 | sub splitlist {
|
---|
249 | my $self = shift (@_);
|
---|
250 | my ($classlistref) = @_;
|
---|
251 | my $classhash = {};
|
---|
252 |
|
---|
253 | # top level
|
---|
254 | my $childtype = "HList";
|
---|
255 |
|
---|
256 | if (scalar (@$classlistref) <= 39 &&
|
---|
257 | !$self->{'nogroup'}) {$childtype = $self->{'childtype'};}
|
---|
258 |
|
---|
259 | my $classifyinfo = $self->get_entry ($self->{'buttonname'}, $childtype, "Invisible");
|
---|
260 | # don't need to do any splitting if there are less than 39 (max + min -1)
|
---|
261 | # classifications, unless nogroup is specified
|
---|
262 | if ((scalar @$classlistref) <= 39 && !$self->{'nogroup'}) {
|
---|
263 | foreach my $subOID (@$classlistref) {
|
---|
264 | push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
|
---|
265 | }
|
---|
266 | return $classifyinfo;
|
---|
267 | }
|
---|
268 |
|
---|
269 | my $validre = $self->{'valid_date_regex'};
|
---|
270 | my $invalid_bucket = $self->{'invalid_date_partition_name'};
|
---|
271 |
|
---|
272 | if ($self->{'bymonth'}) {
|
---|
273 | # first split up the list into separate year+month classifications
|
---|
274 |
|
---|
275 | if (!$self->{'nogroup'}) { # hlist of year+month pairs
|
---|
276 | # single level of classifications
|
---|
277 | foreach my $classification (@$classlistref) {
|
---|
278 | my $date = $self->{'list'}->{$classification};
|
---|
279 | $date =~ s/^(\d\d\d\d)-?(\d\d).*$/$1 _textmonth$2_/;
|
---|
280 | $classhash->{$date} = [] unless defined $classhash->{$date};
|
---|
281 | push (@{$classhash->{$date}}, $classification);
|
---|
282 | }
|
---|
283 |
|
---|
284 | } else { # -nogroup - individual years and months
|
---|
285 | foreach my $classification (@$classlistref) {
|
---|
286 | my $date = $self->{'list'}->{$classification};
|
---|
287 | $date =~ s/^(\d\d\d\d)-?(\d\d).*$/$1 _textmonth$2_/;
|
---|
288 | my ($year, $month)=($1,$2);
|
---|
289 | # create subclass if it doesn't already exist
|
---|
290 | $classhash->{$year} = () unless defined $classhash->{$year};
|
---|
291 |
|
---|
292 | $classhash->{$year}->{$month} = []
|
---|
293 | unless defined $classhash->{$year}->{$month};
|
---|
294 | push (@{$classhash->{$year}->{$month}}, $classification);
|
---|
295 |
|
---|
296 | }
|
---|
297 | # create hlist of years containing hlists of months
|
---|
298 |
|
---|
299 | my @subclasslist = sort {$a <=> $b} (keys %$classhash);
|
---|
300 | if ($self->{'reverse_sort'}) {
|
---|
301 | @subclasslist = reverse @subclasslist;
|
---|
302 | }
|
---|
303 |
|
---|
304 | foreach my $subclass (@subclasslist) {
|
---|
305 | my $yearclassify = $self->get_entry($subclass, "HList");
|
---|
306 | my @subsubclasslist = sort {$a <=> $b} (keys %{$classhash->{$subclass}});
|
---|
307 | if ($self->{'reverse_sort'}) {
|
---|
308 | @subsubclasslist = reverse @subsubclasslist;
|
---|
309 | }
|
---|
310 |
|
---|
311 | foreach my $subsubclass (@subsubclasslist) {
|
---|
312 | my $monthname=$subsubclass;
|
---|
313 | $monthname="_textmonth" . $monthname . "_";
|
---|
314 | my $monthclassify=$self->get_entry($monthname, $self->{'childtype'});
|
---|
315 | push (@{$yearclassify->{'contains'}}, $monthclassify);
|
---|
316 |
|
---|
317 | foreach my $subsubOID
|
---|
318 | (@{$classhash->{$subclass}->{$subsubclass}}) {
|
---|
319 | push (@{$monthclassify->{'contains'}},
|
---|
320 | {'OID'=>$subsubOID});
|
---|
321 | }
|
---|
322 | }
|
---|
323 | push (@{$classifyinfo->{'contains'}}, $yearclassify);
|
---|
324 | }
|
---|
325 |
|
---|
326 | return $classifyinfo;
|
---|
327 | } # nogroup
|
---|
328 | } else {
|
---|
329 | # not by month
|
---|
330 | # first split up the list into separate year classifications
|
---|
331 | foreach my $classification (@$classlistref) {
|
---|
332 | my $date = $self->{'list'}->{$classification};
|
---|
333 | if ($date =~ /^INVALID/) {
|
---|
334 | $date = $invalid_bucket;
|
---|
335 | } else {
|
---|
336 | $date =~ s/^($validre).*$/$1/;
|
---|
337 | }
|
---|
338 | $classhash->{$date} = [] unless defined $classhash->{$date};
|
---|
339 | push (@{$classhash->{$date}}, $classification);
|
---|
340 | }
|
---|
341 |
|
---|
342 | }
|
---|
343 |
|
---|
344 | # only compact the list if nogroup not specified
|
---|
345 | if (!$self->{'nogroup'}) {
|
---|
346 | print STDERR "compacting list\n";
|
---|
347 | $classhash = $self->compactlist ($classhash);
|
---|
348 | }
|
---|
349 | my @subclasslist = sort keys %$classhash;
|
---|
350 | if ($self->{'reverse_sort'}) {
|
---|
351 | @subclasslist = reverse @subclasslist;
|
---|
352 | }
|
---|
353 | foreach my $subclass (@subclasslist) {
|
---|
354 | my $tempclassify = $self->get_entry($subclass, $self->{'childtype'});
|
---|
355 | foreach my $subsubOID (@{$classhash->{$subclass}}) {
|
---|
356 | push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
|
---|
357 | }
|
---|
358 | push (@{$classifyinfo->{'contains'}}, $tempclassify);
|
---|
359 | }
|
---|
360 |
|
---|
361 | return $classifyinfo;
|
---|
362 | }
|
---|
363 |
|
---|
364 | sub compactlist {
|
---|
365 | my $self = shift (@_);
|
---|
366 | my ($classhashref) = @_;
|
---|
367 | my $compactedhash = {};
|
---|
368 | my @currentOIDs = ();
|
---|
369 | my $currentfirstdate = "";
|
---|
370 | my $currentlastdate = "";
|
---|
371 | my $lastkey = "";
|
---|
372 |
|
---|
373 | # minimum and maximum documents to be displayed per page.
|
---|
374 | # the actual maximum will be max + (min-1).
|
---|
375 | # the smallest sub-section is a single letter at present
|
---|
376 | # so in this case there may be many times max documents
|
---|
377 | # displayed on a page.
|
---|
378 | my $min = 10;
|
---|
379 | my $max = 30;
|
---|
380 | my @subsectionlist = sort keys %$classhashref;
|
---|
381 | if ($self->{'reverse_sort'}) {
|
---|
382 | @subsectionlist = reverse @subsectionlist;
|
---|
383 | }
|
---|
384 | foreach my $subsection (@subsectionlist) {
|
---|
385 | if ($subsection eq $self->{'invalid_date_partition_name'}) {
|
---|
386 | # leave this one as is
|
---|
387 | $compactedhash->{$subsection} = $classhashref->{$subsection};
|
---|
388 | next;
|
---|
389 | }
|
---|
390 | # print STDERR "in sub $subsection\n";
|
---|
391 | $currentfirstdate = $subsection if $currentfirstdate eq "";
|
---|
392 | if ((scalar (@currentOIDs) < $min) ||
|
---|
393 | ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
|
---|
394 | push (@currentOIDs, @{$classhashref->{$subsection}});
|
---|
395 | $currentlastdate = $subsection;
|
---|
396 | } else {
|
---|
397 | if ($currentfirstdate eq $currentlastdate) {
|
---|
398 | @{$compactedhash->{$currentfirstdate}} = @currentOIDs;
|
---|
399 | $lastkey = $currentfirstdate;
|
---|
400 | } else {
|
---|
401 | @{$compactedhash->{"$currentfirstdate-$currentlastdate"}} = @currentOIDs;
|
---|
402 | $lastkey = "$currentfirstdate-$currentlastdate";
|
---|
403 | }
|
---|
404 | if (scalar (@{$classhashref->{$subsection}}) >= $max) {
|
---|
405 | $compactedhash->{$subsection} = $classhashref->{$subsection};
|
---|
406 | @currentOIDs = ();
|
---|
407 | $currentfirstdate = "";
|
---|
408 | $lastkey = $subsection;
|
---|
409 | } else {
|
---|
410 | @currentOIDs = @{$classhashref->{$subsection}};
|
---|
411 | $currentfirstdate = $subsection;
|
---|
412 | $currentlastdate = $subsection;
|
---|
413 | }
|
---|
414 | }
|
---|
415 | }
|
---|
416 |
|
---|
417 | # add final OIDs to last sub-classification if there aren't many otherwise
|
---|
418 | # add final sub-classification
|
---|
419 | if (scalar (@currentOIDs) > 0) {
|
---|
420 | if ((scalar (@currentOIDs) < $min)) {
|
---|
421 |
|
---|
422 | # want every thing in previous up to the dash
|
---|
423 | my ($newkey) = $lastkey =~ /^([^\-]+)/;
|
---|
424 | @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
|
---|
425 | delete $compactedhash->{$lastkey};
|
---|
426 | @{$compactedhash->{"$newkey-$currentlastdate"}} = @currentOIDs;
|
---|
427 | } else {
|
---|
428 | if ($currentfirstdate eq $currentlastdate) {
|
---|
429 | @{$compactedhash->{$currentfirstdate}} = @currentOIDs;
|
---|
430 | } else {
|
---|
431 | @{$compactedhash->{"$currentfirstdate-$currentlastdate"}} = @currentOIDs;
|
---|
432 | }
|
---|
433 | }
|
---|
434 | }
|
---|
435 |
|
---|
436 | return $compactedhash;
|
---|
437 | }
|
---|
438 |
|
---|
439 | 1;
|
---|