source: main/trunk/greenstone2/perllib/classify/DateList.pm@ 23116

Last change on this file since 23116 was 23116, checked in by kjdon, 11 years ago

for incremental build, classifiers are not really done incrementally. Previously, we reconstructed all the docs from the database, and classified them, then processed any new/edited/deleted docs, updating the classifier as necessary. Now, we process all new/updated docs, then reconstruct the docs from the database, but only classify those not changed/deleted. This means that we are only ever adding docs to a classifier, never updating or deleting. I have removed edit_mode and all code handling deleting stuff from the classifier.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 12.9 KB
Line 
1###########################################################################
2#
3# DateList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting by date
27
28# date is assumed to be in the form yyyymmdd
29
30# at present dates are split by year - this should change
31# jrm21 - added option "bymonth", which splits by year and month.
32
33# 23/09/03 Added some more options -kjdon.
34# these include:
35# -nogroup, which makes each year (or year+month) an individual entry in
36# the horizontal list and prevents compaction
37# -metadata, use a different metadata for the date (instead of Date), still expects yyyymmdd format. this affects display cos greenstone displays Date metadata as dd month yyyy, whereas any other date metadata is displayed as yyyymmdd - this needs fixing
38# -sort specifies an additional metadata to use in sorting, will take affect when two docs have the same date.
39
40package DateList;
41
42use BaseClassifier;
43use sorttools;
44
45use strict;
46no strict 'refs'; # allow filehandles to be variables and viceversa
47
48sub BEGIN {
49 @DateList::ISA = ('BaseClassifier');
50}
51
52my $arguments =
53 [ { 'name' => "metadata",
54 'desc' => "{DateList.metadata}",
55 'type' => "metadata",
56 'deft' => "Date",
57 'reqd' => "yes" } ,
58 { 'name' => "sort",
59 'desc' => "{DateList.sort}",
60 'type' => "metadata",
61 'reqd' => "no" } ,
62 { 'name' => "reverse_sort",
63 'desc' => "{DateList.reverse_sort}",
64 'type' => "flag",
65 'reqd' => "no" },
66 { 'name' => "bymonth",
67 'desc' => "{DateList.bymonth}",
68 'type' => "flag",
69 'reqd' => "no" },
70 { 'name' => "nogroup",
71 'desc' => "{DateList.nogroup}",
72 'type' => "flag",
73 'reqd' => "no" },
74 { 'name' => "no_special_formatting",
75 'desc' => "{DateList.no_special_formatting}",
76 'type' => "flag",
77 'reqd' => "no" }
78
79 ];
80
81my $options = { 'name' => "DateList",
82 'desc' => "{DateList.desc}",
83 'abstract' => "no",
84 'inherits' => "yes",
85 'args' => $arguments };
86
87
88sub new {
89 my ($class) = shift (@_);
90 my ($classifierslist,$inputargs,$hashArgOptLists) = @_;
91 push(@$classifierslist, $class);
92
93 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
94 push(@{$hashArgOptLists->{"OptList"}},$options);
95
96 my $self = new BaseClassifier($classifierslist, $inputargs, $hashArgOptLists);
97
98 if ($self->{'info_only'}) {
99 # don't worry about any options etc
100 return bless $self, $class;
101 }
102
103 # Manually set $self parameters.
104 $self->{'list'} = {};
105
106 if (!defined $self->{"metadata"} || $self->{"metadata"} eq "") {
107 $self->{'metadata'} = "Date";
108 }
109 # remove any ex.s
110 $self->{'metadata'} = $self->strip_ex_from_metadata($self->{'metadata'});
111 $self->{'sort'} = $self->strip_ex_from_metadata($self->{'sort'});
112
113 # now can have comma separated list of Dates - we just use the first one (for now)
114 my @meta_list = split(/,/, $self->{"metadata"});
115 $self->{'meta_list'} = \@meta_list;
116
117 $self->{'buttonname'} = $self->generate_title_from_metadata($self->{'metadata'}) unless ($self->{'buttonname'});
118
119 $self->{'childtype'} = "DateList";
120 if ($self->{'no_special_formatting'}) {
121 $self->{'childtype'} = "VList";
122 }
123
124 return bless $self, $class;
125}
126
127sub init {
128 my $self = shift (@_);
129
130 $self->{'list'} = {};
131}
132
133sub classify {
134 my $self = shift (@_);
135 my ($doc_obj) = @_;
136
137 my $doc_OID = $doc_obj->get_OID();
138
139 # find the first available metadata
140 my $date;
141 foreach my $m (@{$self->{'meta_list'}}) {
142 $date = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $m);
143 last if defined $date;
144 }
145
146 #my $date = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), $self->{'metadata'});
147 if (!defined $date || $date eq "") {
148 # if this document doesn't contain Date element we won't
149 # include it in this classification
150 return;
151 }
152
153 my $sort_other = "";
154 if (defined $self->{'sort'} && $self->{'sort'} ne "") {
155 $sort_other = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), $self->{'sort'});
156 $sort_other = &sorttools::format_metadata_for_sorting($self->{'sort'}, $sort_other, $doc_obj) unless $self->{'no_metadata_formatting'};
157 }
158
159 if (defined $self->{'list'}->{$doc_OID}) {
160 my $outhandle = $self->{'outhandle'};
161 print $outhandle "WARNING: DateList::classify called multiple times for $doc_OID\n";
162 }
163
164
165 $self->{'list'}->{$doc_OID} = "$date$sort_other";
166
167}
168
169
170sub get_classify_info {
171 my $self = shift (@_);
172
173 my @classlist = sort {$self->{'list'}->{$a} cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
174
175 if ($self->{'reverse_sort'}) {
176 @classlist = reverse @classlist;
177 }
178
179
180 return $self->splitlist (\@classlist);
181}
182
183
184sub get_entry {
185 my $self = shift (@_);
186 my ($title, $childtype, $thistype) = @_;
187
188 # organise into classification structure
189 my %classifyinfo = ('childtype'=>$childtype,
190 'Title'=>$title,
191 'contains'=>[],
192 'mdtype'=>$self->{'metadata'});
193 $classifyinfo{'thistype'} = $thistype
194 if defined $thistype && $thistype =~ /\w/;
195
196 return \%classifyinfo;
197}
198
199# splitlist takes an ordered list of classifications (@$classlistref) and
200# splits it up into sub-sections by date
201sub splitlist {
202 my $self = shift (@_);
203 my ($classlistref) = @_;
204 my $classhash = {};
205
206 # top level
207 my $childtype = "HList";
208
209 if (scalar (@$classlistref) <= 39 &&
210 !$self->{'nogroup'}) {$childtype = $self->{'childtype'};}
211
212 my $classifyinfo = $self->get_entry ($self->{'buttonname'}, $childtype, "Invisible");
213 # don't need to do any splitting if there are less than 39 (max + min -1)
214 # classifications, unless nogroup is specified
215 if ((scalar @$classlistref) <= 39 && !$self->{'nogroup'}) {
216 foreach my $subOID (@$classlistref) {
217 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
218 }
219 return $classifyinfo;
220 }
221
222
223 if ($self->{'bymonth'}) {
224 # first split up the list into separate year+month classifications
225
226 if (!$self->{'nogroup'}) { # hlist of year+month pairs
227 # single level of classifications
228 foreach my $classification (@$classlistref) {
229 my $date = $self->{'list'}->{$classification};
230 $date =~ s/^(\d\d\d\d)-?(\d\d).*$/$1&nbsp;_textmonth$2_/;
231 # sanity check if month is zero
232 if ($date =~ /00_$/) {
233 $date =~ s/^(\d\d\d\d).*$/$1/g;
234 }
235 $classhash->{$date} = [] unless defined $classhash->{$date};
236 push (@{$classhash->{$date}}, $classification);
237 }
238
239 } else { # don't group - individual years and months
240 foreach my $classification (@$classlistref) {
241 my $date = $self->{'list'}->{$classification};
242 $date =~ s/^(\d\d\d\d)-?(\d\d).*$/$1&nbsp;_textmonth$2_/;
243 my ($year, $month)=($1,$2);
244 # sanity check if month is zero
245 if ($date =~ /00_$/) {
246 $date =~ s/^(\d\d\d\d).*$/$1/g;
247 }
248 # create subclass if it doesn't already exist
249 $classhash->{$year} = () unless defined $classhash->{$year};
250
251 $classhash->{$year}->{$month} = []
252 unless defined $classhash->{$year}->{$month};
253 push (@{$classhash->{$year}->{$month}}, $classification);
254
255 }
256 # create hlist of years containing hlists of months
257
258
259 if ($self->{'reverse_sort'}){
260 foreach my $subclass (sort {$b <=> $a}
261 (keys %$classhash)){
262 my $yearclassify = $self->get_entry($subclass, "HList");
263 foreach my $subsubclass (sort {$b <=> $a}
264 (keys %{$classhash->{$subclass}})) {
265 my $monthname=$subsubclass;
266 if ($monthname >= 1 && $monthname <= 12) {
267 $monthname="_textmonth" . $monthname . "_";
268 }
269 my $monthclassify=$self->get_entry($monthname, $self->{'childtype'});
270 push (@{$yearclassify->{'contains'}}, $monthclassify);
271
272 foreach my $subsubOID
273 (@{$classhash->{$subclass}->{$subsubclass}}) {
274 push (@{$monthclassify->{'contains'}},
275 {'OID'=>$subsubOID});
276 }
277 }
278 push (@{$classifyinfo->{'contains'}}, $yearclassify);
279 }
280 }
281 else{
282 foreach my $subclass (sort {$a <=> $b}
283 (keys %$classhash)){
284 my $yearclassify = $self->get_entry($subclass, "HList");
285 foreach my $subsubclass (sort {$a <=> $b}
286 (keys %{$classhash->{$subclass}})) {
287 my $monthname=$subsubclass;
288 if ($monthname >= 1 && $monthname <= 12) {
289 $monthname="_textmonth" . $monthname . "_";
290 }
291 my $monthclassify=$self->get_entry($monthname, $self->{'childtype'});
292 push (@{$yearclassify->{'contains'}}, $monthclassify);
293
294 foreach my $subsubOID
295 (@{$classhash->{$subclass}->{$subsubclass}}) {
296 push (@{$monthclassify->{'contains'}},
297 {'OID'=>$subsubOID});
298 }
299 }
300 push (@{$classifyinfo->{'contains'}}, $yearclassify);
301 }
302
303
304 }
305
306 return $classifyinfo;
307 } # nogroup
308 }else {
309 # not by month
310 # first split up the list into separate year classifications
311 foreach my $classification (@$classlistref) {
312 my $date = $self->{'list'}->{$classification};
313 $date =~ s/^(\d\d\d\d).*$/$1/;
314 $classhash->{$date} = [] unless defined $classhash->{$date};
315 push (@{$classhash->{$date}}, $classification);
316 }
317
318 }
319
320 # only compact the list if nogroup not specified
321 if (!$self->{'nogroup'}) {
322 $classhash = $self->compactlist ($classhash);
323 }
324
325 if ($self->{'reverse_sort'} && $self->{'nogroup'} ) {
326 foreach my $subclass (reverse sort keys %$classhash) {
327 my $tempclassify = $self->get_entry($subclass, $self->{'childtype'});
328 foreach my $subsubOID (@{$classhash->{$subclass}}) {
329 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
330 }
331 push (@{$classifyinfo->{'contains'}}, $tempclassify);
332 }
333 }
334 else{
335 foreach my $subclass (sort keys %$classhash) {
336 my $tempclassify = $self->get_entry($subclass, $self->{'childtype'});
337 foreach my $subsubOID (@{$classhash->{$subclass}}) {
338 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
339 }
340 push (@{$classifyinfo->{'contains'}}, $tempclassify);
341 }
342
343 }
344
345
346 return $classifyinfo;
347}
348
349sub compactlist {
350 my $self = shift (@_);
351 my ($classhashref) = @_;
352 my $compactedhash = {};
353 my @currentOIDs = ();
354 my $currentfirstdate = "";
355 my $currentlastdate = "";
356 my $lastkey = "";
357
358 # minimum and maximum documents to be displayed per page.
359 # the actual maximum will be max + (min-1).
360 # the smallest sub-section is a single letter at present
361 # so in this case there may be many times max documents
362 # displayed on a page.
363 my $min = 10;
364 my $max = 30;
365 foreach my $subsection (sort keys %$classhashref) {
366 $currentfirstdate = $subsection if $currentfirstdate eq "";
367 if ((scalar (@currentOIDs) < $min) ||
368 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
369 push (@currentOIDs, @{$classhashref->{$subsection}});
370 $currentlastdate = $subsection;
371 } else {
372 if ($currentfirstdate eq $currentlastdate) {
373 @{$compactedhash->{$currentfirstdate}} = @currentOIDs;
374 $lastkey = $currentfirstdate;
375 } else {
376 @{$compactedhash->{"$currentfirstdate-$currentlastdate"}} = @currentOIDs;
377 $lastkey = "$currentfirstdate-$currentlastdate";
378 }
379 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
380 $compactedhash->{$subsection} = $classhashref->{$subsection};
381 @currentOIDs = ();
382 $currentfirstdate = "";
383 $lastkey = $subsection;
384 } else {
385 @currentOIDs = @{$classhashref->{$subsection}};
386 $currentfirstdate = $subsection;
387 $currentlastdate = $subsection;
388 }
389 }
390 }
391
392 # add final OIDs to last sub-classification if there aren't many otherwise
393 # add final sub-classification
394 if (scalar (@currentOIDs) > 0) {
395 if ((scalar (@currentOIDs) < $min)) {
396
397 # want every thing in previous up to the dash
398 my ($newkey) = $lastkey =~ /^([^\-]+)/;
399 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
400 delete $compactedhash->{$lastkey};
401 @{$compactedhash->{"$newkey-$currentlastdate"}} = @currentOIDs;
402 } else {
403 if ($currentfirstdate eq $currentlastdate) {
404 @{$compactedhash->{$currentfirstdate}} = @currentOIDs;
405 } else {
406 @{$compactedhash->{"$currentfirstdate-$currentlastdate"}} = @currentOIDs;
407 }
408 }
409 }
410
411 return $compactedhash;
412}
413
4141;
Note: See TracBrowser for help on using the repository browser.