source: trunk/gsdl/perllib/classify/DateList.pm@ 10218

Last change on this file since 10218 was 10218, checked in by kjdon, 19 years ago

Jeffrey's new parsing modifications, committed approx 6 July, 15.16

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 10.4 KB
Line 
1###########################################################################
2#
3# DateList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting by date
27
28# date is assumed to be in the form yyyymmdd
29
30# at present dates are split by year - this should change
31# jrm21 - added option "bymonth", which splits by year and month.
32
33# 23/09/03 Added some more options -kjdon.
34# these include:
35# -nogroup, which makes each year (or year+month) an individual entry in
36# the horizontal list and prevents compaction
37# -metadata, use a different metadata for the date (instead of Date), still expects yyyymmdd format. this affects display cos greenstone displays Date metadata as dd month yyyy, whereas any other date metadata is displayed as yyyymmdd - this needs fixing
38# -sort specifies an additional metadata to use in sorting, will take affect when two docs have the same date.
39
40package DateList;
41
42use BasClas;
43use sorttools;
44
45sub BEGIN {
46 @ISA = ('BasClas');
47}
48
49my $arguments =
50 [ { 'name' => "metadata",
51 'desc' => "{DateList.metadata}",
52 'type' => "metadata",
53 'deft' => "Date",
54 'reqd' => "no" } ,
55 { 'name' => "sort",
56 'desc' => "{DateList.sort}",
57 'type' => "metadata",
58 'reqd' => "no" } ,
59 { 'name' => "reverse_sort",
60 'desc' => "{DateList.reverse_sort}",
61 'type' => "flag",
62 'reqd' => "no" },
63 { 'name' => "bymonth",
64 'desc' => "{DateList.bymonth}",
65 'type' => "flag",
66 'reqd' => "no" },
67 { 'name' => "nogroup",
68 'desc' => "{DateList.nogroup}",
69 'type' => "flag",
70 'reqd' => "no" }
71 ];
72
73my $options = { 'name' => "DateList",
74 'desc' => "{DateList.desc}",
75 'abstract' => "no",
76 'inherits' => "yes",
77 'args' => $arguments };
78
79
80sub new {
81 my ($class) = shift (@_);
82 my ($classifierslist,$inputargs,$hashArgOptLists) = @_;
83 push(@$classifierslist, $class);
84
85 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
86 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
87
88 my $self = (defined $hashArgOptLists)? new BasClas($classifierslist,$inputargs,$hashArgOptLists): new BasClas($classifierslist,$inputargs);
89
90 # Manually set $self parameters.
91 $self->{'list'} = {};
92
93 if (!defined $self->{"metadata"} || $self->{"metadata"} eq "") {
94 $self->{'metadata'} = "Date";
95 }
96
97 return bless $self, $class;
98}
99
100sub init {
101 my $self = shift (@_);
102
103 $self->{'list'} = {};
104}
105
106sub classify {
107 my $self = shift (@_);
108 my ($doc_obj) = @_;
109
110 my $doc_OID = $doc_obj->get_OID();
111 my $date = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), $self->{'metadata'});
112
113 my $sort_other = "";
114 if (defined $self->{'sort'} && $self->{'sort'} ne "") {
115 $sort_other = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), $self->{'sort'});
116 $sort_other = &sorttools::format_metadata_for_sorting($self->{'sort'}, $sort_other, $doc_obj);
117 }
118 # if this document doesn't contain Date element we won't
119 # include it in this classification
120 if (defined $date && $date =~ /\d/) {
121 if (defined $self->{'list'}->{$doc_OID}) {
122 my $outhandle = $self->{'outhandle'};
123 print $outhandle "WARNING: DateList::classify called multiple times for $doc_OID\n";
124 }
125
126 $self->{'list'}->{$doc_OID} = "$date$sort_other";
127 }
128}
129
130
131sub get_classify_info {
132 my $self = shift (@_);
133
134 my @classlist = sort {$self->{'list'}->{$a} cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
135 if ($self->{'reverse_sort'}) {
136 @classlist = reverse @classlist;
137 }
138
139 return $self->splitlist (\@classlist);
140}
141
142
143sub get_entry {
144 my $self = shift (@_);
145 my ($title, $childtype, $thistype) = @_;
146
147 # organise into classification structure
148 my %classifyinfo = ('childtype'=>$childtype,
149 'Title'=>$title,
150 'contains'=>[],
151 'mdtype'=>$self->{'metadata'});
152 $classifyinfo{'thistype'} = $thistype
153 if defined $thistype && $thistype =~ /\w/;
154
155 return \%classifyinfo;
156}
157
158# splitlist takes an ordered list of classifications (@$classlistref) and
159# splits it up into sub-sections by date
160sub splitlist {
161 my $self = shift (@_);
162 my ($classlistref) = @_;
163 my $classhash = {};
164
165 # top level
166 my $childtype = "HList";
167
168 if (scalar (@$classlistref) <= 39 &&
169 !$self->{'nogroup'}) {$childtype = "DateList";}
170 my $classifyinfo = $self->get_entry ("Date", $childtype, "Invisible");
171 # don't need to do any splitting if there are less than 39 (max + min -1)
172 # classifications, unless nogroup is specified
173 if ((scalar @$classlistref) <= 39 && !$self->{'nogroup'}) {
174 foreach $subOID (@$classlistref) {
175 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
176 }
177 return $classifyinfo;
178 }
179
180
181 if ($self->{'bymonth'}) {
182 # first split up the list into separate year+month classifications
183
184 if (!$self->{'nogroup'}) { # hlist of year+month pairs
185 # single level of classifications
186 foreach $classification (@$classlistref) {
187 my $date = $self->{'list'}->{$classification};
188 $date =~ s/^(\d\d\d\d)(\d\d).*$/$1&nbsp;_textmonth$2_/;
189 # sanity check if month is zero
190 if ($date =~ /00_$/) {
191 $date =~ s/^(\d\d\d\d).*$/$1/g;
192 }
193 $classhash->{$date} = [] unless defined $classhash->{$date};
194 push (@{$classhash->{$date}}, $classification);
195 }
196
197 } else { # don't group - individual years and months
198 foreach $classification (@$classlistref) {
199 my $date = $self->{'list'}->{$classification};
200 $date =~ s/^(\d\d\d\d)(\d\d).*$/$1&nbsp;_textmonth$2_/;
201 my ($year, $month)=($1,$2);
202 # sanity check if month is zero
203 if ($date =~ /00_$/) {
204 $date =~ s/^(\d\d\d\d).*$/$1/g;
205 }
206 # create subclass if it doesn't already exist
207 $classhash->{$year} = () unless defined $classhash->{$year};
208 $classhash->{$year}->{$month} = []
209 unless defined $classhash->{$year}->{$month};
210 push (@{$classhash->{$year}->{$month}}, $classification);
211 }
212 # create hlist of years containing hlists of months
213
214 foreach my $subclass (sort {$a <=> $b} keys %$classhash) {
215 my $yearclassify = $self->get_entry($subclass, "HList");
216 foreach my $subsubclass (sort {$a <=> $b}
217 (keys %{$classhash->{$subclass}})) {
218 my $monthname=$subsubclass;
219 if ($monthname >= 1 && $monthname <= 12) {
220 $monthname="_textmonth" . $monthname . "_";
221 }
222 my $monthclassify=$self->get_entry($monthname, "DateList");
223 push (@{$yearclassify->{'contains'}}, $monthclassify);
224
225 foreach $subsubOID
226 (@{$classhash->{$subclass}->{$subsubclass}}) {
227 push (@{$monthclassify->{'contains'}},
228 {'OID'=>$subsubOID});
229 }
230 }
231 push (@{$classifyinfo->{'contains'}}, $yearclassify);
232 }
233 return $classifyinfo;
234 } # nogroup
235 } else {
236 # not by month
237 # first split up the list into separate year classifications
238 foreach $classification (@$classlistref) {
239 my $date = $self->{'list'}->{$classification};
240 $date =~ s/^(\d\d\d\d).*$/$1/;
241 $classhash->{$date} = [] unless defined $classhash->{$date};
242 push (@{$classhash->{$date}}, $classification);
243 }
244 }
245
246 # only compact the list if nogroup not specified
247 if (!$self->{'nogroup'}) {
248 $classhash = $self->compactlist ($classhash);
249 }
250 foreach $subclass (sort keys %$classhash) {
251 my $tempclassify = $self->get_entry($subclass, "DateList");
252 foreach $subsubOID (@{$classhash->{$subclass}}) {
253 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
254 }
255 push (@{$classifyinfo->{'contains'}}, $tempclassify);
256 }
257
258 return $classifyinfo;
259}
260
261sub compactlist {
262 my $self = shift (@_);
263 my ($classhashref) = @_;
264 my $compactedhash = {};
265 my @currentOIDs = ();
266 my $currentfirstdate = "";
267 my $currentlastdate = "";
268 my $lastkey = "";
269
270 # minimum and maximum documents to be displayed per page.
271 # the actual maximum will be max + (min-1).
272 # the smallest sub-section is a single letter at present
273 # so in this case there may be many times max documents
274 # displayed on a page.
275 my $min = 10;
276 my $max = 30;
277 foreach my $subsection (sort keys %$classhashref) {
278 $currentfirstdate = $subsection if $currentfirstdate eq "";
279 if ((scalar (@currentOIDs) < $min) ||
280 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
281 push (@currentOIDs, @{$classhashref->{$subsection}});
282 $currentlastdate = $subsection;
283 } else {
284
285 if ($currentfirstdate eq $currentlastdate) {
286 @{$compactedhash->{$currentfirstdate}} = @currentOIDs;
287 $lastkey = $currentfirstdate;
288 } else {
289 @{$compactedhash->{"$currentfirstdate-$currentlastdate"}} = @currentOIDs;
290 $lastkey = "$currentfirstdate-$currentlastdate";
291 }
292 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
293 $compactedhash->{$subsection} = $classhashref->{$subsection};
294 @currentOIDs = ();
295 $currentfirstdate = "";
296 $lastkey = $subsection;
297 } else {
298 @currentOIDs = @{$classhashref->{$subsection}};
299 $currentfirstdate = $subsection;
300 $currentlastdate = $subsection;
301 }
302 }
303 }
304
305 # add final OIDs to last sub-classification if there aren't many otherwise
306 # add final sub-classification
307 if ((scalar (@currentOIDs) < $min) && (scalar (@currentOIDs) > 0)) {
308 # want every thing in previous up to the dash
309 my ($newkey) = $lastkey =~ /^([^\-]+)/;
310 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
311 delete $compactedhash->{$lastkey};
312 @{$compactedhash->{"$newkey-$currentlastdate"}} = @currentOIDs;
313 } else {
314 if ($currentfirstdate eq $currentlastdate) {
315 @{$compactedhash->{$currentfirstdate}} = @currentOIDs;
316 } else {
317 @{$compactedhash->{"$currentfirstdate-$currentlastdate"}} = @currentOIDs;
318 }
319 }
320
321 return $compactedhash;
322}
323
3241;
Note: See TracBrowser for help on using the repository browser.