source: trunk/gsdl/perllib/classify/DateList.pm@ 2685

Last change on this file since 2685 was 2685, checked in by jrm21, 23 years ago

Improved regex for when the last category is too small, and we need to
work out the name of the previous one. (regexp now works with both year
and year_month).

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 7.1 KB
Line 
1###########################################################################
2#
3# DateList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting by date
27
28# always sorts by 'Date' metadata
29
30# date is assumed to be in the form yyyymmdd
31
32# at present dates are split by year - this should change
33# jrm21 - added option "bymonth", which splits by year and month.
34
35package DateList;
36
37use BasClas;
38use sorttools;
39
40sub BEGIN {
41 @ISA = ('BasClas');
42}
43
44sub print_usage {
45 print STDERR "
46 usage: classify DateList [options]
47 options:
48 -bymonth [or bymonth=1] Classify by year and month
49
50 Classifier plugin for sorting by date.
51 Always sorts by 'Date' metadata.
52 Date is assumed to be in the form yyyymmdd (all digits).
53 By default dates are split by year - this should change.
54
55 Any errors are Dana's problem.
56";
57}
58
59sub new {
60 my $class = shift (@_);
61 my $self = new BasClas($class, @_);
62
63 $self->{'list'} = {};
64 if (!parsargv::parse(\@_,
65 q^bymonth^, \$self->{'bymonth'},
66 "allow_extra_options")) {
67 &print_usage();
68 die "\n";
69 }
70 return bless $self, $class;
71}
72
73sub init {
74 my $self = shift (@_);
75
76 $self->{'list'} = {};
77}
78
79sub classify {
80 my $self = shift (@_);
81 my ($doc_obj) = @_;
82
83 my $doc_OID = $doc_obj->get_OID();
84 my $date = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), 'Date');
85
86 # if this document doesn't contain Date element we won't
87 # include it in this classification
88 if (defined $date && $date =~ /\d/) {
89 if (defined $self->{'list'}->{$doc_OID}) {
90 my $outhandle = $self->{'outhandle'};
91 print $outhandle "WARNING: DateList::classify called multiple times for $doc_OID\n";
92 }
93 $self->{'list'}->{$doc_OID} = $date;
94 }
95}
96
97
98sub get_classify_info {
99 my $self = shift (@_);
100
101 my @classlist = sort {$self->{'list'}->{$a} cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
102
103 return $self->splitlist (\@classlist);
104}
105
106
107sub get_entry {
108 my $self = shift (@_);
109 my ($title, $childtype, $thistype) = @_;
110
111 # organise into classification structure
112 my %classifyinfo = ('childtype'=>$childtype,
113 'Title'=>$title,
114 'contains'=>[]);
115 $classifyinfo{'thistype'} = $thistype
116 if defined $thistype && $thistype =~ /\w/;
117
118 return \%classifyinfo;
119}
120
121# splitlist takes an ordered list of classifications (@$classlistref) and
122# splits it up into sub-sections by date
123sub splitlist {
124 my $self = shift (@_);
125 my ($classlistref) = @_;
126 my $classhash = {};
127
128 # top level
129 my $childtype = "HList";
130 if (scalar (@$classlistref) <= 39) {$childtype = "DateList";}
131 my $classifyinfo = $self->get_entry ("Date", $childtype, "Invisible");
132
133 # don't need to do any splitting if there are less than 39 (max + min -1) classifications
134 if ((scalar @$classlistref) <= 39) {
135 foreach $subOID (@$classlistref) {
136 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
137 }
138 return $classifyinfo;
139 }
140
141
142 if ($self->{'bymonth'}) {
143 # first split up the list into separate year+month classifications
144 foreach $classification (@$classlistref) {
145 my $date = $self->{'list'}->{$classification};
146 $date =~ s/^(\d\d\d\d)(\d\d).*$/$1&nbsp;_textmonth$2_/;
147 # sanity check if month is zero
148 if ($date =~ /00_$/) {
149 $date =~ s/^(\d\d\d\d).*$/$1/g;
150 }
151 $classhash->{$date} = [] unless defined $classhash->{$date};
152 push (@{$classhash->{$date}}, $classification);
153 }
154 } else {
155 # first split up the list into separate year classifications
156 foreach $classification (@$classlistref) {
157 my $date = $self->{'list'}->{$classification};
158 $date =~ s/^(\d\d\d\d).*$/$1/;
159 $classhash->{$date} = [] unless defined $classhash->{$date};
160 push (@{$classhash->{$date}}, $classification);
161 }
162 }
163 $classhash = $self->compactlist ($classhash);
164
165 foreach $subclass (sort keys %$classhash) {
166 my $tempclassify = $self->get_entry($subclass, "DateList");
167 foreach $subsubOID (@{$classhash->{$subclass}}) {
168 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
169 }
170 push (@{$classifyinfo->{'contains'}}, $tempclassify);
171 }
172
173 return $classifyinfo;
174}
175
176sub compactlist {
177 my $self = shift (@_);
178 my ($classhashref) = @_;
179 my $compactedhash = {};
180 my @currentOIDs = ();
181 my $currentfirstdate = "";
182 my $currentlastdate = "";
183 my $lastkey = "";
184
185 # minimum and maximum documents to be displayed per page.
186 # the actual maximum will be max + (min-1).
187 # the smallest sub-section is a single letter at present
188 # so in this case there may be many times max documents
189 # displayed on a page.
190 my $min = 10;
191 my $max = 30;
192 foreach my $subsection (sort keys %$classhashref) {
193 $currentfirstdate = $subsection if $currentfirstdate eq "";
194 if ((scalar (@currentOIDs) < $min) ||
195 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
196 push (@currentOIDs, @{$classhashref->{$subsection}});
197 $currentlastdate = $subsection;
198 } else {
199
200 if ($currentfirstdate eq $currentlastdate) {
201 @{$compactedhash->{$currentfirstdate}} = @currentOIDs;
202 $lastkey = $currentfirstdate;
203 } else {
204 @{$compactedhash->{"$currentfirstdate-$currentlastdate"}} = @currentOIDs;
205 $lastkey = "$currentfirstdate-$currentlastdate";
206 }
207 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
208 $compactedhash->{$subsection} = $classhashref->{$subsection};
209 @currentOIDs = ();
210 $currentfirstdate = "";
211 $lastkey = $subsection;
212 } else {
213 @currentOIDs = @{$classhashref->{$subsection}};
214 $currentfirstdate = $subsection;
215 $currentlastdate = $subsection;
216 }
217 }
218 }
219
220 # add final OIDs to last sub-classification if there aren't many otherwise
221 # add final sub-classification
222 if ((scalar (@currentOIDs) < $min) && (scalar (@currentOIDs) > 0)) {
223 # want every thing in previous up to the dash
224 my ($newkey) = $lastkey =~ /^([^\-]+)/;
225 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
226 delete $compactedhash->{$lastkey};
227 @{$compactedhash->{"$newkey-$currentlastdate"}} = @currentOIDs;
228 } else {
229 if ($currentfirstdate eq $currentlastdate) {
230 @{$compactedhash->{$currentfirstdate}} = @currentOIDs;
231 } else {
232 @{$compactedhash->{"$currentfirstdate-$currentlastdate"}} = @currentOIDs;
233 }
234 }
235
236 return $compactedhash;
237}
238
2391;
Note: See TracBrowser for help on using the repository browser.