source: trunk/gsdl/perllib/classify/DateList.pm@ 2916

Last change on this file since 2916 was 2916, checked in by jrm21, 22 years ago

Tidied up the usage output.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 7.1 KB
Line 
1###########################################################################
2#
3# DateList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting by date
27
28# always sorts by 'Date' metadata
29
30# date is assumed to be in the form yyyymmdd
31
32# at present dates are split by year - this should change
33# jrm21 - added option "bymonth", which splits by year and month.
34
35package DateList;
36
37use BasClas;
38use sorttools;
39
40sub BEGIN {
41 @ISA = ('BasClas');
42}
43
44sub print_usage {
45 print STDERR "
46 usage: classify DateList [options]
47 options:
48 -bymonth [or bymonth=1] Classify by year and month
49
50 Classifier plugin for sorting by date, and assumes that 'Date' metadata
51 exists. Date is assumed to be in the form yyyymmdd (all digits).
52 By default dates are classified by year.
53
54";
55}
56
57sub new {
58 my $class = shift (@_);
59 my $self = new BasClas($class, @_);
60
61 $self->{'list'} = {};
62 if (!parsargv::parse(\@_,
63 q^bymonth^, \$self->{'bymonth'},
64 "allow_extra_options")) {
65 &print_usage();
66 die "\n";
67 }
68 return bless $self, $class;
69}
70
71sub init {
72 my $self = shift (@_);
73
74 $self->{'list'} = {};
75}
76
77sub classify {
78 my $self = shift (@_);
79 my ($doc_obj) = @_;
80
81 my $doc_OID = $doc_obj->get_OID();
82 my $date = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), 'Date');
83
84 # if this document doesn't contain Date element we won't
85 # include it in this classification
86 if (defined $date && $date =~ /\d/) {
87 if (defined $self->{'list'}->{$doc_OID}) {
88 my $outhandle = $self->{'outhandle'};
89 print $outhandle "WARNING: DateList::classify called multiple times for $doc_OID\n";
90 }
91 $self->{'list'}->{$doc_OID} = $date;
92 }
93}
94
95
96sub get_classify_info {
97 my $self = shift (@_);
98
99 my @classlist = sort {$self->{'list'}->{$a} cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
100
101 return $self->splitlist (\@classlist);
102}
103
104
105sub get_entry {
106 my $self = shift (@_);
107 my ($title, $childtype, $thistype) = @_;
108
109 # organise into classification structure
110 my %classifyinfo = ('childtype'=>$childtype,
111 'Title'=>$title,
112 'contains'=>[]);
113 $classifyinfo{'thistype'} = $thistype
114 if defined $thistype && $thistype =~ /\w/;
115
116 return \%classifyinfo;
117}
118
119# splitlist takes an ordered list of classifications (@$classlistref) and
120# splits it up into sub-sections by date
121sub splitlist {
122 my $self = shift (@_);
123 my ($classlistref) = @_;
124 my $classhash = {};
125
126 # top level
127 my $childtype = "HList";
128 if (scalar (@$classlistref) <= 39) {$childtype = "DateList";}
129 my $classifyinfo = $self->get_entry ("Date", $childtype, "Invisible");
130
131 # don't need to do any splitting if there are less than 39 (max + min -1) classifications
132 if ((scalar @$classlistref) <= 39) {
133 foreach $subOID (@$classlistref) {
134 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
135 }
136 return $classifyinfo;
137 }
138
139
140 if ($self->{'bymonth'}) {
141 # first split up the list into separate year+month classifications
142 foreach $classification (@$classlistref) {
143 my $date = $self->{'list'}->{$classification};
144 $date =~ s/^(\d\d\d\d)(\d\d).*$/$1&nbsp;_textmonth$2_/;
145 # sanity check if month is zero
146 if ($date =~ /00_$/) {
147 $date =~ s/^(\d\d\d\d).*$/$1/g;
148 }
149 $classhash->{$date} = [] unless defined $classhash->{$date};
150 push (@{$classhash->{$date}}, $classification);
151 }
152 } else {
153 # first split up the list into separate year classifications
154 foreach $classification (@$classlistref) {
155 my $date = $self->{'list'}->{$classification};
156 $date =~ s/^(\d\d\d\d).*$/$1/;
157 $classhash->{$date} = [] unless defined $classhash->{$date};
158 push (@{$classhash->{$date}}, $classification);
159 }
160 }
161 $classhash = $self->compactlist ($classhash);
162
163 foreach $subclass (sort keys %$classhash) {
164 my $tempclassify = $self->get_entry($subclass, "DateList");
165 foreach $subsubOID (@{$classhash->{$subclass}}) {
166 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
167 }
168 push (@{$classifyinfo->{'contains'}}, $tempclassify);
169 }
170
171 return $classifyinfo;
172}
173
174sub compactlist {
175 my $self = shift (@_);
176 my ($classhashref) = @_;
177 my $compactedhash = {};
178 my @currentOIDs = ();
179 my $currentfirstdate = "";
180 my $currentlastdate = "";
181 my $lastkey = "";
182
183 # minimum and maximum documents to be displayed per page.
184 # the actual maximum will be max + (min-1).
185 # the smallest sub-section is a single letter at present
186 # so in this case there may be many times max documents
187 # displayed on a page.
188 my $min = 10;
189 my $max = 30;
190 foreach my $subsection (sort keys %$classhashref) {
191 $currentfirstdate = $subsection if $currentfirstdate eq "";
192 if ((scalar (@currentOIDs) < $min) ||
193 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
194 push (@currentOIDs, @{$classhashref->{$subsection}});
195 $currentlastdate = $subsection;
196 } else {
197
198 if ($currentfirstdate eq $currentlastdate) {
199 @{$compactedhash->{$currentfirstdate}} = @currentOIDs;
200 $lastkey = $currentfirstdate;
201 } else {
202 @{$compactedhash->{"$currentfirstdate-$currentlastdate"}} = @currentOIDs;
203 $lastkey = "$currentfirstdate-$currentlastdate";
204 }
205 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
206 $compactedhash->{$subsection} = $classhashref->{$subsection};
207 @currentOIDs = ();
208 $currentfirstdate = "";
209 $lastkey = $subsection;
210 } else {
211 @currentOIDs = @{$classhashref->{$subsection}};
212 $currentfirstdate = $subsection;
213 $currentlastdate = $subsection;
214 }
215 }
216 }
217
218 # add final OIDs to last sub-classification if there aren't many otherwise
219 # add final sub-classification
220 if ((scalar (@currentOIDs) < $min) && (scalar (@currentOIDs) > 0)) {
221 # want every thing in previous up to the dash
222 my ($newkey) = $lastkey =~ /^([^\-]+)/;
223 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
224 delete $compactedhash->{$lastkey};
225 @{$compactedhash->{"$newkey-$currentlastdate"}} = @currentOIDs;
226 } else {
227 if ($currentfirstdate eq $currentlastdate) {
228 @{$compactedhash->{$currentfirstdate}} = @currentOIDs;
229 } else {
230 @{$compactedhash->{"$currentfirstdate-$currentlastdate"}} = @currentOIDs;
231 }
232 }
233
234 return $compactedhash;
235}
236
2371;
Note: See TracBrowser for help on using the repository browser.