source: trunk/gsdl/perllib/classify/DateList.pm@ 2018

Last change on this file since 2018 was 1839, checked in by paynter, 23 years ago

Updated classifiers to use the parsearg library instead of ad-hoc
"x=y" style parsing. (Backwards compatability maintained through
a quick hack to the load_classifier function in classfy.pm.)

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 6.3 KB
Line 
1###########################################################################
2#
3# DateList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting by date
27
28# no options - always sorts by 'Date' metadata
29
30# date is assumed to be in the form yyyymmdd
31
32# at present dates are split by year - this should change
33
34package DateList;
35
36use BasClas;
37use sorttools;
38
39sub BEGIN {
40 @ISA = ('BasClas');
41}
42
43sub print_usage {
44 print STDERR "
45 usage: classify DateList
46
47Classifier plugin for sorting by date.
48No options - always sorts by 'Date' metadata
49Date is assumed to be in the form yyyymmdd
50At present dates are split by year - this should change
51
52Any errors are Dana's problem.
53";
54}
55
56sub new {
57 my $class = shift (@_);
58 my $self = new BasClas($class, @_);
59
60 $self->{'list'} = {};
61
62 return bless $self, $class;
63}
64
65sub init {
66 my $self = shift (@_);
67
68 $self->{'list'} = {};
69}
70
71sub classify {
72 my $self = shift (@_);
73 my ($doc_obj) = @_;
74
75 my $doc_OID = $doc_obj->get_OID();
76 my $date = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), 'Date');
77
78 # if this document doesn't contain Date element we won't
79 # include it in this classification
80 if (defined $date && $date =~ /\d/) {
81 if (defined $self->{'list'}->{$doc_OID}) {
82 my $outhandle = $self->{'outhandle'};
83 print $outhandle "WARNING: DateList::classify called multiple times for $doc_OID\n";
84 }
85 $self->{'list'}->{$doc_OID} = $date;
86 }
87}
88
89
90sub get_classify_info {
91 my $self = shift (@_);
92
93 my @classlist = sort {$self->{'list'}->{$a} cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
94
95 return $self->splitlist (\@classlist);
96}
97
98
99sub get_entry {
100 my $self = shift (@_);
101 my ($title, $childtype, $thistype) = @_;
102
103 # organise into classification structure
104 my %classifyinfo = ('childtype'=>$childtype,
105 'Title'=>$title,
106 'contains'=>[]);
107 $classifyinfo{'thistype'} = $thistype
108 if defined $thistype && $thistype =~ /\w/;
109
110 return \%classifyinfo;
111}
112
113# splitlist takes an ordered list of classifications (@$classlistref) and splits it
114# up into sub-sections by date
115sub splitlist {
116 my $self = shift (@_);
117 my ($classlistref) = @_;
118 my $classhash = {};
119
120 # top level
121 my $childtype = "HList";
122 if (scalar (@$classlistref) <= 20) {$childtype = "DateList";}
123 my $classifyinfo = $self->get_entry ("Date", $childtype, "Invisible");
124
125 # don't need to do any splitting if there are less than 20 classifications
126 if ((scalar @$classlistref) <= 20) {
127 foreach $subOID (@$classlistref) {
128 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
129 }
130 return $classifyinfo;
131 }
132
133 # first split up the list into separate year classifications
134 foreach $classification (@$classlistref) {
135 my $date = $self->{'list'}->{$classification};
136 $date =~ s/^(\d\d\d\d).*$/$1/;
137 $classhash->{$date} = [] unless defined $classhash->{$date};
138 push (@{$classhash->{$date}}, $classification);
139 }
140
141 $classhash = $self->compactlist ($classhash);
142
143 foreach $subclass (sort keys %$classhash) {
144 my $tempclassify = $self->get_entry($subclass, "DateList");
145 foreach $subsubOID (@{$classhash->{$subclass}}) {
146 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
147 }
148 push (@{$classifyinfo->{'contains'}}, $tempclassify);
149 }
150
151 return $classifyinfo;
152}
153
154sub compactlist {
155 my $self = shift (@_);
156 my ($classhashref) = @_;
157 my $compactedhash = {};
158 my @currentOIDs = ();
159 my $currentfirstletter = "";
160 my $currentlastletter = "";
161 my $lastkey = "";
162
163 # minimum and maximum documents to be displayed per page.
164 # the actual maximum will be max + (min-1).
165 # the smallest sub-section is a single letter at present
166 # so in this case there may be many times max documents
167 # displayed on a page.
168 my $min = 10;
169 my $max = 30;
170
171 foreach $subsection (sort keys %$classhashref) {
172 $currentfirstletter = $subsection if $currentfirstletter eq "";
173 if ((scalar (@currentOIDs) < $min) ||
174 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
175 push (@currentOIDs, @{$classhashref->{$subsection}});
176 $currentlastletter = $subsection;
177 } else {
178
179 if ($currentfirstletter eq $currentlastletter) {
180 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
181 $lastkey = $currentfirstletter;
182 } else {
183 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
184 $lastkey = "$currentfirstletter-$currentlastletter";
185 }
186 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
187 $compactedhash->{$subsection} = $classhashref->{$subsection};
188 @currentOIDs = ();
189 $currentfirstletter = "";
190 $lastkey = $subsection;
191 } else {
192 @currentOIDs = @{$classhashref->{$subsection}};
193 $currentfirstletter = $subsection;
194 $currentlastletter = $subsection;
195 }
196 }
197 }
198
199 # add final OIDs to last sub-classification if there aren't many otherwise
200 # add final sub-classification
201 if (scalar (@currentOIDs) < $min) {
202 my ($newkey) = $lastkey =~ /^(\d\d\d\d)/;
203 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
204 delete $compactedhash->{$lastkey};
205 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
206 } else {
207 if ($currentfirstletter eq $currentlastletter) {
208 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
209 } else {
210 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
211 }
212 }
213
214 return $compactedhash;
215}
216
2171;
Note: See TracBrowser for help on using the repository browser.