source: trunk/gsdl/perllib/classify/DateList.pm@ 1483

Last change on this file since 1483 was 1483, checked in by sjboddie, 24 years ago

added -out option to classifiers

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 6.0 KB
Line 
1###########################################################################
2#
3# DateList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# classifier plugin for sorting by date
27
28# no options - always sorts by 'Date' metadata
29
30# date is assumed to be in the form yyyymmdd
31
32# at present dates are split by year - this should change
33
34package DateList;
35
36use BasClas;
37use sorttools;
38
39sub BEGIN {
40 @ISA = ('BasClas');
41}
42
43sub new {
44 my ($class, @options) = @_;
45 my $self = new BasClas ($class, @_);
46
47 $self->{'list'} = {};
48
49 return bless $self, $class;
50}
51
52sub init {
53 my $self = shift (@_);
54
55 $self->{'list'} = {};
56}
57
58sub classify {
59 my $self = shift (@_);
60 my ($doc_obj) = @_;
61
62 my $doc_OID = $doc_obj->get_OID();
63 my $date = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), 'Date');
64
65 # if this document doesn't contain Date element we won't
66 # include it in this classification
67 if (defined $date && $date =~ /\d/) {
68 if (defined $self->{'list'}->{$doc_OID}) {
69 my $outhandle = $self->{'outhandle'};
70 print $outhandle "WARNING: DateList::classify called multiple times for $doc_OID\n";
71 }
72 $self->{'list'}->{$doc_OID} = $date;
73 }
74}
75
76
77sub get_classify_info {
78 my $self = shift (@_);
79
80 my @classlist = sort {$self->{'list'}->{$a} cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
81
82 return $self->splitlist (\@classlist);
83}
84
85
86sub get_entry {
87 my $self = shift (@_);
88 my ($title, $childtype, $thistype) = @_;
89
90 # organise into classification structure
91 my %classifyinfo = ('childtype'=>$childtype,
92 'Title'=>$title,
93 'contains'=>[]);
94 $classifyinfo{'thistype'} = $thistype
95 if defined $thistype && $thistype =~ /\w/;
96
97 return \%classifyinfo;
98}
99
100# splitlist takes an ordered list of classifications (@$classlistref) and splits it
101# up into sub-sections by date
102sub splitlist {
103 my $self = shift (@_);
104 my ($classlistref) = @_;
105 my $classhash = {};
106
107 # top level
108 my $childtype = "HList";
109 if (scalar (@$classlistref) <= 20) {$childtype = "DateList";}
110 my $classifyinfo = $self->get_entry ("Date", $childtype, "Invisible");
111
112 # don't need to do any splitting if there are less than 20 classifications
113 if ((scalar @$classlistref) <= 20) {
114 foreach $subOID (@$classlistref) {
115 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
116 }
117 return $classifyinfo;
118 }
119
120 # first split up the list into separate year classifications
121 foreach $classification (@$classlistref) {
122 my $date = $self->{'list'}->{$classification};
123 $date =~ s/^(\d\d\d\d).*$/$1/;
124 $classhash->{$date} = [] unless defined $classhash->{$date};
125 push (@{$classhash->{$date}}, $classification);
126 }
127
128 $classhash = $self->compactlist ($classhash);
129
130 foreach $subclass (sort keys %$classhash) {
131 my $tempclassify = $self->get_entry($subclass, "DateList");
132 foreach $subsubOID (@{$classhash->{$subclass}}) {
133 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
134 }
135 push (@{$classifyinfo->{'contains'}}, $tempclassify);
136 }
137
138 return $classifyinfo;
139}
140
141sub compactlist {
142 my $self = shift (@_);
143 my ($classhashref) = @_;
144 my $compactedhash = {};
145 my @currentOIDs = ();
146 my $currentfirstletter = "";
147 my $currentlastletter = "";
148 my $lastkey = "";
149
150 # minimum and maximum documents to be displayed per page.
151 # the actual maximum will be max + (min-1).
152 # the smallest sub-section is a single letter at present
153 # so in this case there may be many times max documents
154 # displayed on a page.
155 my $min = 10;
156 my $max = 30;
157
158 foreach $subsection (sort keys %$classhashref) {
159 $currentfirstletter = $subsection if $currentfirstletter eq "";
160 if ((scalar (@currentOIDs) < $min) ||
161 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
162 push (@currentOIDs, @{$classhashref->{$subsection}});
163 $currentlastletter = $subsection;
164 } else {
165
166 if ($currentfirstletter eq $currentlastletter) {
167 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
168 $lastkey = $currentfirstletter;
169 } else {
170 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
171 $lastkey = "$currentfirstletter-$currentlastletter";
172 }
173 if (scalar (@{$classhashref->{$subsection}}) >= $max) {
174 $compactedhash->{$subsection} = $classhashref->{$subsection};
175 @currentOIDs = ();
176 $currentfirstletter = "";
177 $lastkey = $subsection;
178 } else {
179 @currentOIDs = @{$classhashref->{$subsection}};
180 $currentfirstletter = $subsection;
181 $currentlastletter = $subsection;
182 }
183 }
184 }
185
186 # add final OIDs to last sub-classification if there aren't many otherwise
187 # add final sub-classification
188 if (scalar (@currentOIDs) < $min) {
189 my ($newkey) = $lastkey =~ /^(\d\d\d\d)/;
190 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
191 delete $compactedhash->{$lastkey};
192 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;
193 } else {
194 if ($currentfirstletter eq $currentlastletter) {
195 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
196 } else {
197 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
198 }
199 }
200
201 return $compactedhash;
202}
203
2041;
Note: See TracBrowser for help on using the repository browser.