source: main/tags/3.03/gsdl/perllib/classify/RecentDocumentsList.pm@ 21154

Last change on this file since 21154 was 13900, checked in by kjdon, 17 years ago

renamed NewList to RecentDocumentsList as that is more descriptive. also modified the code quite a bit. now allows yyyy yyyymm dates, memberof stuff is done properly, added a sort arg, changed the arg names for teh two existing args

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 7.0 KB
Line 
1###########################################################################
2#
3# RecentDocumentsList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# simple list classifier plugin
27# to see the options, run "perl -S classinfo.pl RecentDocumentsList"
28
29use BasClas;
30package RecentDocumentsList;
31
32use strict;
33no strict 'refs'; # allow filehandles to be variables and viceversa
34use sorttools;
35use Time::Local;
36
37sub BEGIN {
38 @RecentDocumentsList::ISA = ('BasClas');
39}
40
41my $arguments =
42 [ { 'name' => "include_docs_added_since",
43 'desc' => "{RecentDocumentsList.include_docs_added_since}",
44 'type' => "string",
45 'reqd' => "no" },
46 { 'name' => "include_most_recently_added",
47 'desc' => "{RecentDocumentsList.include_most_recently_added}",
48 'type' => "int",
49 'deft' => "20",
50 'reqd' => "no"},
51 { 'name' => "sort",
52 'desc' => "{RecentDocumentsList.sort}",
53 'type' => "metadata",
54 'reqd' => "no"}
55 ];
56
57my $options = { 'name' => "RecentDocumentsList",
58 'desc' => "{RecentDocumentsList.desc}",
59 'abstract' => "no",
60 'inherits' => "yes",
61 'args' => $arguments };
62
63
64sub new {
65 my ($class) = shift (@_);
66
67 my ($classifierslist,$inputargs,$hashArgOptLists) = @_;
68 push(@$classifierslist, $class);
69
70 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
71 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
72
73 my $self = new BasClas($classifierslist, $inputargs, $hashArgOptLists);
74
75 if ($self->{'info_only'}) {
76 # don't worry about any options etc
77 return bless $self, $class;
78 }
79 # check the arguments
80
81 if (!$self->{"buttonname"}) {
82 $self->{"buttonname"} = 'RecentDocuments';
83 }
84
85 # we want either include_docs_added_since, or include_most_recently_added, but not both.
86 if (defined $self->{'include_docs_added_since'} && !($self->{'include_docs_added_since'} eq "")){
87 $self->{'classify_by_date'} = 1;
88 my ($year, $month, $day) = $self->{'include_docs_added_since'} =~
89 /^(\d\d\d\d)-?(\d\d)?-?(\d\d)?$/;
90 if (!defined $year) {
91 &gsprintf($self->{'outhandle'}, "RecentDocumentsList::init {RecentDocumentsList.date_wrong_format}\n");
92 die "\n";
93 }
94 if (!defined $month || $month < 1 || $month > 12) {
95 $month = "01";
96 $day = "01";
97 } elsif (!defined $day || $day < 1 || $day > 31) {
98 $day = "01";
99 }
100
101 $self->{'classification_date'} = timelocal(0,0,0,$day,$month-1, $year);
102
103 } else {
104 $self->{'classify_by_date'} = 0;
105 }
106 if ($self->{'sort'} eq "") {
107 undef $self->{'sort'};
108 }
109 # Further setup
110 $self->{'list'} = {};
111 # if we are getting top X docs, and sorting by meta, we need to store the
112 # date and the metadata
113 if (!$self->{'classify_by_date'} && $self->{'sort'}) {
114 $self->{'meta_list'} = {};
115 }
116 return bless $self, $class;
117}
118
119sub init {
120 my $self = shift (@_);
121
122}
123
124sub classify {
125 my $self = shift (@_);
126 my ($doc_obj) = @_;
127
128 my $doc_OID = $doc_obj->get_OID();
129 my $lastmodified = $doc_obj->get_metadata_element($doc_obj->get_top_section(), "lastmodified");
130 if (!defined $lastmodified || $lastmodified eq "") {
131 print $self->{'outhandle'}, "RecentDocumentsList: $doc_OID has no lastmodified metadata, not classifying\n";
132 return;
133 }
134
135 # doc goes into classification if we are not classifying by date, or the date is after the cutoff date.
136 if ($self->{'classify_by_date'}) {
137 if ($lastmodified > $self->{'classification_date'}) {
138 my $sort_meta = $lastmodified;
139 if (defined $self->{'sort'}) {
140 $sort_meta = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'sort'});
141 }
142 $self->{'list'}->{$doc_OID} = $sort_meta;
143 $doc_obj->add_metadata($doc_obj->get_top_section(), "memberof", "CL".$self->get_number());
144 }
145 } else {
146
147 # need to store metadata as well...
148 $self->{'list'}->{$doc_OID} = $lastmodified;
149 if (defined $self->{'sort'}) {
150 my $sort_meta = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'sort'});
151 $self->{'meta_list'}->{$doc_OID} = $sort_meta;
152 }
153 }
154
155}
156
157
158sub get_classify_info {
159 my $self = shift (@_);
160 my $return_doc_size=0;
161
162 my $list = $self->{'list'};
163
164
165 # organise into classification structure
166 my %classifyinfo = ('thistype'=>'Invisible',
167 'childtype'=>'VList',
168 'Title'=>$self->{'buttonname'},
169 'contains'=>[]);
170
171
172 # may or may not support memberof, depending on options set
173 $classifyinfo{'supportsmemberof'} = &supports_memberof();
174
175 # get either all documents (sorted by date), or the top X docs
176 my @sorted_docs = sort {$self->date_or_metadata_sort($a,$b)} keys %{$self->{'list'}};
177 my $numdocs = $self->{'include_most_recently_added'};
178 if ($self->{'classify_by_date'}) {
179 # just include all docs in the list
180 $numdocs = scalar (@sorted_docs);
181 } else {
182 if ($numdocs > scalar (@sorted_docs)) {
183 $numdocs = scalar (@sorted_docs);
184 }
185 if ($self->{'sort'}) {
186 # we need to sort further by metadata
187 # cut off the list
188 @sorted_docs = @sorted_docs[0..$numdocs-1];
189 # sort again
190 @sorted_docs = sort {$self->external_meta_sort($a,$b)}@sorted_docs;
191 }
192 }
193 for (my $i=0; $i<$numdocs; $i++) {
194 push (@{$classifyinfo{'contains'}}, {'OID'=> $sorted_docs[$i]});
195 }
196
197
198 return \%classifyinfo;
199}
200
201# we can only support memberof if we have the include_docs_added_since option, otherwise we don't know at the time of classification of a document if it will be in the classifier or not.
202sub supports_memberof {
203 my $self = shift(@_);
204
205 if ($self->{'classify_by_date'}) {
206 return "true";
207 }
208 return "false";
209}
210
211sub date_or_metadata_sort {
212 my ($self,$a,$b) = @_;
213 # make it do metadata too
214 my $date_a = $self->{'list'}->{$a};
215 my $date_b = $self->{'list'}->{$b};
216 if (!$self->{'sort'} || !$self->{'classify_by_date'}) {
217 # want reverse order (latest to earliest)
218 return ($date_b <=> $date_a);
219 }
220 # meta sorting, use string cmp
221 return ($date_a cmp $date_b);
222}
223
224sub external_meta_sort {
225 my ($self,$a,$b) = @_;
226
227 my $meta_a = $self->{'meta_list'}->{$a};
228 my $meta_b = $self->{'meta_list'}->{$b};
229
230 return ($meta_a cmp $meta_b);
231}
232
233
2341;
235
236
237
238
Note: See TracBrowser for help on using the repository browser.