source: gsdl/tags/3.04/perllib/classify/RecentDocumentsList.pm@ 20973

Last change on this file since 20973 was 18455, checked in by davidb, 15 years ago

Addition of 'edit_mode' parameter to classify(). This can be either 'add' 'delete' or 'reindex' (should think about renaming the last one to something more appropriate, e.g. update).

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 7.0 KB
Line 
1###########################################################################
2#
3# RecentDocumentsList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# simple list classifier plugin
27# to see the options, run "perl -S classinfo.pl RecentDocumentsList"
28
29package RecentDocumentsList;
30
31use BaseClassifier;
32use strict;
33no strict 'refs'; # allow filehandles to be variables and viceversa
34use sorttools;
35use Time::Local;
36
37sub BEGIN {
38 @RecentDocumentsList::ISA = ('BaseClassifier');
39}
40
41my $arguments =
42 [ { 'name' => "include_docs_added_since",
43 'desc' => "{RecentDocumentsList.include_docs_added_since}",
44 'type' => "string",
45 'reqd' => "no" },
46 { 'name' => "include_most_recently_added",
47 'desc' => "{RecentDocumentsList.include_most_recently_added}",
48 'type' => "int",
49 'deft' => "20",
50 'reqd' => "no"},
51 { 'name' => "sort",
52 'desc' => "{RecentDocumentsList.sort}",
53 'type' => "metadata",
54 'reqd' => "no"}
55 ];
56
57my $options = { 'name' => "RecentDocumentsList",
58 'desc' => "{RecentDocumentsList.desc}",
59 'abstract' => "no",
60 'inherits' => "yes",
61 'args' => $arguments };
62
63
64sub new {
65 my ($class) = shift (@_);
66
67 my ($classifierslist,$inputargs,$hashArgOptLists) = @_;
68 push(@$classifierslist, $class);
69
70 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
71 push(@{$hashArgOptLists->{"OptList"}},$options);
72
73 my $self = new BaseClassifier($classifierslist, $inputargs, $hashArgOptLists);
74
75 if ($self->{'info_only'}) {
76 # don't worry about any options etc
77 return bless $self, $class;
78 }
79 # check the arguments
80
81 if (!$self->{"buttonname"}) {
82 $self->{"buttonname"} = 'RecentDocuments';
83 }
84
85 # we want either include_docs_added_since, or include_most_recently_added, but not both.
86 if (defined $self->{'include_docs_added_since'} && !($self->{'include_docs_added_since'} eq "")){
87 $self->{'classify_by_date'} = 1;
88 my ($year, $month, $day) = $self->{'include_docs_added_since'} =~
89 /^(\d\d\d\d)-?(\d\d)?-?(\d\d)?$/;
90 if (!defined $year) {
91 &gsprintf($self->{'outhandle'}, "RecentDocumentsList::init {RecentDocumentsList.date_wrong_format}\n");
92 die "\n";
93 }
94 if (!defined $month || $month < 1 || $month > 12) {
95 $month = "01";
96 $day = "01";
97 } elsif (!defined $day || $day < 1 || $day > 31) {
98 $day = "01";
99 }
100
101 $self->{'classification_date'} = timelocal(0,0,0,$day,$month-1, $year);
102
103 } else {
104 $self->{'classify_by_date'} = 0;
105 }
106 if ($self->{'sort'} eq "") {
107 undef $self->{'sort'};
108 }
109 # Further setup
110 $self->{'list'} = {};
111 # if we are getting top X docs, and sorting by meta, we need to store the
112 # date and the metadata
113 if (!$self->{'classify_by_date'} && $self->{'sort'}) {
114 $self->{'meta_list'} = {};
115 }
116 return bless $self, $class;
117}
118
119sub init {
120 my $self = shift (@_);
121
122}
123
124sub classify {
125 my $self = shift (@_);
126 my ($doc_obj,$edit_mode) = @_;
127
128 my $doc_OID = $doc_obj->get_OID();
129 my $lastmodified = $doc_obj->get_metadata_element($doc_obj->get_top_section(), "lastmodified");
130 if (!defined $lastmodified || $lastmodified eq "") {
131 print $self->{'outhandle'}, "RecentDocumentsList: $doc_OID has no lastmodified metadata, not classifying\n";
132 return;
133 }
134
135 if ($edit_mode eq "delete") {
136 $self->oid_hash_delete($doc_OID,'list');
137 return;
138 }
139
140 # doc goes into classification if we are not classifying by date, or the date is after the cutoff date.
141 if ($self->{'classify_by_date'}) {
142 if ($lastmodified > $self->{'classification_date'}) {
143 my $sort_meta = $lastmodified;
144 if (defined $self->{'sort'}) {
145 $sort_meta = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'sort'});
146 }
147 $self->{'list'}->{$doc_OID} = $sort_meta;
148 $doc_obj->add_metadata($doc_obj->get_top_section(), "memberof", "CL".$self->get_number());
149 }
150 } else {
151
152 # need to store metadata as well...
153 $self->{'list'}->{$doc_OID} = $lastmodified;
154 if (defined $self->{'sort'}) {
155 my $sort_meta = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'sort'});
156 $self->{'meta_list'}->{$doc_OID} = $sort_meta;
157 }
158 }
159
160}
161
162
163sub get_classify_info {
164 my $self = shift (@_);
165 my $return_doc_size=0;
166
167 my $list = $self->{'list'};
168
169
170 # organise into classification structure
171 my %classifyinfo = ('thistype'=>'Invisible',
172 'childtype'=>'VList',
173 'Title'=>$self->{'buttonname'},
174 'contains'=>[]);
175
176
177 # may or may not support memberof, depending on options set
178 $classifyinfo{'supportsmemberof'} = &supports_memberof();
179
180 # get either all documents (sorted by date), or the top X docs
181 my @sorted_docs = sort {$self->date_or_metadata_sort($a,$b)} keys %{$self->{'list'}};
182 my $numdocs = $self->{'include_most_recently_added'};
183 if ($self->{'classify_by_date'}) {
184 # just include all docs in the list
185 $numdocs = scalar (@sorted_docs);
186 } else {
187 if ($numdocs > scalar (@sorted_docs)) {
188 $numdocs = scalar (@sorted_docs);
189 }
190 if ($self->{'sort'}) {
191 # we need to sort further by metadata
192 # cut off the list
193 @sorted_docs = @sorted_docs[0..$numdocs-1];
194 # sort again
195 @sorted_docs = sort {$self->external_meta_sort($a,$b)}@sorted_docs;
196 }
197 }
198 for (my $i=0; $i<$numdocs; $i++) {
199 push (@{$classifyinfo{'contains'}}, {'OID'=> $sorted_docs[$i]});
200 }
201
202
203 return \%classifyinfo;
204}
205
206# we can only support memberof if we have the include_docs_added_since option, otherwise we don't know at the time of classification of a document if it will be in the classifier or not.
207sub supports_memberof {
208 my $self = shift(@_);
209
210 if ($self->{'classify_by_date'}) {
211 return "true";
212 }
213 return "false";
214}
215
216sub date_or_metadata_sort {
217 my ($self,$a,$b) = @_;
218 # make it do metadata too
219 my $date_a = $self->{'list'}->{$a};
220 my $date_b = $self->{'list'}->{$b};
221 if (!$self->{'sort'} || !$self->{'classify_by_date'}) {
222 # want reverse order (latest to earliest)
223 return ($date_b <=> $date_a);
224 }
225 # meta sorting, use string cmp
226 return ($date_a cmp $date_b);
227}
228
229sub external_meta_sort {
230 my ($self,$a,$b) = @_;
231
232 my $meta_a = $self->{'meta_list'}->{$a};
233 my $meta_b = $self->{'meta_list'}->{$b};
234
235 return ($meta_a cmp $meta_b);
236}
237
238
2391;
240
241
242
243
Note: See TracBrowser for help on using the repository browser.