source: main/trunk/greenstone2/perllib/classify/RecentDocumentsList.pm@ 23116

Last change on this file since 23116 was 23116, checked in by kjdon, 14 years ago

for incremental build, classifiers are not really done incrementally. Previously, we reconstructed all the docs from the database, and classified them, then processed any new/edited/deleted docs, updating the classifier as necessary. Now, we process all new/updated docs, then reconstruct the docs from the database, but only classify those not changed/deleted. This means that we are only ever adding docs to a classifier, never updating or deleting. I have removed edit_mode and all code handling deleting stuff from the classifier.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 7.0 KB
Line 
1###########################################################################
2#
3# RecentDocumentsList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# simple list classifier plugin
27# to see the options, run "perl -S classinfo.pl RecentDocumentsList"
28
29package RecentDocumentsList;
30
31use BaseClassifier;
32use strict;
33no strict 'refs'; # allow filehandles to be variables and viceversa
34use sorttools;
35use Time::Local;
36
37sub BEGIN {
38 @RecentDocumentsList::ISA = ('BaseClassifier');
39}
40
41my $arguments =
42 [ { 'name' => "include_docs_added_since",
43 'desc' => "{RecentDocumentsList.include_docs_added_since}",
44 'type' => "string",
45 'reqd' => "no" },
46 { 'name' => "include_most_recently_added",
47 'desc' => "{RecentDocumentsList.include_most_recently_added}",
48 'type' => "int",
49 'deft' => "20",
50 'reqd' => "no"},
51 { 'name' => "sort",
52 'desc' => "{RecentDocumentsList.sort}",
53 'type' => "metadata",
54 'reqd' => "no"}
55 ];
56
57my $options = { 'name' => "RecentDocumentsList",
58 'desc' => "{RecentDocumentsList.desc}",
59 'abstract' => "no",
60 'inherits' => "yes",
61 'args' => $arguments };
62
63
64sub new {
65 my ($class) = shift (@_);
66
67 my ($classifierslist,$inputargs,$hashArgOptLists) = @_;
68 push(@$classifierslist, $class);
69
70 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
71 push(@{$hashArgOptLists->{"OptList"}},$options);
72
73 my $self = new BaseClassifier($classifierslist, $inputargs, $hashArgOptLists);
74
75 if ($self->{'info_only'}) {
76 # don't worry about any options etc
77 return bless $self, $class;
78 }
79 # check the arguments
80
81 if (!$self->{"buttonname"}) {
82 $self->{"buttonname"} = 'RecentDocuments';
83 }
84
85 # we want either include_docs_added_since, or include_most_recently_added, but not both.
86 if (defined $self->{'include_docs_added_since'} && !($self->{'include_docs_added_since'} eq "")){
87 $self->{'classify_by_date'} = 1;
88 my ($year, $month, $day) = $self->{'include_docs_added_since'} =~
89 /^(\d\d\d\d)-?(\d\d)?-?(\d\d)?$/;
90 if (!defined $year) {
91 &gsprintf($self->{'outhandle'}, "RecentDocumentsList::init {RecentDocumentsList.date_wrong_format}\n");
92 die "\n";
93 }
94 if (!defined $month || $month < 1 || $month > 12) {
95 $month = "01";
96 $day = "01";
97 } elsif (!defined $day || $day < 1 || $day > 31) {
98 $day = "01";
99 }
100
101 $self->{'classification_date'} = timelocal(0,0,0,$day,$month-1, $year);
102
103 } else {
104 $self->{'classify_by_date'} = 0;
105 }
106 if ($self->{'sort'} eq "") {
107 undef $self->{'sort'};
108 }
109 $self->{'sort'} = $self->strip_ex_from_metadata($self->{'sort'});
110
111 # Further setup
112 $self->{'list'} = {};
113 # if we are getting top X docs, and sorting by meta, we need to store the
114 # date and the metadata
115 if (!$self->{'classify_by_date'} && $self->{'sort'}) {
116 $self->{'meta_list'} = {};
117 }
118 return bless $self, $class;
119}
120
121sub init {
122 my $self = shift (@_);
123
124}
125
126sub classify {
127 my $self = shift (@_);
128 my ($doc_obj) = @_;
129
130 my $doc_OID = $doc_obj->get_OID();
131 my $lastmodified = $doc_obj->get_metadata_element($doc_obj->get_top_section(), "lastmodified");
132 if (!defined $lastmodified || $lastmodified eq "") {
133 print $self->{'outhandle'}, "RecentDocumentsList: $doc_OID has no lastmodified metadata, not classifying\n";
134 return;
135 }
136
137 # doc goes into classification if we are not classifying by date, or the date is after the cutoff date.
138 if ($self->{'classify_by_date'}) {
139 if ($lastmodified > $self->{'classification_date'}) {
140 my $sort_meta = $lastmodified;
141 if (defined $self->{'sort'}) {
142 $sort_meta = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'sort'});
143 }
144 $self->{'list'}->{$doc_OID} = $sort_meta;
145 $doc_obj->add_metadata($doc_obj->get_top_section(), "memberof", "CL".$self->get_number());
146 }
147 } else {
148
149 # need to store metadata as well...
150 $self->{'list'}->{$doc_OID} = $lastmodified;
151 if (defined $self->{'sort'}) {
152 my $sort_meta = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'sort'});
153 $self->{'meta_list'}->{$doc_OID} = $sort_meta;
154 }
155 }
156
157}
158
159
160sub get_classify_info {
161 my $self = shift (@_);
162 my $return_doc_size=0;
163
164 my $list = $self->{'list'};
165
166
167 # organise into classification structure
168 my %classifyinfo = ('thistype'=>'Invisible',
169 'childtype'=>'VList',
170 'Title'=>$self->{'buttonname'},
171 'contains'=>[]);
172
173
174 # may or may not support memberof, depending on options set
175 $classifyinfo{'supportsmemberof'} = &supports_memberof();
176
177 # get either all documents (sorted by date), or the top X docs
178 my @sorted_docs = sort {$self->date_or_metadata_sort($a,$b)} keys %{$self->{'list'}};
179 my $numdocs = $self->{'include_most_recently_added'};
180 if ($self->{'classify_by_date'}) {
181 # just include all docs in the list
182 $numdocs = scalar (@sorted_docs);
183 } else {
184 if ($numdocs > scalar (@sorted_docs)) {
185 $numdocs = scalar (@sorted_docs);
186 }
187 if ($self->{'sort'}) {
188 # we need to sort further by metadata
189 # cut off the list
190 @sorted_docs = @sorted_docs[0..$numdocs-1];
191 # sort again
192 @sorted_docs = sort {$self->external_meta_sort($a,$b)}@sorted_docs;
193 }
194 }
195 for (my $i=0; $i<$numdocs; $i++) {
196 push (@{$classifyinfo{'contains'}}, {'OID'=> $sorted_docs[$i]});
197 }
198
199
200 return \%classifyinfo;
201}
202
203# we can only support memberof if we have the include_docs_added_since option, otherwise we don't know at the time of classification of a document if it will be in the classifier or not.
204sub supports_memberof {
205 my $self = shift(@_);
206
207 if ($self->{'classify_by_date'}) {
208 return "true";
209 }
210 return "false";
211}
212
213sub date_or_metadata_sort {
214 my ($self,$a,$b) = @_;
215 # make it do metadata too
216 my $date_a = $self->{'list'}->{$a};
217 my $date_b = $self->{'list'}->{$b};
218 if (!$self->{'sort'} || !$self->{'classify_by_date'}) {
219 # want reverse order (latest to earliest)
220 return ($date_b <=> $date_a);
221 }
222 # meta sorting, use string cmp
223 return ($date_a cmp $date_b);
224}
225
226sub external_meta_sort {
227 my ($self,$a,$b) = @_;
228
229 my $meta_a = $self->{'meta_list'}->{$a};
230 my $meta_b = $self->{'meta_list'}->{$b};
231
232 return ($meta_a cmp $meta_b);
233}
234
235
2361;
237
238
239
240
Note: See TracBrowser for help on using the repository browser.