source: trunk/gsdl/perllib/classify/BasClas.pm@ 10620

Last change on this file since 10620 was 10620, checked in by kjdon, 19 years ago

now prints out some gli tags when bad args are encountered for plugins and classifiers ($gli not set here, so just printed them always). Also, for BasPlug, set up the encoding values outside of new(), otherwise the list gets added in multiple times

  • Property svn:keywords set to Author Date Id Revision
File size: 9.3 KB
Line 
1###########################################################################
2#
3# BasClas.pm -- base class for all classifiers
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2000 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package BasClas;
28
29# How a classifier works.
30#
31# For each classifier requested in the collect.cfg file, buildcol.pl creates
32# a new classifier object (a subclass of BasClas). Later, it passes each
33# document object to each classifier in turn for classification.
34#
35# Four primary functions are used:
36#
37# 1. "new" is called before the documents are processed to set up the
38# classifier.
39#
40# 2. "init" is called after buildcol.pl has created the indexes etc but
41# before the documents are classified in order that the classifier might
42# set any variables it requires, etc.
43#
44# 3. "classify" is called once for each document object. The classifier
45# "classifies" each document and updates its local data accordingly.
46#
47# 4. "get_classify_info" is called after every document has been
48# classified. It collates the information about the documents and
49# stores a reference to the classifier so that Greenstone can later
50# display it.
51
52# 09/05/02 Added usage datastructure - John Thompson
53# 28/11/03 Commented out verbosity argument - John Thompson
54
55use gsprintf;
56use printusage;
57use parse2;
58
59# suppress the annoying "subroutine redefined" warning that various
60# classifiers cause under perl 5.6
61$SIG{__WARN__} = sub {warn($_[0]) unless ($_[0] =~ /Subroutine\s+\S+\sredefined/)};
62
63use strict;
64no strict 'subs'; # allow barewords (eg STDERR) as function arguments
65no strict 'refs'; # allow filehandles to be variables and viceversa
66
67my $arguments =
68 [ { 'name' => "builddir",
69 'desc' => "{BasClas.builddir}",
70 'type' => "string",
71 'deft' => "" },
72 { 'name' => "outhandle",
73 'desc' => "{BasClas.outhandle}",
74 'type' => "string",
75 'deft' => "STDERR" },
76 { 'name' => "verbosity",
77 'desc' => "{BasClas.verbosity}",
78# 'type' => "enum",
79 'type' => "int",
80 'deft' => "2",
81 'reqd' => "no" },
82# { 'name' => "ignore_namespace",
83# 'desc' => "{BasClas.ignore_namespace}",
84# 'type' => "flag"}
85 ];
86
87my $options = { 'name' => "BasClas",
88 'desc' => "{BasClas.desc}",
89 'abstract' => "yes",
90 'inherits' => "no",
91 'args' => $arguments };
92
93
94sub gsprintf
95{
96 return &gsprintf::gsprintf(@_);
97}
98
99
100sub print_xml_usage
101{
102 my $self = shift(@_);
103
104 # XML output is always in UTF-8
105 &gsprintf::output_strings_in_UTF8;
106
107 &PrintUsage::print_xml_header();
108 $self->print_xml();
109}
110
111
112sub print_xml
113{
114 my $self = shift(@_);
115
116 my $optionlistref = $self->{'option_list'};
117 my @optionlist = @$optionlistref;
118 my $classifieroptions = shift(@$optionlistref);
119 return if (!defined($classifieroptions));
120
121 &gsprintf(STDERR, "<ClassInfo>\n");
122 &gsprintf(STDERR, " <Name>$classifieroptions->{'name'}</Name>\n");
123 my $desc = &gsprintf::lookup_string($classifieroptions->{'desc'});
124 $desc =~ s/</&amp;lt;/g; # doubly escaped
125 $desc =~ s/>/&amp;gt;/g;
126 &gsprintf(STDERR, " <Desc>$desc</Desc>\n");
127 &gsprintf(STDERR, " <Abstract>$classifieroptions->{'abstract'}</Abstract>\n");
128 &gsprintf(STDERR, " <Inherits>$classifieroptions->{'inherits'}</Inherits>\n");
129 &gsprintf(STDERR, " <Arguments>\n");
130 if (defined($classifieroptions->{'args'})) {
131 &PrintUsage::print_options_xml($classifieroptions->{'args'});
132 }
133
134 # Recurse up the classifier hierarchy
135 $self->print_xml();
136
137 &gsprintf(STDERR, " </Arguments>\n");
138 &gsprintf(STDERR, "</ClassInfo>\n");
139}
140
141
142sub print_txt_usage
143{
144 my $self = shift(@_);
145
146 # Print the usage message for a classifier (recursively)
147 my $descoffset = $self->determine_description_offset(0);
148 $self->print_classifier_usage($descoffset, 1);
149}
150
151
152sub determine_description_offset
153{
154 my $self = shift(@_);
155 my $maxoffset = shift(@_);
156
157 my $optionlistref = $self->{'option_list'};
158 my @optionlist = @$optionlistref;
159 my $classifieroptions = pop(@$optionlistref);
160 return $maxoffset if (!defined($classifieroptions));
161
162 # Find the length of the longest option string of this classifier
163 my $classifierargs = $classifieroptions->{'args'};
164 if (defined($classifierargs)) {
165 my $longest = &PrintUsage::find_longest_option_string($classifierargs);
166 if ($longest > $maxoffset) {
167 $maxoffset = $longest;
168 }
169 }
170
171 # Recurse up the classifier hierarchy
172 $maxoffset = $self->determine_description_offset($maxoffset);
173 $self->{'option_list'} = \@optionlist;
174 return $maxoffset;
175}
176
177
178sub print_classifier_usage
179{
180 my $self = shift(@_);
181 my $descoffset = shift(@_);
182 my $isleafclass = shift(@_);
183
184 my $optionlistref = $self->{'option_list'};
185 my @optionlist = @$optionlistref;
186 my $classifieroptions = shift(@$optionlistref);
187 return if (!defined($classifieroptions));
188
189 my $classifiername = $classifieroptions->{'name'};
190 my $classifierargs = $classifieroptions->{'args'};
191 my $classifierdesc = $classifieroptions->{'desc'};
192 # Produce the usage information using the data structure above
193 if ($isleafclass) {
194 if (defined($classifierdesc)) {
195 &gsprintf(STDERR, "$classifierdesc\n\n");
196 }
197 &gsprintf(STDERR, " {common.usage}: classify $classifiername [{common.options}]\n\n");
198
199 }
200
201 # Display the classifier options, if there are some
202 if (defined($classifierargs)) {
203 # Calculate the column offset of the option descriptions
204 my $optiondescoffset = $descoffset + 2; # 2 spaces between options & descriptions
205
206 if ($isleafclass) {
207 &gsprintf(STDERR, " {common.specific_options}:\n");
208 }
209 else {
210 &gsprintf(STDERR, " {common.general_options}:\n", $classifiername);
211 }
212
213 # Display the classifier options
214 &PrintUsage::print_options_txt($classifierargs, $optiondescoffset);
215 }
216
217 # Recurse up the classifier hierarchy
218 $self->print_classifier_usage($descoffset, 0);
219 $self->{'option_list'} = \@optionlist;
220}
221
222
223sub new {
224 my ($class) = shift (@_);
225 my ($classifierslist,$args,$hashArgOptLists) = @_;
226 push(@$classifierslist, $class);
227 my $classifier_name = (defined $classifierslist->[0]) ? $classifierslist->[0] : $class;
228
229 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
230 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
231
232
233 # Manually set $self parameters.
234 my $self = {};
235 $self->{'outhandle'} = STDERR;
236 $self->{'idnum'} = -1;
237 $self->{'option_list'} = $hashArgOptLists->{"OptList"};
238 $self->{"info_only"} = 0;
239
240 # Check if gsdlinfo is in the argument list or not - if it is, don't parse
241 # the args, just return the object.
242 foreach my $strArg (@{$args})
243 {
244 if($strArg eq "-gsdlinfo")
245 {
246 $self->{"info_only"} = 1;
247 return bless $self, $class;
248 }
249 }
250
251 # general options available to all classifiers
252 if(!parse2::parse($args,$hashArgOptLists->{"ArgList"},$self))
253 {
254 #print out the text usage of this classifier.
255 my $classTempClass = bless $self, $class;
256 print STDERR "<BadClassifier c=$classifier_name>\n";
257
258 &gsprintf(STDERR, "\n{BasClas.bad_general_option}\n", $classifier_name);
259 $classTempClass->print_txt_usage(""); # Use default resource bundle
260 die "\n";
261 }
262
263 delete $self->{"info_only"};
264 return bless $self, $class;
265}
266
267sub init {
268 my $self = shift (@_);
269
270 $self->{'supportsmemberof'} = &supports_memberof();
271}
272
273sub set_number {
274 my $self = shift (@_);
275 my ($id) = @_;
276 $self->{'idnum'} = $id;
277}
278
279sub get_number {
280 my $self = shift (@_);
281 return $self->{'idnum'};
282}
283
284sub classify {
285 my $self = shift (@_);
286 my ($doc_obj) = @_;
287
288 my $outhandle = $self->{'outhandle'};
289 &gsprintf($outhandle, "BasClass::classify {common.must_be_implemented}\n");
290}
291
292sub get_classify_info {
293 my $self = shift (@_);
294
295 my $outhandle = $self->{'outhandle'};
296 &gsprintf($outhandle, "BasClass::get_classify_info {common.must_be_implemented}\n");
297}
298
299sub supports_memberof {
300 my $self = shift(@_);
301
302 return "false";
303}
304
305# previously, if a buttonname wasn't specified, we just use the metadata value,
306# but with a list of metadata, we want to do something a bit nicer so that
307# eg -metadata dc.Title,Title will end up with Title as the buttonname
308
309# current algorithm - use the first element, but strip its namespace
310sub generate_title_from_metadata {
311
312 my $self = shift (@_);
313 my $metadata = shift (@_);
314
315 return "" unless defined $metadata && $metadata =~ /\S/;
316
317 my @metalist = split(/,/, $metadata);
318 my $firstmeta = $metalist[0];
319 if ($firstmeta =~ /\./) {
320 $firstmeta =~ s/^\w+\.//;
321 }
322 return $firstmeta;
323}
324
3251;
Note: See TracBrowser for help on using the repository browser.