source: trunk/gsdl/perllib/classify/BasClas.pm@ 5645

Last change on this file since 5645 was 5645, checked in by mdewsnip, 21 years ago

Moved classifier descriptions into the resource bundle (perllib/strings.rb).

  • Property svn:keywords set to Author Date Id Revision
File size: 7.9 KB
Line 
1###########################################################################
2#
3# BasClas.pm -- base class for all classifiers
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2000 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package BasClas;
28
29# How a classifier works.
30#
31# For each classifier requested in the collect.cfg file, buildcol.pl creates
32# a new classifier object (a subclass of BasClas). Later, it passes each
33# document object to each classifier in turn for classification.
34#
35# Four primary functions are used:
36#
37# 1. "new" is called before the documents are processed to set up the
38# classifier.
39#
40# 2. "init" is called after buildcol.pl has created the indexes etc but
41# before the documents are classified in order that the classifier might
42# set any variables it requires, etc.
43#
44# 3. "classify" is called once for each document object. The classifier
45# "classifies" each document and updates its local data accordingly.
46#
47# 4. "get_classify_info" is called after every document has been
48# classified. It collates the information about the documents and
49# stores a reference to the classifier so that Greenstone can later
50# display it.
51
52# 09/05/02 Added usage datastructure - John Thompson
53
54use parsargv;
55use gsprintf;
56use printusage;
57
58
59my $arguments =
60 [ { 'name' => "builddir",
61 'desc' => "{BasClas.builddir}",
62 'type' => "string",
63 'deft' => "" },
64 { 'name' => "outhandle",
65 'desc' => "{BasClas.outhandle}",
66 'type' => "string",
67 'deft' => "STDERR" },
68 { 'name' => "verbosity",
69 'desc' => "{BasClas.verbosity}",
70 'type' => "enum",
71 'deft' => "2",
72 'reqd' => "no" } ];
73
74my $options = { 'name' => "BasClas",
75 'desc' => "{BasClas.desc}",
76 'inherits' => "No",
77 'args' => $arguments };
78
79
80sub gsprintf
81{
82 return &gsprintf::gsprintf(@_);
83}
84
85
86sub print_xml_usage
87{
88 local $self = shift(@_);
89 local $language = shift(@_);
90
91 &PrintUsage::print_xml_header();
92 $self->print_xml($language);
93}
94
95
96sub print_xml
97{
98 local $self = shift(@_);
99 local $language = shift(@_);
100
101 local $optionlistref = $self->{'option_list'};
102 local @optionlist = @$optionlistref;
103 local $classifieroptions = pop(@$optionlistref);
104 return if (!defined($classifieroptions));
105
106 print STDERR "<ClassInfo>\n";
107 print STDERR " <Name>$classifieroptions->{'name'}</Name>\n";
108 print STDERR " <Desc>$classifieroptions->{'desc'}</Desc>\n";
109 print STDERR " <Inherits>$classifieroptions->{'inherits'}</Inherits>\n";
110 print STDERR " <Arguments>\n";
111 if (defined($classifieroptions->{'args'})) {
112 &PrintUsage::print_options_xml($language, $classifieroptions->{'args'});
113 }
114
115 # Recurse up the classifier hierarchy
116 $self->print_xml($language);
117
118 print STDERR " </Arguments>\n";
119 print STDERR "</ClassInfo>\n";
120}
121
122
123sub print_txt_usage
124{
125 local $self = shift(@_);
126 local $language = shift(@_);
127
128 # Print the usage message for a classifier (recursively)
129 local $descoffset = $self->determine_description_offset(0);
130 $self->print_classifier_usage($language, $descoffset, 1);
131}
132
133
134sub determine_description_offset
135{
136 local $self = shift(@_);
137 local $maxoffset = shift(@_);
138
139 local $optionlistref = $self->{'option_list'};
140 local @optionlist = @$optionlistref;
141 local $classifieroptions = pop(@$optionlistref);
142 return $maxoffset if (!defined($classifieroptions));
143
144 # Find the length of the longest option string of this classifier
145 local $classifierargs = $classifieroptions->{'args'};
146 if (defined($classifierargs)) {
147 local $longest = &PrintUsage::find_longest_option_string($classifierargs);
148 if ($longest > $maxoffset) {
149 $maxoffset = $longest;
150 }
151 }
152
153 # Recurse up the classifier hierarchy
154 $maxoffset = $self->determine_description_offset($maxoffset);
155 $self->{'option_list'} = \@optionlist;
156 return $maxoffset;
157}
158
159
160sub print_classifier_usage
161{
162 local $self = shift(@_);
163 local $language = shift(@_);
164 local $descoffset = shift(@_);
165 local $isleafclass = shift(@_);
166
167 local $optionlistref = $self->{'option_list'};
168 local @optionlist = @$optionlistref;
169 local $classifieroptions = pop(@$optionlistref);
170 return if (!defined($classifieroptions));
171
172 local $classifiername = $classifieroptions->{'name'};
173 local $classifierargs = $classifieroptions->{'args'};
174
175 # Produce the usage information using the data structure above
176 if ($isleafclass) {
177 &gsprintf(STDERR, " {common.usage}: classify $classifiername [{common.options}]\n\n");
178 # print STDERR " usage: classify $classifiername [options]\n\n";
179 }
180
181 # Display the classifier options, if there are some
182 if (defined($classifierargs)) {
183 # Calculate the column offset of the option descriptions
184 local $optiondescoffset = $descoffset + 2; # 2 spaces between options & descriptions
185
186 if ($isleafclass) {
187 &gsprintf(STDERR, " {common.specific_options}:\n");
188 # print STDERR " specific options:\n";
189 }
190 else {
191 &gsprintf(STDERR, " {common.general_options}:\n", $classifiername);
192 # print STDERR " general options (from $classifiername):\n";
193 }
194
195 # Display the classifier options
196 &PrintUsage::print_options_txt($language, $classifierargs, $optiondescoffset);
197 }
198
199 # Recurse up the classifier hierarchy
200 $self->print_classifier_usage($language, $descoffset, 0);
201 $self->{'option_list'} = \@optionlist;
202}
203
204
205# sub print_general_usage {
206# my ($plugin_name) = @_;
207# print STDERR "
208# -verbosity N Controls the quantity of output.
209# Defaults to verbosity of buildcol.pl, which is usually 2.
210
211# (Most general classifier options are set internally by buildcol.)
212
213# ";
214# }
215
216# sub print_usage {
217# print STDERR "
218# This classifier has no classifier-specific options
219#
220# ";
221# }
222
223sub new {
224 my $class = shift (@_);
225 my $name = shift (@_);
226
227 my $self = {};
228
229 $self->{'outhandle'} = STDERR;
230
231 $self->{'option_list'} = [ $options ];
232
233 # general options available to all classifiers
234 if (!parsargv::parse(\@_,
235 q^builddir/.*/^, \$self->{'builddir'},
236 q^outhandle/.*/STDERR^, \$self->{'outhandle'},
237 q^verbosity/\d/2^, \$self->{'verbosity'},
238 "allow_extra_options")) {
239
240 &gsprintf(STDERR, "\n{BasClas.bad_general_option}\n", $name);
241 # print STDERR "\nThe $name classifier uses an incorrect general option\n";
242 # print STDERR "(general options are those available to all classifiers).\n";
243 # print STDERR "Check your collect.cfg configuration file.\n";
244 $self->print_txt_usage(""); # Use default resource bundle
245 die "\n";
246 }
247
248 return bless $self, $class;
249}
250
251sub init {
252 my $self = shift (@_);
253}
254
255sub classify {
256 my $self = shift (@_);
257 my ($doc_obj) = @_;
258
259 my $outhandle = $self->{'outhandle'};
260 &gsprintf($outhandle, "BasClass::classify {common.must_be_implemented}\n");
261 # print $outhandle "BasClas::classify function must be implemented in sub-class\n";
262}
263
264sub get_classify_info {
265 my $self = shift (@_);
266
267 my $outhandle = $self->{'outhandle'};
268 &gsprintf($outhandle, "BasClass::get_classify_info {common.must_be_implemented}\n");
269 # print $outhandle "BasClas::get_classify_info function must be implemented in sub-class\n";
270}
271
2721;
Note: See TracBrowser for help on using the repository browser.