source: trunk/gsdl/perllib/classify/BasClas.pm@ 6932

Last change on this file since 6932 was 6932, checked in by kjdon, 20 years ago

changed the output slightly, and now outputs the classifier/plugin description

  • Property svn:keywords set to Author Date Id Revision
File size: 8.0 KB
Line 
1###########################################################################
2#
3# BasClas.pm -- base class for all classifiers
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2000 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package BasClas;
28
29# How a classifier works.
30#
31# For each classifier requested in the collect.cfg file, buildcol.pl creates
32# a new classifier object (a subclass of BasClas). Later, it passes each
33# document object to each classifier in turn for classification.
34#
35# Four primary functions are used:
36#
37# 1. "new" is called before the documents are processed to set up the
38# classifier.
39#
40# 2. "init" is called after buildcol.pl has created the indexes etc but
41# before the documents are classified in order that the classifier might
42# set any variables it requires, etc.
43#
44# 3. "classify" is called once for each document object. The classifier
45# "classifies" each document and updates its local data accordingly.
46#
47# 4. "get_classify_info" is called after every document has been
48# classified. It collates the information about the documents and
49# stores a reference to the classifier so that Greenstone can later
50# display it.
51
52# 09/05/02 Added usage datastructure - John Thompson
53# 28/11/03 Commented out verbosity argument - John Thompson
54
55use parsargv;
56use gsprintf;
57use printusage;
58
59
60my $arguments =
61 [ { 'name' => "builddir",
62 'desc' => "{BasClas.builddir}",
63 'type' => "string",
64 'deft' => "" },
65 { 'name' => "outhandle",
66 'desc' => "{BasClas.outhandle}",
67 'type' => "string",
68 'deft' => "STDERR" },
69# { 'name' => "verbosity",
70# 'desc' => "{BasClas.verbosity}",
71# 'type' => "enum",
72# 'deft' => "2",
73# 'reqd' => "no" } ];
74# { 'name' => "ignore_namespace",
75# 'desc' => "{BasClas.ignore_namespace}",
76# 'type' => "flag"}
77 ];
78
79my $options = { 'name' => "BasClas",
80 'desc' => "{BasClas.desc}",
81 'abstract' => "yes",
82 'inherits' => "no",
83 'args' => $arguments };
84
85
86sub gsprintf
87{
88 return &gsprintf::gsprintf(@_);
89}
90
91
92sub print_xml_usage
93{
94 local $self = shift(@_);
95
96 &PrintUsage::print_xml_header();
97 $self->print_xml();
98}
99
100
101sub print_xml
102{
103 local $self = shift(@_);
104
105 local $optionlistref = $self->{'option_list'};
106 local @optionlist = @$optionlistref;
107 local $classifieroptions = pop(@$optionlistref);
108 return if (!defined($classifieroptions));
109
110 print STDERR "<ClassInfo>\n";
111 print STDERR " <Name>$classifieroptions->{'name'}</Name>\n";
112 print STDERR " <Desc>$classifieroptions->{'desc'}</Desc>\n";
113 print STDERR " <Abstract>$classifieroptions->{'abstract'}</Abstract>\n";
114 print STDERR " <Inherits>$classifieroptions->{'inherits'}</Inherits>\n";
115 print STDERR " <Arguments>\n";
116 if (defined($classifieroptions->{'args'})) {
117 &PrintUsage::print_options_xml($classifieroptions->{'args'});
118 }
119
120 # Recurse up the classifier hierarchy
121 $self->print_xml();
122
123 print STDERR " </Arguments>\n";
124 print STDERR "</ClassInfo>\n";
125}
126
127
128sub print_txt_usage
129{
130 local $self = shift(@_);
131
132 # Print the usage message for a classifier (recursively)
133 local $descoffset = $self->determine_description_offset(0);
134 $self->print_classifier_usage($descoffset, 1);
135}
136
137
138sub determine_description_offset
139{
140 local $self = shift(@_);
141 local $maxoffset = shift(@_);
142
143 local $optionlistref = $self->{'option_list'};
144 local @optionlist = @$optionlistref;
145 local $classifieroptions = pop(@$optionlistref);
146 return $maxoffset if (!defined($classifieroptions));
147
148 # Find the length of the longest option string of this classifier
149 local $classifierargs = $classifieroptions->{'args'};
150 if (defined($classifierargs)) {
151 local $longest = &PrintUsage::find_longest_option_string($classifierargs);
152 if ($longest > $maxoffset) {
153 $maxoffset = $longest;
154 }
155 }
156
157 # Recurse up the classifier hierarchy
158 $maxoffset = $self->determine_description_offset($maxoffset);
159 $self->{'option_list'} = \@optionlist;
160 return $maxoffset;
161}
162
163
164sub print_classifier_usage
165{
166 local $self = shift(@_);
167 local $descoffset = shift(@_);
168 local $isleafclass = shift(@_);
169
170 local $optionlistref = $self->{'option_list'};
171 local @optionlist = @$optionlistref;
172 local $classifieroptions = pop(@$optionlistref);
173 return if (!defined($classifieroptions));
174
175 local $classifiername = $classifieroptions->{'name'};
176 local $classifierargs = $classifieroptions->{'args'};
177 local $classifierdesc = $classifieroptions->{'desc'};
178 # Produce the usage information using the data structure above
179 if ($isleafclass) {
180 if (defined($classifierdesc)) {
181 &gsprintf(STDERR, "$classifierdesc\n\n");
182 }
183 &gsprintf(STDERR, " {common.usage}: classify $classifiername [{common.options}]\n\n");
184
185 }
186
187 # Display the classifier options, if there are some
188 if (defined($classifierargs)) {
189 # Calculate the column offset of the option descriptions
190 local $optiondescoffset = $descoffset + 2; # 2 spaces between options & descriptions
191
192 if ($isleafclass) {
193 &gsprintf(STDERR, " {common.specific_options}:\n");
194 }
195 else {
196 &gsprintf(STDERR, " {common.general_options}:\n", $classifiername);
197 }
198
199 # Display the classifier options
200 &PrintUsage::print_options_txt($classifierargs, $optiondescoffset);
201 }
202
203 # Recurse up the classifier hierarchy
204 $self->print_classifier_usage($descoffset, 0);
205 $self->{'option_list'} = \@optionlist;
206}
207
208
209# sub print_general_usage {
210# my ($plugin_name) = @_;
211# print STDERR "
212# -verbosity N Controls the quantity of output.
213# Defaults to verbosity of buildcol.pl, which is usually 2.
214
215# (Most general classifier options are set internally by buildcol.)
216
217# ";
218# }
219
220# sub print_usage {
221# print STDERR "
222# This classifier has no classifier-specific options
223#
224# ";
225# }
226
227sub new {
228 my $class = shift (@_);
229 my $name = shift (@_);
230
231 my $self = {};
232
233 $self->{'outhandle'} = STDERR;
234
235 $self->{'option_list'} = [ $options ];
236
237 # general options available to all classifiers
238 if (!parsargv::parse(\@_,
239 q^builddir/.*/^, \$self->{'builddir'},
240 q^outhandle/.*/STDERR^, \$self->{'outhandle'},
241 q^verbosity/\d/2^, \$self->{'verbosity'},
242 q^ignore_namespace^, \$self->{'ignore_namespace'},
243 "allow_extra_options")) {
244
245 &gsprintf(STDERR, "\n{BasClas.bad_general_option}\n", $name);
246 # print STDERR "\nThe $name classifier uses an incorrect general option\n";
247 # print STDERR "(general options are those available to all classifiers).\n";
248 # print STDERR "Check your collect.cfg configuration file.\n";
249 $self->print_txt_usage(""); # Use default resource bundle
250 die "\n";
251 }
252
253 return bless $self, $class;
254}
255
256sub init {
257 my $self = shift (@_);
258}
259
260sub classify {
261 my $self = shift (@_);
262 my ($doc_obj) = @_;
263
264 my $outhandle = $self->{'outhandle'};
265 &gsprintf($outhandle, "BasClass::classify {common.must_be_implemented}\n");
266 # print $outhandle "BasClas::classify function must be implemented in sub-class\n";
267}
268
269sub get_classify_info {
270 my $self = shift (@_);
271
272 my $outhandle = $self->{'outhandle'};
273 &gsprintf($outhandle, "BasClass::get_classify_info {common.must_be_implemented}\n");
274 # print $outhandle "BasClas::get_classify_info function must be implemented in sub-class\n";
275}
276
2771;
Note: See TracBrowser for help on using the repository browser.