source: trunk/gsdl/perllib/classify/BasClas.pm@ 6111

Last change on this file since 6111 was 6111, checked in by jmt12, 20 years ago

Changed the description for the -metadata flag to foreshadow the coming enhancement. This commit also happens to include the prototype -ignore_arguments flag to AZList, that will never actually be used because of the aforementioned metadata enhancement.

  • Property svn:keywords set to Author Date Id Revision
File size: 8.1 KB
Line 
1###########################################################################
2#
3# BasClas.pm -- base class for all classifiers
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2000 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package BasClas;
28
29# How a classifier works.
30#
31# For each classifier requested in the collect.cfg file, buildcol.pl creates
32# a new classifier object (a subclass of BasClas). Later, it passes each
33# document object to each classifier in turn for classification.
34#
35# Four primary functions are used:
36#
37# 1. "new" is called before the documents are processed to set up the
38# classifier.
39#
40# 2. "init" is called after buildcol.pl has created the indexes etc but
41# before the documents are classified in order that the classifier might
42# set any variables it requires, etc.
43#
44# 3. "classify" is called once for each document object. The classifier
45# "classifies" each document and updates its local data accordingly.
46#
47# 4. "get_classify_info" is called after every document has been
48# classified. It collates the information about the documents and
49# stores a reference to the classifier so that Greenstone can later
50# display it.
51
52# 09/05/02 Added usage datastructure - John Thompson
53# 28/11/03 Commented out verbosity argument - John Thompson
54
55use parsargv;
56use gsprintf;
57use printusage;
58
59
60my $arguments =
61 [ { 'name' => "builddir",
62 'desc' => "{BasClas.builddir}",
63 'type' => "string",
64 'deft' => "" },
65 { 'name' => "outhandle",
66 'desc' => "{BasClas.outhandle}",
67 'type' => "string",
68 'deft' => "STDERR" },
69# { 'name' => "verbosity",
70# 'desc' => "{BasClas.verbosity}",
71# 'type' => "enum",
72# 'deft' => "2",
73# 'reqd' => "no" } ];
74 { 'name' => "ignore_namespace",
75 'desc' => "{BasClas.ignore_namespace}",
76 'type' => "flag"} ];
77
78my $options = { 'name' => "BasClas",
79 'desc' => "{BasClas.desc}",
80 'inherits' => "No",
81 'args' => $arguments };
82
83
84sub gsprintf
85{
86 return &gsprintf::gsprintf(@_);
87}
88
89
90sub print_xml_usage
91{
92 local $self = shift(@_);
93 local $language = shift(@_);
94
95 &PrintUsage::print_xml_header();
96 $self->print_xml($language);
97}
98
99
100sub print_xml
101{
102 local $self = shift(@_);
103 local $language = shift(@_);
104
105 local $optionlistref = $self->{'option_list'};
106 local @optionlist = @$optionlistref;
107 local $classifieroptions = pop(@$optionlistref);
108 return if (!defined($classifieroptions));
109
110 print STDERR "<ClassInfo>\n";
111 print STDERR " <Name>$classifieroptions->{'name'}</Name>\n";
112 print STDERR " <Desc>$classifieroptions->{'desc'}</Desc>\n";
113 print STDERR " <Inherits>$classifieroptions->{'inherits'}</Inherits>\n";
114 print STDERR " <Arguments>\n";
115 if (defined($classifieroptions->{'args'})) {
116 &PrintUsage::print_options_xml($language, $classifieroptions->{'args'});
117 }
118
119 # Recurse up the classifier hierarchy
120 $self->print_xml($language);
121
122 print STDERR " </Arguments>\n";
123 print STDERR "</ClassInfo>\n";
124}
125
126
127sub print_txt_usage
128{
129 local $self = shift(@_);
130 local $language = shift(@_);
131
132 # Print the usage message for a classifier (recursively)
133 local $descoffset = $self->determine_description_offset(0);
134 $self->print_classifier_usage($language, $descoffset, 1);
135}
136
137
138sub determine_description_offset
139{
140 local $self = shift(@_);
141 local $maxoffset = shift(@_);
142
143 local $optionlistref = $self->{'option_list'};
144 local @optionlist = @$optionlistref;
145 local $classifieroptions = pop(@$optionlistref);
146 return $maxoffset if (!defined($classifieroptions));
147
148 # Find the length of the longest option string of this classifier
149 local $classifierargs = $classifieroptions->{'args'};
150 if (defined($classifierargs)) {
151 local $longest = &PrintUsage::find_longest_option_string($classifierargs);
152 if ($longest > $maxoffset) {
153 $maxoffset = $longest;
154 }
155 }
156
157 # Recurse up the classifier hierarchy
158 $maxoffset = $self->determine_description_offset($maxoffset);
159 $self->{'option_list'} = \@optionlist;
160 return $maxoffset;
161}
162
163
164sub print_classifier_usage
165{
166 local $self = shift(@_);
167 local $language = shift(@_);
168 local $descoffset = shift(@_);
169 local $isleafclass = shift(@_);
170
171 local $optionlistref = $self->{'option_list'};
172 local @optionlist = @$optionlistref;
173 local $classifieroptions = pop(@$optionlistref);
174 return if (!defined($classifieroptions));
175
176 local $classifiername = $classifieroptions->{'name'};
177 local $classifierargs = $classifieroptions->{'args'};
178
179 # Produce the usage information using the data structure above
180 if ($isleafclass) {
181 &gsprintf(STDERR, " {common.usage}: classify $classifiername [{common.options}]\n\n");
182 # print STDERR " usage: classify $classifiername [options]\n\n";
183 }
184
185 # Display the classifier options, if there are some
186 if (defined($classifierargs)) {
187 # Calculate the column offset of the option descriptions
188 local $optiondescoffset = $descoffset + 2; # 2 spaces between options & descriptions
189
190 if ($isleafclass) {
191 &gsprintf(STDERR, " {common.specific_options}:\n");
192 # print STDERR " specific options:\n";
193 }
194 else {
195 &gsprintf(STDERR, " {common.general_options}:\n", $classifiername);
196 # print STDERR " general options (from $classifiername):\n";
197 }
198
199 # Display the classifier options
200 &PrintUsage::print_options_txt($language, $classifierargs, $optiondescoffset);
201 }
202
203 # Recurse up the classifier hierarchy
204 $self->print_classifier_usage($language, $descoffset, 0);
205 $self->{'option_list'} = \@optionlist;
206}
207
208
209# sub print_general_usage {
210# my ($plugin_name) = @_;
211# print STDERR "
212# -verbosity N Controls the quantity of output.
213# Defaults to verbosity of buildcol.pl, which is usually 2.
214
215# (Most general classifier options are set internally by buildcol.)
216
217# ";
218# }
219
220# sub print_usage {
221# print STDERR "
222# This classifier has no classifier-specific options
223#
224# ";
225# }
226
227sub new {
228 my $class = shift (@_);
229 my $name = shift (@_);
230
231 my $self = {};
232
233 $self->{'outhandle'} = STDERR;
234
235 $self->{'option_list'} = [ $options ];
236
237 # general options available to all classifiers
238 if (!parsargv::parse(\@_,
239 q^builddir/.*/^, \$self->{'builddir'},
240 q^outhandle/.*/STDERR^, \$self->{'outhandle'},
241 q^verbosity/\d/2^, \$self->{'verbosity'},
242 q^ignore_namespace^, \$self->{'ignore_namespace'},
243 "allow_extra_options")) {
244
245 &gsprintf(STDERR, "\n{BasClas.bad_general_option}\n", $name);
246 # print STDERR "\nThe $name classifier uses an incorrect general option\n";
247 # print STDERR "(general options are those available to all classifiers).\n";
248 # print STDERR "Check your collect.cfg configuration file.\n";
249 $self->print_txt_usage(""); # Use default resource bundle
250 die "\n";
251 }
252
253 return bless $self, $class;
254}
255
256sub init {
257 my $self = shift (@_);
258}
259
260sub classify {
261 my $self = shift (@_);
262 my ($doc_obj) = @_;
263
264 my $outhandle = $self->{'outhandle'};
265 &gsprintf($outhandle, "BasClass::classify {common.must_be_implemented}\n");
266 # print $outhandle "BasClas::classify function must be implemented in sub-class\n";
267}
268
269sub get_classify_info {
270 my $self = shift (@_);
271
272 my $outhandle = $self->{'outhandle'};
273 &gsprintf($outhandle, "BasClass::get_classify_info {common.must_be_implemented}\n");
274 # print $outhandle "BasClas::get_classify_info function must be implemented in sub-class\n";
275}
276
2771;
Note: See TracBrowser for help on using the repository browser.