source: trunk/gsdl/perllib/classify/BasClas.pm@ 6925

Last change on this file since 6925 was 6925, checked in by mdewsnip, 20 years ago

Changed the way display in different languages is done. Instead of passing a language variable throughout the process, the desired resource bundle is explicitly loaded during the initialization of each program (buildcol.pl, classinfo.pl, exportcol.pl, import.pl mkcol.pl, pluginfo.pl).

  • Property svn:keywords set to Author Date Id Revision
File size: 7.9 KB
Line 
1###########################################################################
2#
3# BasClas.pm -- base class for all classifiers
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2000 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package BasClas;
28
29# How a classifier works.
30#
31# For each classifier requested in the collect.cfg file, buildcol.pl creates
32# a new classifier object (a subclass of BasClas). Later, it passes each
33# document object to each classifier in turn for classification.
34#
35# Four primary functions are used:
36#
37# 1. "new" is called before the documents are processed to set up the
38# classifier.
39#
40# 2. "init" is called after buildcol.pl has created the indexes etc but
41# before the documents are classified in order that the classifier might
42# set any variables it requires, etc.
43#
44# 3. "classify" is called once for each document object. The classifier
45# "classifies" each document and updates its local data accordingly.
46#
47# 4. "get_classify_info" is called after every document has been
48# classified. It collates the information about the documents and
49# stores a reference to the classifier so that Greenstone can later
50# display it.
51
52# 09/05/02 Added usage datastructure - John Thompson
53# 28/11/03 Commented out verbosity argument - John Thompson
54
55use parsargv;
56use gsprintf;
57use printusage;
58
59
60my $arguments =
61 [ { 'name' => "builddir",
62 'desc' => "{BasClas.builddir}",
63 'type' => "string",
64 'deft' => "" },
65 { 'name' => "outhandle",
66 'desc' => "{BasClas.outhandle}",
67 'type' => "string",
68 'deft' => "STDERR" },
69# { 'name' => "verbosity",
70# 'desc' => "{BasClas.verbosity}",
71# 'type' => "enum",
72# 'deft' => "2",
73# 'reqd' => "no" } ];
74# { 'name' => "ignore_namespace",
75# 'desc' => "{BasClas.ignore_namespace}",
76# 'type' => "flag"}
77 ];
78
79my $options = { 'name' => "BasClas",
80 'desc' => "{BasClas.desc}",
81 'abstract' => "yes",
82 'inherits' => "no",
83 'args' => $arguments };
84
85
86sub gsprintf
87{
88 return &gsprintf::gsprintf(@_);
89}
90
91
92sub print_xml_usage
93{
94 local $self = shift(@_);
95
96 &PrintUsage::print_xml_header();
97 $self->print_xml();
98}
99
100
101sub print_xml
102{
103 local $self = shift(@_);
104
105 local $optionlistref = $self->{'option_list'};
106 local @optionlist = @$optionlistref;
107 local $classifieroptions = pop(@$optionlistref);
108 return if (!defined($classifieroptions));
109
110 print STDERR "<ClassInfo>\n";
111 print STDERR " <Name>$classifieroptions->{'name'}</Name>\n";
112 print STDERR " <Desc>$classifieroptions->{'desc'}</Desc>\n";
113 print STDERR " <Abstract>$classifieroptions->{'abstract'}</Abstract>\n";
114 print STDERR " <Inherits>$classifieroptions->{'inherits'}</Inherits>\n";
115 print STDERR " <Arguments>\n";
116 if (defined($classifieroptions->{'args'})) {
117 &PrintUsage::print_options_xml($classifieroptions->{'args'});
118 }
119
120 # Recurse up the classifier hierarchy
121 $self->print_xml();
122
123 print STDERR " </Arguments>\n";
124 print STDERR "</ClassInfo>\n";
125}
126
127
128sub print_txt_usage
129{
130 local $self = shift(@_);
131
132 # Print the usage message for a classifier (recursively)
133 local $descoffset = $self->determine_description_offset(0);
134 $self->print_classifier_usage($descoffset, 1);
135}
136
137
138sub determine_description_offset
139{
140 local $self = shift(@_);
141 local $maxoffset = shift(@_);
142
143 local $optionlistref = $self->{'option_list'};
144 local @optionlist = @$optionlistref;
145 local $classifieroptions = pop(@$optionlistref);
146 return $maxoffset if (!defined($classifieroptions));
147
148 # Find the length of the longest option string of this classifier
149 local $classifierargs = $classifieroptions->{'args'};
150 if (defined($classifierargs)) {
151 local $longest = &PrintUsage::find_longest_option_string($classifierargs);
152 if ($longest > $maxoffset) {
153 $maxoffset = $longest;
154 }
155 }
156
157 # Recurse up the classifier hierarchy
158 $maxoffset = $self->determine_description_offset($maxoffset);
159 $self->{'option_list'} = \@optionlist;
160 return $maxoffset;
161}
162
163
164sub print_classifier_usage
165{
166 local $self = shift(@_);
167 local $descoffset = shift(@_);
168 local $isleafclass = shift(@_);
169
170 local $optionlistref = $self->{'option_list'};
171 local @optionlist = @$optionlistref;
172 local $classifieroptions = pop(@$optionlistref);
173 return if (!defined($classifieroptions));
174
175 local $classifiername = $classifieroptions->{'name'};
176 local $classifierargs = $classifieroptions->{'args'};
177
178 # Produce the usage information using the data structure above
179 if ($isleafclass) {
180 &gsprintf(STDERR, " {common.usage}: classify $classifiername [{common.options}]\n\n");
181 }
182
183 # Display the classifier options, if there are some
184 if (defined($classifierargs)) {
185 # Calculate the column offset of the option descriptions
186 local $optiondescoffset = $descoffset + 2; # 2 spaces between options & descriptions
187
188 if ($isleafclass) {
189 &gsprintf(STDERR, " {common.specific_options}:\n");
190 }
191 else {
192 &gsprintf(STDERR, " {common.general_options}:\n", $classifiername);
193 }
194
195 # Display the classifier options
196 &PrintUsage::print_options_txt($classifierargs, $optiondescoffset);
197 }
198
199 # Recurse up the classifier hierarchy
200 $self->print_classifier_usage($descoffset, 0);
201 $self->{'option_list'} = \@optionlist;
202}
203
204
205# sub print_general_usage {
206# my ($plugin_name) = @_;
207# print STDERR "
208# -verbosity N Controls the quantity of output.
209# Defaults to verbosity of buildcol.pl, which is usually 2.
210
211# (Most general classifier options are set internally by buildcol.)
212
213# ";
214# }
215
216# sub print_usage {
217# print STDERR "
218# This classifier has no classifier-specific options
219#
220# ";
221# }
222
223sub new {
224 my $class = shift (@_);
225 my $name = shift (@_);
226
227 my $self = {};
228
229 $self->{'outhandle'} = STDERR;
230
231 $self->{'option_list'} = [ $options ];
232
233 # general options available to all classifiers
234 if (!parsargv::parse(\@_,
235 q^builddir/.*/^, \$self->{'builddir'},
236 q^outhandle/.*/STDERR^, \$self->{'outhandle'},
237 q^verbosity/\d/2^, \$self->{'verbosity'},
238 q^ignore_namespace^, \$self->{'ignore_namespace'},
239 "allow_extra_options")) {
240
241 &gsprintf(STDERR, "\n{BasClas.bad_general_option}\n", $name);
242 # print STDERR "\nThe $name classifier uses an incorrect general option\n";
243 # print STDERR "(general options are those available to all classifiers).\n";
244 # print STDERR "Check your collect.cfg configuration file.\n";
245 $self->print_txt_usage(""); # Use default resource bundle
246 die "\n";
247 }
248
249 return bless $self, $class;
250}
251
252sub init {
253 my $self = shift (@_);
254}
255
256sub classify {
257 my $self = shift (@_);
258 my ($doc_obj) = @_;
259
260 my $outhandle = $self->{'outhandle'};
261 &gsprintf($outhandle, "BasClass::classify {common.must_be_implemented}\n");
262 # print $outhandle "BasClas::classify function must be implemented in sub-class\n";
263}
264
265sub get_classify_info {
266 my $self = shift (@_);
267
268 my $outhandle = $self->{'outhandle'};
269 &gsprintf($outhandle, "BasClass::get_classify_info {common.must_be_implemented}\n");
270 # print $outhandle "BasClas::get_classify_info function must be implemented in sub-class\n";
271}
272
2731;
Note: See TracBrowser for help on using the repository browser.