########################################################################### # # BasClas.pm -- base class for all classifiers # # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 2000 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### package BasClas; # How a classifier works. # # For each classifier requested in the collect.cfg file, buildcol.pl creates # a new classifier object (a subclass of BasClas). Later, it passes each # document object to each classifier in turn for classification. # # Four primary functions are used: # # 1. "new" is called before the documents are processed to set up the # classifier. # # 2. "init" is called after buildcol.pl has created the indexes etc but # before the documents are classified in order that the classifier might # set any variables it requires, etc. # # 3. "classify" is called once for each document object. The classifier # "classifies" each document and updates its local data accordingly. # # 4. "get_classify_info" is called after every document has been # classified. It collates the information about the documents and # stores a reference to the classifier so that Greenstone can later # display it. use parsargv; sub print_general_usage { my ($plugin_name) = @_; print STDERR " -verbosity N Controls the quantity of output. Defaults to verbosity of buildcol.pl, which is usually 2. (Most general classifier options are set internally by buildcol.) "; } # print_usage should be overridden for any sub-classes sub print_usage { print STDERR " This classifier has no classifier-specific options "; } sub new { my $class = shift (@_); my $name = shift (@_); my $self = {}; $self->{'outhandle'} = STDERR; # general options available to all classifiers if (!parsargv::parse(\@_, q^builddir/.*/^, \$self->{'builddir'}, q^outhandle/.*/STDERR^, \$self->{'outhandle'}, q^verbosity/\d/2^, \$self->{'verbosity'}, "allow_extra_options")) { print STDERR "\nThe $name classifier uses an incorrect general option\n"; print STDERR "(general options are those available to all classifiers).\n"; print STDERR "Check your collect.cfg configuration file.\n"; &print_general_usage($plugin_name); die "\n"; } return bless $self, $class; } sub init { my $self = shift (@_); } sub classify { my $self = shift (@_); my ($doc_obj) = @_; my $outhandle = $self->{'outhandle'}; print $outhandle "BasClas::classify function must be implemented in sub-class\n"; } sub get_classify_info { my $self = shift (@_); my $outhandle = $self->{'outhandle'}; print $outhandle "BasClas::classify function must be implemented in sub-class\n"; } 1;