1 | # functions to handle classifiers
|
---|
2 |
|
---|
3 | package classify;
|
---|
4 |
|
---|
5 | require util;
|
---|
6 |
|
---|
7 |
|
---|
8 | $next_classify_num = "0";
|
---|
9 |
|
---|
10 | # load_classifier will load one classifier. $classinfo is an
|
---|
11 | # array containing information about the classifier to load. The
|
---|
12 | # first element of the array is the name of the classifier, the
|
---|
13 | # rest of the elements are classifier specific.
|
---|
14 | sub load_classifier {
|
---|
15 | my ($classinfo) = @_;
|
---|
16 |
|
---|
17 | # get the classifier name
|
---|
18 | my $classname = shift(@$classinfo);
|
---|
19 | return undef unless defined $classname;
|
---|
20 |
|
---|
21 | # find the classifier
|
---|
22 | my $colclassname = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"perllib/classify",
|
---|
23 | "${classname}.pm");
|
---|
24 | my $mainclassname = &util::filename_cat($ENV{'GSDLHOME'},"perllib/classify",
|
---|
25 | "${classname}.pm");
|
---|
26 | if (-e $colclassname) { require $colclassname; }
|
---|
27 | elsif (-e $mainclassname) { require $mainclassname; }
|
---|
28 | else { die "ERROR - couldn't find classifier \"$classname\"\n"; }
|
---|
29 |
|
---|
30 | # create the classify object
|
---|
31 | my ($classobj);
|
---|
32 | map { $_ = "\"$_\""; } @$classinfo;
|
---|
33 | my $options = join (",", @$classinfo);
|
---|
34 | eval ("\$classobj = new $classname($options)");
|
---|
35 | die "$@" if $@;
|
---|
36 |
|
---|
37 | return $classobj;
|
---|
38 | }
|
---|
39 |
|
---|
40 | # init_classifiers resets all the classifiers and readys them to process
|
---|
41 | # the documents. They are each given a unique classification OID
|
---|
42 | sub init_classifiers {
|
---|
43 | my ($classifiers) = @_;
|
---|
44 |
|
---|
45 | foreach $classobj (@$classifiers) {
|
---|
46 | $classobj->init();
|
---|
47 | $classobj->set_OID ("CL$next_classify_num");
|
---|
48 | $next_classify_num++;
|
---|
49 | }
|
---|
50 | }
|
---|
51 |
|
---|
52 | # classify_doc lets each of the classifiers classify a document
|
---|
53 | sub classify_doc {
|
---|
54 | my ($classifiers, $doc_obj) = @_;
|
---|
55 |
|
---|
56 | foreach $classobj (@$classifiers) {
|
---|
57 | $classobj->classify($doc_obj);
|
---|
58 | }
|
---|
59 | }
|
---|
60 |
|
---|
61 | # output_classify_info outputs all the info needed for the classification
|
---|
62 | # to the gdbm
|
---|
63 | sub output_classify_info {
|
---|
64 | my ($classifiers, $handle) = @_;
|
---|
65 | # $handle = "main::STDOUT";
|
---|
66 |
|
---|
67 | my $contains_str = "";
|
---|
68 | my $first = 1;
|
---|
69 |
|
---|
70 | # output each of the classifications
|
---|
71 | foreach $classobj (@$classifiers) {
|
---|
72 | $classobj->output_classify_info($handle);
|
---|
73 | $contains_str .= ";" unless $first;
|
---|
74 | $first = 0;
|
---|
75 | $contains_str .= $classobj->get_OID();
|
---|
76 | }
|
---|
77 |
|
---|
78 | # output information about each of the classifications
|
---|
79 | print $handle "[browse]\n";
|
---|
80 | print $handle "<doctype>classify\n";
|
---|
81 | print $handle "<hastxt>0\n";
|
---|
82 | print $handle "<contains>$contains_str\n"
|
---|
83 | unless $contains_str eq "";
|
---|
84 | print $handle '-' x 70, "\n";
|
---|
85 | }
|
---|
86 |
|
---|
87 |
|
---|
88 | 1;
|
---|