source: trunk/gsdl/perllib/classify.pm@ 5682

Last change on this file since 5682 was 5682, checked in by mdewsnip, 21 years ago

Changed to use the gsprintf module, which makes using strings from the resource bundle much easier.

  • Property svn:keywords set to Author Date Id Revision
File size: 6.8 KB
Line 
1###########################################################################
2#
3# classify.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# functions to handle classifiers
27
28package classify;
29
30require util;
31use gsprintf;
32
33
34sub gsprintf
35{
36 return &gsprintf::gsprintf(@_);
37}
38
39
40$next_classify_num = 1;
41
42sub load_classifiers {
43 my ($classify_list, $build_dir, $outhandle) = @_;
44 my @classify_objects = ();
45
46 foreach $classifyoption (@$classify_list) {
47
48 # get the classifier name
49 my $classname = shift @$classifyoption;
50 next unless defined $classname;
51
52 # find the classifier
53 my $colclassname = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"perllib/classify",
54 "${classname}.pm");
55 my $mainclassname = &util::filename_cat($ENV{'GSDLHOME'},"perllib/classify",
56 "${classname}.pm");
57
58 if (-e $colclassname) { require $colclassname; }
59 elsif (-e $mainclassname) { require $mainclassname; }
60 else { &gsprintf(STDERR, "{classify.could_not_find_classifier}\n", $classname) && die "\n";
61 # die "ERROR - couldn't find classifier \"$classname\"\n";
62 }
63
64 # create the classify object
65 my ($classobj);
66
67 # backwards compatability hack: if the classifier options are
68 # in "x=y" format, convert them to parsearg ("-x y") format.
69 my ($opt, $key, $value);
70 my @newoptions;
71 foreach $opt (@$classifyoption) {
72 if ($opt =~ /^(\w+)=(.*)$/) {
73 push @newoptions, "-$1", $2;
74 } else {
75 push @newoptions, $opt;
76 }
77 }
78 push @newoptions, "-builddir", "$build_dir" if ($build_dir);
79 push @newoptions, "-outhandle", "$outhandle" if ($outhandle);
80 push @newoptions, "-verbosity", "2";
81
82 map { $_ = "\"$_\""; } @newoptions;
83 my $options .= join (",", @newoptions);
84
85 eval ("\$classobj = new \$classname($options)");
86 die "$@" if $@;
87
88 # add this object to the list
89 push (@classify_objects, $classobj);
90 }
91
92 return \@classify_objects;
93}
94
95# init_classifiers resets all the classifiers and readys them to process
96# the documents.
97sub init_classifiers {
98 my ($classifiers) = @_;
99
100 foreach $classobj (@$classifiers) {
101 $classobj->init();
102 }
103}
104
105# classify_doc lets each of the classifiers classify a document
106sub classify_doc {
107 my ($classifiers, $doc_obj) = @_;
108
109 foreach $classobj (@$classifiers) {
110 $classobj->classify($doc_obj);
111 }
112}
113
114# output_classify_info outputs all the info needed for the classification
115# to the gdbm
116sub output_classify_info {
117 my ($classifiers, $handle, $allclassifications) = @_;
118# $handle = "main::STDOUT";
119
120 # create a classification containing all the info
121 my $classifyinfo = {'classifyOID'=>'browse',
122 'contains'=>[]};
123
124 # get each of the classifications
125 foreach $classobj (@$classifiers) {
126 my $tempinfo = $classobj->get_classify_info();
127 $tempinfo->{'classifyOID'} = "CL$next_classify_num";
128 $next_classify_num++;
129 push (@{$classifyinfo->{'contains'}}, $tempinfo);
130 }
131
132 &print_classify_info ($handle, $classifyinfo, "", $allclassifications);
133}
134
135sub print_classify_info {
136 my ($handle, $classifyinfo, $OID, $allclassifications) = @_;
137
138 $OID =~ s/^\.+//; # just for good luck
139
140 # book information is printed elsewhere
141 return if (defined ($classifyinfo->{'OID'}));
142
143 # don't want empty classifications
144 if ($allclassifications || &check_contents ($classifyinfo) > 0) {
145
146 $OID = $classifyinfo->{'classifyOID'} if defined ($classifyinfo->{'classifyOID'});
147
148 my $outputtext = "[$OID]\n";
149 $outputtext .= "<doctype>classify\n";
150 $outputtext .= "<hastxt>0\n";
151 $outputtext .= "<childtype>$classifyinfo->{'childtype'}\n"
152 if defined $classifyinfo->{'childtype'};
153 $outputtext .= "<Title>$classifyinfo->{'Title'}\n"
154 if defined $classifyinfo->{'Title'};
155 $outputtext .= "<numleafdocs>$classifyinfo->{'numleafdocs'}\n"
156 if defined $classifyinfo->{'numleafdocs'};
157 $outputtext .= "<thistype>$classifyinfo->{'thistype'}\n"
158 if defined $classifyinfo->{'thistype'};
159 $outputtext .= "<parameters>$classifyinfo->{'parameters'}\n"
160 if defined $classifyinfo->{'parameters'};
161
162 my $contains_text = "<contains>";
163 my $mdoffset_text = "<mdoffset>";
164
165 my $next_subOID = 1;
166 my $first = 1;
167 foreach $tempinfo (@{$classifyinfo->{'contains'}}) {
168 # empty contents were made undefined by clean_contents()
169 next unless defined $tempinfo;
170
171 $contains_text .= ";" unless $first;
172 $mdoffset_text .= ";" unless $first;
173 $first = 0;
174
175 if (defined ($tempinfo->{'classifyOID'})) {
176 $contains_text .= $tempinfo->{'classifyOID'};
177 &print_classify_info ($handle, $tempinfo, $tempinfo->{'classifyOID'},
178 $allclassifications);
179 } elsif (defined ($tempinfo->{'OID'})) {
180 $contains_text .= $tempinfo->{'OID'};
181 $mdoffset_text .= $tempinfo->{'offset'}
182 if (defined ($tempinfo->{'offset'}))
183 # note: we don't want to print the contents of the books
184 } else {
185 $contains_text .= "\".$next_subOID";
186 &print_classify_info ($handle, $tempinfo, "$OID.$next_subOID",
187 $allclassifications);
188 $next_subOID++;
189 }
190 }
191
192 $outputtext .= "$contains_text\n";
193 $outputtext .= "<mdtype>$classifyinfo->{'mdtype'}\n"
194 if defined $classifyinfo->{'mdtype'};
195 $outputtext .= "$mdoffset_text\n"
196 if ($mdoffset_text !~ m/^<mdoffset>;+$/);
197
198 $outputtext .= '-' x 70 . "\n";
199
200 print $handle $outputtext;
201 }
202}
203
204sub check_contents {
205 my ($classifyinfo) = @_;
206 my $num_leaf_docs = 0;
207 my $sub_num_leaf_docs = 0;
208
209 return $classifyinfo->{'numleafdocs'} if (defined $classifyinfo->{'numleafdocs'});
210
211 foreach $content (@{$classifyinfo->{'contains'}}) {
212 if (defined $content->{'OID'}) {
213 # found a book
214 $num_leaf_docs ++;
215 } elsif (($sub_num_leaf_docs = &check_contents ($content)) > 0) {
216 # there's a book somewhere below
217 $num_leaf_docs += $sub_num_leaf_docs;
218 } else {
219 # section contains no books so we want to remove
220 # it from its parents contents
221 $content = undef;
222 }
223 }
224
225 $classifyinfo->{'numleafdocs'} = $num_leaf_docs;
226 return $num_leaf_docs;
227}
228
2291;
Note: See TracBrowser for help on using the repository browser.