source: trunk/gsdl/packages/kea/kea-3.0/weka/classifiers/FilteredClassifier.java@ 8815

Last change on this file since 8815 was 8815, checked in by mdewsnip, 19 years ago

Kea 3.0, as downloaded from http://www.nzdl.org/kea but with CSTR_abstracts_test, CSTR_abstracts_train, Chinese_test, and Chinese_train directories removed.

  • Property svn:keywords set to Author Date Id Revision
File size: 10.4 KB
Line 
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * FilteredClassifier.java
19 * Copyright (C) 1999 Len Trigg
20 *
21 */
22
23package weka.classifiers;
24
25import java.util.Enumeration;
26import java.util.Vector;
27import weka.core.Instance;
28import weka.core.Instances;
29import weka.core.Option;
30import weka.core.OptionHandler;
31import weka.core.Utils;
32import weka.filters.Filter;
33import weka.core.Attribute;
34
35
36/**
37 * Class for running an arbitrary classifier on data that has been passed
38 * through an arbitrary filter.<p>
39 *
40 * Valid options from the command line are:<p>
41 *
42 * -B classifierstring <br>
43 * Classifierstring should contain the full class name of a classifier
44 * followed by options to the classifier.
45 * (required).<p>
46 *
47 * -F filterstring <br>
48 * Filterstring should contain the full class name of a filter
49 * followed by options to the filter.
50 * (required).<p>
51 *
52 * @author Len Trigg ([email protected])
53 * @version $Revision: 8815 $
54 */
55public class FilteredClassifier extends DistributionClassifier
56 implements OptionHandler {
57
58 /** The classifier */
59 protected Classifier m_Classifier = new weka.classifiers.NaiveBayesSimple();
60
61 /** The filter */
62 protected Filter m_Filter = new weka.filters.DiscretizeFilter();
63
64 /** The instance structure of the filtered instances */
65 protected Instances m_FilteredInstances;
66
67 /**
68 * Default constructor specifying NaiveBayesSimple as the classifier and
69 * DiscretizeFilter as the filter. Both of these are just placeholders
70 * for more useful selections.
71 */
72 public FilteredClassifier() {
73
74 this(new weka.classifiers.NaiveBayesSimple(),
75 new weka.filters.DiscretizeFilter());
76 }
77
78 /**
79 * Constructor that specifies the subclassifier and filter to use.
80 *
81 * @param classifier the Classifier to receive filtered instances.
82 * @param filter the Filter that will process instances before
83 * passing to the Classifier.
84 */
85 public FilteredClassifier(Classifier classifier, Filter filter) {
86
87 m_Classifier = classifier;
88 m_Filter = filter;
89 }
90
91 /**
92 * Returns an enumeration describing the available options
93 *
94 * @return an enumeration of all the available options
95 */
96 public Enumeration listOptions() {
97
98 Vector newVector = new Vector(2);
99
100 newVector.addElement(new Option(
101 "\tFull class name of classifier to use, followed\n"
102 + "\tby scheme options. (required)\n"
103 + "\teg: \"weka.classifiers.NaiveBayes -D\"",
104 "B", 1, "-B <classifier specification>"));
105 newVector.addElement(new Option(
106 "\tFull class name of filter to use, followed\n"
107 + "\tby filter options. (required)\n"
108 + "\teg: \"weka.filters.AttributeFilter -V -R 1,2\"",
109 "F", 1, "-F <filter specification>"));
110 return newVector.elements();
111 }
112
113 /**
114 * Parses a given list of options. Valid options are:<p>
115 *
116 * -B classifierstring <br>
117 * Classifierstring should contain the full class name of a classifier
118 * followed by options to the classifier.
119 * (required).<p>
120 *
121 * -F filterstring <br>
122 * Filterstring should contain the full class name of a filter
123 * followed by options to the filter.
124 * (required).<p>
125 *
126 * @param options the list of options as an array of strings
127 * @exception Exception if an option is not supported
128 */
129 public void setOptions(String[] options) throws Exception {
130
131 String classifierString = Utils.getOption('B', options);
132 if (classifierString.length() == 0) {
133 throw new Exception("A classifier must be specified"
134 + " with the -B option.");
135 }
136 String [] classifierSpec = Utils.splitOptions(classifierString);
137 if (classifierSpec.length == 0) {
138 throw new Exception("Invalid classifier specification string");
139 }
140 String classifierName = classifierSpec[0];
141 classifierSpec[0] = "";
142 setClassifier(Classifier.forName(classifierName, classifierSpec));
143
144 // Same for filter
145 String filterString = Utils.getOption('F', options);
146 if (filterString.length() == 0) {
147 throw new Exception("A filter must be specified"
148 + " with the -F option.");
149 }
150 String [] filterSpec = Utils.splitOptions(filterString);
151 if (filterSpec.length == 0) {
152 throw new Exception("Invalid filter specification string");
153 }
154 String filterName = filterSpec[0];
155 filterSpec[0] = "";
156 setFilter((Filter) Utils.forName(Filter.class, filterName, filterSpec));
157 }
158
159 /**
160 * Gets the current settings of the Classifier.
161 *
162 * @return an array of strings suitable for passing to setOptions
163 */
164 public String [] getOptions() {
165
166 String [] options = new String [4];
167 int current = 0;
168
169 options[current++] = "-B";
170 options[current++] = "" + getClassifierSpec();
171
172 // Same for filter
173 options[current++] = "-F";
174 options[current++] = "" + getFilterSpec();
175
176 while (current < options.length) {
177 options[current++] = "";
178 }
179 return options;
180 }
181
182 /**
183 * Sets the classifier
184 *
185 * @param classifier the classifier with all options set.
186 */
187 public void setClassifier(Classifier classifier) {
188
189 m_Classifier = classifier;
190 }
191
192 /**
193 * Gets the classifier used.
194 *
195 * @return the classifier
196 */
197 public Classifier getClassifier() {
198
199 return m_Classifier;
200 }
201
202 /**
203 * Gets the classifier specification string, which contains the class name of
204 * the classifier and any options to the classifier
205 *
206 * @return the classifier string.
207 */
208 protected String getClassifierSpec() {
209
210 Classifier c = getClassifier();
211 if (c instanceof OptionHandler) {
212 return c.getClass().getName() + " "
213 + Utils.joinOptions(((OptionHandler)c).getOptions());
214 }
215 return c.getClass().getName();
216 }
217
218 /**
219 * Sets the filter
220 *
221 * @param filter the filter with all options set.
222 */
223 public void setFilter(Filter filter) {
224
225 m_Filter = filter;
226 }
227
228 /**
229 * Gets the filter used.
230 *
231 * @return the filter
232 */
233 public Filter getFilter() {
234
235 return m_Filter;
236 }
237
238 /**
239 * Gets the filter specification string, which contains the class name of
240 * the filter and any options to the filter
241 *
242 * @return the filter string.
243 */
244 protected String getFilterSpec() {
245
246 Filter c = getFilter();
247 if (c instanceof OptionHandler) {
248 return c.getClass().getName() + " "
249 + Utils.joinOptions(((OptionHandler)c).getOptions());
250 }
251 return c.getClass().getName();
252 }
253
254 /**
255 * Build the classifier on the filtered data.
256 *
257 * @param data the training data
258 * @exception Exception if the classifier could not be built successfully
259 */
260 public void buildClassifier(Instances data) throws Exception {
261
262 if (m_Classifier == null) {
263 throw new Exception("No base classifiers have been set!");
264 }
265 /*
266 String fname = m_Filter.getClass().getName();
267 fname = fname.substring(fname.lastIndexOf('.') + 1);
268 util.Timer t = util.Timer.getTimer("FilteredClassifier::" + fname);
269 t.start();
270 */
271 m_Filter.setInputFormat(data);
272 data = Filter.useFilter(data, m_Filter);
273 //t.stop();
274 m_FilteredInstances = data.stringFreeStructure();
275 m_Classifier.buildClassifier(data);
276 }
277
278 /**
279 * Classifies a given instance after filtering.
280 *
281 * @param instance the instance to be classified
282 * @exception Exception if instance could not be classified
283 * successfully
284 */
285 public double [] distributionForInstance(Instance instance)
286 throws Exception {
287
288 /*
289 System.err.println("FilteredClassifier:: "
290 + m_Filter.getClass().getName()
291 + " in: " + instance);
292 */
293 if (m_Filter.numPendingOutput() > 0) {
294 throw new Exception("Filter output queue not empty!");
295 }
296 /*
297 String fname = m_Filter.getClass().getName();
298 fname = fname.substring(fname.lastIndexOf('.') + 1);
299 util.Timer t = util.Timer.getTimer("FilteredClassifier::" + fname);
300 t.start();
301 */
302 if (!m_Filter.input(instance)) {
303 throw new Exception("Filter didn't make the test instance"
304 + " immediately available!");
305 }
306 m_Filter.batchFinished();
307 Instance newInstance = m_Filter.output();
308 //t.stop();
309 /*
310 System.err.println("FilteredClassifier:: "
311 + m_Filter.getClass().getName()
312 + " out: " + newInstance);
313 */
314 if (m_Classifier instanceof DistributionClassifier) {
315 return ((DistributionClassifier)m_Classifier)
316 .distributionForInstance(newInstance);
317 }
318 double pred = m_Classifier.classifyInstance(newInstance);
319 double [] result = new double [m_FilteredInstances.numClasses()];
320 if (Instance.isMissingValue(pred)) {
321 return result;
322 }
323 switch (instance.classAttribute().type()) {
324 case Attribute.NOMINAL:
325 result[(int) pred] = 1.0;
326 break;
327 case Attribute.NUMERIC:
328 result[0] = pred;
329 break;
330 default:
331 throw new Exception("Unknown class type");
332 }
333 return result;
334 }
335
336 /**
337 * Output a representation of this classifier
338 */
339 public String toString() {
340
341 if (m_FilteredInstances == null) {
342 return "FilteredClassifier: No model built yet.";
343 }
344
345 String result = "FilteredClassifier using "
346 + getClassifierSpec()
347 + " on data filtered through "
348 + getFilterSpec()
349 + "\n\nFiltered Header\n"
350 + m_FilteredInstances.toString()
351 + "\n\nClassifier Model\n"
352 + m_Classifier.toString();
353 return result;
354 }
355
356
357 /**
358 * Main method for testing this class.
359 *
360 * @param argv should contain the following arguments:
361 * -t training file [-T test file] [-c class index]
362 */
363 public static void main(String [] argv) {
364
365 try {
366 System.out.println("Evaluation disabled!");
367 //System.out.println(Evaluation.evaluateModel(new FilteredClassifier(),
368 // argv));
369 } catch (Exception e) {
370 System.err.println(e.getMessage());
371 }
372 }
373
374}
Note: See TracBrowser for help on using the repository browser.