source: trunk/gsdl/packages/kea/kea-3.0/weka/filters/Filter.java@ 8815

Last change on this file since 8815 was 8815, checked in by mdewsnip, 19 years ago

Kea 3.0, as downloaded from http://www.nzdl.org/kea but with CSTR_abstracts_test, CSTR_abstracts_train, Chinese_test, and Chinese_train directories removed.

  • Property svn:keywords set to Author Date Id Revision
File size: 29.8 KB
Line 
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17
18/*
19 * Filter.java
20 * Copyright (C) 1999 Len Trigg
21 *
22 */
23
24
25package weka.filters;
26
27import java.io.BufferedReader;
28import java.io.FileOutputStream;
29import java.io.FileReader;
30import java.io.InputStreamReader;
31import java.io.PrintWriter;
32import java.io.Reader;
33import java.io.Serializable;
34import java.util.Enumeration;
35import weka.core.Attribute;
36import weka.core.Instance;
37import weka.core.Instances;
38import weka.core.Option;
39import weka.core.OptionHandler;
40import weka.core.Queue;
41import weka.core.Utils;
42
43/**
44 * An abstract class for instance filters: objects that take instances
45 * as input, carry out some transformation on the instance and then
46 * output the instance. The method implementations in this class
47 * assume that most of the work will be done in the methods overridden
48 * by subclasses.<p>
49 *
50 * A simple example of filter use. This example doesn't remove
51 * instances from the output queue until all instances have been
52 * input, so has higher memory consumption than an approach that
53 * uses output instances as they are made available:<p>
54 *
55 * <code> <pre>
56 * Filter filter = ..some type of filter..
57 * Instances instances = ..some instances..
58 * for (int i = 0; i < data.numInstances(); i++) {
59 * filter.input(data.instance(i));
60 * }
61 * filter.batchFinished();
62 * Instances newData = filter.outputFormat();
63 * Instance processed;
64 * while ((processed = filter.output()) != null) {
65 * newData.add(processed);
66 * }
67 * ..do something with newData..
68 * </pre> </code>
69 *
70 * @author Len Trigg ([email protected])
71 * @version $Revision: 8815 $
72 */
73public abstract class Filter implements Serializable {
74
75 /*
76 * Filter refactoring TODO:
77 *
78 * - Update all filters to use getOutputFormat and setInputFormat
79 * instead of outputFormat, outputFormatPeek and inputFormat.
80 * - Update users of filters to use getOutputFormat and setInputFormat
81 * - remove outputFormat, outputFormatPeek and inputFormat
82 *
83 */
84
85 /** Debugging mode */
86 private boolean m_Debug = false;
87
88 /** The output format for instances */
89 private Instances m_OutputFormat = null;
90
91 /** The output instance queue */
92 private Queue m_OutputQueue = null;
93
94 /** Indices of string attributes in the output format */
95 private int [] m_OutputStringAtts = null;
96
97 /** Indices of string attributes in the input format */
98 private int [] m_InputStringAtts = null;
99
100 /** The input format for instances */
101 private Instances m_InputFormat = null;
102
103 /** Record whether the filter is at the start of a batch */
104 protected boolean m_NewBatch = true;
105
106 /**
107 * Sets the format of output instances. The derived class should use this
108 * method once it has determined the outputformat. The
109 * output queue is cleared.
110 *
111 * @param outputFormat the new output format
112 */
113 protected void setOutputFormat(Instances outputFormat) {
114
115 if (outputFormat != null) {
116 m_OutputFormat = outputFormat.stringFreeStructure();
117 m_OutputStringAtts = getStringIndices(m_OutputFormat);
118
119 // Rename the attribute
120 String relationName = outputFormat.relationName()
121 + "-" + this.getClass().getName();
122 if (this instanceof OptionHandler) {
123 String [] options = ((OptionHandler)this).getOptions();
124 for (int i = 0; i < options.length; i++) {
125 relationName += options[i].trim();
126 }
127 }
128 m_OutputFormat.setRelationName(relationName);
129 } else {
130 m_OutputFormat = null;
131 }
132 m_OutputQueue = new Queue();
133 }
134
135 /**
136 * Gets the currently set inputformat instances. This dataset may contain
137 * buffered instances.
138 *
139 * @return the input Instances.
140 */
141 protected Instances getInputFormat() {
142
143 return m_InputFormat;
144 }
145
146 /**
147 * Returns a reference to the current output format without
148 * copying it.
149 *
150 * @return a reference to the current output format
151 */
152 protected Instances outputFormatPeek() {
153
154 return m_OutputFormat;
155 }
156
157 /**
158 * Adds an output instance to the queue. The derived class should use this
159 * method for each output instance it makes available.
160 *
161 * @param instance the instance to be added to the queue
162 */
163 protected void push(Instance instance) {
164
165 if (instance != null) {
166 copyStringValues(instance, m_OutputFormat, m_OutputStringAtts);
167 instance.setDataset(m_OutputFormat);
168 m_OutputQueue.push(instance);
169 }
170 }
171
172 /**
173 * Clears the output queue.
174 */
175 protected void resetQueue() {
176
177 m_OutputQueue = new Queue();
178 }
179
180 /**
181 * Adds the supplied input instance to the inputformat dataset for
182 * later processing. Use this method rather than
183 * getInputFormat().add(instance). Or else.
184 *
185 * @param instance the <code>Instance</code> to buffer.
186 */
187 protected void bufferInput(Instance instance) {
188
189 if (instance != null) {
190 copyStringValues(instance, m_InputFormat, m_InputStringAtts);
191 instance.setDataset(m_InputFormat);
192 m_InputFormat.add(instance);
193 }
194 }
195
196 /**
197 * Returns an array containing the indices of all string attributes in the
198 * input format. This index is created during setInputFormat()
199 *
200 * @return an array containing the indices of string attributes in the
201 * input dataset.
202 */
203 protected int [] getInputStringIndex() {
204
205 return m_InputStringAtts;
206 }
207
208 /**
209 * Returns an array containing the indices of all string attributes in the
210 * output format. This index is created during setOutputFormat()
211 *
212 * @return an array containing the indices of string attributes in the
213 * output dataset.
214 */
215 protected int [] getOutputStringIndex() {
216
217 return m_OutputStringAtts;
218 }
219
220 /**
221 * Copies string values contained in the instance copied to a new
222 * dataset. The Instance must already be assigned to a dataset. This
223 * dataset and the destination dataset must have the same structure.
224 *
225 * @param instance the Instance containing the string values to copy.
226 * @param destDataset the destination set of Instances
227 * @param strAtts an array containing the indices of any string attributes
228 * in the dataset.
229 */
230 private void copyStringValues(Instance inst, Instances destDataset,
231 int []strAtts) {
232
233 if (strAtts.length == 0) {
234 return;
235 }
236 if (inst.dataset() == null) {
237 throw new IllegalArgumentException("Instance has no dataset assigned!!");
238 } else if (inst.dataset().numAttributes() != destDataset.numAttributes()) {
239 throw new IllegalArgumentException("Src and Dest differ in # of attributes!!");
240 }
241 copyStringValues(inst, true, inst.dataset(), strAtts,
242 destDataset, strAtts);
243 }
244
245 /**
246 * Takes string values referenced by an Instance and copies them from a
247 * source dataset to a destination dataset. The instance references are
248 * updated to be valid for the destination dataset. The instance may have the
249 * structure (i.e. number and attribute position) of either dataset (this
250 * affects where references are obtained from). The source dataset must
251 * have the same structure as the filter input format and the destination
252 * must have the same structure as the filter output format.
253 *
254 * @param instance the instance containing references to strings in the source
255 * dataset that will have references updated to be valid for the destination
256 * dataset.
257 * @param instSrcCompat true if the instance structure is the same as the
258 * source, or false if it is the same as the destination
259 * @param srcDataset the dataset for which the current instance string
260 * references are valid (after any position mapping if needed)
261 * @param destDataset the dataset for which the current instance string
262 * references need to be inserted (after any position mapping if needed)
263 */
264 protected void copyStringValues(Instance instance, boolean instSrcCompat,
265 Instances srcDataset, Instances destDataset) {
266
267 copyStringValues(instance, instSrcCompat, srcDataset, m_InputStringAtts,
268 destDataset, m_OutputStringAtts);
269 }
270
271 /**
272 * Takes string values referenced by an Instance and copies them from a
273 * source dataset to a destination dataset. The instance references are
274 * updated to be valid for the destination dataset. The instance may have the
275 * structure (i.e. number and attribute position) of either dataset (this
276 * affects where references are obtained from). Only works if the number
277 * of string attributes is the same in both indices (implicitly these string
278 * attributes should be semantically same but just with shifted positions).
279 *
280 * @param instance the instance containing references to strings in the source
281 * dataset that will have references updated to be valid for the destination
282 * dataset.
283 * @param instSrcCompat true if the instance structure is the same as the
284 * source, or false if it is the same as the destination (i.e. which of the
285 * string attribute indices contains the correct locations for this instance).
286 * @param srcDataset the dataset for which the current instance string
287 * references are valid (after any position mapping if needed)
288 * @param srcStrAtts an array containing the indices of string attributes
289 * in the source datset.
290 * @param destDataset the dataset for which the current instance string
291 * references need to be inserted (after any position mapping if needed)
292 * @param destStrAtts an array containing the indices of string attributes
293 * in the destination datset.
294 */
295 protected void copyStringValues(Instance instance, boolean instSrcCompat,
296 Instances srcDataset, int []srcStrAtts,
297 Instances destDataset, int []destStrAtts) {
298 if (srcDataset == destDataset) {
299 return;
300 }
301 if (srcStrAtts.length != destStrAtts.length) {
302 throw new IllegalArgumentException("Src and Dest string indices differ in length!!");
303 }
304 for (int i = 0; i < srcStrAtts.length; i++) {
305 int instIndex = instSrcCompat ? srcStrAtts[i] : destStrAtts[i];
306 Attribute src = srcDataset.attribute(srcStrAtts[i]);
307 Attribute dest = destDataset.attribute(destStrAtts[i]);
308 if (!instance.isMissing(instIndex)) {
309 //System.err.println(instance.value(srcIndex)
310 // + " " + src.numValues()
311 // + " " + dest.numValues());
312 int valIndex = dest.addStringValue(src, (int)instance.value(instIndex));
313 // setValue here shouldn't be too slow here unless your dataset has
314 // squillions of string attributes
315 instance.setValue(instIndex, (double)valIndex);
316 }
317 }
318 }
319
320 /**
321 * This will remove all buffered instances from the inputformat dataset.
322 * Use this method rather than getInputFormat().delete();
323 */
324 protected void flushInput() {
325
326 if (m_InputStringAtts.length > 0) {
327 m_InputFormat = m_InputFormat.stringFreeStructure();
328 } else {
329 // This more efficient than new Instances(m_InputFormat, 0);
330 m_InputFormat.delete();
331 }
332 }
333
334 /**
335 * @deprecated use <code>setInputFormat(Instances)</code> instead.
336 */
337 public boolean inputFormat(Instances instanceInfo) throws Exception {
338
339 return setInputFormat(instanceInfo);
340 }
341
342 /**
343 * Sets the format of the input instances. If the filter is able to
344 * determine the output format before seeing any input instances, it
345 * does so here. This default implementation clears the output format
346 * and output queue, and the new batch flag is set. Overriders should
347 * call <code>super.setInputFormat(Instances)</code>
348 *
349 * @param instanceInfo an Instances object containing the input instance
350 * structure (any instances contained in the object are ignored - only the
351 * structure is required).
352 * @return true if the outputFormat may be collected immediately
353 * @exception Exception if the inputFormat can't be set successfully
354 */
355 public boolean setInputFormat(Instances instanceInfo) throws Exception {
356
357 m_InputFormat = instanceInfo.stringFreeStructure();
358 m_InputStringAtts = getStringIndices(instanceInfo);
359 m_OutputFormat = null;
360 m_OutputQueue = new Queue();
361 m_NewBatch = true;
362 return false;
363 }
364
365 /**
366 * @deprecated use <code>getOutputFormat()</code> instead.
367 */
368 public final Instances outputFormat() {
369
370 return getOutputFormat();
371 }
372
373 /**
374 * Gets the format of the output instances. This should only be called
375 * after input() or batchFinished() has returned true. The relation
376 * name of the output instances should be changed to reflect the
377 * action of the filter (eg: add the filter name and options).
378 *
379 * @return an Instances object containing the output instance
380 * structure only.
381 * @exception NullPointerException if no input structure has been
382 * defined (or the output format hasn't been determined yet)
383 */
384 public final Instances getOutputFormat() {
385
386 if (m_OutputFormat == null) {
387 throw new NullPointerException("No output format defined.");
388 }
389 return new Instances(m_OutputFormat, 0);
390 }
391
392 /**
393 * Input an instance for filtering. Ordinarily the instance is
394 * processed and made available for output immediately. Some filters
395 * require all instances be read before producing output, in which
396 * case output instances should be collected after calling
397 * batchFinished(). If the input marks the start of a new batch, the
398 * output queue is cleared. This default implementation assumes all
399 * instance conversion will occur when batchFinished() is called.
400 *
401 * @param instance the input instance
402 * @return true if the filtered instance may now be
403 * collected with output().
404 * @exception NullPointerException if the input format has not been
405 * defined.
406 * @exception Exception if the input instance was not of the correct
407 * format or if there was a problem with the filtering.
408 */
409 public boolean input(Instance instance) throws Exception {
410
411 if (m_InputFormat == null) {
412 throw new NullPointerException("No input instance format defined");
413 }
414 if (m_NewBatch) {
415 m_OutputQueue = new Queue();
416 m_NewBatch = false;
417 }
418 bufferInput(instance);
419 return false;
420 }
421
422 /**
423 * Signify that this batch of input to the filter is finished. If
424 * the filter requires all instances prior to filtering, output()
425 * may now be called to retrieve the filtered instances. Any
426 * subsequent instances filtered should be filtered based on setting
427 * obtained from the first batch (unless the inputFormat has been
428 * re-assigned or new options have been set). This default
429 * implementation assumes all instance processing occurs during
430 * inputFormat() and input().
431 *
432 * @return true if there are instances pending output
433 * @exception NullPointerException if no input structure has been defined,
434 * @exception Exception if there was a problem finishing the batch.
435 */
436 public boolean batchFinished() throws Exception {
437
438 if (m_InputFormat == null) {
439 throw new NullPointerException("No input instance format defined");
440 }
441 flushInput();
442 m_NewBatch = true;
443 return (numPendingOutput() != 0);
444 }
445
446
447 /**
448 * Output an instance after filtering and remove from the output queue.
449 *
450 * @return the instance that has most recently been filtered (or null if
451 * the queue is empty).
452 * @exception NullPointerException if no output structure has been defined
453 */
454 public Instance output() {
455
456 if (m_OutputFormat == null) {
457 throw new NullPointerException("No output instance format defined");
458 }
459 if (m_OutputQueue.empty()) {
460 return null;
461 }
462 Instance result = (Instance)m_OutputQueue.pop();
463 // Clear out references to old strings occasionally
464 if (m_OutputQueue.empty() && m_NewBatch) {
465 if (m_OutputStringAtts.length > 0) {
466 m_OutputFormat = m_OutputFormat.stringFreeStructure();
467 }
468 }
469 return result;
470 }
471
472 /**
473 * Output an instance after filtering but do not remove from the
474 * output queue.
475 *
476 * @return the instance that has most recently been filtered (or null if
477 * the queue is empty).
478 * @exception NullPointerException if no input structure has been defined
479 */
480 public Instance outputPeek() {
481
482 if (m_OutputFormat == null) {
483 throw new NullPointerException("No output instance format defined");
484 }
485 if (m_OutputQueue.empty()) {
486 return null;
487 }
488 Instance result = (Instance)m_OutputQueue.peek();
489 return result;
490 }
491
492 /**
493 * Returns the number of instances pending output
494 *
495 * @return the number of instances pending output
496 * @exception NullPointerException if no input structure has been defined
497 */
498 public int numPendingOutput() {
499
500 if (m_OutputFormat == null) {
501 throw new NullPointerException("No output instance format defined");
502 }
503 return m_OutputQueue.size();
504 }
505
506 /**
507 * Returns whether the output format is ready to be collected
508 *
509 * @return true if the output format is set
510 */
511 public boolean isOutputFormatDefined() {
512
513 return (m_OutputFormat != null);
514 }
515
516 /**
517 * Gets an array containing the indices of all string attributes.
518 *
519 * @param insts the Instances to scan for string attributes.
520 * @return an array containing the indices of string attributes in
521 * the input structure. Will be zero-length if there are no
522 * string attributes
523 */
524 protected int [] getStringIndices(Instances insts) {
525
526 // Scan through getting the indices of String attributes
527 int [] index = new int [insts.numAttributes()];
528 int indexSize = 0;
529 for (int i = 0; i < insts.numAttributes(); i++) {
530 if (insts.attribute(i).type() == Attribute.STRING) {
531 index[indexSize++] = i;
532 }
533 }
534 int [] result = new int [indexSize];
535 System.arraycopy(index, 0, result, 0, indexSize);
536 return result;
537 }
538
539 /**
540 * Filters an entire set of instances through a filter and returns
541 * the new set.
542 *
543 * @param data the data to be filtered
544 * @param filter the filter to be used
545 * @return the filtered set of data
546 * @exception Exception if the filter can't be used successfully
547 */
548 public static Instances useFilter(Instances data,
549 Filter filter) throws Exception {
550 /*
551 System.err.println(filter.getClass().getName()
552 + " in:" + data.numInstances());
553 */
554 for (int i = 0; i < data.numInstances(); i++) {
555 filter.input(data.instance(i));
556 }
557 filter.batchFinished();
558 Instances newData = filter.getOutputFormat();
559 Instance processed;
560 while ((processed = filter.output()) != null) {
561 newData.add(processed);
562 }
563
564 /*
565 System.err.println(filter.getClass().getName()
566 + " out:" + newData.numInstances());
567 */
568 return newData;
569 }
570
571 /**
572 * Method for testing filters.
573 *
574 * @param argv should contain the following arguments: <br>
575 * -i input_file <br>
576 * -o output_file <br>
577 * -c class_index <br>
578 * or -h for help on options
579 * @exception Exception if something goes wrong or the user requests help on
580 * command options
581 */
582 public static void filterFile(Filter filter, String [] options)
583 throws Exception {
584
585 boolean debug = false;
586 Instances data = null;
587 Reader input = null;
588 PrintWriter output = null;
589 boolean helpRequest;
590
591 try {
592 helpRequest = Utils.getFlag('h', options);
593
594 if (Utils.getFlag('d', options)) {
595 debug = true;
596 }
597 String infileName = Utils.getOption('i', options);
598 String outfileName = Utils.getOption('o', options);
599 String classIndex = Utils.getOption('c', options);
600
601 if (filter instanceof OptionHandler) {
602 ((OptionHandler)filter).setOptions(options);
603 }
604
605 Utils.checkForRemainingOptions(options);
606 if (helpRequest) {
607 throw new Exception("Help requested.\n");
608 }
609 if (infileName.length() != 0) {
610 input = new BufferedReader(new FileReader(infileName));
611 } else {
612 input = new BufferedReader(new InputStreamReader(System.in));
613 }
614 if (outfileName.length() != 0) {
615 output = new PrintWriter(new FileOutputStream(outfileName));
616 } else {
617 output = new PrintWriter(System.out);
618 }
619
620 data = new Instances(input, 1);
621 if (classIndex.length() != 0) {
622 if (classIndex.equals("first")) {
623 data.setClassIndex(0);
624 } else if (classIndex.equals("last")) {
625 data.setClassIndex(data.numAttributes() - 1);
626 } else {
627 data.setClassIndex(Integer.parseInt(classIndex) - 1);
628 }
629 }
630 } catch (Exception ex) {
631 String filterOptions = "";
632 // Output the error and also the valid options
633 if (filter instanceof OptionHandler) {
634 filterOptions += "\nFilter options:\n\n";
635 Enumeration enum = ((OptionHandler)filter).listOptions();
636 while (enum.hasMoreElements()) {
637 Option option = (Option) enum.nextElement();
638 filterOptions += option.synopsis() + '\n'
639 + option.description() + "\n";
640 }
641 }
642
643 String genericOptions = "\nGeneral options:\n\n"
644 + "-h\n"
645 + "\tGet help on available options.\n"
646 + "\t(use -b -h for help on batch mode.)\n"
647 + "-i <file>\n"
648 + "\tThe name of the file containing input instances.\n"
649 + "\tIf not supplied then instances will be read from stdin.\n"
650 + "-o <file>\n"
651 + "\tThe name of the file output instances will be written to.\n"
652 + "\tIf not supplied then instances will be written to stdout.\n"
653 + "-c <class index>\n"
654 + "\tThe number of the attribute to use as the class.\n"
655 + "\t\"first\" and \"last\" are also valid entries.\n"
656 + "\tIf not supplied then no class is assigned.\n";
657
658 throw new Exception('\n' + ex.getMessage()
659 + filterOptions+genericOptions);
660 }
661
662 if (debug) {
663 System.err.println("Setting input format");
664 }
665 boolean printedHeader = false;
666 if (filter.setInputFormat(data)) {
667 if (debug) {
668 System.err.println("Getting output format");
669 }
670 output.println(filter.getOutputFormat().toString());
671 printedHeader = true;
672 }
673
674 // Pass all the instances to the filter
675 while (data.readInstance(input)) {
676 if (debug) {
677 System.err.println("Input instance to filter");
678 }
679 if (filter.input(data.instance(0))) {
680 if (debug) {
681 System.err.println("Filter said collect immediately");
682 }
683 if (!printedHeader) {
684 throw new Error("Filter didn't return true from setInputFormat() "
685 + "earlier!");
686 }
687 if (debug) {
688 System.err.println("Getting output instance");
689 }
690 output.println(filter.output().toString());
691 }
692 data.delete(0);
693 }
694
695 // Say that input has finished, and print any pending output instances
696 if (debug) {
697 System.err.println("Setting end of batch");
698 }
699 if (filter.batchFinished()) {
700 if (debug) {
701 System.err.println("Filter said collect output");
702 }
703 if (!printedHeader) {
704 if (debug) {
705 System.err.println("Getting output format");
706 }
707 output.println(filter.getOutputFormat().toString());
708 }
709 if (debug) {
710 System.err.println("Getting output instance");
711 }
712 while (filter.numPendingOutput() > 0) {
713 output.println(filter.output().toString());
714 if (debug){
715 System.err.println("Getting output instance");
716 }
717 }
718 }
719 if (debug) {
720 System.err.println("Done");
721 }
722
723 if (output != null) {
724 output.close();
725 }
726 }
727
728 /**
729 * Method for testing filters ability to process multiple batches.
730 *
731 * @param argv should contain the following arguments:<br>
732 * -i (first) input file <br>
733 * -o (first) output file <br>
734 * -r (second) input file <br>
735 * -s (second) output file <br>
736 * -c class_index <br>
737 * or -h for help on options
738 * @exception Exception if something goes wrong or the user requests help on
739 * command options
740 */
741 public static void batchFilterFile(Filter filter, String [] options)
742 throws Exception {
743
744 Instances firstData = null;
745 Instances secondData = null;
746 Reader firstInput = null;
747 Reader secondInput = null;
748 PrintWriter firstOutput = null;
749 PrintWriter secondOutput = null;
750 boolean helpRequest;
751 try {
752 helpRequest = Utils.getFlag('h', options);
753
754 String fileName = Utils.getOption('i', options);
755 if (fileName.length() != 0) {
756 firstInput = new BufferedReader(new FileReader(fileName));
757 } else {
758 throw new Exception("No first input file given.\n");
759 }
760
761 fileName = Utils.getOption('r', options);
762 if (fileName.length() != 0) {
763 secondInput = new BufferedReader(new FileReader(fileName));
764 } else {
765 throw new Exception("No second input file given.\n");
766 }
767
768 fileName = Utils.getOption('o', options);
769 if (fileName.length() != 0) {
770 firstOutput = new PrintWriter(new FileOutputStream(fileName));
771 } else {
772 firstOutput = new PrintWriter(System.out);
773 }
774
775 fileName = Utils.getOption('s', options);
776 if (fileName.length() != 0) {
777 secondOutput = new PrintWriter(new FileOutputStream(fileName));
778 } else {
779 secondOutput = new PrintWriter(System.out);
780 }
781 String classIndex = Utils.getOption('c', options);
782
783 if (filter instanceof OptionHandler) {
784 ((OptionHandler)filter).setOptions(options);
785 }
786 Utils.checkForRemainingOptions(options);
787
788 if (helpRequest) {
789 throw new Exception("Help requested.\n");
790 }
791 firstData = new Instances(firstInput, 1);
792 secondData = new Instances(secondInput, 1);
793 if (!secondData.equalHeaders(firstData)) {
794 throw new Exception("Input file formats differ.\n");
795 }
796 if (classIndex.length() != 0) {
797 if (classIndex.equals("first")) {
798 firstData.setClassIndex(0);
799 secondData.setClassIndex(0);
800 } else if (classIndex.equals("last")) {
801 firstData.setClassIndex(firstData.numAttributes() - 1);
802 secondData.setClassIndex(secondData.numAttributes() - 1);
803 } else {
804 firstData.setClassIndex(Integer.parseInt(classIndex) - 1);
805 secondData.setClassIndex(Integer.parseInt(classIndex) - 1);
806 }
807 }
808 } catch (Exception ex) {
809 String filterOptions = "";
810 // Output the error and also the valid options
811 if (filter instanceof OptionHandler) {
812 filterOptions += "\nFilter options:\n\n";
813 Enumeration enum = ((OptionHandler)filter).listOptions();
814 while (enum.hasMoreElements()) {
815 Option option = (Option) enum.nextElement();
816 filterOptions += option.synopsis() + '\n'
817 + option.description() + "\n";
818 }
819 }
820
821 String genericOptions = "\nGeneral options:\n\n"
822 + "-h\n"
823 + "\tGet help on available options.\n"
824 + "-i <filename>\n"
825 + "\tThe file containing first input instances.\n"
826 + "-o <filename>\n"
827 + "\tThe file first output instances will be written to.\n"
828 + "-r <filename>\n"
829 + "\tThe file containing second input instances.\n"
830 + "-s <filename>\n"
831 + "\tThe file second output instances will be written to.\n"
832 + "-c <class index>\n"
833 + "\tThe number of the attribute to use as the class.\n"
834 + "\t\"first\" and \"last\" are also valid entries.\n"
835 + "\tIf not supplied then no class is assigned.\n";
836
837 throw new Exception('\n' + ex.getMessage()
838 + filterOptions+genericOptions);
839 }
840 boolean printedHeader = false;
841 if (filter.setInputFormat(firstData)) {
842 firstOutput.println(filter.getOutputFormat().toString());
843 printedHeader = true;
844 }
845
846 // Pass all the instances to the filter
847 while (firstData.readInstance(firstInput)) {
848 if (filter.input(firstData.instance(0))) {
849 if (!printedHeader) {
850 throw new Error("Filter didn't return true from setInputFormat() "
851 + "earlier!");
852 }
853 firstOutput.println(filter.output().toString());
854 }
855 firstData.delete(0);
856 }
857
858 // Say that input has finished, and print any pending output instances
859 if (filter.batchFinished()) {
860 if (!printedHeader) {
861 firstOutput.println(filter.getOutputFormat().toString());
862 }
863 while (filter.numPendingOutput() > 0) {
864 firstOutput.println(filter.output().toString());
865 }
866 }
867
868 if (firstOutput != null) {
869 firstOutput.close();
870 }
871 printedHeader = false;
872 if (filter.isOutputFormatDefined()) {
873 secondOutput.println(filter.getOutputFormat().toString());
874 printedHeader = true;
875 }
876 // Pass all the second instances to the filter
877 while (secondData.readInstance(secondInput)) {
878 if (filter.input(secondData.instance(0))) {
879 if (!printedHeader) {
880 throw new Error("Filter didn't return true from"
881 + " isOutputFormatDefined() earlier!");
882 }
883 secondOutput.println(filter.output().toString());
884 }
885 secondData.delete(0);
886 }
887
888 // Say that input has finished, and print any pending output instances
889 if (filter.batchFinished()) {
890 if (!printedHeader) {
891 secondOutput.println(filter.getOutputFormat().toString());
892 }
893 while (filter.numPendingOutput() > 0) {
894 secondOutput.println(filter.output().toString());
895 }
896 }
897 if (secondOutput != null) {
898 secondOutput.close();
899 }
900 }
901
902 /**
903 * Main method for testing this class.
904 *
905 * @param argv should contain arguments to the filter: use -h for help
906 */
907 public static void main(String [] args) {
908
909 try {
910 if (args.length == 0) {
911 throw new Exception("First argument must be the class name of a Filter");
912 }
913 String fname = args[0];
914 Filter f = (Filter)Class.forName(fname).newInstance();
915 args[0] = "";
916 if (Utils.getFlag('b', args)) {
917 Filter.batchFilterFile(f, args);
918 } else {
919 Filter.filterFile(f, args);
920 }
921 } catch (Exception ex) {
922 ex.printStackTrace();
923 System.out.println(ex.getMessage());
924 }
925 }
926}
927
928
929
930
931
932
933
934
Note: See TracBrowser for help on using the repository browser.