source: trunk/gsdl/packages/kea/kea-3.0/weka/core/Attribute.java@ 8815

Last change on this file since 8815 was 8815, checked in by mdewsnip, 19 years ago

Kea 3.0, as downloaded from http://www.nzdl.org/kea but with CSTR_abstracts_test, CSTR_abstracts_train, Chinese_test, and Chinese_train directories removed.

  • Property svn:keywords set to Author Date Id Revision
File size: 19.0 KB
Line 
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * Attribute.java
19 * Copyright (C) 1999 Eibe Frank
20 *
21 */
22
23package weka.core;
24
25import java.io.*;
26import java.util.*;
27
28/**
29 * Class for handling an attribute. Once an attribute has been created,
30 * it can't be changed. <p>
31 *
32 * Three attribute types are supported:
33 * <ul>
34 * <li> numeric: <ul>
35 * This type of attribute represents a floating-point number.
36 * </ul>
37 * <li> nominal: <ul>
38 * This type of attribute represents a fixed set of nominal values.
39 * </ul>
40 * <li> string: <ul>
41 * This type of attribute represents a dynamically expanding set of
42 * nominal values. String attributes are not used by the learning
43 * schemes in Weka. They can be used, for example, to store an
44 * identifier with each instance in a dataset.
45 * </ul>
46 * </ul>
47 * Typical usage (code from the main() method of this class): <p>
48 *
49 * <code>
50 * ... <br>
51 *
52 * // Create numeric attributes "length" and "weight" <br>
53 * Attribute length = new Attribute("length"); <br>
54 * Attribute weight = new Attribute("weight"); <br><br>
55 *
56 * // Create vector to hold nominal values "first", "second", "third" <br>
57 * FastVector my_nominal_values = new FastVector(3); <br>
58 * my_nominal_values.addElement("first"); <br>
59 * my_nominal_values.addElement("second"); <br>
60 * my_nominal_values.addElement("third"); <br><br>
61 *
62 * // Create nominal attribute "position" <br>
63 * Attribute position = new Attribute("position", my_nominal_values);<br>
64 *
65 * ... <br>
66 * </code><p>
67 *
68 * @author Eibe Frank ([email protected])
69 * @version $Revision: 8815 $
70 */
71public class Attribute implements Copyable, Serializable {
72
73 /** Constant set for numeric attributes. */
74 public final static int NUMERIC = 0;
75
76 /** Constant set for nominal attributes. */
77 public final static int NOMINAL = 1;
78
79 /** Constant set for attributes with string values. */
80 public final static int STRING = 2;
81
82 /** Strings longer than this will be stored compressed. */
83 private final static int STRING_COMPRESS_THRESHOLD = 200;
84 //private final static int STRING_COMPRESS_THRESHOLD = Integer.MAX_VALUE;
85
86 /** The attribute's name. */
87 private String m_Name;
88
89 /** The attribute's type. */
90 private int m_Type;
91
92 /** The attribute's values (if nominal or string). */
93 private FastVector m_Values;
94
95 /** Mapping of values to indices (if nominal or string). */
96 private Hashtable m_Hashtable;
97
98 /** The attribute's index. */
99 private int m_Index;
100
101 /**
102 * Constructor for a numeric attribute.
103 *
104 * @param attributeName the name for the attribute
105 */
106 public Attribute(String attributeName) {
107
108 m_Name = attributeName;
109 m_Index = -1;
110 m_Values = null;
111 m_Hashtable = null;
112 m_Type = NUMERIC;
113 }
114
115 /**
116 * Constructor for nominal attributes and string attributes.
117 * If a null vector of attribute values is passed to the method,
118 * the attribute is assumed to be a string.
119 *
120 * @param attributeName the name for the attribute
121 * @param attributeValues a vector of strings denoting the
122 * attribute values. Null if the attribute is a string attribute.
123 */
124 public Attribute(String attributeName,
125 FastVector attributeValues) {
126
127 m_Name = attributeName;
128 m_Index = -1;
129 if (attributeValues == null) {
130 m_Values = new FastVector();
131 m_Hashtable = new Hashtable();
132 m_Type = STRING;
133 } else {
134 m_Values = (FastVector) attributeValues.copy();
135 m_Hashtable = new Hashtable(m_Values.size());
136 for (int i = 0; i < m_Values.size(); i++) {
137 m_Hashtable.put(m_Values.elementAt(i), new Integer(i));
138 }
139 m_Type = NOMINAL;
140 }
141 }
142
143 /**
144 * Produces a shallow copy of this attribute.
145 *
146 * @return a copy of this attribute with the same index
147 */
148 public Object copy() {
149
150 Attribute copy = new Attribute(m_Name);
151
152 copy.m_Index = m_Index;
153 if (!isNominal() && !isString())
154 return copy;
155 copy.m_Type = m_Type;
156 copy.m_Values = m_Values;
157 copy.m_Hashtable = m_Hashtable;
158
159 return copy;
160 }
161
162 /**
163 * Returns an enumeration of all the attribute's values if
164 * the attribute is nominal or a string, null otherwise.
165 *
166 * @return enumeration of all the attribute's values
167 */
168 public final Enumeration enumerateValues() {
169
170 if (isNominal() || isString()) {
171 final Enumeration ee = m_Values.elements();
172 return new Enumeration () {
173 public boolean hasMoreElements() {
174 return ee.hasMoreElements();
175 }
176 public Object nextElement() {
177 Object oo = ee.nextElement();
178 if (oo instanceof SerializedObject) {
179 return ((SerializedObject)oo).getObject();
180 } else {
181 return oo;
182 }
183 }
184 };
185 }
186 return null;
187 }
188
189 /**
190 * Tests if given attribute is equal to this attribute.
191 *
192 * @param other the Object to be compared to this attribute
193 * @return true if the given attribute is equal to this attribute
194 */
195 public final boolean equals(Object other) {
196
197 if ((other == null) || !(other.getClass().equals(this.getClass()))) {
198 return false;
199 }
200 Attribute att = (Attribute) other;
201 if (!m_Name.equals(att.m_Name)) {
202 return false;
203 }
204 if (isNumeric() && att.isNumeric()) {
205 return true;
206 }
207 if (isNumeric() || att.isNumeric()) {
208 return false;
209 }
210 if (m_Values.size() != att.m_Values.size()) {
211 return false;
212 }
213 for (int i = 0; i < m_Values.size(); i++) {
214 if (!m_Values.elementAt(i).equals(att.m_Values.elementAt(i))) {
215 return false;
216 }
217 }
218 return true;
219 }
220
221 /**
222 * Returns the index of this attribute.
223 *
224 * @return the index of this attribute
225 */
226 public final int index() {
227
228 return m_Index;
229 }
230
231 /**
232 * Returns the index of a given attribute value. (The index of
233 * the first occurence of this value.)
234 *
235 * @param value the value for which the index is to be returned
236 * @return the index of the given attribute value if attribute
237 * is nominal or a string, -1 if it is numeric or the value
238 * can't be found
239 */
240 public final int indexOfValue(String value) {
241
242 if (!isNominal() && !isString())
243 return -1;
244 Object store = value;
245 if (value.length() > STRING_COMPRESS_THRESHOLD) {
246 try {
247 store = new SerializedObject(value, true);
248 } catch (Exception ex) {
249 System.err.println("Couldn't compress string attribute value -"
250 + " searching uncompressed.");
251 }
252 }
253 Integer val = (Integer)m_Hashtable.get(store);
254 if (val == null) {
255 throw new IllegalArgumentException("Value \"" + value +
256 "\" not found in attribute " +
257 "declaration!");
258 } else {
259 return val.intValue();
260 }
261 }
262
263 /**
264 * Test if the attribute is nominal.
265 *
266 * @return true if the attribute is nominal
267 */
268 public final boolean isNominal() {
269
270 return (m_Type == NOMINAL);
271 }
272
273 /**
274 * Tests if the attribute is numeric.
275 *
276 * @return true if the attribute is numeric
277 */
278 public final boolean isNumeric() {
279
280 return (m_Type == NUMERIC);
281 }
282
283 /**
284 * Tests if the attribute is a string.
285 *
286 * @return true if the attribute is a string
287 */
288 public final boolean isString() {
289
290 return (m_Type == STRING);
291 }
292
293 /**
294 * Returns the attribute's name.
295 *
296 * @return the attribute's name as a string
297 */
298 public final String name() {
299
300 return m_Name;
301 }
302
303 /**
304 * Returns the number of attribute values. Returns 0 for numeric attributes.
305 *
306 * @return the number of attribute values
307 */
308 public final int numValues() {
309
310 if (!isNominal() && !isString()) {
311 return 0;
312 } else {
313 return m_Values.size();
314 }
315 }
316
317 /**
318 * Returns a description of this attribute in ARFF format. Quotes
319 * strings if they contain whitespace characters, or if they
320 * are a question mark.
321 *
322 * @return a description of this attribute as a string
323 */
324 public final String toString() {
325
326 StringBuffer text = new StringBuffer();
327
328 text.append("@attribute " + Utils.quote(m_Name) + " ");
329 if (isNominal()) {
330 text.append('{');
331 Enumeration enum = enumerateValues();
332 while (enum.hasMoreElements()) {
333 text.append(Utils.quote((String) enum.nextElement()));
334 if (enum.hasMoreElements())
335 text.append(',');
336 }
337 text.append('}');
338 } else {
339 if (isNumeric()) {
340 text.append("numeric");
341 } else {
342 text.append("string");
343 }
344 }
345 return text.toString();
346 }
347
348 /**
349 * Returns the attribute's type as an integer.
350 *
351 * @returns the attribute's type.
352 */
353 public final int type() {
354
355 return m_Type;
356 }
357
358 /**
359 * Returns a value of a nominal or string attribute.
360 * Returns an empty string if the attribute is neither
361 * nominal nor a string attribute.
362 *
363 * @param valIndex the value's index
364 * @return the attribute's value as a string
365 */
366 public final String value(int valIndex) {
367
368 if (!isNominal() && !isString()) {
369 return "";
370 } else {
371 Object val = m_Values.elementAt(valIndex);
372
373 // If we're storing strings compressed, uncompress it.
374 if (val instanceof SerializedObject) {
375 val = ((SerializedObject)val).getObject();
376 }
377 return (String) val;
378 }
379 }
380
381 /**
382 * Constructor for a numeric attribute with a particular index.
383 *
384 * @param attributeName the name for the attribute
385 * @param index the attribute's index
386 */
387 Attribute(String attributeName, int index) {
388
389 this(attributeName);
390
391 m_Index = index;
392 }
393
394 /**
395 * Constructor for nominal attributes and string attributes with
396 * a particular index.
397 * If a null vector of attribute values is passed to the method,
398 * the attribute is assumed to be a string.
399 *
400 * @param attributeName the name for the attribute
401 * @param attributeValues a vector of strings denoting the attribute values.
402 * Null if the attribute is a string attribute.
403 * @param index the attribute's index
404 */
405 Attribute(String attributeName, FastVector attributeValues,
406 int index) {
407
408 this(attributeName, attributeValues);
409
410 m_Index = index;
411 }
412
413 /**
414 * Adds a string value to the list of valid strings for attributes
415 * of type STRING and returns the index of the string.
416 *
417 * @param value The string value to add
418 * @return the index assigned to the string, or -1 if the attribute is not
419 * of type Attribute.STRING
420 */
421 public int addStringValue(String value) {
422
423 if (!isString()) {
424 return -1;
425 }
426 Object store = value;
427
428 if (value.length() > STRING_COMPRESS_THRESHOLD) {
429 try {
430 store = new SerializedObject(value, true);
431 } catch (Exception ex) {
432 System.err.println("Couldn't compress string attribute value -"
433 + " storing uncompressed.");
434 }
435 }
436 Integer index = (Integer)m_Hashtable.get(store);
437 if (index != null) {
438 return index.intValue();
439 } else {
440 int intIndex = m_Values.size();
441 m_Values.addElement(store);
442 m_Hashtable.put(store, new Integer(intIndex));
443 return intIndex;
444 }
445 }
446
447 /**
448 * Adds a string value to the list of valid strings for attributes
449 * of type STRING and returns the index of the string. This method is
450 * more efficient than addStringValue(String) for long strings.
451 *
452 * @param src The Attribute containing the string value to add.
453 * @param int index the index of the string value in the source attribute.
454 * @return the index assigned to the string, or -1 if the attribute is not
455 * of type Attribute.STRING
456 */
457 public int addStringValue(Attribute src, int index) {
458
459 if (!isString()) {
460 return -1;
461 }
462 Object store = src.m_Values.elementAt(index);
463 Integer oldIndex = (Integer)m_Hashtable.get(store);
464 if (oldIndex != null) {
465 return oldIndex.intValue();
466 } else {
467 int intIndex = m_Values.size();
468 m_Values.addElement(store);
469 m_Hashtable.put(store, new Integer(intIndex));
470 return intIndex;
471 }
472 }
473
474 /**
475 * Adds an attribute value. Creates a fresh list of attribute
476 * values before adding it.
477 *
478 * @param value the attribute value
479 */
480 final void addValue(String value) {
481
482 m_Values = (FastVector)m_Values.copy();
483 m_Hashtable = (Hashtable)m_Hashtable.clone();
484 forceAddValue(value);
485 }
486
487 /**
488 * Produces a shallow copy of this attribute with a new name.
489 *
490 * @param newName the name of the new attribute
491 * @return a copy of this attribute with the same index
492 */
493 final Attribute copy(String newName) {
494
495 Attribute copy = new Attribute(newName);
496
497 copy.m_Index = m_Index;
498 if (!isNominal() && !isString())
499 return copy;
500 copy.m_Type = m_Type;
501 copy.m_Values = m_Values;
502 copy.m_Hashtable = m_Hashtable;
503
504 return copy;
505 }
506
507 /**
508 * Removes a value of a nominal or string attribute. Creates a
509 * fresh list of attribute values before removing it.
510 *
511 * @param index the value's index
512 * @exception IllegalArgumentException if the attribute is not nominal
513 */
514 final void delete(int index) {
515
516 if (!isNominal() && !isString())
517 throw new IllegalArgumentException("Can only remove value of" +
518 "nominal or string attribute!");
519 else {
520 m_Values = (FastVector)m_Values.copy();
521 m_Values.removeElementAt(index);
522 Hashtable hash = new Hashtable(m_Hashtable.size());
523 Enumeration enum = hash.keys();
524 while (enum.hasMoreElements()) {
525 String string = (String)enum.nextElement();
526 Integer valIndexObject = (Integer)m_Hashtable.get(string);
527 int valIndex = valIndexObject.intValue();
528 if (valIndex > index) {
529 hash.put(string, new Integer(valIndex - 1));
530 } else if (valIndex < index) {
531 hash.put(string, valIndexObject);
532 }
533 }
534 m_Hashtable = hash;
535 }
536 }
537
538 /**
539 * Adds an attribute value.
540 *
541 * @param value the attribute value
542 */
543 final void forceAddValue(String value) {
544
545 Object store = value;
546 if (value.length() > STRING_COMPRESS_THRESHOLD) {
547 try {
548 store = new SerializedObject(value, true);
549 } catch (Exception ex) {
550 System.err.println("Couldn't compress string attribute value -"
551 + " storing uncompressed.");
552 }
553 }
554 m_Values.addElement(store);
555 m_Hashtable.put(store, new Integer(m_Values.size() - 1));
556 }
557
558 /**
559 * Sets the index of this attribute.
560 *
561 * @param the index of this attribute
562 */
563 final void setIndex(int index) {
564
565 m_Index = index;
566 }
567
568 /**
569 * Sets a value of a nominal attribute or string attribute.
570 * Creates a fresh list of attribute values before it is set.
571 *
572 * @param index the value's index
573 * @param string the value
574 * @exception IllegalArgumentException if the attribute is not nominal or
575 * string.
576 */
577 final void setValue(int index, String string) {
578
579 if (!isNominal() && !isString()) {
580 throw new IllegalArgumentException("Can only set value of nominal"+
581 "or string attribute!");
582 } else {
583 m_Values = (FastVector)m_Values.copy();
584 m_Hashtable = (Hashtable)m_Hashtable.clone();
585 Object store = string;
586 if (string.length() > STRING_COMPRESS_THRESHOLD) {
587 try {
588 store = new SerializedObject(string, true);
589 } catch (Exception ex) {
590 System.err.println("Couldn't compress string attribute value -"
591 + " storing uncompressed.");
592 }
593 }
594 m_Hashtable.remove(m_Values.elementAt(index));
595 m_Values.setElementAt(store, index);
596 m_Hashtable.put(store, new Integer(index));
597 }
598 }
599
600 /**
601 * Simple main method for testing this class.
602 */
603 public static void main(String[] ops) {
604
605 try {
606
607 // Create numeric attributes "length" and "weight"
608 Attribute length = new Attribute("length");
609 Attribute weight = new Attribute("weight");
610
611 // Create vector to hold nominal values "first", "second", "third"
612 FastVector my_nominal_values = new FastVector(3);
613 my_nominal_values.addElement("first");
614 my_nominal_values.addElement("second");
615 my_nominal_values.addElement("third");
616
617 // Create nominal attribute "position"
618 Attribute position = new Attribute("position", my_nominal_values);
619
620 // Print the name of "position"
621 System.out.println("Name of \"position\": " + position.name());
622
623 // Print the values of "position"
624 Enumeration attValues = position.enumerateValues();
625 while (attValues.hasMoreElements()) {
626 String string = (String)attValues.nextElement();
627 System.out.println("Value of \"position\": " + string);
628 }
629
630 // Shallow copy attribute "position"
631 Attribute copy = (Attribute) position.copy();
632
633 // Test if attributes are the same
634 System.out.println("Copy is the same as original: " + copy.equals(position));
635
636 // Print index of attribute "weight" (should be unset: -1)
637 System.out.println("Index of attribute \"weight\" (should be -1): " +
638 weight.index());
639
640 // Print index of value "first" of attribute "position"
641 System.out.println("Index of value \"first\" of \"position\" (should be 0): " +
642 position.indexOfValue("first"));
643
644 // Tests type of attribute "position"
645 System.out.println("\"position\" is numeric: " + position.isNumeric());
646 System.out.println("\"position\" is nominal: " + position.isNominal());
647 System.out.println("\"position\" is string: " + position.isString());
648
649 // Prints name of attribute "position"
650 System.out.println("Name of \"position\": " + position.name());
651
652 // Prints number of values of attribute "position"
653 System.out.println("Number of values for \"position\": " + position.numValues());
654
655 // Prints the values (againg)
656 for (int i = 0; i < position.numValues(); i++) {
657 System.out.println("Value " + i + ": " + position.value(i));
658 }
659
660 // Prints the attribute "position" in ARFF format
661 System.out.println(position);
662
663 // Checks type of attribute "position" using constants
664 switch (position.type()) {
665 case Attribute.NUMERIC:
666 System.out.println("\"position\" is numeric");
667 break;
668 case Attribute.NOMINAL:
669 System.out.println("\"position\" is nominal");
670 break;
671 case Attribute.STRING:
672 System.out.println("\"position\" is string");
673 break;
674 default:
675 System.out.println("\"position\" has unknown type");
676 }
677 } catch (Exception e) {
678 e.printStackTrace();
679 }
680 }
681}
682
Note: See TracBrowser for help on using the repository browser.