1 | /*
|
---|
2 | * This program is free software; you can redistribute it and/or modify
|
---|
3 | * it under the terms of the GNU General Public License as published by
|
---|
4 | * the Free Software Foundation; either version 2 of the License, or
|
---|
5 | * (at your option) any later version.
|
---|
6 | *
|
---|
7 | * This program is distributed in the hope that it will be useful,
|
---|
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
10 | * GNU General Public License for more details.
|
---|
11 | *
|
---|
12 | * You should have received a copy of the GNU General Public License
|
---|
13 | * along with this program; if not, write to the Free Software
|
---|
14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
15 | */
|
---|
16 |
|
---|
17 | /*
|
---|
18 | * Instance.java
|
---|
19 | * Copyright (C) 1999 Eibe Frank
|
---|
20 | *
|
---|
21 | */
|
---|
22 |
|
---|
23 | package weka.core;
|
---|
24 |
|
---|
25 | import java.util.*;
|
---|
26 | import java.io.*;
|
---|
27 |
|
---|
28 | /**
|
---|
29 | * Class for handling an instance. All values (numeric, nominal, or
|
---|
30 | * string) are internally stored as floating-point numbers. If an
|
---|
31 | * attribute is nominal (or a string), the stored value is the index
|
---|
32 | * of the corresponding nominal (or string) value in the attribute's
|
---|
33 | * definition. We have chosen this approach in favor of a more elegant
|
---|
34 | * object-oriented approach because it is much faster. <p>
|
---|
35 | *
|
---|
36 | * Typical usage (code from the main() method of this class): <p>
|
---|
37 | *
|
---|
38 | * <code>
|
---|
39 | * ... <br>
|
---|
40 | *
|
---|
41 | * // Create empty instance with three attribute values <br>
|
---|
42 | * Instance inst = new Instance(3); <br><br>
|
---|
43 | *
|
---|
44 | * // Set instance's values for the attributes "length", "weight", and "position"<br>
|
---|
45 | * inst.setValue(length, 5.3); <br>
|
---|
46 | * inst.setValue(weight, 300); <br>
|
---|
47 | * inst.setValue(position, "first"); <br><br>
|
---|
48 | *
|
---|
49 | * // Set instance's dataset to be the dataset "race" <br>
|
---|
50 | * inst.setDataset(race); <br><br>
|
---|
51 | *
|
---|
52 | * // Print the instance <br>
|
---|
53 | * System.out.println("The instance: " + inst); <br>
|
---|
54 | *
|
---|
55 | * ... <br>
|
---|
56 | * </code><p>
|
---|
57 | *
|
---|
58 | * All methods that change an instance are safe, ie. a change of an
|
---|
59 | * instance does not affect any other instances. All methods that
|
---|
60 | * change an instance's attribute values clone the attribute value
|
---|
61 | * vector before it is changed. If your application heavily modifies
|
---|
62 | * instance values, it may be faster to create a new instance from scratch.
|
---|
63 | *
|
---|
64 | * @author Eibe Frank ([email protected])
|
---|
65 | * @version $Revision: 8815 $
|
---|
66 | */
|
---|
67 | public class Instance implements Copyable, Serializable {
|
---|
68 |
|
---|
69 | /** Constant representing a missing value. */
|
---|
70 | protected final static double MISSING_VALUE = Double.NaN;
|
---|
71 |
|
---|
72 | /**
|
---|
73 | * The dataset the instance has access to. Null if the instance
|
---|
74 | * doesn't have access to any dataset. Only if an instance has
|
---|
75 | * access to a dataset, it knows about the actual attribute types.
|
---|
76 | */
|
---|
77 | protected Instances m_Dataset;
|
---|
78 |
|
---|
79 | /** The instance's attribute values. */
|
---|
80 | protected double[] m_AttValues;
|
---|
81 |
|
---|
82 | /** The instance's weight. */
|
---|
83 | protected double m_Weight;
|
---|
84 |
|
---|
85 | /**
|
---|
86 | * Constructor that copies the attribute values and the weight from
|
---|
87 | * the given instance. Reference to the dataset is set to null.
|
---|
88 | * (ie. the instance doesn't have access to information about the
|
---|
89 | * attribute types)
|
---|
90 | *
|
---|
91 | * @param instance the instance from which the attribute
|
---|
92 | * values and the weight are to be copied
|
---|
93 | */
|
---|
94 | public Instance(Instance instance) {
|
---|
95 |
|
---|
96 | m_AttValues = instance.m_AttValues;
|
---|
97 | m_Weight = instance.m_Weight;
|
---|
98 | m_Dataset = null;
|
---|
99 | }
|
---|
100 |
|
---|
101 | /**
|
---|
102 | * Constructor that inititalizes instance variable with given
|
---|
103 | * values. Reference to the dataset is set to null. (ie. the instance
|
---|
104 | * doesn't have access to information about the attribute types)
|
---|
105 | *
|
---|
106 | * @param weight the instance's weight
|
---|
107 | * @param attValues a vector of attribute values
|
---|
108 | */
|
---|
109 | public Instance(double weight, double[] attValues){
|
---|
110 |
|
---|
111 | m_AttValues = attValues;
|
---|
112 | m_Weight = weight;
|
---|
113 | m_Dataset = null;
|
---|
114 | }
|
---|
115 |
|
---|
116 | /**
|
---|
117 | * Constructor of an instance that sets weight to one, all values to
|
---|
118 | * be missing, and the reference to the dataset to null. (ie. the instance
|
---|
119 | * doesn't have access to information about the attribute types)
|
---|
120 | *
|
---|
121 | * @param numAttributes the size of the instance
|
---|
122 | */
|
---|
123 | public Instance(int numAttributes) {
|
---|
124 |
|
---|
125 | m_AttValues = new double[numAttributes];
|
---|
126 | for (int i = 0; i < m_AttValues.length; i++) {
|
---|
127 | m_AttValues[i] = MISSING_VALUE;
|
---|
128 | }
|
---|
129 | m_Weight = 1;
|
---|
130 | m_Dataset = null;
|
---|
131 | }
|
---|
132 |
|
---|
133 | /**
|
---|
134 | * Returns the attribute with the given index.
|
---|
135 | *
|
---|
136 | * @param index the attribute's index
|
---|
137 | * @return the attribute at the given position
|
---|
138 | * @exception UnassignedDatasetException if instance doesn't have access to a
|
---|
139 | * dataset
|
---|
140 | */
|
---|
141 | public Attribute attribute(int index) {
|
---|
142 |
|
---|
143 | if (m_Dataset == null) {
|
---|
144 | throw new UnassignedDatasetException("Instance doesn't have access to a dataset!");
|
---|
145 | }
|
---|
146 | return m_Dataset.attribute(index);
|
---|
147 | }
|
---|
148 |
|
---|
149 | /**
|
---|
150 | * Returns the attribute with the given index. Does the same
|
---|
151 | * thing as attribute().
|
---|
152 | *
|
---|
153 | * @param indexOfIndex the index of the attribute's index
|
---|
154 | * @return the attribute at the given position
|
---|
155 | * @exception UnassignedDatasetException if instance doesn't have access to a
|
---|
156 | * dataset
|
---|
157 | */
|
---|
158 | public Attribute attributeSparse(int indexOfIndex) {
|
---|
159 |
|
---|
160 | if (m_Dataset == null) {
|
---|
161 | throw new UnassignedDatasetException("Instance doesn't have access to a dataset!");
|
---|
162 | }
|
---|
163 | return m_Dataset.attribute(indexOfIndex);
|
---|
164 | }
|
---|
165 |
|
---|
166 | /**
|
---|
167 | * Returns class attribute.
|
---|
168 | *
|
---|
169 | * @return the class attribute
|
---|
170 | * @exception UnassignedDatasetException if the class is not set or the
|
---|
171 | * instance doesn't have access to a dataset
|
---|
172 | */
|
---|
173 | public Attribute classAttribute() {
|
---|
174 |
|
---|
175 | if (m_Dataset == null) {
|
---|
176 | throw new UnassignedDatasetException("Instance doesn't have access to a dataset!");
|
---|
177 | }
|
---|
178 | return m_Dataset.classAttribute();
|
---|
179 | }
|
---|
180 |
|
---|
181 | /**
|
---|
182 | * Returns the class attribute's index.
|
---|
183 | *
|
---|
184 | * @return the class index as an integer
|
---|
185 | * @exception UnassignedDatasetException if instance doesn't have access to a dataset
|
---|
186 | */
|
---|
187 | public int classIndex() {
|
---|
188 |
|
---|
189 | if (m_Dataset == null) {
|
---|
190 | throw new UnassignedDatasetException("Instance doesn't have access to a dataset!");
|
---|
191 | }
|
---|
192 | return m_Dataset.classIndex();
|
---|
193 | }
|
---|
194 |
|
---|
195 | /**
|
---|
196 | * Tests if an instance's class is missing.
|
---|
197 | *
|
---|
198 | * @return true if the instance's class is missing
|
---|
199 | * @exception UnassignedClassException if the class is not set or the instance doesn't
|
---|
200 | * have access to a dataset
|
---|
201 | */
|
---|
202 | public boolean classIsMissing() {
|
---|
203 |
|
---|
204 | if (classIndex() < 0) {
|
---|
205 | throw new UnassignedClassException("Class is not set!");
|
---|
206 | }
|
---|
207 | return isMissing(classIndex());
|
---|
208 | }
|
---|
209 |
|
---|
210 | /**
|
---|
211 | * Returns an instance's class value in internal format. (ie. as a
|
---|
212 | * floating-point number)
|
---|
213 | *
|
---|
214 | * @return the corresponding value as a double (If the
|
---|
215 | * corresponding attribute is nominal (or a string) then it returns the
|
---|
216 | * value's index as a double).
|
---|
217 | * @exception UnassignedClassException if the class is not set or the instance doesn't
|
---|
218 | * have access to a dataset
|
---|
219 | */
|
---|
220 | public double classValue() {
|
---|
221 |
|
---|
222 | if (classIndex() < 0) {
|
---|
223 | throw new UnassignedClassException("Class is not set!");
|
---|
224 | }
|
---|
225 | return value(classIndex());
|
---|
226 | }
|
---|
227 |
|
---|
228 | /**
|
---|
229 | * Produces a shallow copy of this instance. The copy has
|
---|
230 | * access to the same dataset. (if you want to make a copy
|
---|
231 | * that doesn't have access to the dataset, use
|
---|
232 | * <code>new Instance(instance)</code>
|
---|
233 | *
|
---|
234 | * @return the shallow copy
|
---|
235 | */
|
---|
236 | public Object copy() {
|
---|
237 |
|
---|
238 | Instance result = new Instance(this);
|
---|
239 | result.m_Dataset = m_Dataset;
|
---|
240 | return result;
|
---|
241 | }
|
---|
242 |
|
---|
243 | /**
|
---|
244 | * Returns the dataset this instance has access to. (ie. obtains
|
---|
245 | * information about attribute types from) Null if the instance
|
---|
246 | * doesn't have access to a dataset.
|
---|
247 | *
|
---|
248 | * @return the dataset the instance has accesss to
|
---|
249 | */
|
---|
250 | public Instances dataset() {
|
---|
251 |
|
---|
252 | return m_Dataset;
|
---|
253 | }
|
---|
254 |
|
---|
255 | /**
|
---|
256 | * Deletes an attribute at the given position (0 to
|
---|
257 | * numAttributes() - 1). Only succeeds if the instance does not
|
---|
258 | * have access to any dataset because otherwise inconsistencies
|
---|
259 | * could be introduced.
|
---|
260 | *
|
---|
261 | * @param pos the attribute's position
|
---|
262 | * @exception RuntimeException if the instance has access to a
|
---|
263 | * dataset
|
---|
264 | */
|
---|
265 | public void deleteAttributeAt(int position) {
|
---|
266 |
|
---|
267 | if (m_Dataset != null) {
|
---|
268 | throw new RuntimeException("Instance has access to a dataset!");
|
---|
269 | }
|
---|
270 | forceDeleteAttributeAt(position);
|
---|
271 | }
|
---|
272 |
|
---|
273 | /**
|
---|
274 | * Returns an enumeration of all the attributes.
|
---|
275 | *
|
---|
276 | * @return enumeration of all the attributes
|
---|
277 | * @exception UnassignedDatasetException if the instance doesn't
|
---|
278 | * have access to a dataset
|
---|
279 | */
|
---|
280 | public Enumeration enumerateAttributes() {
|
---|
281 |
|
---|
282 | if (m_Dataset == null) {
|
---|
283 | throw new UnassignedDatasetException("Instance doesn't have access to a dataset!");
|
---|
284 | }
|
---|
285 | return m_Dataset.enumerateAttributes();
|
---|
286 | }
|
---|
287 |
|
---|
288 | /**
|
---|
289 | * Tests if the headers of two instances are equivalent.
|
---|
290 | *
|
---|
291 | * @param instance another instance
|
---|
292 | * @return true if the header of the given instance is
|
---|
293 | * equivalent to this instance's header
|
---|
294 | * @exception UnassignedDatasetException if instance doesn't have access to any
|
---|
295 | * dataset
|
---|
296 | */
|
---|
297 | public boolean equalHeaders(Instance inst) {
|
---|
298 |
|
---|
299 | if (m_Dataset == null) {
|
---|
300 | throw new UnassignedDatasetException("Instance doesn't have access to a dataset!");
|
---|
301 | }
|
---|
302 | return m_Dataset.equalHeaders(inst.m_Dataset);
|
---|
303 | }
|
---|
304 |
|
---|
305 | /**
|
---|
306 | * Returns the index of the attribute stored at the given position.
|
---|
307 | * Just returns the given value.
|
---|
308 | *
|
---|
309 | * @param position the position
|
---|
310 | * @return the index of the attribute stored at the given position
|
---|
311 | */
|
---|
312 | public int index(int position) {
|
---|
313 |
|
---|
314 | return position;
|
---|
315 | }
|
---|
316 |
|
---|
317 | /**
|
---|
318 | * Inserts an attribute at the given position (0 to
|
---|
319 | * numAttributes()). Only succeeds if the instance does not
|
---|
320 | * have access to any dataset because otherwise inconsistencies
|
---|
321 | * could be introduced.
|
---|
322 | *
|
---|
323 | * @param pos the attribute's position
|
---|
324 | * @exception RuntimeException if the instance has accesss to a
|
---|
325 | * dataset
|
---|
326 | * @exception IllegalArgumentException if the position is out of range
|
---|
327 | */
|
---|
328 | public void insertAttributeAt(int position) {
|
---|
329 |
|
---|
330 | if (m_Dataset != null) {
|
---|
331 | throw new RuntimeException("Instance has accesss to a dataset!");
|
---|
332 | }
|
---|
333 | if ((position < 0) ||
|
---|
334 | (position > numAttributes())) {
|
---|
335 | throw new IllegalArgumentException("Can't insert attribute: index out "+
|
---|
336 | "of range");
|
---|
337 | }
|
---|
338 | forceInsertAttributeAt(position);
|
---|
339 | }
|
---|
340 |
|
---|
341 | /**
|
---|
342 | * Tests if a specific value is "missing".
|
---|
343 | *
|
---|
344 | * @param attIndex the attribute's index
|
---|
345 | */
|
---|
346 | public boolean isMissing(int attIndex) {
|
---|
347 |
|
---|
348 | if (Double.isNaN(m_AttValues[attIndex])) {
|
---|
349 | return true;
|
---|
350 | }
|
---|
351 | return false;
|
---|
352 | }
|
---|
353 |
|
---|
354 | /**
|
---|
355 | * Tests if a specific value is "missing". Does
|
---|
356 | * the same thing as isMissing() if applied to an Instance.
|
---|
357 | *
|
---|
358 | * @param indexOfIndex the index of the attribute's index
|
---|
359 | */
|
---|
360 | public boolean isMissingSparse(int indexOfIndex) {
|
---|
361 |
|
---|
362 | if (Double.isNaN(m_AttValues[indexOfIndex])) {
|
---|
363 | return true;
|
---|
364 | }
|
---|
365 | return false;
|
---|
366 | }
|
---|
367 |
|
---|
368 | /**
|
---|
369 | * Tests if a specific value is "missing".
|
---|
370 | * The given attribute has to belong to a dataset.
|
---|
371 | *
|
---|
372 | * @param att the attribute
|
---|
373 | */
|
---|
374 | public boolean isMissing(Attribute att) {
|
---|
375 |
|
---|
376 | return isMissing(att.index());
|
---|
377 | }
|
---|
378 |
|
---|
379 | /**
|
---|
380 | * Tests if the given value codes "missing".
|
---|
381 | *
|
---|
382 | * @param val the value to be tested
|
---|
383 | * @return true if val codes "missing"
|
---|
384 | */
|
---|
385 | public static boolean isMissingValue(double val) {
|
---|
386 |
|
---|
387 | return Double.isNaN(val);
|
---|
388 | }
|
---|
389 |
|
---|
390 | /**
|
---|
391 | * Merges this instance with the given instance and returns
|
---|
392 | * the result. Dataset is set to null.
|
---|
393 | *
|
---|
394 | * @param inst the instance to be merged with this one
|
---|
395 | * @return the merged instances
|
---|
396 | */
|
---|
397 | public Instance mergeInstance(Instance inst) {
|
---|
398 |
|
---|
399 | int m = 0;
|
---|
400 | double [] newVals = new double[numAttributes() + inst.numAttributes()];
|
---|
401 | for (int j = 0; j < numAttributes(); j++, m++) {
|
---|
402 | newVals[m] = value(j);
|
---|
403 | }
|
---|
404 | for (int j = 0; j < inst.numAttributes(); j++, m++) {
|
---|
405 | newVals[m] = inst.value(j);
|
---|
406 | }
|
---|
407 | return new Instance(1.0, newVals);
|
---|
408 | }
|
---|
409 |
|
---|
410 | /**
|
---|
411 | * Returns the double that codes "missing".
|
---|
412 | *
|
---|
413 | * @return the double that codes "missing"
|
---|
414 | */
|
---|
415 | public static double missingValue() {
|
---|
416 |
|
---|
417 | return MISSING_VALUE;
|
---|
418 | }
|
---|
419 |
|
---|
420 | /**
|
---|
421 | * Returns the number of attributes.
|
---|
422 | *
|
---|
423 | * @return the number of attributes as an integer
|
---|
424 | */
|
---|
425 | public int numAttributes() {
|
---|
426 |
|
---|
427 | return m_AttValues.length;
|
---|
428 | }
|
---|
429 |
|
---|
430 | /**
|
---|
431 | * Returns the number of class labels.
|
---|
432 | *
|
---|
433 | * @return the number of class labels as an integer if the
|
---|
434 | * class attribute is nominal, 1 otherwise.
|
---|
435 | * @exception UnassignedDatasetException if instance doesn't have access to any
|
---|
436 | * dataset
|
---|
437 | */
|
---|
438 | public int numClasses() {
|
---|
439 |
|
---|
440 | if (m_Dataset == null) {
|
---|
441 | throw new UnassignedDatasetException("Instance doesn't have access to a dataset!");
|
---|
442 | }
|
---|
443 | return m_Dataset.numClasses();
|
---|
444 | }
|
---|
445 |
|
---|
446 | /**
|
---|
447 | * Returns the number of values present. Always the same as numAttributes().
|
---|
448 | *
|
---|
449 | * @return the number of values
|
---|
450 | */
|
---|
451 | public int numValues() {
|
---|
452 |
|
---|
453 | return m_AttValues.length;
|
---|
454 | }
|
---|
455 |
|
---|
456 | /**
|
---|
457 | * Replaces all missing values in the instance with the
|
---|
458 | * values contained in the given array. A deep copy of
|
---|
459 | * the vector of attribute values is performed before the
|
---|
460 | * values are replaced.
|
---|
461 | *
|
---|
462 | * @param array containing the means and modes
|
---|
463 | * @exception IllegalArgumentException if numbers of attributes are unequal
|
---|
464 | */
|
---|
465 | public void replaceMissingValues(double[] array) {
|
---|
466 |
|
---|
467 | if ((array == null) ||
|
---|
468 | (array.length != m_AttValues.length)) {
|
---|
469 | throw new IllegalArgumentException("Unequal number of attributes!");
|
---|
470 | }
|
---|
471 | freshAttributeVector();
|
---|
472 | for (int i = 0; i < m_AttValues.length; i++) {
|
---|
473 | if (isMissing(i)) {
|
---|
474 | m_AttValues[i] = array[i];
|
---|
475 | }
|
---|
476 | }
|
---|
477 | }
|
---|
478 |
|
---|
479 | /**
|
---|
480 | * Sets the class value of an instance to be "missing". A deep copy of
|
---|
481 | * the vector of attribute values is performed before the
|
---|
482 | * value is set to be missing.
|
---|
483 | *
|
---|
484 | * @exception UnassignedClassException if the class is not set
|
---|
485 | * @exception UnassignedDatasetException if the instance doesn't
|
---|
486 | * have access to a dataset
|
---|
487 | */
|
---|
488 | public void setClassMissing() {
|
---|
489 |
|
---|
490 | if (classIndex() < 0) {
|
---|
491 | throw new UnassignedClassException("Class is not set!");
|
---|
492 | }
|
---|
493 | setMissing(classIndex());
|
---|
494 | }
|
---|
495 |
|
---|
496 | /**
|
---|
497 | * Sets the class value of an instance to the given value (internal
|
---|
498 | * floating-point format). A deep copy of the vector of attribute
|
---|
499 | * values is performed before the value is set.
|
---|
500 | *
|
---|
501 | * @param value the new attribute value (If the corresponding
|
---|
502 | * attribute is nominal (or a string) then this is the new value's
|
---|
503 | * index as a double).
|
---|
504 | * @exception UnassignedClassException if the class is not set
|
---|
505 | * @exception UnaddignedDatasetException if the instance doesn't
|
---|
506 | * have access to a dataset
|
---|
507 | */
|
---|
508 | public void setClassValue(double value) {
|
---|
509 |
|
---|
510 | if (classIndex() < 0) {
|
---|
511 | throw new UnassignedClassException("Class is not set!");
|
---|
512 | }
|
---|
513 | setValue(classIndex(), value);
|
---|
514 | }
|
---|
515 |
|
---|
516 | /**
|
---|
517 | * Sets the class value of an instance to the given value. A deep
|
---|
518 | * copy of the vector of attribute values is performed before the
|
---|
519 | * value is set.
|
---|
520 | *
|
---|
521 | * @param value the new class value (If the class
|
---|
522 | * is a string attribute and the value can't be found,
|
---|
523 | * the value is added to the attribute).
|
---|
524 | * @exception UnassignedClassException if the class is not set
|
---|
525 | * @exception UnassignedDatasetException if the dataset is not set
|
---|
526 | * @exception IllegalArgumentException if the attribute is not
|
---|
527 | * nominal or a string, or the value couldn't be found for a nominal
|
---|
528 | * attribute
|
---|
529 | */
|
---|
530 | public final void setClassValue(String value) {
|
---|
531 |
|
---|
532 | if (classIndex() < 0) {
|
---|
533 | throw new UnassignedClassException("Class is not set!");
|
---|
534 | }
|
---|
535 | setValue(classIndex(), value);
|
---|
536 | }
|
---|
537 |
|
---|
538 | /**
|
---|
539 | * Sets the reference to the dataset. Does not check if the instance
|
---|
540 | * is compatible with the dataset. Note: the dataset does not know
|
---|
541 | * about this instance. If the structure of the dataset's header
|
---|
542 | * gets changed, this instance will not be adjusted automatically.
|
---|
543 | *
|
---|
544 | * @param instances the reference to the dataset
|
---|
545 | */
|
---|
546 | public final void setDataset(Instances instances) {
|
---|
547 |
|
---|
548 | m_Dataset = instances;
|
---|
549 | }
|
---|
550 |
|
---|
551 | /**
|
---|
552 | * Sets a specific value to be "missing". Performs a deep copy
|
---|
553 | * of the vector of attribute values before the value is set to
|
---|
554 | * be missing.
|
---|
555 | *
|
---|
556 | * @param attIndex the attribute's index
|
---|
557 | */
|
---|
558 | public final void setMissing(int attIndex) {
|
---|
559 |
|
---|
560 | setValue(attIndex, MISSING_VALUE);
|
---|
561 | }
|
---|
562 |
|
---|
563 | /**
|
---|
564 | * Sets a specific value to be "missing". Performs a deep copy
|
---|
565 | * of the vector of attribute values before the value is set to
|
---|
566 | * be missing. The given attribute has to belong to a dataset.
|
---|
567 | *
|
---|
568 | * @param att the attribute
|
---|
569 | */
|
---|
570 | public final void setMissing(Attribute att) {
|
---|
571 |
|
---|
572 | setMissing(att.index());
|
---|
573 | }
|
---|
574 |
|
---|
575 | /**
|
---|
576 | * Sets a specific value in the instance to the given value
|
---|
577 | * (internal floating-point format). Performs a deep copy
|
---|
578 | * of the vector of attribute values before the value is set.
|
---|
579 | *
|
---|
580 | * @param attIndex the attribute's index
|
---|
581 | * @param value the new attribute value (If the corresponding
|
---|
582 | * attribute is nominal (or a string) then this is the new value's
|
---|
583 | * index as a double).
|
---|
584 | */
|
---|
585 | public void setValue(int attIndex, double value) {
|
---|
586 |
|
---|
587 | freshAttributeVector();
|
---|
588 | m_AttValues[attIndex] = value;
|
---|
589 | }
|
---|
590 |
|
---|
591 | /**
|
---|
592 | * Sets a specific value in the instance to the given value
|
---|
593 | * (internal floating-point format). Performs a deep copy
|
---|
594 | * of the vector of attribute values before the value is set.
|
---|
595 | * Does exactly the same thing as setValue().
|
---|
596 | *
|
---|
597 | * @param indexOfIndex the index of the attribute's index
|
---|
598 | * @param value the new attribute value (If the corresponding
|
---|
599 | * attribute is nominal (or a string) then this is the new value's
|
---|
600 | * index as a double).
|
---|
601 | */
|
---|
602 | public void setValueSparse(int indexOfIndex, double value) {
|
---|
603 |
|
---|
604 | freshAttributeVector();
|
---|
605 | m_AttValues[indexOfIndex] = value;
|
---|
606 | }
|
---|
607 |
|
---|
608 | /**
|
---|
609 | * Sets a value of a nominal or string attribute to the given
|
---|
610 | * value. Performs a deep copy of the vector of attribute values
|
---|
611 | * before the value is set.
|
---|
612 | *
|
---|
613 | * @param attIndex the attribute's index
|
---|
614 | * @param value the new attribute value (If the attribute
|
---|
615 | * is a string attribute and the value can't be found,
|
---|
616 | * the value is added to the attribute).
|
---|
617 | * @exception UnassignedDatasetException if the dataset is not set
|
---|
618 | * @exception IllegalArgumentException if the selected
|
---|
619 | * attribute is not nominal or a string, or the supplied value couldn't
|
---|
620 | * be found for a nominal attribute
|
---|
621 | */
|
---|
622 | public final void setValue(int attIndex, String value) {
|
---|
623 |
|
---|
624 | int valIndex;
|
---|
625 |
|
---|
626 | if (m_Dataset == null) {
|
---|
627 | throw new UnassignedDatasetException("Instance doesn't have access to a dataset!");
|
---|
628 | }
|
---|
629 | if (!attribute(attIndex).isNominal() &&
|
---|
630 | !attribute(attIndex).isString()) {
|
---|
631 | throw new IllegalArgumentException("Attribute neither nominal nor string!");
|
---|
632 | }
|
---|
633 | valIndex = attribute(attIndex).indexOfValue(value);
|
---|
634 | if (valIndex == -1) {
|
---|
635 | if (attribute(attIndex).isNominal()) {
|
---|
636 | throw new IllegalArgumentException("Value not defined for given nominal attribute!");
|
---|
637 | } else {
|
---|
638 | attribute(attIndex).forceAddValue(value);
|
---|
639 | valIndex = attribute(attIndex).indexOfValue(value);
|
---|
640 | }
|
---|
641 | }
|
---|
642 | setValue(attIndex, (double)valIndex);
|
---|
643 | }
|
---|
644 |
|
---|
645 | /**
|
---|
646 | * Sets a specific value in the instance to the given value
|
---|
647 | * (internal floating-point format). Performs a deep copy of the
|
---|
648 | * vector of attribute values before the value is set, so if you are
|
---|
649 | * planning on calling setValue many times it may be faster to
|
---|
650 | * create a new instance using toDoubleArray. The given attribute
|
---|
651 | * has to belong to a dataset.
|
---|
652 | *
|
---|
653 | * @param att the attribute
|
---|
654 | * @param value the new attribute value (If the corresponding
|
---|
655 | * attribute is nominal (or a string) then this is the new value's
|
---|
656 | * index as a double).
|
---|
657 | */
|
---|
658 | public final void setValue(Attribute att, double value) {
|
---|
659 |
|
---|
660 | setValue(att.index(), value);
|
---|
661 | }
|
---|
662 |
|
---|
663 | /**
|
---|
664 | * Sets a value of an nominal or string attribute to the given
|
---|
665 | * value. Performs a deep copy of the vector of attribute values
|
---|
666 | * before the value is set, so if you are planning on calling setValue many
|
---|
667 | * times it may be faster to create a new instance using toDoubleArray.
|
---|
668 | * The given attribute has to belong to a dataset.
|
---|
669 | *
|
---|
670 | * @param att the attribute
|
---|
671 | * @param value the new attribute value (If the attribute
|
---|
672 | * is a string attribute and the value can't be found,
|
---|
673 | * the value is added to the attribute).
|
---|
674 | * @exception IllegalArgumentException if the the attribute is not
|
---|
675 | * nominal or a string, or the value couldn't be found for a nominal
|
---|
676 | * attribute
|
---|
677 | */
|
---|
678 | public final void setValue(Attribute att, String value) {
|
---|
679 |
|
---|
680 | if (!att.isNominal() &&
|
---|
681 | !att.isString()) {
|
---|
682 | throw new IllegalArgumentException("Attribute neither nominal nor string!");
|
---|
683 | }
|
---|
684 | int valIndex = att.indexOfValue(value);
|
---|
685 | if (valIndex == -1) {
|
---|
686 | if (att.isNominal()) {
|
---|
687 | throw new IllegalArgumentException("Value not defined for given nominal attribute!");
|
---|
688 | } else {
|
---|
689 | att.forceAddValue(value);
|
---|
690 | valIndex = att.indexOfValue(value);
|
---|
691 | }
|
---|
692 | }
|
---|
693 | setValue(att.index(), (double)valIndex);
|
---|
694 | }
|
---|
695 |
|
---|
696 | /**
|
---|
697 | * Sets the weight of an instance.
|
---|
698 | *
|
---|
699 | * @param weight the weight
|
---|
700 | */
|
---|
701 | public final void setWeight(double weight) {
|
---|
702 |
|
---|
703 | m_Weight = weight;
|
---|
704 | }
|
---|
705 |
|
---|
706 | /**
|
---|
707 | * Returns the value of a nominal (or string) attribute
|
---|
708 | * for the instance.
|
---|
709 | *
|
---|
710 | * @param attIndex the attribute's index
|
---|
711 | * @return the value as a string
|
---|
712 | * @exception IllegalArgumentException if the attribute is not a nominal
|
---|
713 | * (or string) attribute.
|
---|
714 | * @exception UnassignedDatasetException if the instance doesn't belong
|
---|
715 | * to a dataset.
|
---|
716 | */
|
---|
717 | public final String stringValue(int attIndex) {
|
---|
718 |
|
---|
719 | if (m_Dataset == null) {
|
---|
720 | throw new UnassignedDatasetException("Instance doesn't have access to a dataset!");
|
---|
721 | }
|
---|
722 | if (!m_Dataset.attribute(attIndex).isNominal() &&
|
---|
723 | !m_Dataset.attribute(attIndex).isString()) {
|
---|
724 | throw new IllegalArgumentException("Attribute neither nominal nor string!");
|
---|
725 | }
|
---|
726 | return m_Dataset.attribute(attIndex).
|
---|
727 | value((int) value(attIndex));
|
---|
728 | }
|
---|
729 |
|
---|
730 | /**
|
---|
731 | * Returns the value of a nominal (or string) attribute
|
---|
732 | * for the instance.
|
---|
733 | *
|
---|
734 | * @param att the attribute
|
---|
735 | * @return the value as a string
|
---|
736 | * @exception IllegalArgumentException if the attribute is not a nominal
|
---|
737 | * (or string) attribute.
|
---|
738 | * @exception UnassignedDatasetException if the instance doesn't belong
|
---|
739 | * to a dataset.
|
---|
740 | */
|
---|
741 | public final String stringValue(Attribute att) {
|
---|
742 |
|
---|
743 | return stringValue(att.index());
|
---|
744 | }
|
---|
745 |
|
---|
746 | /**
|
---|
747 | * Returns the values of each attribute as an array of doubles.
|
---|
748 | *
|
---|
749 | * @return an array containing all the instance attribute values
|
---|
750 | */
|
---|
751 | public double[] toDoubleArray() {
|
---|
752 |
|
---|
753 | double[] newValues = new double[m_AttValues.length];
|
---|
754 | System.arraycopy(m_AttValues, 0, newValues, 0,
|
---|
755 | m_AttValues.length);
|
---|
756 | return newValues;
|
---|
757 | }
|
---|
758 |
|
---|
759 | /**
|
---|
760 | * Returns the description of one instance. If the instance
|
---|
761 | * doesn't have access to a dataset, it returns the internal
|
---|
762 | * floating-point values. Quotes string
|
---|
763 | * values that contain whitespace characters.
|
---|
764 | *
|
---|
765 | * @return the instance's description as a string
|
---|
766 | */
|
---|
767 | public String toString() {
|
---|
768 |
|
---|
769 | StringBuffer text = new StringBuffer();
|
---|
770 |
|
---|
771 | for (int i = 0; i < m_AttValues.length; i++) {
|
---|
772 | if (i > 0) text.append(",");
|
---|
773 | text.append(toString(i));
|
---|
774 | }
|
---|
775 |
|
---|
776 | return text.toString();
|
---|
777 | }
|
---|
778 |
|
---|
779 | /**
|
---|
780 | * Returns the description of one value of the instance as a
|
---|
781 | * string. If the instance doesn't have access to a dataset, it
|
---|
782 | * returns the internal floating-point value. Quotes string
|
---|
783 | * values that contain whitespace characters, or if they
|
---|
784 | * are a question mark.
|
---|
785 | *
|
---|
786 | * @param attIndex the attribute's index
|
---|
787 | * @return the value's description as a string
|
---|
788 | */
|
---|
789 | public final String toString(int attIndex) {
|
---|
790 |
|
---|
791 | StringBuffer text = new StringBuffer();
|
---|
792 |
|
---|
793 | if (isMissing(attIndex)) {
|
---|
794 | text.append("?");
|
---|
795 | } else {
|
---|
796 | if (m_Dataset == null) {
|
---|
797 | text.append(Utils.doubleToString(m_AttValues[attIndex],6));
|
---|
798 | } else {
|
---|
799 | if (m_Dataset.attribute(attIndex).isNominal() ||
|
---|
800 | m_Dataset.attribute(attIndex).isString()) {
|
---|
801 | text.append(Utils.quote(stringValue(attIndex)));
|
---|
802 | } else {
|
---|
803 | text.append(Utils.doubleToString(value(attIndex),6));
|
---|
804 | }
|
---|
805 | }
|
---|
806 | }
|
---|
807 | return text.toString();
|
---|
808 | }
|
---|
809 |
|
---|
810 | /**
|
---|
811 | * Returns the description of one value of the instance as a
|
---|
812 | * string. If the instance doesn't have access to a dataset it
|
---|
813 | * returns the internal floating-point value. Quotes string
|
---|
814 | * values that contain whitespace characters, or if they
|
---|
815 | * are a question mark.
|
---|
816 | * The given attribute has to belong to a dataset.
|
---|
817 | *
|
---|
818 | * @param att the attribute
|
---|
819 | * @return the value's description as a string
|
---|
820 | */
|
---|
821 | public final String toString(Attribute att) {
|
---|
822 |
|
---|
823 | return toString(att.index());
|
---|
824 | }
|
---|
825 |
|
---|
826 | /**
|
---|
827 | * Returns an instance's attribute value in internal format.
|
---|
828 | *
|
---|
829 | * @param attIndex the attribute's index
|
---|
830 | * @return the specified value as a double (If the corresponding
|
---|
831 | * attribute is nominal (or a string) then it returns the value's index as a
|
---|
832 | * double).
|
---|
833 | */
|
---|
834 | public double value(int attIndex) {
|
---|
835 |
|
---|
836 | return m_AttValues[attIndex];
|
---|
837 | }
|
---|
838 |
|
---|
839 | /**
|
---|
840 | * Returns an instance's attribute value in internal format.
|
---|
841 | * Does exactly the same thing as value() if applied to an Instance.
|
---|
842 | *
|
---|
843 | * @param indexOfIndex the index of the attribute's index
|
---|
844 | * @return the specified value as a double (If the corresponding
|
---|
845 | * attribute is nominal (or a string) then it returns the value's index as a
|
---|
846 | * double).
|
---|
847 | */
|
---|
848 | public double valueSparse(int indexOfIndex) {
|
---|
849 |
|
---|
850 | return m_AttValues[indexOfIndex];
|
---|
851 | }
|
---|
852 |
|
---|
853 | /**
|
---|
854 | * Returns an instance's attribute value in internal format.
|
---|
855 | * The given attribute has to belong to a dataset.
|
---|
856 | *
|
---|
857 | * @param att the attribute
|
---|
858 | * @return the specified value as a double (If the corresponding
|
---|
859 | * attribute is nominal (or a string) then it returns the value's index as a
|
---|
860 | * double).
|
---|
861 | */
|
---|
862 | public double value(Attribute att) {
|
---|
863 |
|
---|
864 | return value(att.index());
|
---|
865 | }
|
---|
866 |
|
---|
867 | /**
|
---|
868 | * Returns the instance's weight.
|
---|
869 | *
|
---|
870 | * @return the instance's weight as a double
|
---|
871 | */
|
---|
872 | public final double weight() {
|
---|
873 |
|
---|
874 | return m_Weight;
|
---|
875 | }
|
---|
876 |
|
---|
877 | /**
|
---|
878 | * Deletes an attribute at the given position (0 to
|
---|
879 | * numAttributes() - 1).
|
---|
880 | *
|
---|
881 | * @param pos the attribute's position
|
---|
882 | */
|
---|
883 |
|
---|
884 | void forceDeleteAttributeAt(int position) {
|
---|
885 |
|
---|
886 | double[] newValues = new double[m_AttValues.length - 1];
|
---|
887 |
|
---|
888 | System.arraycopy(m_AttValues, 0, newValues, 0, position);
|
---|
889 | if (position < m_AttValues.length - 1) {
|
---|
890 | System.arraycopy(m_AttValues, position + 1,
|
---|
891 | newValues, position,
|
---|
892 | m_AttValues.length - (position + 1));
|
---|
893 | }
|
---|
894 | m_AttValues = newValues;
|
---|
895 | }
|
---|
896 |
|
---|
897 | /**
|
---|
898 | * Inserts an attribute at the given position
|
---|
899 | * (0 to numAttributes()) and sets its value to be missing.
|
---|
900 | *
|
---|
901 | * @param pos the attribute's position
|
---|
902 | */
|
---|
903 | void forceInsertAttributeAt(int position) {
|
---|
904 |
|
---|
905 | double[] newValues = new double[m_AttValues.length + 1];
|
---|
906 |
|
---|
907 | System.arraycopy(m_AttValues, 0, newValues, 0, position);
|
---|
908 | newValues[position] = MISSING_VALUE;
|
---|
909 | System.arraycopy(m_AttValues, position, newValues,
|
---|
910 | position + 1, m_AttValues.length - position);
|
---|
911 | m_AttValues = newValues;
|
---|
912 | }
|
---|
913 |
|
---|
914 | /**
|
---|
915 | * Private constructor for subclasses. Does nothing.
|
---|
916 | */
|
---|
917 | protected Instance() {
|
---|
918 | }
|
---|
919 |
|
---|
920 | /**
|
---|
921 | * Clones the attribute vector of the instance and
|
---|
922 | * overwrites it with the clone.
|
---|
923 | */
|
---|
924 | private void freshAttributeVector() {
|
---|
925 |
|
---|
926 | m_AttValues = toDoubleArray();
|
---|
927 | }
|
---|
928 |
|
---|
929 | /**
|
---|
930 | * Main method for testing this class.
|
---|
931 | */
|
---|
932 | public static void main(String[] options) {
|
---|
933 |
|
---|
934 | try {
|
---|
935 |
|
---|
936 | // Create numeric attributes "length" and "weight"
|
---|
937 | Attribute length = new Attribute("length");
|
---|
938 | Attribute weight = new Attribute("weight");
|
---|
939 |
|
---|
940 | // Create vector to hold nominal values "first", "second", "third"
|
---|
941 | FastVector my_nominal_values = new FastVector(3);
|
---|
942 | my_nominal_values.addElement("first");
|
---|
943 | my_nominal_values.addElement("second");
|
---|
944 | my_nominal_values.addElement("third");
|
---|
945 |
|
---|
946 | // Create nominal attribute "position"
|
---|
947 | Attribute position = new Attribute("position", my_nominal_values);
|
---|
948 |
|
---|
949 | // Create vector of the above attributes
|
---|
950 | FastVector attributes = new FastVector(3);
|
---|
951 | attributes.addElement(length);
|
---|
952 | attributes.addElement(weight);
|
---|
953 | attributes.addElement(position);
|
---|
954 |
|
---|
955 | // Create the empty dataset "race" with above attributes
|
---|
956 | Instances race = new Instances("race", attributes, 0);
|
---|
957 |
|
---|
958 | // Make position the class attribute
|
---|
959 | race.setClassIndex(position.index());
|
---|
960 |
|
---|
961 | // Create empty instance with three attribute values
|
---|
962 | Instance inst = new Instance(3);
|
---|
963 |
|
---|
964 | // Set instance's values for the attributes "length", "weight", and "position"
|
---|
965 | inst.setValue(length, 5.3);
|
---|
966 | inst.setValue(weight, 300);
|
---|
967 | inst.setValue(position, "first");
|
---|
968 |
|
---|
969 | // Set instance's dataset to be the dataset "race"
|
---|
970 | inst.setDataset(race);
|
---|
971 |
|
---|
972 | // Print the instance
|
---|
973 | System.out.println("The instance: " + inst);
|
---|
974 |
|
---|
975 | // Print the first attribute
|
---|
976 | System.out.println("First attribute: " + inst.attribute(0));
|
---|
977 |
|
---|
978 | // Print the class attribute
|
---|
979 | System.out.println("Class attribute: " + inst.classAttribute());
|
---|
980 |
|
---|
981 | // Print the class index
|
---|
982 | System.out.println("Class index: " + inst.classIndex());
|
---|
983 |
|
---|
984 | // Say if class is missing
|
---|
985 | System.out.println("Class is missing: " + inst.classIsMissing());
|
---|
986 |
|
---|
987 | // Print the instance's class value in internal format
|
---|
988 | System.out.println("Class value (internal format): " + inst.classValue());
|
---|
989 |
|
---|
990 | // Print a shallow copy of this instance
|
---|
991 | Instance copy = (Instance) inst.copy();
|
---|
992 | System.out.println("Shallow copy: " + copy);
|
---|
993 |
|
---|
994 | // Set dataset for shallow copy
|
---|
995 | copy.setDataset(inst.dataset());
|
---|
996 | System.out.println("Shallow copy with dataset set: " + copy);
|
---|
997 |
|
---|
998 | // Unset dataset for copy, delete first attribute, and insert it again
|
---|
999 | copy.setDataset(null);
|
---|
1000 | copy.deleteAttributeAt(0);
|
---|
1001 | copy.insertAttributeAt(0);
|
---|
1002 | copy.setDataset(inst.dataset());
|
---|
1003 | System.out.println("Copy with first attribute deleted and inserted: " + copy);
|
---|
1004 |
|
---|
1005 | // Enumerate attributes (leaving out the class attribute)
|
---|
1006 | System.out.println("Enumerating attributes (leaving out class):");
|
---|
1007 | Enumeration enum = inst.enumerateAttributes();
|
---|
1008 | while (enum.hasMoreElements()) {
|
---|
1009 | Attribute att = (Attribute) enum.nextElement();
|
---|
1010 | System.out.println(att);
|
---|
1011 | }
|
---|
1012 |
|
---|
1013 | // Headers are equivalent?
|
---|
1014 | System.out.println("Header of original and copy equivalent: " +
|
---|
1015 | inst.equalHeaders(copy));
|
---|
1016 |
|
---|
1017 | // Test for missing values
|
---|
1018 | System.out.println("Length of copy missing: " + copy.isMissing(length));
|
---|
1019 | System.out.println("Weight of copy missing: " + copy.isMissing(weight.index()));
|
---|
1020 | System.out.println("Length of copy missing: " +
|
---|
1021 | Instance.isMissingValue(copy.value(length)));
|
---|
1022 | System.out.println("Missing value coded as: " + Instance.missingValue());
|
---|
1023 |
|
---|
1024 | // Prints number of attributes and classes
|
---|
1025 | System.out.println("Number of attributes: " + copy.numAttributes());
|
---|
1026 | System.out.println("Number of classes: " + copy.numClasses());
|
---|
1027 |
|
---|
1028 | // Replace missing values
|
---|
1029 | double[] meansAndModes = {2, 3, 0};
|
---|
1030 | copy.replaceMissingValues(meansAndModes);
|
---|
1031 | System.out.println("Copy with missing value replaced: " + copy);
|
---|
1032 |
|
---|
1033 | // Setting and getting values and weights
|
---|
1034 | copy.setClassMissing();
|
---|
1035 | System.out.println("Copy with missing class: " + copy);
|
---|
1036 | copy.setClassValue(0);
|
---|
1037 | System.out.println("Copy with class value set to first value: " + copy);
|
---|
1038 | copy.setClassValue("third");
|
---|
1039 | System.out.println("Copy with class value set to \"third\": " + copy);
|
---|
1040 | copy.setMissing(1);
|
---|
1041 | System.out.println("Copy with second attribute set to be missing: " + copy);
|
---|
1042 | copy.setMissing(length);
|
---|
1043 | System.out.println("Copy with length set to be missing: " + copy);
|
---|
1044 | copy.setValue(0, 0);
|
---|
1045 | System.out.println("Copy with first attribute set to 0: " + copy);
|
---|
1046 | copy.setValue(weight, 1);
|
---|
1047 | System.out.println("Copy with weight attribute set to 1: " + copy);
|
---|
1048 | copy.setValue(position, "second");
|
---|
1049 | System.out.println("Copy with position set to \"second\": " + copy);
|
---|
1050 | copy.setValue(2, "first");
|
---|
1051 | System.out.println("Copy with last attribute set to \"first\": " + copy);
|
---|
1052 | System.out.println("Current weight of instance copy: " + copy.weight());
|
---|
1053 | copy.setWeight(2);
|
---|
1054 | System.out.println("Current weight of instance copy (set to 2): " + copy.weight());
|
---|
1055 | System.out.println("Last value of copy: " + copy.toString(2));
|
---|
1056 | System.out.println("Value of position for copy: " + copy.toString(position));
|
---|
1057 | System.out.println("Last value of copy (internal format): " + copy.value(2));
|
---|
1058 | System.out.println("Value of position for copy (internal format): " +
|
---|
1059 | copy.value(position));
|
---|
1060 | } catch (Exception e) {
|
---|
1061 | e.printStackTrace();
|
---|
1062 | }
|
---|
1063 | }
|
---|
1064 | }
|
---|