1 | /*
|
---|
2 | * This program is free software; you can redistribute it and/or modify
|
---|
3 | * it under the terms of the GNU General Public License as published by
|
---|
4 | * the Free Software Foundation; either version 2 of the License, or
|
---|
5 | * (at your option) any later version.
|
---|
6 | *
|
---|
7 | * This program is distributed in the hope that it will be useful,
|
---|
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
10 | * GNU General Public License for more details.
|
---|
11 | *
|
---|
12 | * You should have received a copy of the GNU General Public License
|
---|
13 | * along with this program; if not, write to the Free Software
|
---|
14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
15 | */
|
---|
16 |
|
---|
17 | /*
|
---|
18 | * AttributeStats.java
|
---|
19 | * Copyright (C) 1999 Len Trigg
|
---|
20 | *
|
---|
21 | */
|
---|
22 |
|
---|
23 | package weka.core;
|
---|
24 |
|
---|
25 | /**
|
---|
26 | * A Utility class that contains summary information on an
|
---|
27 | * the values that appear in a dataset for a particular attribute.
|
---|
28 | *
|
---|
29 | * @author <a href="mailto:[email protected]">Len Trigg</a>
|
---|
30 | * @version $Revision: 8815 $
|
---|
31 | */
|
---|
32 | public class AttributeStats {
|
---|
33 |
|
---|
34 | /** The number of int-like values */
|
---|
35 | public int intCount = 0;
|
---|
36 |
|
---|
37 | /** The number of real-like values (i.e. have a fractional part) */
|
---|
38 | public int realCount = 0;
|
---|
39 |
|
---|
40 | /** The number of missing values */
|
---|
41 | public int missingCount = 0;
|
---|
42 |
|
---|
43 | /** The number of distinct values */
|
---|
44 | public int distinctCount = 0;
|
---|
45 |
|
---|
46 | /** The number of values that only appear once */
|
---|
47 | public int uniqueCount = 0;
|
---|
48 |
|
---|
49 | /** The total number of values (i.e. number of instances) */
|
---|
50 | public int totalCount = 0;
|
---|
51 |
|
---|
52 | /** Stats on numeric value distributions */
|
---|
53 | // perhaps Stats should be moved from weka.experiment to weka.core
|
---|
54 | public weka.experiment.Stats numericStats;
|
---|
55 |
|
---|
56 | /** Counts of each nominal value */
|
---|
57 | public int [] nominalCounts;
|
---|
58 |
|
---|
59 | /**
|
---|
60 | * Updates the counters for one more observed distinct value.
|
---|
61 | *
|
---|
62 | * @param value the value that has just been seen
|
---|
63 | * @param count the number of times the value appeared
|
---|
64 | */
|
---|
65 | protected void addDistinct(double value, int count) {
|
---|
66 |
|
---|
67 | if (count > 0) {
|
---|
68 | if (count == 1) {
|
---|
69 | uniqueCount++;
|
---|
70 | }
|
---|
71 | if (Utils.eq(value, (double)((int)value))) {
|
---|
72 | intCount += count;
|
---|
73 | } else {
|
---|
74 | realCount += count;
|
---|
75 | }
|
---|
76 | if (nominalCounts != null) {
|
---|
77 | nominalCounts[(int)value] = count;
|
---|
78 | }
|
---|
79 | if (numericStats != null) {
|
---|
80 | numericStats.add(value, count);
|
---|
81 | numericStats.calculateDerived();
|
---|
82 | }
|
---|
83 | }
|
---|
84 | distinctCount++;
|
---|
85 | }
|
---|
86 |
|
---|
87 | /**
|
---|
88 | * Returns a human readable representation of this AttributeStats instance.
|
---|
89 | *
|
---|
90 | * @return a String represtinging these AttributeStats.
|
---|
91 | */
|
---|
92 | public String toString() {
|
---|
93 |
|
---|
94 | StringBuffer sb = new StringBuffer();
|
---|
95 | sb.append(Utils.padLeft("Type", 4)).append(Utils.padLeft("Nom", 5));
|
---|
96 | sb.append(Utils.padLeft("Int", 5)).append(Utils.padLeft("Real", 5));
|
---|
97 | sb.append(Utils.padLeft("Missing", 12));
|
---|
98 | sb.append(Utils.padLeft("Unique", 12));
|
---|
99 | sb.append(Utils.padLeft("Dist", 6));
|
---|
100 | if (nominalCounts != null) {
|
---|
101 | sb.append(' ');
|
---|
102 | for (int i = 0; i < nominalCounts.length; i++) {
|
---|
103 | sb.append(Utils.padLeft("C[" + i + "]", 5));
|
---|
104 | }
|
---|
105 | }
|
---|
106 | sb.append('\n');
|
---|
107 |
|
---|
108 | long percent;
|
---|
109 | percent = Math.round(100.0 * intCount / totalCount);
|
---|
110 | if (nominalCounts != null) {
|
---|
111 | sb.append(Utils.padLeft("Nom", 4)).append(' ');
|
---|
112 | sb.append(Utils.padLeft("" + percent, 3)).append("% ");
|
---|
113 | sb.append(Utils.padLeft("" + 0, 3)).append("% ");
|
---|
114 | } else {
|
---|
115 | sb.append(Utils.padLeft("Num", 4)).append(' ');
|
---|
116 | sb.append(Utils.padLeft("" + 0, 3)).append("% ");
|
---|
117 | sb.append(Utils.padLeft("" + percent, 3)).append("% ");
|
---|
118 | }
|
---|
119 | percent = Math.round(100.0 * realCount / totalCount);
|
---|
120 | sb.append(Utils.padLeft("" + percent, 3)).append("% ");
|
---|
121 | sb.append(Utils.padLeft("" + missingCount, 5)).append(" /");
|
---|
122 | percent = Math.round(100.0 * missingCount / totalCount);
|
---|
123 | sb.append(Utils.padLeft("" + percent, 3)).append("% ");
|
---|
124 | sb.append(Utils.padLeft("" + uniqueCount, 5)).append(" /");
|
---|
125 | percent = Math.round(100.0 * uniqueCount / totalCount);
|
---|
126 | sb.append(Utils.padLeft("" + percent, 3)).append("% ");
|
---|
127 | sb.append(Utils.padLeft("" + distinctCount, 5)).append(' ');
|
---|
128 | if (nominalCounts != null) {
|
---|
129 | for (int i = 0; i < nominalCounts.length; i++) {
|
---|
130 | sb.append(Utils.padLeft("" + nominalCounts[i], 5));
|
---|
131 | }
|
---|
132 | }
|
---|
133 | sb.append('\n');
|
---|
134 | return sb.toString();
|
---|
135 | }
|
---|
136 | }
|
---|