source: trunk/gsdl/packages/kea/kea-3.0/weka/core/AttributeStats.java@ 8815

Last change on this file since 8815 was 8815, checked in by mdewsnip, 19 years ago

Kea 3.0, as downloaded from http://www.nzdl.org/kea but with CSTR_abstracts_test, CSTR_abstracts_train, Chinese_test, and Chinese_train directories removed.

  • Property svn:keywords set to Author Date Id Revision
File size: 4.3 KB
Line 
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * AttributeStats.java
19 * Copyright (C) 1999 Len Trigg
20 *
21 */
22
23package weka.core;
24
25/**
26 * A Utility class that contains summary information on an
27 * the values that appear in a dataset for a particular attribute.
28 *
29 * @author <a href="mailto:[email protected]">Len Trigg</a>
30 * @version $Revision: 8815 $
31 */
32public class AttributeStats {
33
34 /** The number of int-like values */
35 public int intCount = 0;
36
37 /** The number of real-like values (i.e. have a fractional part) */
38 public int realCount = 0;
39
40 /** The number of missing values */
41 public int missingCount = 0;
42
43 /** The number of distinct values */
44 public int distinctCount = 0;
45
46 /** The number of values that only appear once */
47 public int uniqueCount = 0;
48
49 /** The total number of values (i.e. number of instances) */
50 public int totalCount = 0;
51
52 /** Stats on numeric value distributions */
53 // perhaps Stats should be moved from weka.experiment to weka.core
54 public weka.experiment.Stats numericStats;
55
56 /** Counts of each nominal value */
57 public int [] nominalCounts;
58
59 /**
60 * Updates the counters for one more observed distinct value.
61 *
62 * @param value the value that has just been seen
63 * @param count the number of times the value appeared
64 */
65 protected void addDistinct(double value, int count) {
66
67 if (count > 0) {
68 if (count == 1) {
69 uniqueCount++;
70 }
71 if (Utils.eq(value, (double)((int)value))) {
72 intCount += count;
73 } else {
74 realCount += count;
75 }
76 if (nominalCounts != null) {
77 nominalCounts[(int)value] = count;
78 }
79 if (numericStats != null) {
80 numericStats.add(value, count);
81 numericStats.calculateDerived();
82 }
83 }
84 distinctCount++;
85 }
86
87 /**
88 * Returns a human readable representation of this AttributeStats instance.
89 *
90 * @return a String represtinging these AttributeStats.
91 */
92 public String toString() {
93
94 StringBuffer sb = new StringBuffer();
95 sb.append(Utils.padLeft("Type", 4)).append(Utils.padLeft("Nom", 5));
96 sb.append(Utils.padLeft("Int", 5)).append(Utils.padLeft("Real", 5));
97 sb.append(Utils.padLeft("Missing", 12));
98 sb.append(Utils.padLeft("Unique", 12));
99 sb.append(Utils.padLeft("Dist", 6));
100 if (nominalCounts != null) {
101 sb.append(' ');
102 for (int i = 0; i < nominalCounts.length; i++) {
103 sb.append(Utils.padLeft("C[" + i + "]", 5));
104 }
105 }
106 sb.append('\n');
107
108 long percent;
109 percent = Math.round(100.0 * intCount / totalCount);
110 if (nominalCounts != null) {
111 sb.append(Utils.padLeft("Nom", 4)).append(' ');
112 sb.append(Utils.padLeft("" + percent, 3)).append("% ");
113 sb.append(Utils.padLeft("" + 0, 3)).append("% ");
114 } else {
115 sb.append(Utils.padLeft("Num", 4)).append(' ');
116 sb.append(Utils.padLeft("" + 0, 3)).append("% ");
117 sb.append(Utils.padLeft("" + percent, 3)).append("% ");
118 }
119 percent = Math.round(100.0 * realCount / totalCount);
120 sb.append(Utils.padLeft("" + percent, 3)).append("% ");
121 sb.append(Utils.padLeft("" + missingCount, 5)).append(" /");
122 percent = Math.round(100.0 * missingCount / totalCount);
123 sb.append(Utils.padLeft("" + percent, 3)).append("% ");
124 sb.append(Utils.padLeft("" + uniqueCount, 5)).append(" /");
125 percent = Math.round(100.0 * uniqueCount / totalCount);
126 sb.append(Utils.padLeft("" + percent, 3)).append("% ");
127 sb.append(Utils.padLeft("" + distinctCount, 5)).append(' ');
128 if (nominalCounts != null) {
129 for (int i = 0; i < nominalCounts.length; i++) {
130 sb.append(Utils.padLeft("" + nominalCounts[i], 5));
131 }
132 }
133 sb.append('\n');
134 return sb.toString();
135 }
136}
Note: See TracBrowser for help on using the repository browser.