source: trunk/gsdl/packages/kea/kea-3.0/weka/core/Statistics.java@ 8815

Last change on this file since 8815 was 8815, checked in by mdewsnip, 19 years ago

Kea 3.0, as downloaded from http://www.nzdl.org/kea but with CSTR_abstracts_test, CSTR_abstracts_train, Chinese_test, and Chinese_train directories removed.

  • Property svn:keywords set to Author Date Id Revision
File size: 7.7 KB
Line 
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * Statistics.java
19 * Copyright (C) 1999 Eibe Frank
20 *
21 */
22
23package weka.core;
24
25/**
26 * Class implementing some distributions, tests, etc. Most of the
27 * code is adapted from Gary Perlman's unixstat.
28 *
29 * @author Eibe Frank ([email protected])
30 * @version $Revision: 8815 $
31 */
32public class Statistics {
33
34 /** Some constants */
35 private static double logSqrtPi = Math.log(Math.sqrt(Math.PI));
36 private static double rezSqrtPi = 1/Math.sqrt(Math.PI);
37 private static double bigx = 20.0;
38
39 /**
40 * Computes standard error for observed values of a binomial
41 * random variable.
42 *
43 * @param p the probability of success
44 * @param n the size of the sample
45 * @return the standard error
46 */
47 public static double binomialStandardError(double p, int n) {
48
49 if (n == 0) {
50 return 0;
51 }
52 return Math.sqrt((p*(1-p))/(double) n);
53 }
54
55 /**
56 * Returns chi-squared probability for given value and degrees
57 * of freedom. (The probability that the chi-squared variate
58 * will be greater than x for the given degrees of freedom.)
59 * Adapted from unixstat by Gary Perlman.
60 *
61 * @param x the value
62 * @param df the number of degrees of freedom
63 */
64 public static double chiSquaredProbability(double x, int df) {
65
66 double a, y = 0, s, e, c, z, val;
67 boolean even;
68
69 if (x <= 0 || df < 1)
70 return (1);
71 a = 0.5 * x;
72 even = (((int)(2*(df/2))) == df);
73 if (df > 1)
74 y = Math.exp(-a); //((-a < -bigx) ? 0.0 : Math.exp (-a));
75 s = (even ? y : (2.0 * normalProbability(-Math.sqrt (x))));
76 if (df > 2){
77 x = 0.5 * (df - 1.0);
78 z = (even ? 1.0 : 0.5);
79 if (a > bigx){
80 e = (even ? 0.0 : logSqrtPi);
81 c = Math.log (a);
82 while (z <= x){
83 e = Math.log (z) + e;
84 val = c*z-a-e;
85 s += Math.exp (val); //((val < -bigx) ? 0.0 : Math.exp (val));
86 z += 1.0;
87 }
88 return (s);
89 }else{
90 e = (even ? 1.0 : (rezSqrtPi / Math.sqrt (a)));
91 c = 0.0;
92 while (z <= x){
93 e = e * (a / z);
94 c = c + e;
95 z += 1.0;
96 }
97 return (c * y + s);
98 }
99 }else{
100 return (s);
101 }
102 }
103
104 /**
105 * Critical value for given probability of F-distribution.
106 * Adapted from unixstat by Gary Perlman.
107 *
108 * @param p the probability
109 * @param df1 the first number of degrees of freedom
110 * @param df2 the second number of degrees of freedom
111 * @return the critical value for the given probability
112 */
113 public static double FCriticalValue(double p, int df1, int df2) {
114
115 double fval;
116 double maxf = 99999.0; /* maximum possible F ratio */
117 double minf = .000001; /* minimum possible F ratio */
118
119 if (p <= 0.0 || p >= 1.0)
120 return (0.0);
121
122 fval = 1.0 / p; /* the smaller the p, the larger the F */
123
124 while (Math.abs (maxf - minf) > .000001) {
125 if (FProbability(fval, df1, df2) < p) /* F too large */
126 maxf = fval;
127 else /* F too small */
128 minf = fval;
129 fval = (maxf + minf) * 0.5;
130 }
131
132 return (fval);
133 }
134
135 /**
136 * Computes probability of F-ratio.
137 * Adapted from unixstat by Gary Perlman.
138 * Collected Algorithms of the CACM
139 * Algorithm 322
140 * Egon Dorrer
141 *
142 * @param F the F-ratio
143 * @param df1 the first number of degrees of freedom
144 * @param df2 the second number of degrees of freedom
145 * @return the probability of the F-ratio.
146 */
147 public static double FProbability(double F, int df1, int df2) {
148
149 int i, j;
150 int a, b;
151 double w, y, z, d, p;
152
153 if ((Math.abs(F) < 10e-10) || df1 <= 0 || df2 <= 0)
154 return (1.0);
155 a = (df1%2 == 1) ? 1 : 2;
156 b = (df2%2 == 1) ? 1 : 2;
157 w = (F * df1) / df2;
158 z = 1.0 / (1.0 + w);
159 if (a == 1)
160 if (b == 1) {
161 p = Math.sqrt (w);
162 y = 1/Math.PI; /* 1 / 3.14159 */
163 d = y * z / p;
164 p = 2.0 * y * Math.atan (p);
165 } else {
166 p = Math.sqrt (w * z);
167 d = 0.5 * p * z / w;
168 } else if (b == 1) {
169 p = Math.sqrt (z);
170 d = 0.5 * z * p;
171 p = 1.0 - p;
172 } else {
173 d = z * z;
174 p = w * z;
175 }
176 y = 2.0 * w / z;
177 for (j = b + 2; j <= df2; j += 2) {
178 d *= (1.0 + a / (j - 2.0)) * z;
179 p = (a == 1 ? p + d * y / (j - 1.0) : (p + w) * z);
180 }
181 y = w * z;
182 z = 2.0 / z;
183 b = df2 - 2;
184 for (i = a + 2; i <= df1; i += 2) {
185 j = i + b;
186 d *= y * j / (i - 2.0);
187 p -= z * d / j;
188 }
189
190 // correction for approximation errors suggested in certification
191 if (p < 0.0)
192 p = 0.0;
193 else if (p > 1.0)
194 p = 1.0;
195 return (1.0-p);
196 }
197
198 /**
199 * Returns probability that the standardized normal variate Z (mean = 0, standard
200 * deviation = 1) is less than z.
201 * Adapted from unixstat by Gary Perlman.
202 *
203 * @param the z-value
204 * @return the probability of the z value according to the normal pdf
205 */
206 public static double normalProbability(double z) {
207
208 double y, x, w;
209
210 if (z == 0.0)
211 x = 0.0;
212 else{
213 y = 0.5 * Math.abs (z);
214 if (y >= 3.0)
215 x = 1.0;
216 else if (y < 1.0){
217 w = y*y;
218 x = ((((((((0.000124818987 * w
219 -0.001075204047) * w +0.005198775019) * w
220 -0.019198292004) * w +0.059054035642) * w
221 -0.151968751364) * w +0.319152932694) * w
222 -0.531923007300) * w +0.797884560593) * y * 2.0;
223 }else{
224 y -= 2.0;
225 x = (((((((((((((-0.000045255659 * y
226 +0.000152529290) * y -0.000019538132) * y
227 -0.000676904986) * y +0.001390604284) * y
228 -0.000794620820) * y -0.002034254874) * y
229 +0.006549791214) * y -0.010557625006) * y
230 +0.011630447319) * y -0.009279453341) * y
231 +0.005353579108) * y -0.002141268741) * y
232 +0.000535310849) * y +0.999936657524;
233 }
234 }
235
236 return (z > 0.0 ? ((x + 1.0) / 2.0) : ((1.0 - x) / 2.0));
237 }
238
239 /**
240 * Computes absolute size of half of a student-t confidence interval
241 * for given degrees of freedom, probability, and observed value.
242 *
243 * @param df the number of degrees of freedom
244 * @param p the probability
245 * @param se the observed value
246 * @return absolute size of half of a student-t confidence interval
247 */
248 public static double studentTConfidenceInterval(int df, double p,
249 double se) {
250
251 return Math.sqrt(FCriticalValue(p, 1, df))*se;
252 }
253
254 /**
255 * Main method for testing this class.
256 */
257 public static void main(String[] ops) {
258
259 System.out.println("Binomial standard error (0.5, 100): " +
260 Statistics.binomialStandardError(0.5, 100));
261 System.out.println("Chi-squared probability (2.558, 10): " +
262 Statistics.chiSquaredProbability(2.558, 10));
263 System.out.println("Normal probability (0.2): " +
264 Statistics.normalProbability(0.2));
265 System.out.println("F critical value (0.05, 4, 5): " +
266 Statistics.FCriticalValue(0.05, 4, 5));
267 System.out.println("F probability (5.1922, 4, 5): " +
268 Statistics.FProbability(5.1922, 4, 5));
269 System.out.println("Student-t confidence interval (9, 0.01, 2): " +
270 Statistics.studentTConfidenceInterval(9, 0.01, 2));
271 }
272}
273
274
275
276
277
278
279
280
Note: See TracBrowser for help on using the repository browser.