Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: trunk/gsdl/packages/kea/kea-3.0/weka/core/Statistics.java@ 8815

Last change on this file since 8815 was 8815, checked in by mdewsnip, 19 years ago
Kea 3.0, as downloaded from http://www.nzdl.org/kea but with CSTR_abstracts_test, CSTR_abstracts_train, Chinese_test, and Chinese_train directories removed.
Property svn:keywords set to `Author Date Id Revision`
File size: 7.7 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* Statistics.java
19	* Copyright (C) 1999 Eibe Frank
20	*
21	*/
22
23	package weka.core;
24
25	/**
26	* Class implementing some distributions, tests, etc. Most of the
27	* code is adapted from Gary Perlman's unixstat.
28	*
29	* @author Eibe Frank ([email protected])
30	* @version $Revision: 8815 $
31	*/
32	public class Statistics {
33
34	/** Some constants */
35	private static double logSqrtPi = Math.log(Math.sqrt(Math.PI));
36	private static double rezSqrtPi = 1/Math.sqrt(Math.PI);
37	private static double bigx = 20.0;
38
39	/**
40	* Computes standard error for observed values of a binomial
41	* random variable.
42	*
43	* @param p the probability of success
44	* @param n the size of the sample
45	* @return the standard error
46	*/
47	public static double binomialStandardError(double p, int n) {
48
49	if (n == 0) {
50	return 0;
51	}
52	return Math.sqrt((p*(1-p))/(double) n);
53	}
54
55	/**
56	* Returns chi-squared probability for given value and degrees
57	* of freedom. (The probability that the chi-squared variate
58	* will be greater than x for the given degrees of freedom.)
59	* Adapted from unixstat by Gary Perlman.
60	*
61	* @param x the value
62	* @param df the number of degrees of freedom
63	*/
64	public static double chiSquaredProbability(double x, int df) {
65
66	double a, y = 0, s, e, c, z, val;
67	boolean even;
68
69	if (x <= 0 \|\| df < 1)
70	return (1);
71	a = 0.5 * x;
72	even = (((int)(2*(df/2))) == df);
73	if (df > 1)
74	y = Math.exp(-a); //((-a < -bigx) ? 0.0 : Math.exp (-a));
75	s = (even ? y : (2.0 * normalProbability(-Math.sqrt (x))));
76	if (df > 2){
77	x = 0.5 * (df - 1.0);
78	z = (even ? 1.0 : 0.5);
79	if (a > bigx){
80	e = (even ? 0.0 : logSqrtPi);
81	c = Math.log (a);
82	while (z <= x){
83	e = Math.log (z) + e;
84	val = c*z-a-e;
85	s += Math.exp (val); //((val < -bigx) ? 0.0 : Math.exp (val));
86	z += 1.0;
87	}
88	return (s);
89	}else{
90	e = (even ? 1.0 : (rezSqrtPi / Math.sqrt (a)));
91	c = 0.0;
92	while (z <= x){
93	e = e * (a / z);
94	c = c + e;
95	z += 1.0;
96	}
97	return (c * y + s);
98	}
99	}else{
100	return (s);
101	}
102	}
103
104	/**
105	* Critical value for given probability of F-distribution.
106	* Adapted from unixstat by Gary Perlman.
107	*
108	* @param p the probability
109	* @param df1 the first number of degrees of freedom
110	* @param df2 the second number of degrees of freedom
111	* @return the critical value for the given probability
112	*/
113	public static double FCriticalValue(double p, int df1, int df2) {
114
115	double fval;
116	double maxf = 99999.0; /* maximum possible F ratio */
117	double minf = .000001; /* minimum possible F ratio */
118
119	if (p <= 0.0 \|\| p >= 1.0)
120	return (0.0);
121
122	fval = 1.0 / p; /* the smaller the p, the larger the F */
123
124	while (Math.abs (maxf - minf) > .000001) {
125	if (FProbability(fval, df1, df2) < p) /* F too large */
126	maxf = fval;
127	else /* F too small */
128	minf = fval;
129	fval = (maxf + minf) * 0.5;
130	}
131
132	return (fval);
133	}
134
135	/**
136	* Computes probability of F-ratio.
137	* Adapted from unixstat by Gary Perlman.
138	* Collected Algorithms of the CACM
139	* Algorithm 322
140	* Egon Dorrer
141	*
142	* @param F the F-ratio
143	* @param df1 the first number of degrees of freedom
144	* @param df2 the second number of degrees of freedom
145	* @return the probability of the F-ratio.
146	*/
147	public static double FProbability(double F, int df1, int df2) {
148
149	int i, j;
150	int a, b;
151	double w, y, z, d, p;
152
153	if ((Math.abs(F) < 10e-10) \|\| df1 <= 0 \|\| df2 <= 0)
154	return (1.0);
155	a = (df1%2 == 1) ? 1 : 2;
156	b = (df2%2 == 1) ? 1 : 2;
157	w = (F * df1) / df2;
158	z = 1.0 / (1.0 + w);
159	if (a == 1)
160	if (b == 1) {
161	p = Math.sqrt (w);
162	y = 1/Math.PI; /* 1 / 3.14159 */
163	d = y * z / p;
164	p = 2.0 * y * Math.atan (p);
165	} else {
166	p = Math.sqrt (w * z);
167	d = 0.5 * p * z / w;
168	} else if (b == 1) {
169	p = Math.sqrt (z);
170	d = 0.5 * z * p;
171	p = 1.0 - p;
172	} else {
173	d = z * z;
174	p = w * z;
175	}
176	y = 2.0 * w / z;
177	for (j = b + 2; j <= df2; j += 2) {
178	d = (1.0 + a / (j - 2.0)) z;
179	p = (a == 1 ? p + d * y / (j - 1.0) : (p + w) * z);
180	}
181	y = w * z;
182	z = 2.0 / z;
183	b = df2 - 2;
184	for (i = a + 2; i <= df1; i += 2) {
185	j = i + b;
186	d = y j / (i - 2.0);
187	p -= z * d / j;
188	}
189
190	// correction for approximation errors suggested in certification
191	if (p < 0.0)
192	p = 0.0;
193	else if (p > 1.0)
194	p = 1.0;
195	return (1.0-p);
196	}
197
198	/**
199	* Returns probability that the standardized normal variate Z (mean = 0, standard
200	* deviation = 1) is less than z.
201	* Adapted from unixstat by Gary Perlman.
202	*
203	* @param the z-value
204	* @return the probability of the z value according to the normal pdf
205	*/
206	public static double normalProbability(double z) {
207
208	double y, x, w;
209
210	if (z == 0.0)
211	x = 0.0;
212	else{
213	y = 0.5 * Math.abs (z);
214	if (y >= 3.0)
215	x = 1.0;
216	else if (y < 1.0){
217	w = y*y;
218	x = ((((((((0.000124818987 * w
219	-0.001075204047) * w +0.005198775019) * w
220	-0.019198292004) * w +0.059054035642) * w
221	-0.151968751364) * w +0.319152932694) * w
222	-0.531923007300) * w +0.797884560593) * y * 2.0;
223	}else{
224	y -= 2.0;
225	x = (((((((((((((-0.000045255659 * y
226	+0.000152529290) * y -0.000019538132) * y
227	-0.000676904986) * y +0.001390604284) * y
228	-0.000794620820) * y -0.002034254874) * y
229	+0.006549791214) * y -0.010557625006) * y
230	+0.011630447319) * y -0.009279453341) * y
231	+0.005353579108) * y -0.002141268741) * y
232	+0.000535310849) * y +0.999936657524;
233	}
234	}
235
236	return (z > 0.0 ? ((x + 1.0) / 2.0) : ((1.0 - x) / 2.0));
237	}
238
239	/**
240	* Computes absolute size of half of a student-t confidence interval
241	* for given degrees of freedom, probability, and observed value.
242	*
243	* @param df the number of degrees of freedom
244	* @param p the probability
245	* @param se the observed value
246	* @return absolute size of half of a student-t confidence interval
247	*/
248	public static double studentTConfidenceInterval(int df, double p,
249	double se) {
250
251	return Math.sqrt(FCriticalValue(p, 1, df))*se;
252	}
253
254	/**
255	* Main method for testing this class.
256	*/
257	public static void main(String[] ops) {
258
259	System.out.println("Binomial standard error (0.5, 100): " +
260	Statistics.binomialStandardError(0.5, 100));
261	System.out.println("Chi-squared probability (2.558, 10): " +
262	Statistics.chiSquaredProbability(2.558, 10));
263	System.out.println("Normal probability (0.2): " +
264	Statistics.normalProbability(0.2));
265	System.out.println("F critical value (0.05, 4, 5): " +
266	Statistics.FCriticalValue(0.05, 4, 5));
267	System.out.println("F probability (5.1922, 4, 5): " +
268	Statistics.FProbability(5.1922, 4, 5));
269	System.out.println("Student-t confidence interval (9, 0.01, 2): " +
270	Statistics.studentTConfidenceInterval(9, 0.01, 2));
271	}
272	}
273
274
275
276
277
278
279
280

Note: See TracBrowser for help on using the repository browser.

Download in other formats: