| 1 |
/** |
|---|
| 2 |
* Cumulative Probability Distribution Functions |
|---|
| 3 |
* |
|---|
| 4 |
* Copyright: Based on the CEPHES math library, which is |
|---|
| 5 |
* Copyright (C) 1994 Stephen L. Moshier (moshier@world.std.com). |
|---|
| 6 |
* License: BSD style: $(LICENSE) |
|---|
| 7 |
* Authors: Stephen L. Moshier (original C code), Don Clugston |
|---|
| 8 |
*/ |
|---|
| 9 |
|
|---|
| 10 |
/** |
|---|
| 11 |
* Macros: |
|---|
| 12 |
* NAN = $(RED NAN) |
|---|
| 13 |
* SUP = <span style="vertical-align:super;font-size:smaller">$0</span> |
|---|
| 14 |
* GAMMA = Γ |
|---|
| 15 |
* INTEGRAL = ∫ |
|---|
| 16 |
* INTEGRATE = $(BIG ∫<sub>$(SMALL $1)</sub><sup>$2</sup>) |
|---|
| 17 |
* POWER = $1<sup>$2</sup> |
|---|
| 18 |
* BIGSUM = $(BIG Σ <sup>$2</sup><sub>$(SMALL $1)</sub>) |
|---|
| 19 |
* CHOOSE = $(BIG () <sup>$(SMALL $1)</sup><sub>$(SMALL $2)</sub> $(BIG )) |
|---|
| 20 |
* TABLE_SV = <table border=1 cellpadding=4 cellspacing=0> |
|---|
| 21 |
* <caption>Special Values</caption> |
|---|
| 22 |
* $0</table> |
|---|
| 23 |
* SVH = $(TR $(TH $1) $(TH $2)) |
|---|
| 24 |
* SV = $(TR $(TD $1) $(TD $2)) |
|---|
| 25 |
*/ |
|---|
| 26 |
|
|---|
| 27 |
module tango.math.Probability; |
|---|
| 28 |
static import tango.math.ErrorFunction; |
|---|
| 29 |
private import tango.math.GammaFunction; |
|---|
| 30 |
private import tango.math.Math; |
|---|
| 31 |
private import tango.math.IEEE; |
|---|
| 32 |
|
|---|
| 33 |
|
|---|
| 34 |
/*** |
|---|
| 35 |
Cumulative distribution function for the Normal distribution, and its complement. |
|---|
| 36 |
|
|---|
| 37 |
The normal (or Gaussian, or bell-shaped) distribution is |
|---|
| 38 |
defined as: |
|---|
| 39 |
|
|---|
| 40 |
normalDist(x) = 1/$(SQRT) π $(INTEGRAL -$(INFINITY), x) exp( - $(POWER t, 2)/2) dt |
|---|
| 41 |
= 0.5 + 0.5 * erf(x/sqrt(2)) |
|---|
| 42 |
= 0.5 * erfc(- x/sqrt(2)) |
|---|
| 43 |
|
|---|
| 44 |
Note that |
|---|
| 45 |
normalDistribution(x) = 1 - normalDistribution(-x). |
|---|
| 46 |
|
|---|
| 47 |
Accuracy: |
|---|
| 48 |
Within a few bits of machine resolution over the entire |
|---|
| 49 |
range. |
|---|
| 50 |
|
|---|
| 51 |
References: |
|---|
| 52 |
$(LINK http://www.netlib.org/cephes/ldoubdoc.html), |
|---|
| 53 |
G. Marsaglia, "Evaluating the Normal Distribution", |
|---|
| 54 |
Journal of Statistical Software <b>11</b>, (July 2004). |
|---|
| 55 |
*/ |
|---|
| 56 |
real normalDistribution(real a) |
|---|
| 57 |
{ |
|---|
| 58 |
return tango.math.ErrorFunction.normalDistributionImpl(a); |
|---|
| 59 |
} |
|---|
| 60 |
|
|---|
| 61 |
/** ditto */ |
|---|
| 62 |
real normalDistributionCompl(real a) |
|---|
| 63 |
{ |
|---|
| 64 |
return -tango.math.ErrorFunction.normalDistributionImpl(-a); |
|---|
| 65 |
} |
|---|
| 66 |
|
|---|
| 67 |
/****************************** |
|---|
| 68 |
* Inverse of Normal distribution function |
|---|
| 69 |
* |
|---|
| 70 |
* Returns the argument, x, for which the area under the |
|---|
| 71 |
* Normal probability density function (integrated from |
|---|
| 72 |
* minus infinity to x) is equal to p. |
|---|
| 73 |
* |
|---|
| 74 |
* For small arguments 0 < p < exp(-2), the program computes |
|---|
| 75 |
* z = sqrt( -2 log(p) ); then the approximation is |
|---|
| 76 |
* x = z - log(z)/z - (1/z) P(1/z) / Q(1/z) . |
|---|
| 77 |
* For larger arguments, x/sqrt(2 pi) = w + w^3 R(w^2)/S(w^2)) , |
|---|
| 78 |
* where w = p - 0.5 . |
|---|
| 79 |
*/ |
|---|
| 80 |
real normalDistributionInv(real p) |
|---|
| 81 |
{ |
|---|
| 82 |
return tango.math.ErrorFunction.normalDistributionInvImpl(p); |
|---|
| 83 |
} |
|---|
| 84 |
|
|---|
| 85 |
/** ditto */ |
|---|
| 86 |
real normalDistributionComplInv(real p) |
|---|
| 87 |
{ |
|---|
| 88 |
return -tango.math.ErrorFunction.normalDistributionInvImpl(-p); |
|---|
| 89 |
} |
|---|
| 90 |
|
|---|
| 91 |
debug(UnitTest) { |
|---|
| 92 |
unittest { |
|---|
| 93 |
assert(feqrel(normalDistributionInv(normalDistribution(0.1)),0.1L)>=real.mant_dig-4); |
|---|
| 94 |
assert(feqrel(normalDistributionComplInv(normalDistributionCompl(0.1)),0.1L)>=real.mant_dig-4); |
|---|
| 95 |
} |
|---|
| 96 |
} |
|---|
| 97 |
|
|---|
| 98 |
/** Student's t cumulative distribution function |
|---|
| 99 |
* |
|---|
| 100 |
* Computes the integral from minus infinity to t of the Student |
|---|
| 101 |
* t distribution with integer nu > 0 degrees of freedom: |
|---|
| 102 |
* |
|---|
| 103 |
* $(GAMMA)( (nu+1)/2) / ( sqrt(nu π) $(GAMMA)(nu/2) ) * |
|---|
| 104 |
* $(INTEGRATE -∞, t) $(POWER (1+$(POWER x, 2)/nu), -(nu+1)/2) dx |
|---|
| 105 |
* |
|---|
| 106 |
* Can be used to test whether the means of two normally distributed populations |
|---|
| 107 |
* are equal. |
|---|
| 108 |
* |
|---|
| 109 |
* It is related to the incomplete beta integral: |
|---|
| 110 |
* 1 - studentsDistribution(nu,t) = 0.5 * betaDistribution( nu/2, 1/2, z ) |
|---|
| 111 |
* where |
|---|
| 112 |
* z = nu/(nu + t<sup>2</sup>). |
|---|
| 113 |
* |
|---|
| 114 |
* For t < -1.6, this is the method of computation. For higher t, |
|---|
| 115 |
* a direct method is derived from integration by parts. |
|---|
| 116 |
* Since the function is symmetric about t=0, the area under the |
|---|
| 117 |
* right tail of the density is found by calling the function |
|---|
| 118 |
* with -t instead of t. |
|---|
| 119 |
*/ |
|---|
| 120 |
real studentsTDistribution(int nu, real t) |
|---|
| 121 |
in{ |
|---|
| 122 |
assert(nu>0); |
|---|
| 123 |
} |
|---|
| 124 |
body{ |
|---|
| 125 |
/* Based on code from Cephes Math Library Release 2.3: January, 1995 |
|---|
| 126 |
Copyright 1984, 1995 by Stephen L. Moshier |
|---|
| 127 |
*/ |
|---|
| 128 |
|
|---|
| 129 |
if ( nu <= 0 ) return NaN(TANGO_NAN.STUDENTSDDISTRIBUTION_DOMAIN); // domain error -- or should it return 0? |
|---|
| 130 |
if ( t == 0.0 ) return 0.5; |
|---|
| 131 |
|
|---|
| 132 |
real rk, z, p; |
|---|
| 133 |
|
|---|
| 134 |
if ( t < -1.6 ) { |
|---|
| 135 |
rk = nu; |
|---|
| 136 |
z = rk / (rk + t * t); |
|---|
| 137 |
return 0.5L * betaIncomplete( 0.5L*rk, 0.5L, z ); |
|---|
| 138 |
} |
|---|
| 139 |
|
|---|
| 140 |
/* compute integral from -t to + t */ |
|---|
| 141 |
|
|---|
| 142 |
rk = nu; /* degrees of freedom */ |
|---|
| 143 |
|
|---|
| 144 |
real x; |
|---|
| 145 |
if (t < 0) x = -t; else x = t; |
|---|
| 146 |
z = 1.0L + ( x * x )/rk; |
|---|
| 147 |
|
|---|
| 148 |
real f, tz; |
|---|
| 149 |
int j; |
|---|
| 150 |
|
|---|
| 151 |
if ( nu & 1) { |
|---|
| 152 |
/* computation for odd nu */ |
|---|
| 153 |
real xsqk = x/sqrt(rk); |
|---|
| 154 |
p = atan( xsqk ); |
|---|
| 155 |
if ( nu > 1 ) { |
|---|
| 156 |
f = 1.0L; |
|---|
| 157 |
tz = 1.0L; |
|---|
| 158 |
j = 3; |
|---|
| 159 |
while( (j<=(nu-2)) && ( (tz/f) > real.epsilon ) ) { |
|---|
| 160 |
tz *= (j-1)/( z * j ); |
|---|
| 161 |
f += tz; |
|---|
| 162 |
j += 2; |
|---|
| 163 |
} |
|---|
| 164 |
p += f * xsqk/z; |
|---|
| 165 |
} |
|---|
| 166 |
p *= 2.0L/PI; |
|---|
| 167 |
} else { |
|---|
| 168 |
/* computation for even nu */ |
|---|
| 169 |
f = 1.0L; |
|---|
| 170 |
tz = 1.0L; |
|---|
| 171 |
j = 2; |
|---|
| 172 |
|
|---|
| 173 |
while ( ( j <= (nu-2) ) && ( (tz/f) > real.epsilon ) ) { |
|---|
| 174 |
tz *= (j - 1)/( z * j ); |
|---|
| 175 |
f += tz; |
|---|
| 176 |
j += 2; |
|---|
| 177 |
} |
|---|
| 178 |
p = f * x/sqrt(z*rk); |
|---|
| 179 |
} |
|---|
| 180 |
if ( t < 0.0L ) |
|---|
| 181 |
p = -p; /* note destruction of relative accuracy */ |
|---|
| 182 |
|
|---|
| 183 |
p = 0.5L + 0.5L * p; |
|---|
| 184 |
return p; |
|---|
| 185 |
} |
|---|
| 186 |
|
|---|
| 187 |
/** Inverse of Student's t distribution |
|---|
| 188 |
* |
|---|
| 189 |
* Given probability p and degrees of freedom nu, |
|---|
| 190 |
* finds the argument t such that the one-sided |
|---|
| 191 |
* studentsDistribution(nu,t) is equal to p. |
|---|
| 192 |
* |
|---|
| 193 |
* Params: |
|---|
| 194 |
* nu = degrees of freedom. Must be >1 |
|---|
| 195 |
* p = probability. 0 < p < 1 |
|---|
| 196 |
*/ |
|---|
| 197 |
real studentsTDistributionInv(int nu, real p ) |
|---|
| 198 |
in { |
|---|
| 199 |
assert(nu>0); |
|---|
| 200 |
assert(p>=0.0L && p<=1.0L); |
|---|
| 201 |
} |
|---|
| 202 |
body |
|---|
| 203 |
{ |
|---|
| 204 |
if (p==0) return -real.infinity; |
|---|
| 205 |
if (p==1) return real.infinity; |
|---|
| 206 |
|
|---|
| 207 |
real rk, z; |
|---|
| 208 |
rk = nu; |
|---|
| 209 |
|
|---|
| 210 |
if ( p > 0.25L && p < 0.75L ) { |
|---|
| 211 |
if ( p == 0.5L ) return 0; |
|---|
| 212 |
z = 1.0L - 2.0L * p; |
|---|
| 213 |
z = betaIncompleteInv( 0.5L, 0.5L*rk, fabs(z) ); |
|---|
| 214 |
real t = sqrt( rk*z/(1.0L-z) ); |
|---|
| 215 |
if( p < 0.5L ) |
|---|
| 216 |
t = -t; |
|---|
| 217 |
return t; |
|---|
| 218 |
} |
|---|
| 219 |
int rflg = -1; // sign of the result |
|---|
| 220 |
if (p >= 0.5L) { |
|---|
| 221 |
p = 1.0L - p; |
|---|
| 222 |
rflg = 1; |
|---|
| 223 |
} |
|---|
| 224 |
z = betaIncompleteInv( 0.5L*rk, 0.5L, 2.0L*p ); |
|---|
| 225 |
|
|---|
| 226 |
if (z<0) return rflg * real.infinity; |
|---|
| 227 |
return rflg * sqrt( rk/z - rk ); |
|---|
| 228 |
} |
|---|
| 229 |
|
|---|
| 230 |
debug(UnitTest) { |
|---|
| 231 |
unittest { |
|---|
| 232 |
|
|---|
| 233 |
// There are simple forms for nu = 1 and nu = 2. |
|---|
| 234 |
|
|---|
| 235 |
// if (nu == 1), tDistribution(x) = 0.5 + atan(x)/PI |
|---|
| 236 |
// so tDistributionInv(p) = tan( PI * (p-0.5) ); |
|---|
| 237 |
// nu==2: tDistribution(x) = 0.5 * (1 + x/ sqrt(2+x*x) ) |
|---|
| 238 |
|
|---|
| 239 |
assert(studentsTDistribution(1, -0.4)== 0.5 + atan(-0.4)/PI); |
|---|
| 240 |
assert(studentsTDistribution(2, 0.9) == 0.5L * (1 + 0.9L/sqrt(2.0L + 0.9*0.9)) ); |
|---|
| 241 |
assert(studentsTDistribution(2, -5.4) == 0.5L * (1 - 5.4L/sqrt(2.0L + 5.4*5.4)) ); |
|---|
| 242 |
|
|---|
| 243 |
// return true if a==b to given number of places. |
|---|
| 244 |
bool isfeqabs(real a, real b, real diff) |
|---|
| 245 |
{ |
|---|
| 246 |
return fabs(a-b) < diff; |
|---|
| 247 |
} |
|---|
| 248 |
|
|---|
| 249 |
// Check a few spot values with statsoft.com (Mathworld values are wrong!!) |
|---|
| 250 |
// According to statsoft.com, studentsDistributionInv(10, 0.995)= 3.16927. |
|---|
| 251 |
|
|---|
| 252 |
// The remaining values listed here are from Excel, and are unlikely to be accurate |
|---|
| 253 |
// in the last decimal places. However, they are helpful as a sanity check. |
|---|
| 254 |
|
|---|
| 255 |
// Microsoft Excel 2003 gives TINV(2*(1-0.995), 10) == 3.16927267160917 |
|---|
| 256 |
assert(isfeqabs(studentsTDistributionInv(10, 0.995), 3.169_272_67L, 0.000_000_005L)); |
|---|
| 257 |
|
|---|
| 258 |
|
|---|
| 259 |
assert(isfeqabs(studentsTDistributionInv(8, 0.6), 0.261_921_096_769_043L,0.000_000_000_05L)); |
|---|
| 260 |
// -TINV(2*0.4, 18) == -0.257123042655869 |
|---|
| 261 |
|
|---|
| 262 |
assert(isfeqabs(studentsTDistributionInv(18, 0.4), -0.257_123_042_655_869L, 0.000_000_000_05L)); |
|---|
| 263 |
assert( feqrel(studentsTDistribution(18, studentsTDistributionInv(18, 0.4L)),0.4L) |
|---|
| 264 |
> real.mant_dig-5 ); |
|---|
| 265 |
assert( feqrel(studentsTDistribution(11, studentsTDistributionInv(11, 0.9L)),0.9L) |
|---|
| 266 |
> real.mant_dig-2); |
|---|
| 267 |
} |
|---|
| 268 |
} |
|---|
| 269 |
|
|---|
| 270 |
/** The F distribution, its complement, and inverse. |
|---|
| 271 |
* |
|---|
| 272 |
* The F density function (also known as Snedcor's density or the |
|---|
| 273 |
* variance ratio density) is the density |
|---|
| 274 |
* of x = (u1/df1)/(u2/df2), where u1 and u2 are random |
|---|
| 275 |
* variables having $(POWER χ,2) distributions with df1 |
|---|
| 276 |
* and df2 degrees of freedom, respectively. |
|---|
| 277 |
* |
|---|
| 278 |
* fDistribution returns the area from zero to x under the F density |
|---|
| 279 |
* function. The complementary function, |
|---|
| 280 |
* fDistributionCompl, returns the area from x to ∞ under the F density function. |
|---|
| 281 |
* |
|---|
| 282 |
* The inverse of the complemented F distribution, |
|---|
| 283 |
* fDistributionComplInv, finds the argument x such that the integral |
|---|
| 284 |
* from x to infinity of the F density is equal to the given probability y. |
|---|
| 285 |
* |
|---|
| 286 |
* Can be used to test whether the means of multiple normally distributed |
|---|
| 287 |
* populations, all with the same standard deviation, are equal; |
|---|
| 288 |
* or to test that the standard deviations of two normally distributed |
|---|
| 289 |
* populations are equal. |
|---|
| 290 |
* |
|---|
| 291 |
* Params: |
|---|
| 292 |
* df1 = Degrees of freedom of the first variable. Must be >= 1 |
|---|
| 293 |
* df2 = Degrees of freedom of the second variable. Must be >= 1 |
|---|
| 294 |
* x = Must be >= 0 |
|---|
| 295 |
*/ |
|---|
| 296 |
real fDistribution(int df1, int df2, real x) |
|---|
| 297 |
in { |
|---|
| 298 |
assert(df1>=1 && df2>=1); |
|---|
| 299 |
assert(x>=0); |
|---|
| 300 |
} |
|---|
| 301 |
body{ |
|---|
| 302 |
real a = cast(real)(df1); |
|---|
| 303 |
real b = cast(real)(df2); |
|---|
| 304 |
real w = a * x; |
|---|
| 305 |
w = w/(b + w); |
|---|
| 306 |
return betaIncomplete(0.5L*a, 0.5L*b, w); |
|---|
| 307 |
} |
|---|
| 308 |
|
|---|
| 309 |
/** ditto */ |
|---|
| 310 |
real fDistributionCompl(int df1, int df2, real x) |
|---|
| 311 |
in { |
|---|
| 312 |
assert(df1>=1 && df2>=1); |
|---|
| 313 |
assert(x>=0); |
|---|
| 314 |
} |
|---|
| 315 |
body{ |
|---|
| 316 |
real a = cast(real)(df1); |
|---|
| 317 |
real b = cast(real)(df2); |
|---|
| 318 |
real w = b / (b + a * x); |
|---|
| 319 |
return betaIncomplete( 0.5L*b, 0.5L*a, w ); |
|---|
| 320 |
} |
|---|
| 321 |
|
|---|
| 322 |
/* |
|---|
| 323 |
* Inverse of complemented F distribution |
|---|
| 324 |
* |
|---|
| 325 |
* Finds the F density argument x such that the integral |
|---|
| 326 |
* from x to infinity of the F density is equal to the |
|---|
| 327 |
* given probability p. |
|---|
| 328 |
* |
|---|
| 329 |
* This is accomplished using the inverse beta integral |
|---|
| 330 |
* function and the relations |
|---|
| 331 |
* |
|---|
| 332 |
* z = betaIncompleteInv( df2/2, df1/2, p ), |
|---|
| 333 |
* x = df2 (1-z) / (df1 z). |
|---|
| 334 |
* |
|---|
| 335 |
* Note that the following relations hold for the inverse of |
|---|
| 336 |
* the uncomplemented F distribution: |
|---|
| 337 |
* |
|---|
| 338 |
* z = betaIncompleteInv( df1/2, df2/2, p ), |
|---|
| 339 |
* x = df2 z / (df1 (1-z)). |
|---|
| 340 |
*/ |
|---|
| 341 |
|
|---|
| 342 |
/** ditto */ |
|---|
| 343 |
real fDistributionComplInv(int df1, int df2, real p ) |
|---|
| 344 |
in { |
|---|
| 345 |
assert(df1>=1 && df2>=1); |
|---|
| 346 |
assert(p>=0 && p<=1.0); |
|---|
| 347 |
} |
|---|
| 348 |
body{ |
|---|
| 349 |
real a = df1; |
|---|
| 350 |
real b = df2; |
|---|
| 351 |
/* Compute probability for x = 0.5. */ |
|---|
| 352 |
real w = betaIncomplete( 0.5L*b, 0.5L*a, 0.5L ); |
|---|
| 353 |
/* If that is greater than p, then the solution w < .5. |
|---|
| 354 |
Otherwise, solve at 1-p to remove cancellation in (b - b*w). */ |
|---|
| 355 |
if ( w > p || p < 0.001L) { |
|---|
| 356 |
w = betaIncompleteInv( 0.5L*b, 0.5L*a, p ); |
|---|
| 357 |
return (b - b*w)/(a*w); |
|---|
| 358 |
} else { |
|---|
| 359 |
w = betaIncompleteInv( 0.5L*a, 0.5L*b, 1.0L - p ); |
|---|
| 360 |
return b*w/(a*(1.0L-w)); |
|---|
| 361 |
} |
|---|
| 362 |
} |
|---|
| 363 |
|
|---|
| 364 |
debug(UnitTest) { |
|---|
| 365 |
unittest { |
|---|
| 366 |
// fDistCompl(df1, df2, x) = Excel's FDIST(x, df1, df2) |
|---|
| 367 |
assert(fabs(fDistributionCompl(6, 4, 16.5) - 0.00858719177897249L)< 0.0000000000005L); |
|---|
| 368 |
assert(fabs((1-fDistribution(12, 23, 0.1)) - 0.99990562845505L)< 0.0000000000005L); |
|---|
| 369 |
assert(fabs(fDistributionComplInv(8, 34, 0.2) - 1.48267037661408L)< 0.0000000005L); |
|---|
| 370 |
assert(fabs(fDistributionComplInv(4, 16, 0.008) - 5.043_537_593_48596L)< 0.0000000005L); |
|---|
| 371 |
// Regression test: This one used to fail because of a bug in the definition of MINLOG. |
|---|
| 372 |
assert(feqrel(fDistributionCompl(4, 16, fDistributionComplInv(4,16, 0.008)), 0.008L)>=real.mant_dig-3); |
|---|
| 373 |
} |
|---|
| 374 |
} |
|---|
| 375 |
|
|---|
| 376 |
/** $(POWER χ,2) cumulative distribution function and its complement. |
|---|
| 377 |
* |
|---|
| 378 |
* Returns the area under the left hand tail (from 0 to x) |
|---|
| 379 |
* of the Chi square probability density function with |
|---|
| 380 |
* v degrees of freedom. The complement returns the area under |
|---|
| 381 |
* the right hand tail (from x to ∞). |
|---|
| 382 |
* |
|---|
| 383 |
* chiSqrDistribution(x | v) = ($(INTEGRATE 0, x) |
|---|
| 384 |
* $(POWER t, v/2-1) $(POWER e, -t/2) dt ) |
|---|
| 385 |
* / $(POWER 2, v/2) $(GAMMA)(v/2) |
|---|
| 386 |
* |
|---|
| 387 |
* chiSqrDistributionCompl(x | v) = ($(INTEGRATE x, ∞) |
|---|
| 388 |
* $(POWER t, v/2-1) $(POWER e, -t/2) dt ) |
|---|
| 389 |
* / $(POWER 2, v/2) $(GAMMA)(v/2) |
|---|
| 390 |
* |
|---|
| 391 |
* Params: |
|---|
| 392 |
* v = degrees of freedom. Must be positive. |
|---|
| 393 |
* x = the $(POWER χ,2) variable. Must be positive. |
|---|
| 394 |
* |
|---|
| 395 |
*/ |
|---|
| 396 |
real chiSqrDistribution(real v, real x) |
|---|
| 397 |
in { |
|---|
| 398 |
assert(x>=0); |
|---|
| 399 |
assert(v>=1.0); |
|---|
| 400 |
} |
|---|
| 401 |
body{ |
|---|
| 402 |
return gammaIncomplete( 0.5*v, 0.5*x); |
|---|
| 403 |
} |
|---|
| 404 |
|
|---|
| 405 |
/** ditto */ |
|---|
| 406 |
real chiSqrDistributionCompl(real v, real x) |
|---|
| 407 |
in { |
|---|
| 408 |
assert(x>=0); |
|---|
| 409 |
assert(v>=1.0); |
|---|
| 410 |
} |
|---|
| 411 |
body{ |
|---|
| 412 |
return gammaIncompleteCompl( 0.5L*v, 0.5L*x ); |
|---|
| 413 |
} |
|---|
| 414 |
|
|---|
| 415 |
/** |
|---|
| 416 |
* Inverse of complemented $(POWER χ, 2) distribution |
|---|
| 417 |
* |
|---|
| 418 |
* Finds the $(POWER χ, 2) argument x such that the integral |
|---|
| 419 |
* from x to ∞ of the $(POWER χ, 2) density is equal |
|---|
| 420 |
* to the given cumulative probability p. |
|---|
| 421 |
* |
|---|
| 422 |
* Params: |
|---|
| 423 |
* p = Cumulative probability. 0<= p <=1. |
|---|
| 424 |
* v = Degrees of freedom. Must be positive. |
|---|
| 425 |
* |
|---|
| 426 |
*/ |
|---|
| 427 |
real chiSqrDistributionComplInv(real v, real p) |
|---|
| 428 |
in { |
|---|
| 429 |
assert(p>=0 && p<=1.0L); |
|---|
| 430 |
assert(v>=1.0L); |
|---|
| 431 |
} |
|---|
| 432 |
body |
|---|
| 433 |
{ |
|---|
| 434 |
return 2.0 * gammaIncompleteComplInv( 0.5*v, p); |
|---|
| 435 |
} |
|---|
| 436 |
|
|---|
| 437 |
debug(UnitTest) { |
|---|
| 438 |
unittest { |
|---|
| 439 |
assert(feqrel(chiSqrDistributionCompl(3.5L, chiSqrDistributionComplInv(3.5L, 0.1L)), 0.1L)>=real.mant_dig-5); |
|---|
| 440 |
assert(chiSqrDistribution(19.02L, 0.4L) + chiSqrDistributionCompl(19.02L, 0.4L) ==1.0L); |
|---|
| 441 |
} |
|---|
| 442 |
} |
|---|
| 443 |
|
|---|
| 444 |
/** |
|---|
| 445 |
* The Γ distribution and its complement |
|---|
| 446 |
* |
|---|
| 447 |
* The Γ distribution is defined as the integral from 0 to x of the |
|---|
| 448 |
* gamma probability density function. The complementary function returns the |
|---|
| 449 |
* integral from x to ∞ |
|---|
| 450 |
* |
|---|
| 451 |
* gammaDistribution = ($(INTEGRATE 0, x) $(POWER t, b-1)$(POWER e, -at) dt) $(POWER a, b)/Γ(b) |
|---|
| 452 |
* |
|---|
| 453 |
* x must be greater than 0. |
|---|
| 454 |
*/ |
|---|
| 455 |
real gammaDistribution(real a, real b, real x) |
|---|
| 456 |
in { |
|---|
| 457 |
assert(x>=0); |
|---|
| 458 |
} |
|---|
| 459 |
body { |
|---|
| 460 |
return gammaIncomplete(b, a*x); |
|---|
| 461 |
} |
|---|
| 462 |
|
|---|
| 463 |
/** ditto */ |
|---|
| 464 |
real gammaDistributionCompl(real a, real b, real x ) |
|---|
| 465 |
in { |
|---|
| 466 |
assert(x>=0); |
|---|
| 467 |
} |
|---|
| 468 |
body { |
|---|
| 469 |
return gammaIncompleteCompl( b, a * x ); |
|---|
| 470 |
} |
|---|
| 471 |
|
|---|
| 472 |
debug(UnitTest) { |
|---|
| 473 |
unittest { |
|---|
| 474 |
assert(gammaDistribution(7,3,0.18)+gammaDistributionCompl(7,3,0.18)==1); |
|---|
| 475 |
} |
|---|
| 476 |
} |
|---|
| 477 |
|
|---|
| 478 |
/********************** |
|---|
| 479 |
* Beta distribution and its inverse |
|---|
| 480 |
* |
|---|
| 481 |
* Returns the incomplete beta integral of the arguments, evaluated |
|---|
| 482 |
* from zero to x. The function is defined as |
|---|
| 483 |
* |
|---|
| 484 |
* betaDistribution = Γ(a+b)/(Γ(a) Γ(b)) * |
|---|
| 485 |
* $(INTEGRATE 0, x) $(POWER t, a-1)$(POWER (1-t),b-1) dt |
|---|
| 486 |
* |
|---|
| 487 |
* The domain of definition is 0 <= x <= 1. In this |
|---|
| 488 |
* implementation a and b are restricted to positive values. |
|---|
| 489 |
* The integral from x to 1 may be obtained by the symmetry |
|---|
| 490 |
* relation |
|---|
| 491 |
* |
|---|
| 492 |
* betaDistributionCompl(a, b, x ) = betaDistribution( b, a, 1-x ) |
|---|
| 493 |
* |
|---|
| 494 |
* The integral is evaluated by a continued fraction expansion |
|---|
| 495 |
* or, when b*x is small, by a power series. |
|---|
| 496 |
* |
|---|
| 497 |
* The inverse finds the value of x for which betaDistribution(a,b,x) - y = 0 |
|---|
| 498 |
*/ |
|---|
| 499 |
real betaDistribution(real a, real b, real x ) |
|---|
| 500 |
{ |
|---|
| 501 |
return betaIncomplete(a, b, x ); |
|---|
| 502 |
} |
|---|
| 503 |
|
|---|
| 504 |
/** ditto */ |
|---|
| 505 |
real betaDistributionCompl(real a, real b, real x) |
|---|
| 506 |
{ |
|---|
| 507 |
return betaIncomplete(b, a, 1-x); |
|---|
| 508 |
} |
|---|
| 509 |
|
|---|
| 510 |
/** ditto */ |
|---|
| 511 |
real betaDistributionInv(real a, real b, real y) |
|---|
| 512 |
{ |
|---|
| 513 |
return betaIncompleteInv(a, b, y); |
|---|
| 514 |
} |
|---|
| 515 |
|
|---|
| 516 |
/** ditto */ |
|---|
| 517 |
real betaDistributionComplInv(real a, real b, real y) |
|---|
| 518 |
{ |
|---|
| 519 |
return 1-betaIncompleteInv(b, a, y); |
|---|
| 520 |
} |
|---|
| 521 |
|
|---|
| 522 |
debug(UnitTest) { |
|---|
| 523 |
unittest { |
|---|
| 524 |
assert(feqrel(betaDistributionInv(2, 6, betaDistribution(2,6, 0.7L)),0.7L)>=real.mant_dig-3); |
|---|
| 525 |
assert(feqrel(betaDistributionComplInv(1.3, 8, betaDistributionCompl(1.3,8, 0.01L)),0.01L)>=real.mant_dig-4); |
|---|
| 526 |
} |
|---|
| 527 |
} |
|---|
| 528 |
|
|---|
| 529 |
/** |
|---|
| 530 |
* The Poisson distribution, its complement, and inverse |
|---|
| 531 |
* |
|---|
| 532 |
* k is the number of events. m is the mean. |
|---|
| 533 |
* The Poisson distribution is defined as the sum of the first k terms of |
|---|
| 534 |
* the Poisson density function. |
|---|
| 535 |
* The complement returns the sum of the terms k+1 to ∞. |
|---|
| 536 |
* |
|---|
| 537 |
* poissonDistribution = $(BIGSUM j=0, k) $(POWER e, -m) $(POWER m, j)/j! |
|---|
| 538 |
* |
|---|
| 539 |
* poissonDistributionCompl = $(BIGSUM j=k+1, ∞) $(POWER e, -m) $(POWER m, j)/j! |
|---|
| 540 |
* |
|---|
| 541 |
* The terms are not summed directly; instead the incomplete |
|---|
| 542 |
* gamma integral is employed, according to the relation |
|---|
| 543 |
* |
|---|
| 544 |
* y = poissonDistribution( k, m ) = gammaIncompleteCompl( k+1, m ). |
|---|
| 545 |
* |
|---|
| 546 |
* The arguments must both be positive. |
|---|
| 547 |
*/ |
|---|
| 548 |
real poissonDistribution(int k, real m ) |
|---|
| 549 |
in { |
|---|
| 550 |
assert(k>=0); |
|---|
| 551 |
assert(m>0); |
|---|
| 552 |
} |
|---|
| 553 |
body { |
|---|
| 554 |
return gammaIncompleteCompl( k+1.0, m ); |
|---|
| 555 |
} |
|---|
| 556 |
|
|---|
| 557 |
/** ditto */ |
|---|
| 558 |
real poissonDistributionCompl(int k, real m ) |
|---|
| 559 |
in { |
|---|
| 560 |
assert(k>=0); |
|---|
| 561 |
assert(m>0); |
|---|
| 562 |
} |
|---|
| 563 |
body { |
|---|
| 564 |
return gammaIncomplete( k+1.0, m ); |
|---|
| 565 |
} |
|---|
| 566 |
|
|---|
| 567 |
/** ditto */ |
|---|
| 568 |
real poissonDistributionInv( int k, real p ) |
|---|
| 569 |
in { |
|---|
| 570 |
assert(k>=0); |
|---|
| 571 |
assert(p>=0.0 && p<=1.0); |
|---|
| 572 |
} |
|---|
| 573 |
body { |
|---|
| 574 |
return gammaIncompleteComplInv(k+1, p); |
|---|
| 575 |
} |
|---|
| 576 |
|
|---|
| 577 |
debug(UnitTest) { |
|---|
| 578 |
unittest { |
|---|
| 579 |
// = Excel's POISSON(k, m, TRUE) |
|---|
| 580 |
assert( fabs(poissonDistribution(5, 6.3) |
|---|
| 581 |
- 0.398771730072867L) < 0.000000000000005L); |
|---|
| 582 |
assert( feqrel(poissonDistributionInv(8, poissonDistribution(8, 2.7e3L)), 2.7e3L)>=real.mant_dig-2); |
|---|
| 583 |
assert( poissonDistribution(2, 8.4e-5) + poissonDistributionCompl(2, 8.4e-5) == 1.0L); |
|---|
| 584 |
} |
|---|
| 585 |
} |
|---|
| 586 |
|
|---|
| 587 |
/*********************************** |
|---|
| 588 |
* Binomial distribution and complemented binomial distribution |
|---|
| 589 |
* |
|---|
| 590 |
* The binomial distribution is defined as the sum of the terms 0 through k |
|---|
| 591 |
* of the Binomial probability density. |
|---|
| 592 |
* The complement returns the sum of the terms k+1 through n. |
|---|
| 593 |
* |
|---|
| 594 |
binomialDistribution = $(BIGSUM j=0, k) $(CHOOSE n, j) $(POWER p, j) $(POWER (1-p), n-j) |
|---|
| 595 |
|
|---|
| 596 |
binomialDistributionCompl = $(BIGSUM j=k+1, n) $(CHOOSE n, j) $(POWER p, j) $(POWER (1-p), n-j) |
|---|
| 597 |
* |
|---|
| 598 |
* The terms are not summed directly; instead the incomplete |
|---|
| 599 |
* beta integral is employed, according to the formula |
|---|
| 600 |
* |
|---|
| 601 |
* y = binomialDistribution( k, n, p ) = betaDistribution( n-k, k+1, 1-p ). |
|---|
| 602 |
* |
|---|
| 603 |
* The arguments must be positive, with p ranging from 0 to 1, and k<=n. |
|---|
| 604 |
*/ |
|---|
| 605 |
real binomialDistribution(int k, int n, real p ) |
|---|
| 606 |
in { |
|---|
| 607 |
assert(p>=0 && p<=1.0); // domain error |
|---|
| 608 |
assert(k>=0 && k<=n); |
|---|
| 609 |
} |
|---|
| 610 |
body{ |
|---|
| 611 |
real dk, dn, q; |
|---|
| 612 |
if( k == n ) |
|---|
| 613 |
return 1.0L; |
|---|
| 614 |
|
|---|
| 615 |
q = 1.0L - p; |
|---|
| 616 |
dn = n - k; |
|---|
| 617 |
if ( k == 0 ) { |
|---|
| 618 |
return pow( q, dn ); |
|---|
| 619 |
} else { |
|---|
| 620 |
return betaIncomplete( dn, k + 1, q ); |
|---|
| 621 |
} |
|---|
| 622 |
} |
|---|
| 623 |
|
|---|
| 624 |
debug(UnitTest) { |
|---|
| 625 |
unittest { |
|---|
| 626 |
// = Excel's BINOMDIST(k, n, p, TRUE) |
|---|
| 627 |
assert( fabs(binomialDistribution(8, 12, 0.5) |
|---|
| 628 |
- 0.927001953125L) < 0.0000000000005L); |
|---|
| 629 |
assert( fabs(binomialDistribution(0, 3, 0.008L) |
|---|
| 630 |
- 0.976191488L) < 0.00000000005L); |
|---|
| 631 |
assert(binomialDistribution(7,7, 0.3)==1.0); |
|---|
| 632 |
} |
|---|
| 633 |
} |
|---|
| 634 |
|
|---|
| 635 |
/** ditto */ |
|---|
| 636 |
real binomialDistributionCompl(int k, int n, real p ) |
|---|
| 637 |
in { |
|---|
| 638 |
assert(p>=0 && p<=1.0); // domain error |
|---|
| 639 |
assert(k>=0 && k<=n); |
|---|
| 640 |
} |
|---|
| 641 |
body{ |
|---|
| 642 |
if ( k == n ) { |
|---|
| 643 |
return 0; |
|---|
| 644 |
} |
|---|
| 645 |
real dn = n - k; |
|---|
| 646 |
if ( k == 0 ) { |
|---|
| 647 |
if ( p < .01L ) |
|---|
| 648 |
return -expm1( dn * log1p(-p) ); |
|---|
| 649 |
else |
|---|
| 650 |
return 1.0L - pow( 1.0L-p, dn ); |
|---|
| 651 |
} else { |
|---|
| 652 |
return betaIncomplete( k+1, dn, p ); |
|---|
| 653 |
} |
|---|
| 654 |
} |
|---|
| 655 |
|
|---|
| 656 |
debug(UnitTest){ |
|---|
| 657 |
unittest { |
|---|
| 658 |
// = Excel's (1 - BINOMDIST(k, n, p, TRUE)) |
|---|
| 659 |
assert( fabs(1.0L-binomialDistributionCompl(0, 15, 0.003) |
|---|
| 660 |
- 0.955932824838906L) < 0.0000000000000005L); |
|---|
| 661 |
assert( fabs(1.0L-binomialDistributionCompl(0, 25, 0.2) |
|---|
| 662 |
- 0.00377789318629572L) < 0.000000000000000005L); |
|---|
| 663 |
assert( fabs(1.0L-binomialDistributionCompl(8, 12, 0.5) |
|---|
| 664 |
- 0.927001953125L) < 0.00000000000005L); |
|---|
| 665 |
assert(binomialDistributionCompl(7,7, 0.3)==0.0); |
|---|
| 666 |
} |
|---|
| 667 |
} |
|---|
| 668 |
|
|---|
| 669 |
/** Inverse binomial distribution |
|---|
| 670 |
* |
|---|
| 671 |
* Finds the event probability p such that the sum of the |
|---|
| 672 |
* terms 0 through k of the Binomial probability density |
|---|
| 673 |
* is equal to the given cumulative probability y. |
|---|
| 674 |
* |
|---|
| 675 |
* This is accomplished using the inverse beta integral |
|---|
| 676 |
* function and the relation |
|---|
| 677 |
* |
|---|
| 678 |
* 1 - p = betaDistributionInv( n-k, k+1, y ). |
|---|
| 679 |
* |
|---|
| 680 |
* The arguments must be positive, with 0 <= y <= 1, and k <= n. |
|---|
| 681 |
*/ |
|---|
| 682 |
real binomialDistributionInv( int k, int n, real y ) |
|---|
| 683 |
in { |
|---|
| 684 |
assert(y>=0 && y<=1.0); // domain error |
|---|
| 685 |
assert(k>=0 && k<=n); |
|---|
| 686 |
} |
|---|
| 687 |
body{ |
|---|
| 688 |
real dk, p; |
|---|
| 689 |
real dn = n - k; |
|---|
| 690 |
if ( k == 0 ) { |
|---|
| 691 |
if( y > 0.8L ) |
|---|
| 692 |
p = -expm1( log1p(y-1.0L) / dn ); |
|---|
| 693 |
else |
|---|
| 694 |
p = 1.0L - pow( y, 1.0L/dn ); |
|---|
| 695 |
} else { |
|---|
| 696 |
dk = k + 1; |
|---|
| 697 |
p = betaIncomplete( dn, dk, y ); |
|---|
| 698 |
if( p > 0.5 ) |
|---|
| 699 |
p = betaIncompleteInv( dk, dn, 1.0L-y ); |
|---|
| 700 |
else |
|---|
| 701 |
p = 1.0 - betaIncompleteInv( dn, dk, y ); |
|---|
| 702 |
} |
|---|
| 703 |
return p; |
|---|
| 704 |
} |
|---|
| 705 |
|
|---|
| 706 |
debug(UnitTest){ |
|---|
| 707 |
unittest { |
|---|
| 708 |
real w = binomialDistribution(9, 15, 0.318L); |
|---|
| 709 |
assert(feqrel(binomialDistributionInv(9, 15, w), 0.318L)>=real.mant_dig-3); |
|---|
| 710 |
w = binomialDistribution(5, 35, 0.718L); |
|---|
| 711 |
assert(feqrel(binomialDistributionInv(5, 35, w), 0.718L)>=real.mant_dig-3); |
|---|
| 712 |
w = binomialDistribution(0, 24, 0.637L); |
|---|
| 713 |
assert(feqrel(binomialDistributionInv(0, 24, w), 0.637L)>=real.mant_dig-3); |
|---|
| 714 |
w = binomialDistributionInv(0, 59, 0.962L); |
|---|
| 715 |
assert(feqrel(binomialDistribution(0, 59, w), 0.962L)>=real.mant_dig-5); |
|---|
| 716 |
} |
|---|
| 717 |
} |
|---|
| 718 |
|
|---|
| 719 |
/** Negative binomial distribution and its inverse |
|---|
| 720 |
* |
|---|
| 721 |
* Returns the sum of the terms 0 through k of the negative |
|---|
| 722 |
* binomial distribution: |
|---|
| 723 |
* |
|---|
| 724 |
* $(BIGSUM j=0, k) $(CHOOSE n+j-1, j-1) $(POWER p, n) $(POWER (1-p), j) |
|---|
| 725 |
* |
|---|
| 726 |
* In a sequence of Bernoulli trials, this is the probability |
|---|
| 727 |
* that k or fewer failures precede the n-th success. |
|---|
| 728 |
* |
|---|
| 729 |
* The arguments must be positive, with 0 < p < 1 and r>0. |
|---|
| 730 |
* |
|---|
| 731 |
* The inverse finds the argument y such |
|---|
| 732 |
* that negativeBinomialDistribution(k,n,y) is equal to p. |
|---|
| 733 |
* |
|---|
| 734 |
* The Geometric Distribution is a special case of the negative binomial |
|---|
| 735 |
* distribution. |
|---|
| 736 |
* ----------------------- |
|---|
| 737 |
* geometricDistribution(k, p) = negativeBinomialDistribution(k, 1, p); |
|---|
| 738 |
* ----------------------- |
|---|
| 739 |
* References: |
|---|
| 740 |
* $(LINK http://mathworld.wolfram.com/NegativeBinomialDistribution.html) |
|---|
| 741 |
*/ |
|---|
| 742 |
|
|---|
| 743 |
real negativeBinomialDistribution(int k, int n, real p ) |
|---|
| 744 |
in { |
|---|
| 745 |
assert(p>=0 && p<=1.0); // domain error |
|---|
| 746 |
assert(k>=0); |
|---|
| 747 |
} |
|---|
| 748 |
body{ |
|---|
| 749 |
if ( k == 0 ) return pow( p, n ); |
|---|
| 750 |
return betaIncomplete( n, k + 1, p ); |
|---|
| 751 |
} |
|---|
| 752 |
|
|---|
| 753 |
/** ditto */ |
|---|
| 754 |
real negativeBinomialDistributionInv(int k, int n, real p ) |
|---|
| 755 |
in { |
|---|
| 756 |
assert(p>=0 && p<=1.0); // domain error |
|---|
| 757 |
assert(k>=0); |
|---|
| 758 |
} |
|---|
| 759 |
body{ |
|---|
| 760 |
return betaIncompleteInv(n, k + 1, p); |
|---|
| 761 |
} |
|---|
| 762 |
|
|---|
| 763 |
debug(UnitTest) { |
|---|
| 764 |
unittest { |
|---|
| 765 |
// Value obtained by sum of terms of MS Excel 2003's NEGBINOMDIST. |
|---|
| 766 |
assert( fabs(negativeBinomialDistribution(10, 20, 0.2) - 3.830_52E-08)< 0.000_005e-08); |
|---|
| 767 |
assert(feqrel(negativeBinomialDistributionInv(14, 208, negativeBinomialDistribution(14, 208, 1e-4L)), 1e-4L)>=real.mant_dig-3); |
|---|
| 768 |
} |
|---|
| 769 |
} |
|---|