| 101 | | OnlineMeanSD meanSd; |
|---|
| 102 | | uint len; |
|---|
| 103 | | foreach(elem; data) { |
|---|
| 104 | | len++; |
|---|
| 105 | | meanSd.put(elem); |
|---|
| 106 | | } |
|---|
| 107 | | |
|---|
| 108 | | real t = (meanSd.mean - mean) / (meanSd.stdev / sqrt(cast(real) len)); |
|---|
| 109 | | if(alt == Alt.LESS) |
|---|
| 110 | | return studentsTCDF(t, data.length - 1); |
|---|
| 111 | | else if(alt == Alt.GREATER) |
|---|
| 112 | | return studentsTCDF(-t, data.length - 1); |
|---|
| 113 | | else |
|---|
| 114 | | return 2 * min(studentsTCDF(t, data.length - 1), |
|---|
| 115 | | studentsTCDF(-t, data.length - 1)); |
|---|
| 116 | | } |
|---|
| 117 | | |
|---|
| 118 | | unittest { |
|---|
| 119 | | assert(approxEqual(studentsTTest([1, 2, 3, 4, 5].dup, 2), .2302)); |
|---|
| 120 | | assert(approxEqual(studentsTTest([1, 2, 3, 4, 5].dup, 2, Alt.LESS), .8849)); |
|---|
| 121 | | assert(approxEqual(studentsTTest([1, 2, 3, 4, 5].dup, 2, Alt.GREATER), .1151)); |
|---|
| | 146 | return pairedTTest(data, repeat(0), testMean, alt, confLevel); |
|---|
| | 147 | |
|---|
| | 148 | } |
|---|
| | 149 | |
|---|
| | 150 | unittest { |
|---|
| | 151 | auto t1 = studentsTTest([1, 2, 3, 4, 5].dup, 2); |
|---|
| | 152 | assert(approxEqual(t1.testStat, 1.4142)); |
|---|
| | 153 | assert(approxEqual(t1.p, 0.2302)); |
|---|
| | 154 | assert(approxEqual(t1.lowerBound, 1.036757)); |
|---|
| | 155 | assert(approxEqual(t1.upperBound, 4.963243)); |
|---|
| | 156 | |
|---|
| | 157 | auto t2 = studentsTTest([1, 2, 3, 4, 5].dup, 2, Alt.LESS); |
|---|
| | 158 | assert(approxEqual(t2, .8849)); |
|---|
| | 159 | assert(approxEqual(t2.testStat, 1.4142)); |
|---|
| | 160 | assert(t2.lowerBound == -real.infinity); |
|---|
| | 161 | assert(approxEqual(t2.upperBound, 4.507443)); |
|---|
| | 162 | |
|---|
| | 163 | auto t3 = studentsTTest([1, 2, 3, 4, 5].dup, 2, Alt.GREATER); |
|---|
| | 164 | assert(approxEqual(t3, .1151)); |
|---|
| | 165 | assert(approxEqual(t3.testStat, 1.4142)); |
|---|
| | 166 | assert(approxEqual(t3.lowerBound, 1.492557)); |
|---|
| | 167 | assert(t3.upperBound == real.infinity); |
|---|
| | 168 | |
|---|
| 163 | | assert(approxEqual(studentsTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup), 0.06985)); |
|---|
| 164 | | assert(approxEqual(studentsTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup, Alt.LESS), |
|---|
| 165 | | 0.965)); |
|---|
| 166 | | assert(approxEqual(studentsTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup, Alt.GREATER), |
|---|
| 167 | | 0.03492)); |
|---|
| | 233 | auto t2 = studentsTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup); |
|---|
| | 234 | assert(approxEqual(t2, 0.06985)); |
|---|
| | 235 | assert(approxEqual(t2.testStat, 2.0567)); |
|---|
| | 236 | assert(approxEqual(t2.lowerBound, -0.3595529)); |
|---|
| | 237 | assert(approxEqual(t2.upperBound, 7.5595529)); |
|---|
| | 238 | |
|---|
| | 239 | |
|---|
| | 240 | auto t5 = studentsTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup, 0, Alt.LESS); |
|---|
| | 241 | assert(approxEqual(t5, 0.965)); |
|---|
| | 242 | assert(approxEqual(t5.testStat, 2.0567)); |
|---|
| | 243 | assert(approxEqual(t5.upperBound, 6.80857)); |
|---|
| | 244 | assert(t5.lowerBound == -real.infinity); |
|---|
| | 245 | |
|---|
| | 246 | auto t6 = studentsTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup, 0, Alt.GREATER); |
|---|
| | 247 | assert(approxEqual(t6, 0.03492)); |
|---|
| | 248 | assert(approxEqual(t6.testStat, 2.0567)); |
|---|
| | 249 | assert(approxEqual(t6.lowerBound, 0.391422)); |
|---|
| | 250 | assert(t6.upperBound == real.infinity); |
|---|
| 200 | | if(alt == Alt.LESS) |
|---|
| 201 | | return studentsTCDF(t, df); |
|---|
| 202 | | else if(alt == Alt.GREATER) |
|---|
| 203 | | return studentsTCDF(-t, df); |
|---|
| 204 | | else |
|---|
| 205 | | return 2 * min(studentsTCDF(t, df), studentsTCDF(-t, df)); |
|---|
| 206 | | } |
|---|
| 207 | | |
|---|
| 208 | | unittest { |
|---|
| 209 | | // Values from R. |
|---|
| 210 | | assert(approxEqual(welchTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup), 0.2159)); |
|---|
| 211 | | assert(approxEqual(welchTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup, Alt.LESS), |
|---|
| 212 | | 0.1079)); |
|---|
| 213 | | assert(approxEqual(welchTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup, Alt.GREATER), |
|---|
| 214 | | 0.892)); |
|---|
| | 288 | |
|---|
| | 289 | ConfInt ret; |
|---|
| | 290 | ret.testStat = t; |
|---|
| | 291 | if(alt == Alt.LESS) { |
|---|
| | 292 | ret.p = studentsTCDF(t, df); |
|---|
| | 293 | ret.lowerBound = -real.infinity; |
|---|
| | 294 | ret.upperBound = meanDiff + testMean - invStudentsTCDF(1 - confLevel, df) * sx1x2; |
|---|
| | 295 | } else if(alt == Alt.GREATER) { |
|---|
| | 296 | ret.p = studentsTCDF(-t, df); |
|---|
| | 297 | ret.lowerBound = meanDiff + testMean + invStudentsTCDF(1 - confLevel, df) * sx1x2; |
|---|
| | 298 | ret.upperBound = real.infinity; |
|---|
| | 299 | } else { |
|---|
| | 300 | ret.p = 2 * min(studentsTCDF(t, df), studentsTCDF(-t, df)); |
|---|
| | 301 | real delta = invStudentsTCDF(0.5 * (1 - confLevel), df) * sx1x2; |
|---|
| | 302 | ret.upperBound = meanDiff + testMean - delta; |
|---|
| | 303 | ret.lowerBound = meanDiff + testMean + delta; |
|---|
| | 304 | } |
|---|
| | 305 | return ret; |
|---|
| | 306 | } |
|---|
| | 307 | |
|---|
| | 308 | unittest { |
|---|
| | 309 | // Values from R. |
|---|
| | 310 | auto t1 = welchTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup, 2); |
|---|
| | 311 | assert(approxEqual(t1, 0.02285)); |
|---|
| | 312 | assert(approxEqual(t1.testStat, -2.8099)); |
|---|
| | 313 | assert(approxEqual(t1.lowerBound, -4.979316)); |
|---|
| | 314 | assert(approxEqual(t1.upperBound, 1.312649)); |
|---|
| | 315 | |
|---|
| | 316 | auto t2 = welchTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup, -1, Alt.LESS); |
|---|
| | 317 | assert(approxEqual(t2, 0.2791)); |
|---|
| | 318 | assert(approxEqual(t2.testStat, -0.6108)); |
|---|
| | 319 | assert(t2.lowerBound == -real.infinity); |
|---|
| | 320 | assert(approxEqual(t2.upperBound, 0.7035534)); |
|---|
| | 321 | |
|---|
| | 322 | auto t3 = welchTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup, 0.5, Alt.GREATER); |
|---|
| | 323 | assert(approxEqual(t3, 0.9372)); |
|---|
| | 324 | assert(approxEqual(t3.testStat, -1.7104)); |
|---|
| | 325 | assert(approxEqual(t3.lowerBound, -4.37022)); |
|---|
| | 326 | assert(t3.upperBound == real.infinity); |
|---|
| | 327 | |
|---|
| 257 | | assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, Alt.LESS), 0.0889)); |
|---|
| 258 | | assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, Alt.GREATER), 0.9111)); |
|---|
| 259 | | assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, Alt.TWOSIDE), 0.1778)); |
|---|
| 260 | | assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, Alt.LESS, 1), 0.01066)); |
|---|
| 261 | | assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, Alt.GREATER, 1), 0.9893)); |
|---|
| 262 | | assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, Alt.TWOSIDE, 1), 0.02131)); |
|---|
| | 388 | auto t1 = pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, 1); |
|---|
| | 389 | assert(approxEqual(t1.p, 0.02131)); |
|---|
| | 390 | assert(approxEqual(t1.testStat, -3.6742)); |
|---|
| | 391 | assert(approxEqual(t1.lowerBound, -2.1601748)); |
|---|
| | 392 | assert(approxEqual(t1.upperBound, 0.561748)); |
|---|
| | 393 | |
|---|
| | 394 | assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, 0, Alt.LESS), 0.0889)); |
|---|
| | 395 | assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, 0, Alt.GREATER), 0.9111)); |
|---|
| | 396 | assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, 0, Alt.TWOSIDE), 0.1778)); |
|---|
| | 397 | assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, 1, Alt.LESS), 0.01066)); |
|---|
| | 398 | assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, 1, Alt.GREATER), 0.9893)); |
|---|
| 266 | | /**Wilcoxon rank-sum test statistic. This is a non-parametric test for a |
|---|
| 267 | | * difference in the mean ranks of two sets of numbers. The tieSum parameter is |
|---|
| 268 | | * mostly for use internally, and if included will place a value in the |
|---|
| 269 | | * dereference that can be used in wilcoxonRankSumPval() to adjust for ties in |
|---|
| 270 | | * the input data. |
|---|
| | 402 | /**A test for normality of the distribution of a range of values. Based on |
|---|
| | 403 | * the assumption that normally distributed values will have a sample skewness |
|---|
| | 404 | * and sample kurtosis very close to zero. |
|---|
| | 405 | * |
|---|
| | 406 | * Returns: A TestRes with the K statistic, which is Chi-Square distributed |
|---|
| | 407 | * with 2 degrees of freedom under the null, and the P-value for the alternative |
|---|
| | 408 | * that the data has skewness and kurtosis not equal to zero against the null |
|---|
| | 409 | * that skewness and kurtosis are near zero. A normal distribution always has |
|---|
| | 410 | * skewness and kurtosis that converge to zero as sample size goes to infinity. |
|---|
| | 411 | * |
|---|
| | 412 | * References: |
|---|
| | 413 | * D'Agostino, Ralph B., Albert Belanger, and Ralph B. D'Agostino, Jr. |
|---|
| | 414 | * "A Suggestion for Using Powerful and Informative Tests of Normality", |
|---|
| | 415 | * The American Statistician, Vol. 44, No. 4. (Nov., 1990), pp. 316-321. |
|---|
| | 416 | */ |
|---|
| | 417 | TestRes dAgostinoK(T)(T range) |
|---|
| | 418 | if(realIterable!(T)) { |
|---|
| | 419 | // You are not meant to understand this. I sure don't. I just implemented |
|---|
| | 420 | // these formulas off of Wikipedia, which got them from: |
|---|
| | 421 | |
|---|
| | 422 | // D'Agostino, Ralph B., Albert Belanger, and Ralph B. D'Agostino, Jr. |
|---|
| | 423 | // "A Suggestion for Using Powerful and Informative Tests of Normality", |
|---|
| | 424 | // The American Statistician, Vol. 44, No. 4. (Nov., 1990), pp. 316-321. |
|---|
| | 425 | |
|---|
| | 426 | // Amazing. I didn't even realize things this complicated were possible |
|---|
| | 427 | // in 1990, before widespread computer algebra systems. |
|---|
| | 428 | |
|---|
| | 429 | // Notation from Wikipedia. Keeping same notation for simplicity. |
|---|
| | 430 | real sqrtb1 = void, b2 = void, n = void; |
|---|
| | 431 | { |
|---|
| | 432 | auto summ = summary(range); |
|---|
| | 433 | sqrtb1 = summ.skew; |
|---|
| | 434 | b2 = summ.kurtosis + 3; |
|---|
| | 435 | n = summ.N; |
|---|
| | 436 | } |
|---|
| | 437 | |
|---|
| | 438 | // Calculate transformed skewness. |
|---|
| | 439 | real Y = sqrtb1 * sqrt((n + 1) * (n + 3) / (6 * (n - 2))); |
|---|
| | 440 | real beta2b1Numer = 3 * (n * n + 27 * n - 70) * (n + 1) * (n + 3); |
|---|
| | 441 | real beta2b1Denom = (n - 2) * (n + 5) * (n + 7) * (n + 9); |
|---|
| | 442 | real beta2b1 = beta2b1Numer / beta2b1Denom; |
|---|
| | 443 | real Wsq = -1 + sqrt(2 * (beta2b1 - 1)); |
|---|
| | 444 | real delta = 1.0L / sqrt(log(sqrt(Wsq))); |
|---|
| | 445 | real alpha = sqrt( 2.0L / (Wsq - 1)); |
|---|
| | 446 | real Zb1 = delta * log(Y / alpha + sqrt(pow(Y / alpha, 2) + 1)); |
|---|
| | 447 | |
|---|
| | 448 | // Calculate transformed kurtosis. |
|---|
| | 449 | real Eb2 = 3 * (n - 1) / (n + 1); |
|---|
| | 450 | real sigma2b2 = (24 * n * (n - 2) * (n - 3)) / ( |
|---|
| | 451 | (n + 1) * (n + 1) * (n + 3) * (n + 5)); |
|---|
| | 452 | real x = (b2 - Eb2) / sqrt(sigma2b2); |
|---|
| | 453 | |
|---|
| | 454 | real sqBeta1b2 = 6 * (n * n - 5 * n + 2) / ((n + 7) * (n + 9)) * |
|---|
| | 455 | sqrt((6 * (n + 3) * (n + 5)) / (n * (n - 2) * (n - 3))); |
|---|
| | 456 | real A = 6 + 8 / sqBeta1b2 * (2 / sqBeta1b2 + sqrt(1 + 4 / (sqBeta1b2 * sqBeta1b2))); |
|---|
| | 457 | real Zb2 = ((1 - 2 / (9 * A)) - |
|---|
| | 458 | cbrt((1 - 2 / A) / (1 + x * sqrt(2 / (A - 4)))) ) * |
|---|
| | 459 | sqrt(9 * A / 2); |
|---|
| | 460 | |
|---|
| | 461 | real K2 = Zb1 * Zb1 + Zb2 * Zb2; |
|---|
| | 462 | return TestRes(K2, chiSqrCDFR(K2, 2)); |
|---|
| | 463 | } |
|---|
| | 464 | |
|---|
| | 465 | unittest { |
|---|
| | 466 | // Values from R's fBasics package. |
|---|
| | 467 | int[] arr1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]; |
|---|
| | 468 | int[] arr2 = [8,6,7,5,3,0,9,8,6,7,5,3,0,9,3,6,2,4,3,6,8]; |
|---|
| | 469 | |
|---|
| | 470 | auto r1 = dAgostinoK(arr1); |
|---|
| | 471 | auto r2 = dAgostinoK(arr2); |
|---|
| | 472 | |
|---|
| | 473 | assert(approxEqual(r1.testStat, 3.1368)); |
|---|
| | 474 | assert(approxEqual(r1.p, 0.2084)); |
|---|
| | 475 | writeln("Passed dAgostinoK test."); |
|---|
| | 476 | } |
|---|
| | 477 | |
|---|
| | 478 | |
|---|
| | 479 | /**Computes Wilcoxon rank sum test statistic and P-value for |
|---|
| | 480 | * a set of observations against another set, using the given alternative. |
|---|
| | 481 | * Alt.LESS means that sample1 is stochastically less than sample2. |
|---|
| | 482 | * Alt.GREATER means sample1 is stochastically greater than sample2. |
|---|
| | 483 | * Alt.TWOSIDE means sample1 is stochastically less than or greater than |
|---|
| | 484 | * sample2. |
|---|
| | 485 | * |
|---|
| | 486 | * exactThresh is the threshold value of (n1 + n2) at which this function |
|---|
| | 487 | * switches from exact to approximate computation of the p-value. Exact |
|---|
| | 488 | * computation is very slow for large datasets, and the asymptotic |
|---|
| | 489 | * approximation is good enough for all practical purposes at sample sizes much |
|---|
| | 490 | * smaller than this. Do not set exactThresh to more than 200, as the exact |
|---|
| | 491 | * calculation is both very slow and not numerically stable past this point, |
|---|
| | 492 | * and the asymptotic calculation is very good for N this large. To disable |
|---|
| | 493 | * exact calculation entirely, set exactThresh to 0. |
|---|
| | 494 | * |
|---|
| | 495 | * Notes: Exact p-value computation is never used when ties are present in the |
|---|
| | 496 | * data, because it is not computationally feasible. |
|---|
| 274 | | * Returns: The Wilcoxon test statistic W.*/ |
|---|
| 275 | | real wilcoxonRankSum(T)(T sample1, T sample2, real* tieSum = null) |
|---|
| | 500 | * This test is also known as the Mann-Whitney test. |
|---|
| | 501 | * |
|---|
| | 502 | * Returns: A TestRes containing the W test statistic and the P-value against |
|---|
| | 503 | * the given alternative.*/ |
|---|
| | 504 | TestRes wilcoxonRankSum(T)(T sample1, T sample2, Alt alt = Alt.TWOSIDE, |
|---|
| | 505 | uint exactThresh = 50) if(isInputRange!(T) && dstats.base.hasLength!(T)) { |
|---|
| | 506 | |
|---|
| | 507 | real tieSum; |
|---|
| | 508 | real W = wilcoxonRankSumW(sample1, sample2, &tieSum); |
|---|
| | 509 | real p = wilcoxonRankSumPval(W, sample1.length, sample2.length, alt, tieSum, |
|---|
| | 510 | exactThresh); |
|---|
| | 511 | return TestRes(W, p); |
|---|
| | 512 | } |
|---|
| | 513 | |
|---|
| | 514 | unittest { |
|---|
| | 515 | // Values from R. |
|---|
| | 516 | |
|---|
| | 517 | assert(wilcoxonRankSum([1, 2, 3, 4, 5].dup, [2, 4, 6, 8, 10].dup).testStat == 5); |
|---|
| | 518 | assert(wilcoxonRankSum([2, 4, 6, 8, 10].dup, [1, 2, 3, 4, 5].dup).testStat == 20); |
|---|
| | 519 | assert(wilcoxonRankSum([3, 7, 21, 5, 9].dup, [2, 4, 6, 8, 10].dup).testStat == 15); |
|---|
| | 520 | |
|---|
| | 521 | // Simple stuff (no ties) first. Testing approximate |
|---|
| | 522 | // calculation first. |
|---|
| | 523 | assert(approxEqual(wilcoxonRankSum([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, |
|---|
| | 524 | Alt.TWOSIDE, 0), 0.9273)); |
|---|
| | 525 | assert(approxEqual(wilcoxonRankSum([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, |
|---|
| | 526 | Alt.LESS, 0), 0.6079)); |
|---|
| | 527 | assert(approxEqual(wilcoxonRankSum([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, |
|---|
| | 528 | Alt.GREATER, 0), 0.4636)); |
|---|
| | 529 | assert(approxEqual(wilcoxonRankSum([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, |
|---|
| | 530 | Alt.TWOSIDE, 0), 0.4113)); |
|---|
| | 531 | assert(approxEqual(wilcoxonRankSum([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, |
|---|
| | 532 | Alt.LESS, 0), 0.2057)); |
|---|
| | 533 | assert(approxEqual(wilcoxonRankSum([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, |
|---|
| | 534 | Alt.GREATER, 0), 0.8423)); |
|---|
| | 535 | assert(approxEqual(wilcoxonRankSum([1,3,5,7,9].dup, [2,4,6,8,10].dup, |
|---|
| | 536 | Alt.TWOSIDE, 0), .6745)); |
|---|
| | 537 | assert(approxEqual(wilcoxonRankSum([1,3,5,7,9].dup, [2,4,6,8,10].dup, |
|---|
| | 538 | Alt.LESS, 0), .3372)); |
|---|
| | 539 | assert(approxEqual(wilcoxonRankSum([1,3,5,7,9].dup, [2,4,6,8,10].dup, |
|---|
| | 540 | Alt.GREATER, 0), .7346)); |
|---|
| | 541 | |
|---|
| | 542 | // Now, lots of ties. |
|---|
| | 543 | assert(approxEqual(wilcoxonRankSum([1,2,3,4,5].dup, [2,3,4,5,6].dup, |
|---|
| | 544 | Alt.TWOSIDE, 0), 0.3976)); |
|---|
| | 545 | assert(approxEqual(wilcoxonRankSum([1,2,3,4,5].dup, [2,3,4,5,6].dup, |
|---|
| | 546 | Alt.LESS, 0), 0.1988)); |
|---|
| | 547 | assert(approxEqual(wilcoxonRankSum([1,2,3,4,5].dup, [2,3,4,5,6].dup, |
|---|
| | 548 | Alt.GREATER, 0), 0.8548)); |
|---|
| | 549 | assert(approxEqual(wilcoxonRankSum([1,2,1,1,2].dup, [1,2,3,1,1].dup, |
|---|
| | 550 | Alt.TWOSIDE, 0), 0.9049)); |
|---|
| | 551 | assert(approxEqual(wilcoxonRankSum([1,2,1,1,2].dup, [1,2,3,1,1].dup, |
|---|
| | 552 | Alt.LESS, 0), 0.4524)); |
|---|
| | 553 | assert(approxEqual(wilcoxonRankSum([1,2,1,1,2].dup, [1,2,3,1,1].dup, |
|---|
| | 554 | Alt.GREATER, 0), 0.64)); |
|---|
| | 555 | |
|---|
| | 556 | // Now, testing the exact calculation on the same data. |
|---|
| | 557 | assert(approxEqual(wilcoxonRankSum([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, |
|---|
| | 558 | Alt.TWOSIDE), 0.9307)); |
|---|
| | 559 | assert(approxEqual(wilcoxonRankSum([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, |
|---|
| | 560 | Alt.LESS), 0.6039)); |
|---|
| | 561 | assert(approxEqual(wilcoxonRankSum([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, |
|---|
| | 562 | Alt.GREATER), 0.4654)); |
|---|
| | 563 | assert(approxEqual(wilcoxonRankSum([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, |
|---|
| | 564 | Alt.TWOSIDE), 0.4286)); |
|---|
| | 565 | assert(approxEqual(wilcoxonRankSum([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, |
|---|
| | 566 | Alt.LESS), 0.2143)); |
|---|
| | 567 | assert(approxEqual(wilcoxonRankSum([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, |
|---|
| | 568 | Alt.GREATER), 0.8355)); |
|---|
| | 569 | assert(approxEqual(wilcoxonRankSum([1,3,5,7,9].dup, [2,4,6,8,10].dup, |
|---|
| | 570 | Alt.TWOSIDE), .6905)); |
|---|
| | 571 | assert(approxEqual(wilcoxonRankSum([1,3,5,7,9].dup, [2,4,6,8,10].dup, |
|---|
| | 572 | Alt.LESS), .3452)); |
|---|
| | 573 | assert(approxEqual(wilcoxonRankSum([1,3,5,7,9].dup, [2,4,6,8,10].dup, |
|---|
| | 574 | Alt.GREATER), .7262)); |
|---|
| | 575 | writeln("Passed wilcoxonRankSum test."); |
|---|
| | 576 | } |
|---|
| | 577 | |
|---|
| | 578 | real wilcoxonRankSumW(T)(T sample1, T sample2, real* tieSum = null) |
|---|
| 369 | | /**Computes Wilcoxon rank sum test P-value for |
|---|
| 370 | | * a set of observations against another set, using the given alternative. |
|---|
| 371 | | * Alt.LESS means that mean rank(sample1) < mean rank(sample2). Alt.GREATER means |
|---|
| 372 | | * mean rank(sample1) > mean rank(sample2). Alt.TWOSIDE means mean rank(sample1) != |
|---|
| 373 | | * mean rank(sample2). |
|---|
| 374 | | * |
|---|
| 375 | | * tieSum is a parameter that is used internally to adjust for |
|---|
| 376 | | * ties in the data. It is computed by wilcoxonRankSum(). |
|---|
| 377 | | * exactThresh is the threshold value of (n1 + n2) at which this function |
|---|
| 378 | | * switches from exact to approximate computation of the p-value. Exact |
|---|
| 379 | | * computation is very slow for large datasets, and for anything but very small |
|---|
| 380 | | * datasets, the asymptotic approximation is good enough for all practical |
|---|
| 381 | | * purposes. Do not set exactThresh to more than 200, as the exact |
|---|
| 382 | | * calculation is both very slow and not numerically stable past this point, |
|---|
| 383 | | * and the asymptotic calculation is very good for N this large. To disable |
|---|
| 384 | | * exact calculation entirely, set exactThresh to 0. |
|---|
| 385 | | * |
|---|
| 386 | | * Note: Exact p-value computation is never used when tieSum > 0, i.e. when |
|---|
| 387 | | * there were ties in the data, because it is not computationally feasible. |
|---|
| 388 | | * In these cases, exactThresh will be ignored. |
|---|
| 389 | | * |
|---|
| 390 | | * Input ranges for this function must define a length. |
|---|
| 391 | | * |
|---|
| 392 | | * Returns: The p-value against the given alternative.*/ |
|---|
| 393 | | real wilcoxonRankSumPval(T)(T sample1, T sample2, Alt alt = Alt.TWOSIDE, |
|---|
| 394 | | uint exactThresh = 50) if(isInputRange!(T) && dstats.base.hasLength!(T)) { |
|---|
| 395 | | |
|---|
| 396 | | real tieSum; |
|---|
| 397 | | real W = wilcoxonRankSum(sample1, sample2, &tieSum); |
|---|
| 398 | | return wilcoxonRankSumPval(W, sample1.length, sample2.length, alt, tieSum, |
|---|
| 399 | | exactThresh); |
|---|
| 400 | | } |
|---|
| 401 | | |
|---|
| 402 | | unittest { |
|---|
| 403 | | // Values from R. Simple stuff (no ties) first. Testing approximate |
|---|
| 404 | | // calculation first. |
|---|
| 405 | | assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, |
|---|
| 406 | | Alt.TWOSIDE, 0), 0.9273)); |
|---|
| 407 | | assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, |
|---|
| 408 | | Alt.LESS, 0), 0.6079)); |
|---|
| 409 | | assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, |
|---|
| 410 | | Alt.GREATER, 0), 0.4636)); |
|---|
| 411 | | assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, |
|---|
| 412 | | Alt.TWOSIDE, 0), 0.4113)); |
|---|
| 413 | | assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, |
|---|
| 414 | | Alt.LESS, 0), 0.2057)); |
|---|
| 415 | | assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, |
|---|
| 416 | | Alt.GREATER, 0), 0.8423)); |
|---|
| 417 | | assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9].dup, [2,4,6,8,10].dup, |
|---|
| 418 | | Alt.TWOSIDE, 0), .6745)); |
|---|
| 419 | | assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9].dup, [2,4,6,8,10].dup, |
|---|
| 420 | | Alt.LESS, 0), .3372)); |
|---|
| 421 | | assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9].dup, [2,4,6,8,10].dup, |
|---|
| 422 | | Alt.GREATER, 0), .7346)); |
|---|
| 423 | | |
|---|
| 424 | | // Now, lots of ties. |
|---|
| 425 | | assert(approxEqual(wilcoxonRankSumPval([1,2,3,4,5].dup, [2,3,4,5,6].dup, |
|---|
| 426 | | Alt.TWOSIDE, 0), 0.3976)); |
|---|
| 427 | | assert(approxEqual(wilcoxonRankSumPval([1,2,3,4,5].dup, [2,3,4,5,6].dup, |
|---|
| 428 | | Alt.LESS, 0), 0.1988)); |
|---|
| 429 | | assert(approxEqual(wilcoxonRankSumPval([1,2,3,4,5].dup, [2,3,4,5,6].dup, |
|---|
| 430 | | Alt.GREATER, 0), 0.8548)); |
|---|
| 431 | | assert(approxEqual(wilcoxonRankSumPval([1,2,1,1,2].dup, [1,2,3,1,1].dup, |
|---|
| 432 | | Alt.TWOSIDE, 0), 0.9049)); |
|---|
| 433 | | assert(approxEqual(wilcoxonRankSumPval([1,2,1,1,2].dup, [1,2,3,1,1].dup, |
|---|
| 434 | | Alt.LESS, 0), 0.4524)); |
|---|
| 435 | | assert(approxEqual(wilcoxonRankSumPval([1,2,1,1,2].dup, [1,2,3,1,1].dup, |
|---|
| 436 | | Alt.GREATER, 0), 0.64)); |
|---|
| 437 | | |
|---|
| 438 | | // Now, testing the exact calculation on the same data. |
|---|
| 439 | | assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, |
|---|
| 440 | | Alt.TWOSIDE), 0.9307)); |
|---|
| 441 | | assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, |
|---|
| 442 | | Alt.LESS), 0.6039)); |
|---|
| 443 | | assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, |
|---|
| 444 | | Alt.GREATER), 0.4654)); |
|---|
| 445 | | assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, |
|---|
| 446 | | Alt.TWOSIDE), 0.4286)); |
|---|
| 447 | | assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, |
|---|
| 448 | | Alt.LESS), 0.2143)); |
|---|
| 449 | | assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, |
|---|
| 450 | | Alt.GREATER), 0.8355)); |
|---|
| 451 | | assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9].dup, [2,4,6,8,10].dup, |
|---|
| 452 | | Alt.TWOSIDE), .6905)); |
|---|
| 453 | | assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9].dup, [2,4,6,8,10].dup, |
|---|
| 454 | | Alt.LESS), .3452)); |
|---|
| 455 | | assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9].dup, [2,4,6,8,10].dup, |
|---|
| 456 | | Alt.GREATER), .7262)); |
|---|
| 457 | | writeln("Passed wilcoxonRankSumPval test."); |
|---|
| 458 | | } |
|---|
| 459 | | |
|---|
| 460 | | |
|---|
| 461 | | /* Used internally by wilcoxonRankSumPval. This function uses dynamic |
|---|
| | 647 | /* Used internally by wilcoxonRankSum. This function uses dynamic |
|---|
| 563 | | /**Wilcoxon signed-rank statistic. This is a non-parametric test for a |
|---|
| 564 | | * difference between paired sets of numbers. Since this is a paired test, |
|---|
| 565 | | * before.length must equal after.length. The tieSum parameter is |
|---|
| 566 | | * mostly for use internally, and if included will place a value in the |
|---|
| 567 | | * dereference that can be used in wilcoxonRankSumPval() to adjust for ties in |
|---|
| 568 | | * the input data. |
|---|
| 569 | | * |
|---|
| 570 | | * Input ranges for this function must define a length. |
|---|
| 571 | | * |
|---|
| 572 | | * Returns: The Wilcoxon test statistic W.*/ |
|---|
| 573 | | real wilcoxonSignedRank(T, U)(T before, U after, real* tieSum = null) |
|---|
| | 749 | /**Computes a test statistic and P-value for a Wilcoxon signed rank test against |
|---|
| | 750 | * the given alternative. Alt.LESS means that elements of before are stochastically |
|---|
| | 751 | * less than corresponding elements of after. Alt.GREATER means elements of |
|---|
| | 752 | * before are typically greater than corresponding elements of after. |
|---|
| | 753 | * Alt.TWOSIDE means there is a significant difference in either direction. |
|---|
| | 754 | * |
|---|
| | 755 | * exactThresh is the threshold value of before.length at which this function |
|---|
| | 756 | * switches from exact to approximate computation of the p-value. Do not set |
|---|
| | 757 | * exactThresh to more than 200, as the exact calculation is both very slow and |
|---|
| | 758 | * not numerically stable past this point, and the asymptotic calculation is |
|---|
| | 759 | * very good for N this large. To disable exact calculation entirely, set |
|---|
| | 760 | * exactThresh to 0. |
|---|
| | 761 | * |
|---|
| | 762 | * Notes: Exact p-value computation is never used when ties are present, |
|---|
| | 763 | * because it is not computationally feasible. |
|---|
| | 764 | * |
|---|
| | 765 | * The input ranges for this function must define a length and must be |
|---|
| | 766 | * forward ranges. |
|---|
| | 767 | * |
|---|
| | 768 | * Returns: A TestRes of the W statistic and the p-value against the given |
|---|
| | 769 | * alternative.*/ |
|---|
| | 770 | TestRes wilcoxonSignedRank(T, U)(T before, U after, Alt alt = Alt.TWOSIDE, uint exactThresh = 50) |
|---|
| | 771 | if(realInput!(T) && dstats.base.hasLength!(T) && isForwardRange!(T) && |
|---|
| | 772 | realInput!(U) && dstats.base.hasLength!(U) && isForwardRange!(U)) |
|---|
| | 773 | in { |
|---|
| | 774 | assert(before.length == after.length); |
|---|
| | 775 | } body { |
|---|
| | 776 | real tieSum; |
|---|
| | 777 | real W = wilcoxonSignedRankW(before, after, &tieSum); |
|---|
| | 778 | ulong N = before.length; |
|---|
| | 779 | while(!before.empty && !after.empty) { |
|---|
| | 780 | if(before.front == after.front) |
|---|
| | 781 | N--; |
|---|
| | 782 | before.popFront; |
|---|
| | 783 | after.popFront; |
|---|
| | 784 | |
|---|
| | 785 | } |
|---|
| | 786 | return TestRes(W, wilcoxonSignedRankPval(W, N, alt, tieSum, exactThresh)); |
|---|
| | 787 | } |
|---|
| | 788 | |
|---|
| | 789 | unittest { |
|---|
| | 790 | // Values from R. |
|---|
| | 791 | alias approxEqual ae; |
|---|
| | 792 | assert(wilcoxonSignedRank([1,2,3,4,5].dup, [2,1,4,5,3].dup).testStat == 7.5); |
|---|
| | 793 | assert(wilcoxonSignedRank([3,1,4,1,5].dup, [2,7,1,8,2].dup).testStat == 6); |
|---|
| | 794 | assert(wilcoxonSignedRank([8,6,7,5,3].dup, [0,9,8,6,7].dup).testStat == 5); |
|---|
| | 795 | |
|---|
| | 796 | // With ties, normal approx. |
|---|
| | 797 | assert(ae(wilcoxonSignedRank([1,2,3,4,5].dup, [2,1,4,5,3].dup), 1)); |
|---|
| | 798 | assert(ae(wilcoxonSignedRank([3,1,4,1,5].dup, [2,7,1,8,2].dup), 0.7865)); |
|---|
| | 799 | assert(ae(wilcoxonSignedRank([8,6,7,5,3].dup, [0,9,8,6,7].dup), 0.5879)); |
|---|
| | 800 | assert(ae(wilcoxonSignedRank([1,2,3,4,5].dup, [2,1,4,5,3].dup, Alt.LESS), 0.5562)); |
|---|
| | 801 | assert(ae(wilcoxonSignedRank([3,1,4,1,5].dup, [2,7,1,8,2].dup, Alt.LESS), 0.3932)); |
|---|
| | 802 | assert(ae(wilcoxonSignedRank([8,6,7,5,3].dup, [0,9,8,6,7].dup, Alt.LESS), 0.2940)); |
|---|
| | 803 | assert(ae(wilcoxonSignedRank([1,2,3,4,5].dup, [2,1,4,5,3].dup, Alt.GREATER), 0.5562)); |
|---|
| | 804 | assert(ae(wilcoxonSignedRank([3,1,4,1,5].dup, [2,7,1,8,2].dup, Alt.GREATER), 0.706)); |
|---|
| | 805 | assert(ae(wilcoxonSignedRank([8,6,7,5,3].dup, [0,9,8,6,7].dup, Alt.GREATER), 0.7918)); |
|---|
| | 806 | |
|---|
| | 807 | // Exact. |
|---|
| | 808 | assert(ae(wilcoxonSignedRank([1,2,3,4,5].dup, [2,-4,-8,16,32].dup), 0.625)); |
|---|
| | 809 | assert(ae(wilcoxonSignedRank([1,2,3,4,5].dup, [2,-4,-8,16,32].dup, Alt.LESS), 0.3125)); |
|---|
| | 810 | assert(ae(wilcoxonSignedRank([1,2,3,4,5].dup, [2,-4,-8,16,32].dup, Alt.GREATER), 0.7812)); |
|---|
| | 811 | assert(ae(wilcoxonSignedRank([1,2,3,4,5].dup, [2,-4,-8,-16,32].dup), 0.8125)); |
|---|
| | 812 | assert(ae(wilcoxonSignedRank([1,2,3,4,5].dup, [2,-4,-8,-16,32].dup, Alt.LESS), 0.6875)); |
|---|
| | 813 | assert(ae(wilcoxonSignedRank([1,2,3,4,5].dup, [2,-4,-8,-16,32].dup, Alt.GREATER), 0.4062)); |
|---|
| | 814 | writeln("Passed wilcoxonSignedRank unittest."); |
|---|
| | 815 | } |
|---|
| | 816 | |
|---|
| | 817 | real wilcoxonSignedRankW(T, U)(T before, U after, real* tieSum = null) |
|---|
| 631 | | unittest { |
|---|
| 632 | | assert(wilcoxonSignedRank([1,2,3,4,5].dup, [2,1,4,5,3].dup) == 7.5); |
|---|
| 633 | | assert(wilcoxonSignedRank([3,1,4,1,5].dup, [2,7,1,8,2].dup) == 6); |
|---|
| 634 | | assert(wilcoxonSignedRank([8,6,7,5,3].dup, [0,9,8,6,7].dup) == 5); |
|---|
| 635 | | writeln("Passed wilcoxonSignedRank unittest."); |
|---|
| 636 | | } |
|---|
| 637 | | |
|---|
| 638 | | /**Computes a P-value for a Wilcoxon signed rank test score against the given |
|---|
| 639 | | * alternative. Alt.LESS means that elements of before are typically less |
|---|
| 640 | | * than corresponding elements of after. Alt.GREATER means elements of |
|---|
| 641 | | * before are typically greater than corresponding elements of after. |
|---|
| 642 | | * Alt.TWOSIDE means there is a significant difference in either direction. |
|---|
| 643 | | * |
|---|
| 644 | | * exactThresh is the threshold value of before.length at which this function |
|---|
| 645 | | * switches from exact to approximate computation of the p-value. Do not set |
|---|
| 646 | | * exactThresh to more than 200, as the exact calculation is both very slow and |
|---|
| 647 | | * not numerically stable past this point, and the asymptotic calculation is |
|---|
| 648 | | * very good for N this large. To disable exact calculation entirely, set |
|---|
| 649 | | * exactThresh to 0. |
|---|
| 650 | | * |
|---|
| 651 | | * Notes: Exact p-value computation is never used when tieSum > 0, i.e. when |
|---|
| 652 | | * there were ties in the data, because it is not computationally feasible. |
|---|
| 653 | | * In these cases, exactThresh will be ignored. |
|---|
| 654 | | * |
|---|
| 655 | | * May give a different answer than calling wilcoxonSignedRank, and then |
|---|
| 656 | | * passing the W value into wilcoxonSignedRankPval in the presence of ties |
|---|
| 657 | | * or equal pairs. |
|---|
| 658 | | * |
|---|
| 659 | | * The input ranges for this function must define a length and must be |
|---|
| 660 | | * forward ranges. |
|---|
| 661 | | * |
|---|
| 662 | | * Returns: The p-value against the given alternative.*/ |
|---|
| 663 | | real wilcoxonSignedRankPval(T)(T before, T after, |
|---|
| 664 | | Alt alt = Alt.TWOSIDE, uint exactThresh = 50) |
|---|
| 665 | | if(realInput!(T) && dstats.base.hasLength!(T) && isForwardRange!(T)) |
|---|
| 666 | | in { |
|---|
| 667 | | assert(before.length == after.length); |
|---|
| 668 | | } body { |
|---|
| 669 | | real tieSum; |
|---|
| 670 | | real W = wilcoxonSignedRank(before, after, &tieSum); |
|---|
| 671 | | ulong N = before.length; |
|---|
| 672 | | while(!before.empty && !after.empty) { |
|---|
| 673 | | if(before.front == after.front) |
|---|
| 674 | | N--; |
|---|
| 675 | | before.popFront; |
|---|
| 676 | | after.popFront; |
|---|
| 677 | | |
|---|
| 678 | | } |
|---|
| 679 | | return wilcoxonSignedRankPval(W, N, alt, tieSum, exactThresh); |
|---|
| 680 | | } |
|---|
| 681 | | |
|---|
| 682 | | unittest { |
|---|
| 683 | | // Values from R. |
|---|
| 684 | | alias approxEqual ae; |
|---|
| 685 | | // With ties, normal approx. |
|---|
| 686 | | assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,1,4,5,3].dup), 1)); |
|---|
| 687 | | assert(ae(wilcoxonSignedRankPval([3,1,4,1,5].dup, [2,7,1,8,2].dup), 0.7865)); |
|---|
| 688 | | assert(ae(wilcoxonSignedRankPval([8,6,7,5,3].dup, [0,9,8,6,7].dup), 0.5879)); |
|---|
| 689 | | assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,1,4,5,3].dup, Alt.LESS), 0.5562)); |
|---|
| 690 | | assert(ae(wilcoxonSignedRankPval([3,1,4,1,5].dup, [2,7,1,8,2].dup, Alt.LESS), 0.3932)); |
|---|
| 691 | | assert(ae(wilcoxonSignedRankPval([8,6,7,5,3].dup, [0,9,8,6,7].dup, Alt.LESS), 0.2940)); |
|---|
| 692 | | assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,1,4,5,3].dup, Alt.GREATER), 0.5562)); |
|---|
| 693 | | assert(ae(wilcoxonSignedRankPval([3,1,4,1,5].dup, [2,7,1,8,2].dup, Alt.GREATER), 0.706)); |
|---|
| 694 | | assert(ae(wilcoxonSignedRankPval([8,6,7,5,3].dup, [0,9,8,6,7].dup, Alt.GREATER), 0.7918)); |
|---|
| 695 | | |
|---|
| 696 | | // Exact. |
|---|
| 697 | | assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,-4,-8,16,32].dup), 0.625)); |
|---|
| 698 | | assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,-4,-8,16,32].dup, Alt.LESS), 0.3125)); |
|---|
| 699 | | assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,-4,-8,16,32].dup, Alt.GREATER), 0.7812)); |
|---|
| 700 | | assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,-4,-8,-16,32].dup), 0.8125)); |
|---|
| 701 | | assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,-4,-8,-16,32].dup, Alt.LESS), 0.6875)); |
|---|
| 702 | | assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,-4,-8,-16,32].dup, Alt.GREATER), 0.4062)); |
|---|
| 703 | | writeln("Passed wilcoxonSignedRankPval unittest."); |
|---|
| 704 | | } |
|---|
| 705 | | |
|---|
| 706 | | /**Computes Wilcoxon signed rank test P-value for |
|---|
| 707 | | * a set of observations against another set, using the given alternative. |
|---|
| 708 | | * Alt.LESS means that elements of before are typically less than corresponding |
|---|
| 709 | | * elements of after. Alt.GREATER means elements of before are typically |
|---|
| 710 | | * greater than corresponding elements of after. Alt.TWOSIDE means that |
|---|
| 711 | | * there is a significant difference in either direction. |
|---|
| 712 | | * |
|---|
| 713 | | * tieSum is a parameter that is used internally to adjust for |
|---|
| 714 | | * ties in the data. It is computed by wilcoxonSignedRank(). |
|---|
| 715 | | * exactThresh is the threshold value of N at which this function |
|---|
| 716 | | * switches from exact to approximate computation of the p-value. Exact |
|---|
| 717 | | * computation is very slow for large datasets, and for anything but very small |
|---|
| 718 | | * datasets, the asymptotic approximation is good enough for all practical |
|---|
| 719 | | * purposes. Do not set exactThresh to more than 200, as the exact |
|---|
| 720 | | * calculation is both very slow and not numerically stable past this point, |
|---|
| 721 | | * and the asymptotic calculation is very good for N this large. To disable |
|---|
| 722 | | * exact calculation entirely, set exactThresh to 0. |
|---|
| 723 | | * |
|---|
| 724 | | * Note: Exact p-value computation is never used when tieSum > 0, i.e. when |
|---|
| 725 | | * there were ties in the data, because it is not computationally feasible. |
|---|
| 726 | | * In these cases, exactThresh will be ignored. |
|---|
| 727 | | * |
|---|
| 728 | | * Returns: The p-value against the given alternative.*/ |
|---|
| 1087 | | * reference distribution. This implementation uses a signed D value to indicate |
|---|
| 1088 | | * the direction of the difference between distributions. |
|---|
| 1089 | | * To get the D value used in standard notation, |
|---|
| 1090 | | * simply take the absolute value of this D value.*/ |
|---|
| 1091 | | real ksTest(T, U)(T F, U Fprime) |
|---|
| | 1243 | * reference distribution. |
|---|
| | 1244 | * |
|---|
| | 1245 | * Returns: A TestRes with the K-S D value and a P value for the null that |
|---|
| | 1246 | * FPrime is distribuuted identically to F against the alternative that it isn't. |
|---|
| | 1247 | * This implementation uses a signed D value to indicate the direction of the |
|---|
| | 1248 | * difference between distributions. To get the D value used in standard |
|---|
| | 1249 | * notation, simply take the absolute value of this D value. |
|---|
| | 1250 | * |
|---|
| | 1251 | * Bugs: Exact calculation not implemented. Uses asymptotic approximation.*/ |
|---|
| | 1252 | TestRes ksTest(T, U)(T F, U Fprime) |
|---|
| | 1253 | if(realInput!(T) && realInput!(U)) { |
|---|
| | 1254 | real D = ksTestD(F, Fprime); |
|---|
| | 1255 | return TestRes(D, ksPval(F.length, Fprime.length, D)); |
|---|
| | 1256 | } |
|---|
| | 1257 | |
|---|
| | 1258 | unittest { |
|---|
| | 1259 | assert(approxEqual(ksTest([1,2,3,4,5].dup, [1,2,3,4,5].dup).testStat, 0)); |
|---|
| | 1260 | assert(approxEqual(ksTestDestructive([1,2,3,4,5].dup, [1,2,2,3,5].dup).testStat, -.2)); |
|---|
| | 1261 | assert(approxEqual(ksTest([-1,0,2,8, 6].dup, [1,2,2,3,5].dup).testStat, .4)); |
|---|
| | 1262 | assert(approxEqual(ksTest([1,2,3,4,5].dup, [1,2,2,3,5,7,8].dup).testStat, .2857)); |
|---|
| | 1263 | assert(approxEqual(ksTestDestructive([1, 2, 3, 4, 4, 4, 5].dup, |
|---|
| | 1264 | [1, 2, 3, 4, 5, 5, 5].dup).testStat, .2857)); |
|---|
| | 1265 | |
|---|
| | 1266 | assert(approxEqual(ksTest([1, 2, 3, 4, 4, 4, 5].dup, [1, 2, 3, 4, 5, 5, 5].dup), |
|---|
| | 1267 | .9375)); |
|---|
| | 1268 | assert(approxEqual(ksTestDestructive([1, 2, 3, 4, 4, 4, 5].dup, |
|---|
| | 1269 | [1, 2, 3, 4, 5, 5, 5].dup), .9375)); |
|---|
| | 1270 | writeln("Passed ksTest 2-sample test."); |
|---|
| | 1271 | } |
|---|
| | 1272 | |
|---|
| | 1273 | template isArrayLike(T) { |
|---|
| | 1274 | enum bool isArrayLike = hasSwappableElements!(T) && hasAssignableElements!(T) |
|---|
| | 1275 | && dstats.base.hasLength!(T) && isRandomAccessRange!(T); |
|---|
| | 1276 | } |
|---|
| | 1277 | |
|---|
| | 1278 | /**One-sample KS test against a reference distribution, doesn't modify input |
|---|
| | 1279 | * data. Takes a function pointer or delegate for the CDF of refernce |
|---|
| | 1280 | * distribution. |
|---|
| | 1281 | * |
|---|
| | 1282 | * Returns: A TestRes with the K-S D value and a P value for the null that |
|---|
| | 1283 | * Femp is a sample from F against the alternative that it isn't. This |
|---|
| | 1284 | * implementation uses a signed D value to indicate the direction of the |
|---|
| | 1285 | * difference between distributions. To get the D value used in standard |
|---|
| | 1286 | * notation, simply take the absolute value of this D value. |
|---|
| | 1287 | * |
|---|
| | 1288 | * Bugs: Exact calculation not implemented. Uses asymptotic approximation. |
|---|
| | 1289 | * |
|---|
| | 1290 | * Examples: |
|---|
| | 1291 | * --- |
|---|
| | 1292 | * auto stdNormal = parametrize!(normalCDF)(0.0L, 1.0L); |
|---|
| | 1293 | * auto empirical = [1, 2, 3, 4, 5]; |
|---|
| | 1294 | * real res = ksTest(empirical, stdNormal); |
|---|
| | 1295 | * --- |
|---|
| | 1296 | */ |
|---|
| | 1297 | TestRes ksTest(T, Func)(T Femp, Func F) |
|---|
| | 1298 | if(realInput!(T) && (is(Func == function) || is(Func == delegate))) { |
|---|
| | 1299 | real D = ksTestD(Femp, F); |
|---|
| | 1300 | return TestRes(D, ksPval(Femp.length, D)); |
|---|
| | 1301 | } |
|---|
| | 1302 | |
|---|
| | 1303 | unittest { |
|---|
| | 1304 | auto stdNormal = parametrize!(normalCDF)(0.0L, 1.0L); |
|---|
| | 1305 | assert(approxEqual(ksTest([1,2,3,4,5].dup, stdNormal).testStat, -.8413)); |
|---|
| | 1306 | assert(approxEqual(ksTestDestructive([-1,0,2,8, 6].dup, stdNormal).testStat, -.5772)); |
|---|
| | 1307 | auto lotsOfTies = [5,1,2,2,2,2,2,2,3,4].dup; |
|---|
| | 1308 | assert(approxEqual(ksTest(lotsOfTies, stdNormal).testStat, -0.8772)); |
|---|
| | 1309 | |
|---|
| | 1310 | assert(approxEqual(ksTest([0,1,2,3,4].dup, stdNormal), .03271)); |
|---|
| | 1311 | |
|---|
| | 1312 | auto uniform01 = parametrize!(uniformCDF)(0, 1); |
|---|
| | 1313 | assert(approxEqual(ksTestDestructive([0.1, 0.3, 0.5, 0.9, 1].dup, uniform01), 0.7591)); |
|---|
| | 1314 | |
|---|
| | 1315 | writeln("Passed ksTest 1-sample test."); |
|---|
| | 1316 | } |
|---|
| | 1317 | |
|---|
| | 1318 | /**Same as ksTest, except sorts in place, avoiding memory allocations.*/ |
|---|
| | 1319 | TestRes ksTestDestructive(T, U)(T F, U Fprime) |
|---|
| | 1320 | if(isArrayLike!(T) && isArrayLike!(U)) { |
|---|
| | 1321 | real D = ksTestDDestructive(F, Fprime); |
|---|
| | 1322 | return TestRes(D, ksPval(F.length, Fprime.length, D)); |
|---|
| | 1323 | } |
|---|
| | 1324 | |
|---|
| | 1325 | ///Ditto. |
|---|
| | 1326 | TestRes ksTestDestructive(T, Func)(T Femp, Func F) |
|---|
| | 1327 | if(isArrayLike!(T) && (is(Func == function) || is(Func == delegate))) { |
|---|
| | 1328 | real D = ksTestDDestructive(Femp, F); |
|---|
| | 1329 | return TestRes(D, ksPval(Femp.length, D)); |
|---|
| | 1330 | } |
|---|
| | 1331 | |
|---|
| | 1332 | real ksTestD(T, U)(T F, U Fprime) |
|---|
| 1130 | | unittest { |
|---|
| 1131 | | // Values from R. |
|---|
| 1132 | | assert(approxEqual(ksTestDestructive([1,2,3,4,5].dup, [1,2,3,4,5].dup), 0)); |
|---|
| 1133 | | assert(approxEqual(ksTestDestructive([1,2,3,4,5].dup, [1,2,2,3,5].dup), -.2)); |
|---|
| 1134 | | assert(approxEqual(ksTestDestructive([-1,0,2,8, 6].dup, [1,2,2,3,5].dup), .4)); |
|---|
| 1135 | | assert(approxEqual(ksTestDestructive([1,2,3,4,5].dup, [1,2,2,3,5,7,8].dup), .2857)); |
|---|
| 1136 | | assert(approxEqual(ksTestDestructive([1, 2, 3, 4, 4, 4, 5].dup, |
|---|
| 1137 | | [1, 2, 3, 4, 5, 5, 5].dup), .2857)); |
|---|
| 1138 | | writeln("Passed 2-sample ksTestDestructive."); |
|---|
| 1139 | | } |
|---|
| 1140 | | |
|---|
| 1141 | | /**One-sample KS test against a reference distribution, doesn't modify input |
|---|
| 1142 | | * data. Takes a function pointer or delegate for the CDF of refernce |
|---|
| 1143 | | * distribution. |
|---|
| 1144 | | * |
|---|
| 1145 | | * Examples: |
|---|
| 1146 | | * --- |
|---|
| 1147 | | * auto stdNormal = parametrize!(normalCDF)(0.0L, 1.0L); |
|---|
| 1148 | | * auto empirical = [1, 2, 3, 4, 5]; |
|---|
| 1149 | | * real D = ksTest(empirical, stdNormal); |
|---|
| 1150 | | * --- |
|---|
| 1151 | | */ |
|---|
| 1152 | | real ksTest(T, Func)(T Femp, Func F) |
|---|
| | 1367 | real ksTestD(T, Func)(T Femp, Func F) |
|---|
| 1207 | | /**KS-test, returns 2-sided P-val instead of D. |
|---|
| 1208 | | * Bugs: Exact calculation not implemented. Uses asymptotic approximation.*/ |
|---|
| 1209 | | real ksPval(T, U)(T F, U Fprime) |
|---|
| 1210 | | if(realInput!(T) && realInput!(U)) { |
|---|
| 1211 | | return ksPvalD(F.length, Fprime.length, ksTest(F, Fprime)); |
|---|
| 1212 | | } |
|---|
| 1213 | | |
|---|
| 1214 | | unittest { |
|---|
| 1215 | | assert(approxEqual(ksPval([1, 2, 3, 4, 4, 4, 5].dup, [1, 2, 3, 4, 5, 5, 5].dup), |
|---|
| 1216 | | .9375)); |
|---|
| 1217 | | writeln("Passed ksPval 2-sample test."); |
|---|
| 1218 | | } |
|---|
| 1219 | | |
|---|
| 1220 | | /**One-sided K-S test, returns P-val instead of D. |
|---|
| 1221 | | * Bugs: Exact calculation not implemented. Uses asymptotic approximation.*/ |
|---|
| 1222 | | real ksPval(T, Func)(T Femp, Func F) |
|---|
| 1223 | | if(realInput!(T) && (is(Func == function) || is(Func == delegate))) { |
|---|
| 1224 | | return ksPvalD(Femp.length, ksTest(Femp, F)); |
|---|
| 1225 | | } |
|---|
| 1226 | | |
|---|
| 1227 | | unittest { |
|---|
| 1228 | | auto stdNormal = parametrize!(normalCDF)(0.0L, 1.0L); |
|---|
| 1229 | | assert(approxEqual(ksPval([0,1,2,3,4].dup, stdNormal), .03271)); |
|---|
| 1230 | | |
|---|
| 1231 | | auto uniform01 = parametrize!(uniformCDF)(0, 1); |
|---|
| 1232 | | assert(approxEqual(ksPval([0.1, 0.3, 0.5, 0.9, 1].dup, uniform01), 0.7591)); |
|---|
| 1233 | | |
|---|
| 1234 | | writeln("Passed ksPval 1-sample test."); |
|---|
| | 1403 | /**Fisher's Exact test for difference in odds between rows/columns |
|---|
| | 1404 | * in a 2x2 contingency table. Specifically, this function tests the odds |
|---|
| | 1405 | * ratio, which is defined, for a contingency table c, as (c[0][0] * c[1][1]) |
|---|
| | 1406 | * / (c[1][0] * c[0][1]). Alternatives are Alt.LESS, meaning true odds ratio |
|---|
| | 1407 | * < 1, Alt.GREATER, meaning true odds ratio > 1, and Alt.TWOSIDE, meaning |
|---|
| | 1408 | * true odds ratio != 1. |
|---|
| | 1409 | * |
|---|
| | 1410 | * Accepts a 2x2 contingency table as an array of arrays of uints. |
|---|
| | 1411 | * For now, only does 2x2 contingency tables. |
|---|
| | 1412 | * |
|---|
| | 1413 | * Returns: A TestRes of the odds ratio and the P-value against the given |
|---|
| | 1414 | * alternative. |
|---|
| | 1415 | * |
|---|
| | 1416 | * Examples: |
|---|
| | 1417 | * --- |
|---|
| | 1418 | * real res = fisherExact([[2u, 7], [8, 2]], Alt.LESS); |
|---|
| | 1419 | * assert(approxEqual(res.p, 0.01852)); // Odds ratio is very small in this case. |
|---|
| | 1420 | * assert(approxEqual(res.testStat, 4.0 / 56.0)); |
|---|
| | 1421 | * --- |
|---|
| | 1422 | * */ |
|---|
| | 1423 | TestRes fisherExact(T)(const T[2][2] contingencyTable, Alt alt = Alt.TWOSIDE) |
|---|
| | 1424 | if(isIntegral!(T)) { |
|---|
| | 1425 | |
|---|
| | 1426 | static real fisherLower(const uint[2][2] contingencyTable) { |
|---|
| | 1427 | alias contingencyTable c; |
|---|
| | 1428 | return hypergeometricCDF(c[0][0], c[0][0] + c[0][1], c[1][0] + c[1][1], |
|---|
| | 1429 | c[0][0] + c[1][0]); |
|---|
| | 1430 | } |
|---|
| | 1431 | |
|---|
| | 1432 | static real fisherUpper(const uint[2][2] contingencyTable) { |
|---|
| | 1433 | alias contingencyTable c; |
|---|
| | 1434 | return hypergeometricCDFR(c[0][0], c[0][0] + c[0][1], c[1][0] + c[1][1], |
|---|
| | 1435 | c[0][0] + c[1][0]); |
|---|
| | 1436 | } |
|---|
| | 1437 | |
|---|
| | 1438 | |
|---|
| | 1439 | alias contingencyTable c; |
|---|
| | 1440 | real oddsRatio = cast(real) c[0][0] * c[1][1] / c[0][1] / c[1][0]; |
|---|
| | 1441 | if(alt == Alt.LESS) |
|---|
| | 1442 | return TestRes(oddsRatio, fisherLower(contingencyTable)); |
|---|
| | 1443 | else if(alt == Alt.GREATER) |
|---|
| | 1444 | return TestRes(oddsRatio, fisherUpper(contingencyTable)); |
|---|
| | 1445 | |
|---|
| | 1446 | |
|---|
| | 1447 | immutable uint n1 = c[0][0] + c[0][1], |
|---|
| | 1448 | n2 = c[1][0] + c[1][1], |
|---|
| | 1449 | n = c[0][0] + c[1][0]; |
|---|
| | 1450 | |
|---|
| | 1451 | immutable uint mode = |
|---|
| | 1452 | cast(uint) ((cast(real) (n + 1) * (n1 + 1)) / (n1 + n2 + 2)); |
|---|
| | 1453 | immutable real pExact = hypergeometricPMF(c[0][0], n1, n2, n); |
|---|
| | 1454 | immutable real pMode = hypergeometricPMF(mode, n1, n2, n); |
|---|
| | 1455 | |
|---|
| | 1456 | if(approxEqual(pExact, pMode, 1e-7)) { |
|---|
| | 1457 | return TestRes(oddsRatio, 1); |
|---|
| | 1458 | } else if(c[0][0] < mode) { |
|---|
| | 1459 | immutable real pLower = hypergeometricCDF(c[0][0], n1, n2, n); |
|---|
| | 1460 | |
|---|
| | 1461 | // Special case to prevent binary search from getting stuck. |
|---|
| | 1462 | if(hypergeometricPMF(n, n1, n2, n) > pExact) { |
|---|
| | 1463 | return TestRes(oddsRatio, pLower); |
|---|
| | 1464 | } |
|---|
| | 1465 | |
|---|
| | 1466 | // Binary search for where to begin upper half. |
|---|
| | 1467 | uint min = mode, max = n, guess = uint.max; |
|---|
| | 1468 | while(min != max) { |
|---|
| | 1469 | guess = (max == min + 1 && guess == min) ? max : |
|---|
| | 1470 | (cast(ulong) max + cast(ulong) min) / 2UL; |
|---|
| | 1471 | |
|---|
| | 1472 | immutable real pGuess = hypergeometricPMF(guess, n1, n2, n); |
|---|
| | 1473 | if(pGuess <= pExact && |
|---|
| | 1474 | hypergeometricPMF(guess - 1, n1, n2, n) > pExact) { |
|---|
| | 1475 | break; |
|---|
| | 1476 | } else if(pGuess < pExact) { |
|---|
| | 1477 | max = guess; |
|---|
| | 1478 | } else min = guess; |
|---|
| | 1479 | } |
|---|
| | 1480 | if(guess == uint.max && min == max) |
|---|
| | 1481 | guess = min; |
|---|
| | 1482 | |
|---|
| | 1483 | auto p = std.algorithm.min(pLower + |
|---|
| | 1484 | hypergeometricCDFR(guess, n1, n2, n), 1.0L); |
|---|
| | 1485 | return TestRes(oddsRatio, p); |
|---|
| | 1486 | } else { |
|---|
| | 1487 | immutable real pUpper = hypergeometricCDFR(c[0][0], n1, n2, n); |
|---|
| | 1488 | |
|---|
| | 1489 | // Special case to prevent binary search from getting stuck. |
|---|
| | 1490 | if(hypergeometricPMF(0, n1, n2, n) > pExact) { |
|---|
| | 1491 | return TestRes(oddsRatio, pUpper); |
|---|
| | 1492 | } |
|---|
| | 1493 | |
|---|
| | 1494 | // Binary search for where to begin lower half. |
|---|
| | 1495 | uint min = 0, max = mode, guess = uint.max; |
|---|
| | 1496 | while(min != max) { |
|---|
| | 1497 | guess = (max == min + 1 && guess == min) ? max : |
|---|
| | 1498 | (cast(ulong) max + cast(ulong) min) / 2UL; |
|---|
| | 1499 | real pGuess = hypergeometricPMF(guess, n1, n2, n); |
|---|
| | 1500 | |
|---|
| | 1501 | if(pGuess <= pExact && |
|---|
| | 1502 | hypergeometricPMF(guess + 1, n1, n2, n) > pExact) { |
|---|
| | 1503 | break; |
|---|
| | 1504 | } else if(pGuess <= pExact) { |
|---|
| | 1505 | min = guess; |
|---|
| | 1506 | } else max = guess; |
|---|
| | 1507 | } |
|---|
| | 1508 | |
|---|
| | 1509 | if(guess == uint.max && min == max) |
|---|
| | 1510 | guess = min; |
|---|
| | 1511 | |
|---|
| | 1512 | auto p = std.algorithm.min(pUpper + |
|---|
| | 1513 | hypergeometricCDF(guess, n1, n2, n), 1.0L); |
|---|
| | 1514 | return TestRes(oddsRatio, p); |
|---|
| | 1515 | } |
|---|
| 1252 | | } |
|---|
| 1253 | | |
|---|
| 1254 | | |
|---|
| 1255 | | /**Fisher's Exact test for difference in odds between rows/columns |
|---|
| 1256 | | * in a 2x2 contingency table. Specifically, this function tests the odds |
|---|
| 1257 | | * ratio, which is defined, for a contingency table c, as (c[0][0] * c[1][1]) |
|---|
| 1258 | | * / (c[1][0] * c[0][1]). Alternatives are Alt.LESS, meaning true odds ratio |
|---|
| 1259 | | * < 1, Alt.GREATER, meaning true odds ratio > 1, and Alt.TWOSIDE, meaning |
|---|
| 1260 | | * true odds ratio != 1. |
|---|
| 1261 | | * |
|---|
| 1262 | | * Accepts a 2x2 contingency table as an array of arrays of uints. |
|---|
| 1263 | | * For now, only does 2x2 contingency tables. |
|---|
| 1264 | | * Examples: |
|---|
| 1265 | | * --- |
|---|
| 1266 | | * real res = fisherExact([[2u, 7], [8, 2]], Alt.LESS); |
|---|
| 1267 | | * assert(approxEqual(res, 0.01852)); // Odds ratio is very small in this case. |
|---|
| 1268 | | * --- |
|---|
| 1269 | | * */ |
|---|
| 1270 | | real fisherExact(T)(const T[2][2] contingencyTable, Alt alt = Alt.TWOSIDE) |
|---|
| 1271 | | if(isIntegral!(T)) { |
|---|
| 1272 | | |
|---|
| 1273 | | static real fisherLower(const uint[2][2] contingencyTable) { |
|---|
| 1274 | | alias contingencyTable c; |
|---|
| 1275 | | return hypergeometricCDF(c[0][0], c[0][0] + c[0][1], c[1][0] + c[1][1], |
|---|
| 1276 | | c[0][0] + c[1][0]); |
|---|
| 1277 | | } |
|---|
| 1278 | | |
|---|
| 1279 | | static real fisherUpper(const uint[2][2] contingencyTable) { |
|---|
| 1280 | | alias contingencyTable c; |
|---|
| 1281 | | return hypergeometricCDFR(c[0][0], c[0][0] + c[0][1], c[1][0] + c[1][1], |
|---|
| 1282 | | c[0][0] + c[1][0]); |
|---|
| 1283 | | } |
|---|
| 1284 | | |
|---|
| 1285 | | |
|---|
| 1286 | | if(alt == Alt.LESS) |
|---|
| 1287 | | return fisherLower(contingencyTable); |
|---|
| 1288 | | else if(alt == Alt.GREATER) |
|---|
| 1289 | | return fisherUpper(contingencyTable); |
|---|
| 1290 | | |
|---|
| 1291 | | alias contingencyTable c; |
|---|
| 1292 | | |
|---|
| 1293 | | immutable uint n1 = c[0][0] + c[0][1], |
|---|
| 1294 | | n2 = c[1][0] + c[1][1], |
|---|
| 1295 | | n = c[0][0] + c[1][0]; |
|---|
| 1296 | | |
|---|
| 1297 | | immutable uint mode = |
|---|
| 1298 | | cast(uint) ((cast(real) (n + 1) * (n1 + 1)) / (n1 + n2 + 2)); |
|---|
| 1299 | | immutable real pExact = hypergeometricPMF(c[0][0], n1, n2, n); |
|---|
| 1300 | | immutable real pMode = hypergeometricPMF(mode, n1, n2, n); |
|---|
| 1301 | | |
|---|
| 1302 | | if(approxEqual(pExact, pMode, 1e-7)) { |
|---|
| 1303 | | return 1; |
|---|
| 1304 | | } else if(c[0][0] < mode) { |
|---|
| 1305 | | immutable real pLower = hypergeometricCDF(c[0][0], n1, n2, n); |
|---|
| 1306 | | |
|---|
| 1307 | | // Special case to prevent binary search from getting stuck. |
|---|
| 1308 | | if(hypergeometricPMF(n, n1, n2, n) > pExact) { |
|---|
| 1309 | | return pLower; |
|---|
| 1310 | | } |
|---|
| 1311 | | |
|---|
| 1312 | | // Binary search for where to begin upper half. |
|---|
| 1313 | | uint min = mode, max = n, guess = uint.max; |
|---|
| 1314 | | while(min != max) { |
|---|
| 1315 | | guess = (max == min + 1 && guess == min) ? max : |
|---|
| 1316 | | (cast(ulong) max + cast(ulong) min) / 2UL; |
|---|
| 1317 | | |
|---|
| 1318 | | immutable real pGuess = hypergeometricPMF(guess, n1, n2, n); |
|---|
| 1319 | | if(pGuess <= pExact && |
|---|
| 1320 | | hypergeometricPMF(guess - 1, n1, n2, n) > pExact) { |
|---|
| 1321 | | break; |
|---|
| 1322 | | } else if(pGuess < pExact) { |
|---|
| 1323 | | max = guess; |
|---|
| 1324 | | } else min = guess; |
|---|
| 1325 | | } |
|---|
| 1326 | | if(guess == uint.max && min == max) |
|---|
| 1327 | | guess = min; |
|---|
| 1328 | | |
|---|
| 1329 | | return std.algorithm.min(pLower + |
|---|
| 1330 | | hypergeometricCDFR(guess, n1, n2, n), 1.0L); |
|---|
| 1331 | | } else { |
|---|
| 1332 | | immutable real pUpper = hypergeometricCDFR(c[0][0], n1, n2, n); |
|---|
| 1333 | | |
|---|
| 1334 | | // Special case to prevent binary search from getting stuck. |
|---|
| 1335 | | if(hypergeometricPMF(0, n1, n2, n) > pExact) { |
|---|
| 1336 | | return pUpper; |
|---|
| 1337 | | } |
|---|
| 1338 | | |
|---|
| 1339 | | // Binary search for where to begin lower half. |
|---|
| 1340 | | uint min = 0, max = mode, guess = uint.max; |
|---|
| 1341 | | while(min != max) { |
|---|
| 1342 | | guess = (max == min + 1 && guess == min) ? max : |
|---|
| 1343 | | (cast(ulong) max + cast(ulong) min) / 2UL; |
|---|
| 1344 | | real pGuess = hypergeometricPMF(guess, n1, n2, n); |
|---|
| 1345 | | |
|---|
| 1346 | | if(pGuess <= pExact && |
|---|
| 1347 | | hypergeometricPMF(guess + 1, n1, n2, n) > pExact) { |
|---|
| 1348 | | break; |
|---|
| 1349 | | } else if(pGuess <= pExact) { |
|---|
| 1350 | | min = guess; |
|---|
| 1351 | | } else max = guess; |
|---|
| 1352 | | } |
|---|
| 1353 | | |
|---|
| 1354 | | if(guess == uint.max && min == max) |
|---|
| 1355 | | guess = min; |
|---|
| 1356 | | |
|---|
| 1357 | | return std.algorithm.min(pUpper + |
|---|
| 1358 | | hypergeometricCDF(guess, n1, n2, n), 1.0L); |
|---|
| 1359 | | } |
|---|
| 1530 | | /**Calculates the significance (P-value) of the given Pearson correlation |
|---|
| 1531 | | * coefficient against the given alternative, for an input vector of length N. |
|---|
| 1532 | | * Alternatives are Alt.LESS, meaning the true cor is < 0, Alt.GREATER, meaning |
|---|
| 1533 | | * the true cor is > 0, and Alt.TWOSIDE, meaning the true cor != 0. |
|---|
| 1534 | | */ |
|---|
| 1535 | | real pcorSig(real cor, ulong N, Alt alt = Alt.TWOSIDE) { |
|---|
| 1536 | | real t = cor / sqrt((1 - cor * cor) / (N - 2)); |
|---|
| 1537 | | |
|---|
| | 1704 | /**Tests the hypothesis that the Pearson correlation between two ranges is |
|---|
| | 1705 | * different from some 0. Alternatives are |
|---|
| | 1706 | * Alt.LESS (pcor(range1, range2) < 0), Alt.GREATER (pcor(range1, range2) |
|---|
| | 1707 | * > 0) and Alt.TWOSIDE (pcor(range1, range2) != 0). |
|---|
| | 1708 | * |
|---|
| | 1709 | * Returns: A ConfInt of the estimated Pearson correlation of the two ranges, |
|---|
| | 1710 | * the P-value against the given alternative, and the confidence interval of |
|---|
| | 1711 | * the correlation at the level specified by confLevel.*/ |
|---|
| | 1712 | ConfInt pcorTest(T, U)(T range1, U range2, Alt alt = Alt.TWOSIDE, real confLevel = 0.95) |
|---|
| | 1713 | if(realInput!(T) && realInput!(U)) { |
|---|
| | 1714 | OnlinePcor res; |
|---|
| | 1715 | while(!range1.empty && !range2.empty) { |
|---|
| | 1716 | res.put(range1.front, range2.front); |
|---|
| | 1717 | range1.popFront; |
|---|
| | 1718 | range2.popFront; |
|---|
| | 1719 | } |
|---|
| | 1720 | return finishPearsonSpearman(res.cor, res.N, alt, confLevel); |
|---|
| | 1721 | } |
|---|
| | 1722 | |
|---|
| | 1723 | unittest { |
|---|
| | 1724 | // Values from R. |
|---|
| | 1725 | auto t1 = pcorTest([1,2,3,4,5].dup, [2,1,4,3,5].dup, Alt.TWOSIDE); |
|---|
| | 1726 | auto t2 = pcorTest([1,2,3,4,5].dup, [2,1,4,3,5].dup, Alt.LESS); |
|---|
| | 1727 | auto t3 = pcorTest([1,2,3,4,5].dup, [2,1,4,3,5].dup, Alt.GREATER); |
|---|
| | 1728 | |
|---|
| | 1729 | assert(approxEqual(t1.testStat, 0.8)); |
|---|
| | 1730 | assert(approxEqual(t2.testStat, 0.8)); |
|---|
| | 1731 | assert(approxEqual(t3.testStat, 0.8)); |
|---|
| | 1732 | |
|---|
| | 1733 | assert(approxEqual(t1.p, 0.1041)); |
|---|
| | 1734 | assert(approxEqual(t2.p, 0.948)); |
|---|
| | 1735 | assert(approxEqual(t3.p, 0.05204)); |
|---|
| | 1736 | |
|---|
| | 1737 | assert(approxEqual(t1.lowerBound, -0.2796400)); |
|---|
| | 1738 | assert(approxEqual(t3.lowerBound, -0.06438567)); |
|---|
| | 1739 | assert(approxEqual(t2.lowerBound, -1)); |
|---|
| | 1740 | |
|---|
| | 1741 | assert(approxEqual(t1.upperBound, 0.9861962)); |
|---|
| | 1742 | assert(approxEqual(t2.upperBound, 0.9785289)); |
|---|
| | 1743 | assert(approxEqual(t3.upperBound, 1)); |
|---|
| | 1744 | |
|---|
| | 1745 | writeln("Passed pcorSig test."); |
|---|
| | 1746 | } |
|---|
| | 1747 | |
|---|
| | 1748 | /**Tests the hypothesis that the Spearman correlation between two ranges is |
|---|
| | 1749 | * different from some 0. Alternatives are |
|---|
| | 1750 | * Alt.LESS (scor(range1, range2) < 0), Alt.GREATER (scor(range1, range2) |
|---|
| | 1751 | * > 0) and Alt.TWOSIDE (scor(range1, range2) != 0). |
|---|
| | 1752 | * |
|---|
| | 1753 | * Returns: A TestRes containing the Spearman correlation coefficient and |
|---|
| | 1754 | * the P-value for the given alternative. |
|---|
| | 1755 | * |
|---|
| | 1756 | * Bugs: Exact P-value computation not yet implemented. Uses asymptotic |
|---|
| | 1757 | * approximation only. This is good enough for most practical purposes given |
|---|
| | 1758 | * reasonably large N, but is not perfectly accurate. Not valid for data with |
|---|
| | 1759 | * very large amounts of ties. */ |
|---|
| | 1760 | TestRes scorTest(T, U)(T range1, U range2, Alt alt = Alt.TWOSIDE) |
|---|
| | 1761 | if(isInputRange!(T) && isInputRange!(U) && |
|---|
| | 1762 | dstats.base.hasLength!(T) && dstats.base.hasLength!(U)) { |
|---|
| | 1763 | real N = range1.length; |
|---|
| | 1764 | return finishPearsonSpearman(scor(range1, range2), N, alt, 0); |
|---|
| | 1765 | } |
|---|
| | 1766 | |
|---|
| | 1767 | unittest { |
|---|
| | 1768 | // Values from R. |
|---|
| | 1769 | int[] arr1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]; |
|---|
| | 1770 | int[] arr2 = [8,6,7,5,3,0,9,8,6,7,5,3,0,9,3,6,2,4,3,6,8]; |
|---|
| | 1771 | auto t1 = scorTest(arr1, arr2, Alt.TWOSIDE); |
|---|
| | 1772 | auto t2 = scorTest(arr1, arr2, Alt.LESS); |
|---|
| | 1773 | auto t3 = scorTest(arr1, arr2, Alt.GREATER); |
|---|
| | 1774 | |
|---|
| | 1775 | assert(approxEqual(t1.testStat, -0.1769406)); |
|---|
| | 1776 | assert(approxEqual(t2.testStat, -0.1769406)); |
|---|
| | 1777 | assert(approxEqual(t3.testStat, -0.1769406)); |
|---|
| | 1778 | |
|---|
| | 1779 | assert(approxEqual(t1.p, 0.4429)); |
|---|
| | 1780 | assert(approxEqual(t3.p, 0.7785)); |
|---|
| | 1781 | assert(approxEqual(t2.p, 0.2215)); |
|---|
| | 1782 | |
|---|
| | 1783 | writeln("Passed scorSig test."); |
|---|
| | 1784 | } |
|---|
| | 1785 | |
|---|
| | 1786 | private ConfInt finishPearsonSpearman(real cor, real N, Alt alt, real confLevel) { |
|---|
| | 1787 | real denom = sqrt((1 - cor * cor) / (N - 2)); |
|---|
| | 1788 | real t = cor / denom; |
|---|
| | 1789 | ConfInt ret; |
|---|
| | 1790 | ret.testStat = cor; |
|---|
| | 1791 | real sqN = sqrt(N - 3); |
|---|
| | 1792 | real z = sqN * atanh(cor); |
|---|
| 1613 | | assert(approxEqual(kcorSig(0.2105263, 20), 0.2086, 0.0, .01)); |
|---|
| 1614 | | assert(approxEqual(kcorSig(0.2105263, 20, Alt.LESS), 0.907, 0.0, .01)); |
|---|
| 1615 | | assert(approxEqual(kcorSig(0.2105263, 20, Alt.GREATER), 0.1043, 0.0, .01)); |
|---|
| 1616 | | writeln("Passed kcorSig test."); |
|---|
| | 1855 | |
|---|
| | 1856 | int[] arr1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]; |
|---|
| | 1857 | int[] arr2 = [8,6,7,5,3,0,9,8,6,7,5,3,0,9,3,6,2,4,3,6,8]; |
|---|
| | 1858 | auto t1 = kcorTest(arr1, arr2, Alt.TWOSIDE); |
|---|
| | 1859 | auto t2 = kcorTest(arr1, arr2, Alt.LESS); |
|---|
| | 1860 | auto t3 = kcorTest(arr1, arr2, Alt.GREATER); |
|---|
| | 1861 | |
|---|
| | 1862 | assert(approxEqual(t1.testStat, -.1448010)); |
|---|
| | 1863 | assert(approxEqual(t2.testStat, -.1448010)); |
|---|
| | 1864 | assert(approxEqual(t3.testStat, -.1448010)); |
|---|
| | 1865 | |
|---|
| | 1866 | assert(approxEqual(t1.p, 0.3757, 0.0, 0.02)); |
|---|
| | 1867 | assert(approxEqual(t3.p, 0.8122, 0.0, 0.02)); |
|---|
| | 1868 | assert(approxEqual(t2.p, 0.1878, 0.0, 0.02)); |
|---|
| | 1869 | |
|---|
| | 1870 | writeln("Passed scorSig test."); |
|---|