Changeset 48

Show
Ignore:
Timestamp:
05/07/09 00:46:04 (3 years ago)
Author:
dsimcha
Message:

Overhaul tests. Change kurtosis, skewness to more widely used, possibly less biased definitions. This only matters for small samples.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/distrib.d

    r40 r48  
    12541254 
    12551255/// 
    1256 real cauchyPMF(real X, real X0 = 0, real gamma = 1)
     1256real cauchyPMF(real X, real X0 = 0, real gamma = 1) pure nothrow
    12571257    real toSquare = (X - X0) / gamma; 
    12581258    return 1.0L / ( 
     
    12901290 
    12911291/// 
    1292 real invCauchyCDF(real p, real X0 = 0, real gamma = 1)
     1292real invCauchyCDF(real p, real X0 = 0, real gamma = 1) pure nothrow
    12931293    return X0 + gamma * tan(PI * (p - 0.5L)); 
    12941294} 
     
    13091309 
    13101310/// 
    1311 real laplaceCDF(real X, real mu = 0, real b = 1)
     1311real laplaceCDF(real X, real mu = 0, real b = 1) pure nothrow
    13121312    real diff = (X - mu); 
    13131313    real sign = (diff > 0) ? 1 : -1; 
     
    13231323 
    13241324/// 
    1325 real laplaceCDFR(real X, real mu = 0, real b = 1)
     1325real laplaceCDFR(real X, real mu = 0, real b = 1) pure nothrow
    13261326    real diff = (mu - X); 
    13271327    real sign = (diff > 0) ? 1 : -1; 
     
    13521352 
    13531353/**Kolmogorov distribution.  Used in Kolmogorov-Smirnov testing.*/ 
    1354 real kolmDist(real X)
     1354real kolmDist(real X) pure nothrow
    13551355    if(X == 0) { 
    13561356        //Handle as a special case.  Otherwise, get NAN b/c of divide by zero. 
  • trunk/gamma.d

    r28 r48  
    7878 * $(NAN) if sign is indeterminate. 
    7979 */ 
    80 real sgnGamma(real x) 
     80real sgnGamma(real x) pure nothrow 
    8181{ 
    8282    /* Author: Don Clugston. */ 
     
    529529// Continued fraction expansion #1 for incomplete beta integral 
    530530// Use when x < (a+1)/(a+b+2) 
    531 real betaDistExpansion1(real a, real b, real x ) 
     531real betaDistExpansion1(real a, real b, real x ) pure nothrow 
    532532{ 
    533533    real xk, pk, pkm1, pkm2, qk, qkm1, qkm2; 
     
    612612// Continued fraction expansion #2 for incomplete beta integral 
    613613// Use when x > (a+1)/(a+b+2) 
    614 real betaDistExpansion2(real a, real b, real x ) 
     614real betaDistExpansion2(real a, real b, real x ) pure nothrow 
    615615{ 
    616616    real  xk, pk, pkm1, pkm2, qk, qkm1, qkm2; 
  • trunk/random.d

    r45 r48  
    210210    foreach(ref elem; observ) 
    211211    elem = rNorm(); 
    212     auto ksRes = ksPval(observ, parametrize!(normalCDF)(0.0L, 1.0L)); 
    213     writeln("100k samples from normal(0, 1):  K-S P-val:  ", ksRes); 
     212    auto ksRes = ksTest(observ, parametrize!(normalCDF)(0.0L, 1.0L)); 
     213    writeln("100k samples from normal(0, 1):  K-S P-val:  ", ksRes.p); 
    214214    writeln("\tMean Expected: 0  Observed:  ", mean(observ)); 
    215215    writeln("\tMedian Expected: 0  Observed:  ", median(observ)); 
     
    229229    foreach(ref elem; observ) 
    230230    elem = rCauchy(2, 5); 
    231     auto ksRes = ksPval(observ, parametrize!(cauchyCDF)(2.0L, 5.0L)); 
    232     writeln("100k samples from Cauchy(2, 5):  K-S P-val:  ", ksRes); 
     231    auto ksRes = ksTest(observ, parametrize!(cauchyCDF)(2.0L, 5.0L)); 
     232    writeln("100k samples from Cauchy(2, 5):  K-S P-val:  ", ksRes.p); 
    233233    writeln("\tMean Expected: N/A  Observed:  ", mean(observ)); 
    234234    writeln("\tMedian Expected: 2  Observed:  ", median(observ)); 
     
    251251    foreach(ref elem; observ) 
    252252    elem = rStudentT(5); 
    253     auto ksRes = ksPval(observ, parametrize!(studentsTCDF)(5)); 
    254     writeln("100k samples from T(5):  K-S P-val:  ", ksRes); 
     253    auto ksRes = ksTest(observ, parametrize!(studentsTCDF)(5)); 
     254    writeln("100k samples from T(5):  K-S P-val:  ", ksRes.p); 
    255255    writeln("\tMean Expected: 0  Observed:  ", mean(observ)); 
    256256    writeln("\tMedian Expected: 0  Observed:  ", median(observ)); 
     
    271271    foreach(ref elem; observ) 
    272272    elem = rFisher(5, 7); 
    273     auto ksRes = ksPval(observ, parametrize!(fisherCDF)(5, 7)); 
    274     writeln("100k samples from fisher(5, 7):  K-S P-val:  ", ksRes); 
     273    auto ksRes = ksTest(observ, parametrize!(fisherCDF)(5, 7)); 
     274    writeln("100k samples from fisher(5, 7):  K-S P-val:  ", ksRes.p); 
    275275    writeln("\tMean Expected: ",  7.0 / 5, "  Observed:  ", mean(observ)); 
    276276    writeln("\tMedian Expected: ??  Observed:  ", median(observ)); 
     
    290290    foreach(ref elem; observ) 
    291291    elem = rChiSquare(df); 
    292     auto ksRes = ksPval(observ, parametrize!(chiSqrCDF)(5)); 
    293     writeln("100k samples from Chi-Square:  K-S P-val:  ", ksRes); 
     292    auto ksRes = ksTest(observ, parametrize!(chiSqrCDF)(5)); 
     293    writeln("100k samples from Chi-Square:  K-S P-val:  ", ksRes.p); 
    294294    writeln("\tMean Expected: ", df, "  Observed:  ", mean(observ)); 
    295295    writeln("\tMedian Expected: ", df - (2.0L / 3.0L), "  Observed:  ", median(observ)); 
     
    715715        foreach(ref elem; observ) 
    716716        elem = rHypergeometric(n1, n2, n); 
    717         auto ksRes = ksPval(observ, parametrize!(hypergeometricCDF)(n1, n2, n)); 
     717        auto ksRes = ksTest(observ, parametrize!(hypergeometricCDF)(n1, n2, n)); 
    718718        writeln("100k samples from hypergeom.(", n1, ", ", n2, ", ", n, "):"); 
    719719        writeln("\tMean Expected: ", n * cast(real) n1 / (n1 + n2), 
     
    821821    foreach(ref elem; observ) 
    822822    elem = rLaplace(); 
    823     auto ksRes = ksPval(observ, parametrize!(laplaceCDF)(0.0L, 1.0L)); 
    824     writeln("100k samples from Laplace(0, 1):  K-S P-val:  ", ksRes); 
     823    auto ksRes = ksTest(observ, parametrize!(laplaceCDF)(0.0L, 1.0L)); 
     824    writeln("100k samples from Laplace(0, 1):  K-S P-val:  ", ksRes.p); 
    825825    writeln("\tMean Expected: 0  Observed:  ", mean(observ)); 
    826826    writeln("\tMedian Expected: 0  Observed:  ", median(observ)); 
     
    841841    foreach(ref elem; observ) 
    842842    elem = rExponential(2.0L); 
    843     auto ksRes = ksPval(observ, parametrize!(gammaCDF)(2, 1)); 
    844     writeln("100k samples from exponential(2):  K-S P-val:  ", ksRes); 
     843    auto ksRes = ksTest(observ, parametrize!(gammaCDF)(2, 1)); 
     844    writeln("100k samples from exponential(2):  K-S P-val:  ", ksRes.p); 
    845845    writeln("\tMean Expected: 0.5  Observed:  ", mean(observ)); 
    846846    writeln("\tMedian Expected: 0.3465  Observed:  ", median(observ)); 
     
    900900    foreach(ref elem; observ) 
    901901    elem = rGamma(2.0L, 3.0L); 
    902     auto ksRes = ksPval(observ, parametrize!(gammaCDF)(2, 3)); 
    903     writeln("100k samples from gamma(2, 3):  K-S P-val:  ", ksRes); 
     902    auto ksRes = ksTest(observ, parametrize!(gammaCDF)(2, 3)); 
     903    writeln("100k samples from gamma(2, 3):  K-S P-val:  ", ksRes.p); 
    904904    writeln("\tMean Expected: 1.5  Observed:  ", mean(observ)); 
    905905    writeln("\tMedian Expected: ??  Observed:  ", median(observ)); 
     
    963963        foreach(ref elem; observ) 
    964964        elem = rBeta(a, b); 
    965         auto ksRes = ksPval(observ, paramBeta(a, b)); 
     965        auto ksRes = ksTest(observ, paramBeta(a, b)); 
    966966        auto summ = summary(observ); 
    967         writeln("100k samples from beta(", a, ", ", b, "):  K-S P-val:  ", ksRes); 
     967        writeln("100k samples from beta(", a, ", ", b, "):  K-S P-val:  ", ksRes.p); 
    968968        writeln("\tMean Expected: ", a / (a + b), " Observed:  ", summ.mean); 
    969969        writeln("\tMedian Expected: ??  Observed:  ", median(observ)); 
     
    988988    foreach(ref elem; observ) 
    989989    elem = rLogistic(2.0L, 3.0L); 
    990     auto ksRes = ksPval(observ, parametrize!(logisticCDF)(2, 3)); 
    991     writeln("100k samples from logistic(2, 3):  K-S P-val:  ", ksRes); 
     990    auto ksRes = ksTest(observ, parametrize!(logisticCDF)(2, 3)); 
     991    writeln("100k samples from logistic(2, 3):  K-S P-val:  ", ksRes.p); 
    992992    writeln("\tMean Expected: 2  Observed:  ", mean(observ)); 
    993993    writeln("\tMedian Expected: 2  Observed:  ", median(observ)); 
     
    10071007    foreach(ref elem; observ) 
    10081008    elem = rLogNorm(-2.0L, 1.0L); 
    1009     auto ksRes = ksPval(observ, parametrize!(logNormalCDF)(-2, 1)); 
    1010     writeln("100k samples from log-normal(-2, 1):  K-S P-val:  ", ksRes); 
     1009    auto ksRes = ksTest(observ, parametrize!(logNormalCDF)(-2, 1)); 
     1010    writeln("100k samples from log-normal(-2, 1):  K-S P-val:  ", ksRes.p); 
    10111011    writeln("\tMean Expected: ", exp(-1.5), "  Observed:  ", mean(observ)); 
    10121012    writeln("\tMedian Expected: ", exp(-2.0L), "  Observed:  ", median(observ)); 
     
    10281028    foreach(ref elem; observ) 
    10291029    elem = rWeibull(2.0L, 3.0L); 
    1030     auto ksRes = ksPval(observ, parametrize!(weibullCDF)(2.0, 3.0)); 
    1031     writeln("100k samples from weibull(2, 3):  K-S P-val:  ", ksRes); 
     1030    auto ksRes = ksTest(observ, parametrize!(weibullCDF)(2.0, 3.0)); 
     1031    writeln("100k samples from weibull(2, 3):  K-S P-val:  ", ksRes.p); 
    10321032    delete observ; 
    10331033} 
     
    10571057        elem = rWald(4, 7); 
    10581058    } 
    1059     auto ksRes = ksPval(observ, parametrize!(waldCDF)(4, 7)); 
    1060     writeln("100k samples from wald(4, 7):  K-S P-val:  ", ksRes); 
     1059    auto ksRes = ksTest(observ, parametrize!(waldCDF)(4, 7)); 
     1060    writeln("100k samples from wald(4, 7):  K-S P-val:  ", ksRes.p); 
    10611061    writeln("\tMean Expected: ", 4, "  Observed:  ", mean(observ)); 
    10621062    writeln("\tMedian Expected: ??  Observed:  ", median(observ)); 
     
    10771077        elem = rRayleigh(3); 
    10781078    } 
    1079     auto ksRes = ksPval(observ, parametrize!(rayleighCDF)(3)); 
    1080     writeln("100k samples from rayleigh(3):  K-S P-val:  ", ksRes); 
    1081 } 
    1082  
    1083  
     1079    auto ksRes = ksTest(observ, parametrize!(rayleighCDF)(3)); 
     1080    writeln("100k samples from rayleigh(3):  K-S P-val:  ", ksRes.p); 
     1081} 
     1082 
     1083 
  • trunk/summary.d

    r34 r48  
    497497    /// 
    498498    real skewness() const { 
    499         real var = var()
     499        real var = _m2 - _mean * _mean
    500500        real numerator = _m3 - 3 * _mean * _m2 + 2 * _mean * _mean * _mean; 
    501501        return numerator / pow(var, 1.5L); 
     
    506506        real mean4 = mean * mean; 
    507507        mean4 *= mean4; 
    508         real vari = var()
     508        real vari = _m2 - _mean * _mean
    509509        return (_m4 - 4 * _mean * _m3 + 6 * _mean * _mean * _m2 - 3 * mean4) / 
    510510               (vari * vari) - 3; 
     
    549549 
    550550unittest { 
    551     // Values from Octave
    552     assert(approxEqual(kurtosis([1, 1, 1, 1, 10].dup), -.92)); 
    553     assert(approxEqual(kurtosis([2.5, 3.5, 4.5, 5.5].dup), -2.0775)); 
    554     assert(approxEqual(kurtosis([1,2,2,2,2,2,100].dup), 0.79523)); 
     551    // Values from Matlab
     552    assert(approxEqual(kurtosis([1, 1, 1, 1, 10].dup), 0.25)); 
     553    assert(approxEqual(kurtosis([2.5, 3.5, 4.5, 5.5].dup), -1.36)); 
     554    assert(approxEqual(kurtosis([1,2,2,2,2,2,100].dup), 2.1657)); 
    555555    writefln("Passed kurtosis unittest."); 
    556556} 
     
    573573    // Values from Octave. 
    574574    assert(approxEqual(skewness([1,2,3,4,5].dup), 0)); 
    575     assert(approxEqual(skewness([3,1,4,1,5,9,2,6,5].dup), 0.45618)); 
    576     assert(approxEqual(skewness([2,7,1,8,2,8,1,8,2,8,4,5,9].dup), -0.076783)); 
     575    assert(approxEqual(skewness([3,1,4,1,5,9,2,6,5].dup), 0.5443)); 
     576    assert(approxEqual(skewness([2,7,1,8,2,8,1,8,2,8,4,5,9].dup), -0.0866)); 
    577577 
    578578    // Test handling of ranges that are not arrays. 
    579579    string[] stringy = ["3", "1", "4", "1", "5", "9", "2", "6", "5"]; 
    580580    auto intified = map!(to!(int, string))(stringy); 
    581     assert(approxEqual(skewness(intified), 0.45618)); 
     581    assert(approxEqual(skewness(intified), 0.5443)); 
    582582    writeln("Passed skewness test."); 
    583583} 
  • trunk/tests.d

    r45 r48  
    6767 
    6868import dstats.base, dstats.distrib, dstats.alloc, dstats.summary, dstats.sort, 
    69        std.algorithm, std.functional, std.range, std.c.stdlib; 
     69       dstats.cor, std.algorithm, std.functional, std.range, std.c.stdlib, 
     70       std.conv : text; 
    7071 
    7172version(unittest) { 
     
    9192} 
    9293 
     94/**A plain old data struct for returning the results of hypothesis tests.*/ 
     95struct TestRes { 
     96 
     97    /// The test statistic.  What exactly this is is specific to the test. 
     98    real testStat; 
     99 
     100    /**The P-value against the provided alternative.  This struct can 
     101     * be implicitly converted to just the P-value via alias this.*/ 
     102    real p; 
     103 
     104    /// Allow implicit conversion to the P-value. 
     105    alias p this; 
     106 
     107    /// 
     108    string toString() { 
     109        return text("Test Statistic = ", testStat, "\nP = ", p); 
     110    } 
     111} 
     112 
     113/**A plain old data struct for returning the results of hypothesis tests 
     114 * that also produce confidence intervals.  Contains, can implicitly convert 
     115 * to, a TestRes.*/ 
     116struct ConfInt { 
     117    /// 
     118    TestRes testRes; 
     119 
     120    ///  Lower bound of the confidence interval at the level specified. 
     121    real lowerBound; 
     122 
     123    ///  Upper bound of the confidence interval at the level specified. 
     124    real upperBound; 
     125 
     126    alias testRes this; 
     127 
     128    /// 
     129    string toString() { 
     130        return text("Test Statistic = ", testRes.testStat, "\nP = ", testRes.p, 
     131                "\nLower Confidence Bound = ", lowerBound, 
     132                "\nUpper Confidence Bound = ", upperBound); 
     133    } 
     134} 
     135 
    93136/**One-sample Student's T-test for difference between mean of data and 
    94137 * a fixed value.  Alternatives are Alt.LESS, meaning mean(data) < mean, 
     
    96139 * != mean. 
    97140 * 
    98  * Returns:  The p-value against the given alternative.*/ 
    99 real studentsTTest(T)(T data, real mean, Alt alt = Alt.TWOSIDE) 
     141 * Returns:  A ConfInt containing T, the P-value and the boundaries of 
     142 * the confidence interval for mean(T) at the level specified.*/ 
     143ConfInt studentsTTest(T)(T data, real testMean = 0, Alt alt = Alt.TWOSIDE, 
     144    real confLevel = 0.95) 
    100145if(realIterable!(T)) { 
    101     OnlineMeanSD meanSd; 
    102     uint len; 
    103     foreach(elem; data) { 
    104         len++; 
    105         meanSd.put(elem); 
    106     } 
    107  
    108     real t = (meanSd.mean - mean) / (meanSd.stdev / sqrt(cast(real) len)); 
    109     if(alt == Alt.LESS) 
    110         return studentsTCDF(t, data.length - 1); 
    111     else if(alt == Alt.GREATER) 
    112         return studentsTCDF(-t, data.length - 1); 
    113     else 
    114         return 2 * min(studentsTCDF(t, data.length - 1), 
    115                        studentsTCDF(-t, data.length - 1)); 
    116 
    117  
    118 unittest { 
    119     assert(approxEqual(studentsTTest([1, 2, 3, 4, 5].dup, 2), .2302)); 
    120     assert(approxEqual(studentsTTest([1, 2, 3, 4, 5].dup, 2, Alt.LESS), .8849)); 
    121     assert(approxEqual(studentsTTest([1, 2, 3, 4, 5].dup, 2, Alt.GREATER), .1151)); 
     146    return pairedTTest(data, repeat(0), testMean, alt, confLevel); 
     147 
     148
     149 
     150unittest { 
     151    auto t1 = studentsTTest([1, 2, 3, 4, 5].dup, 2); 
     152    assert(approxEqual(t1.testStat, 1.4142)); 
     153    assert(approxEqual(t1.p, 0.2302)); 
     154    assert(approxEqual(t1.lowerBound, 1.036757)); 
     155    assert(approxEqual(t1.upperBound, 4.963243)); 
     156 
     157    auto t2 = studentsTTest([1, 2, 3, 4, 5].dup, 2, Alt.LESS); 
     158    assert(approxEqual(t2, .8849)); 
     159    assert(approxEqual(t2.testStat, 1.4142)); 
     160    assert(t2.lowerBound == -real.infinity); 
     161    assert(approxEqual(t2.upperBound, 4.507443)); 
     162 
     163    auto t3 = studentsTTest([1, 2, 3, 4, 5].dup, 2, Alt.GREATER); 
     164    assert(approxEqual(t3, .1151)); 
     165    assert(approxEqual(t3.testStat, 1.4142)); 
     166    assert(approxEqual(t3.lowerBound, 1.492557)); 
     167    assert(t3.upperBound == real.infinity); 
     168 
    122169    writeln("Passed 1-sample studentsTTest test."); 
    123170} 
     
    127174 * mean(sample1) < mean(sample2), Alt.GREATER, meaning mean(sample1) > 
    128175 * mean(sample2), and Alt.TWOSIDE, meaning mean(sample1) != mean(sample2). 
    129  * Returns:  The p-value against the given alternative.*/ 
    130 real studentsTTest(T, U)(T sample1, U sample2, Alt alt = Alt.TWOSIDE) 
     176 * 
     177 * Returns:  A ConfInt containing the T statistic, the P-value, and the 
     178 * boundaries of the confidence interval for the difference between means 
     179 * of sample1 and sample2 at the specified level.*/ 
     180ConfInt studentsTTest(T, U)(T sample1, U sample2, real testMean = 0, 
     181    Alt alt = Alt.TWOSIDE, real confLevel = 0.95) 
    131182if(realIterable!(T) && realIterable!(U)) { 
    132183    size_t n1, n2; 
     
    143194    real sx1x2 = sqrt(((n1 - 1) * s1summ.var + (n2 - 1) * s2summ.var) / 
    144195                 (n1 + n2 - 2)); 
    145     real t = (s1summ.mean - s2summ.mean) / 
    146              (sx1x2 * sqrt((1.0L / n1) + (1.0L / n2))); 
    147     if(alt == Alt.LESS) 
    148         return studentsTCDF(t, n1 + n2 - 2); 
    149     else if(alt == Alt.GREATER) 
    150         return studentsTCDF(-t, n1 + n2 - 2); 
    151     else 
    152         return 2 * min(studentsTCDF(t, n1 + n2 - 2), 
    153                        studentsTCDF(-t, n1 + n2 - 2)); 
     196    real normSd = (sx1x2 * sqrt((1.0L / n1) + (1.0L / n2))); 
     197    real meanDiff = s1summ.mean - s2summ.mean; 
     198    ConfInt ret; 
     199    ret.testStat = meanDiff / normSd; 
     200    if(alt == Alt.LESS) { 
     201        ret.p = studentsTCDF(ret.testStat, n1 + n2 - 2); 
     202        real delta = invStudentsTCDF(1 - confLevel, n1 + n2 - 2) * normSd; 
     203        ret.lowerBound = -real.infinity; 
     204        ret.upperBound = meanDiff - delta; 
     205    } else if(alt == Alt.GREATER) { 
     206        ret.p = studentsTCDF(-ret.testStat, n1 + n2 - 2); 
     207        real delta = invStudentsTCDF(1 - confLevel, n1 + n2 - 2) * normSd; 
     208        ret.lowerBound = meanDiff + delta; 
     209        ret.upperBound = real.infinity; 
     210    } else { 
     211        ret.p = 2 * min(studentsTCDF(ret.testStat, n1 + n2 - 2), 
     212                       studentsTCDF(-ret.testStat, n1 + n2 - 2)); 
     213        real delta = invStudentsTCDF(0.5 * (1 - confLevel), n1 + n2 - 2) * normSd; 
     214        ret.lowerBound = meanDiff + delta; 
     215        ret.upperBound = meanDiff - delta; 
     216    } 
     217    return ret; 
    154218} 
    155219 
    156220unittest { 
    157221    // Values from R. 
    158     assert(approxEqual(studentsTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup), 0.2346)); 
    159     assert(approxEqual(studentsTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup, Alt.LESS), 
     222    auto t1 = studentsTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup); 
     223    assert(approxEqual(t1, 0.2346)); 
     224    assert(approxEqual(t1.testStat, -1.274)); 
     225    assert(approxEqual(t1.lowerBound, -5.088787)); 
     226    assert(approxEqual(t1.upperBound, 1.422120)); 
     227 
     228 
     229    assert(approxEqual(studentsTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup, 0, Alt.LESS), 
    160230           0.1173)); 
    161     assert(approxEqual(studentsTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup, Alt.GREATER), 
     231    assert(approxEqual(studentsTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup, 0, Alt.GREATER), 
    162232           0.8827)); 
    163     assert(approxEqual(studentsTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup), 0.06985)); 
    164     assert(approxEqual(studentsTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup, Alt.LESS), 
    165            0.965)); 
    166     assert(approxEqual(studentsTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup, Alt.GREATER), 
    167            0.03492)); 
     233    auto t2 = studentsTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup); 
     234    assert(approxEqual(t2, 0.06985)); 
     235    assert(approxEqual(t2.testStat, 2.0567)); 
     236    assert(approxEqual(t2.lowerBound, -0.3595529)); 
     237    assert(approxEqual(t2.upperBound, 7.5595529)); 
     238 
     239 
     240    auto t5 = studentsTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup, 0, Alt.LESS); 
     241    assert(approxEqual(t5, 0.965)); 
     242    assert(approxEqual(t5.testStat, 2.0567)); 
     243    assert(approxEqual(t5.upperBound, 6.80857)); 
     244    assert(t5.lowerBound == -real.infinity); 
     245 
     246    auto t6 = studentsTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup, 0, Alt.GREATER); 
     247    assert(approxEqual(t6, 0.03492)); 
     248    assert(approxEqual(t6.testStat, 2.0567)); 
     249    assert(approxEqual(t6.lowerBound, 0.391422)); 
     250    assert(t6.upperBound == real.infinity); 
    168251    writeln("Passed 2-sample studentsTTest test."); 
    169252} 
     
    174257 * meaning mean(sample1) > mean(sample2), and Alt.TWOSIDE, meaning mean(sample1) 
    175258 * != mean(sample2). 
    176  * Returns:  The p-value against the given alternative.*/ 
    177 real welchTTest(T, U)(T sample1, U sample2, Alt alt = Alt.TWOSIDE) 
     259 * 
     260 * Returns:  A ConfInt containing the T statistic, the P-value, and the 
     261 * boundaries of the confidence interval for the difference between means 
     262 * of sample1 and sample2 at the specified level.*/ 
     263ConfInt welchTTest(T, U)(T sample1, U sample2, real testMean = 0, 
     264    Alt alt = Alt.TWOSIDE, real confLevel = 0.95) 
    178265if(realIterable!(T) && realIterable!(U)) { 
    179266    size_t n1, n2; 
     
    190277    auto v1 = s1summ.var, v2 = s2summ.var; 
    191278    real sx1x2 = sqrt(v1 / n1 + v2 / n2); 
    192     real t = (s1summ.mean - s2summ.mean) / sx1x2; 
     279    real meanDiff = s1summ.mean - s2summ.mean - testMean; 
     280    real t = meanDiff / sx1x2; 
    193281    real numerator = v1 / n1 + v2 / n2; 
    194282    numerator *= numerator; 
     
    198286    denom2 = denom2 * denom2 / (n2 - 1); 
    199287    real df = numerator / (denom1 + denom2); 
    200     if(alt == Alt.LESS) 
    201         return studentsTCDF(t, df); 
    202     else if(alt == Alt.GREATER) 
    203         return studentsTCDF(-t, df); 
    204     else 
    205         return 2 * min(studentsTCDF(t, df), studentsTCDF(-t, df)); 
    206 
    207  
    208 unittest { 
    209         // Values from R. 
    210     assert(approxEqual(welchTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup), 0.2159)); 
    211     assert(approxEqual(welchTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup, Alt.LESS), 
    212            0.1079)); 
    213     assert(approxEqual(welchTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup, Alt.GREATER), 
    214            0.892)); 
     288 
     289    ConfInt ret; 
     290    ret.testStat = t; 
     291    if(alt == Alt.LESS) { 
     292        ret.p = studentsTCDF(t, df); 
     293        ret.lowerBound = -real.infinity; 
     294        ret.upperBound = meanDiff + testMean - invStudentsTCDF(1 - confLevel, df) * sx1x2; 
     295    } else if(alt == Alt.GREATER) { 
     296        ret.p = studentsTCDF(-t, df); 
     297        ret.lowerBound = meanDiff + testMean + invStudentsTCDF(1 - confLevel, df) * sx1x2; 
     298        ret.upperBound = real.infinity; 
     299    } else { 
     300        ret.p = 2 * min(studentsTCDF(t, df), studentsTCDF(-t, df)); 
     301        real delta = invStudentsTCDF(0.5 * (1 - confLevel), df) * sx1x2; 
     302        ret.upperBound = meanDiff + testMean - delta; 
     303        ret.lowerBound = meanDiff + testMean + delta; 
     304    } 
     305    return ret; 
     306
     307 
     308unittest { 
     309    // Values from R. 
     310    auto t1 = welchTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup, 2); 
     311    assert(approxEqual(t1, 0.02285)); 
     312    assert(approxEqual(t1.testStat, -2.8099)); 
     313    assert(approxEqual(t1.lowerBound, -4.979316)); 
     314    assert(approxEqual(t1.upperBound, 1.312649)); 
     315 
     316    auto t2 = welchTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup, -1, Alt.LESS); 
     317    assert(approxEqual(t2, 0.2791)); 
     318    assert(approxEqual(t2.testStat, -0.6108)); 
     319    assert(t2.lowerBound == -real.infinity); 
     320    assert(approxEqual(t2.upperBound, 0.7035534)); 
     321 
     322    auto t3 = welchTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup, 0.5, Alt.GREATER); 
     323    assert(approxEqual(t3, 0.9372)); 
     324    assert(approxEqual(t3.testStat, -1.7104)); 
     325    assert(approxEqual(t3.lowerBound, -4.37022)); 
     326    assert(t3.upperBound == real.infinity); 
     327 
    215328    assert(approxEqual(welchTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup), 0.06616)); 
    216     assert(approxEqual(welchTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup, Alt.LESS)
    217           0.967)); 
    218     assert(approxEqual(welchTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup, Alt.GREATER)
    219           0.03308)); 
     329    assert(approxEqual(welchTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup, 0
     330        Alt.LESS), 0.967)); 
     331    assert(approxEqual(welchTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup, 0
     332        Alt.GREATER), 0.03308)); 
    220333    writeln("Passed welchTTest test."); 
    221334} 
     
    228341 * equal to testMean. 
    229342 * 
    230  * Returns:  The p-value against the given alternative.*/ 
    231 real pairedTTest(T, U)(T before, U after, Alt alt = Alt.TWOSIDE, real testMean = 0) 
     343 * 
     344 * Returns:  A ConfInt containing the T statistic, the P-value, and the 
     345 * boundaries of the confidence interval for the mean difference between 
     346 * corresponding elements of sample1 and sample2 at the specified level.*/ 
     347ConfInt pairedTTest(T, U)(T before, U after, real testMean = 0, 
     348    Alt alt = Alt.TWOSIDE, real confLevel = 0.95) 
    232349if(realInput!(T) && realInput!(U)) { 
    233350    OnlineMeanSD msd; 
     
    240357        len++; 
    241358    } 
    242     real t = (msd.mean - testMean) / msd.stdev * sqrt(cast(real) len); 
    243  
     359 
     360    ConfInt ret; 
     361    ret.testStat = (msd.mean - testMean) / msd.stdev * sqrt(cast(real) len); 
     362    auto sampleMean = msd.mean; 
     363    auto sampleSd = msd.stdev; 
     364    real normSd = sampleSd / sqrt(cast(real) len); 
     365    ret.testStat = (sampleMean - testMean) / normSd; 
    244366    if(alt == Alt.LESS) { 
    245         return studentsTCDF(t, len - 1); 
     367        ret.p = studentsTCDF(ret.testStat, len - 1); 
     368        real delta = invStudentsTCDF(1 - confLevel, len - 1) * normSd; 
     369        ret.lowerBound = -real.infinity; 
     370        ret.upperBound = sampleMean - delta; 
    246371    } else if(alt == Alt.GREATER) { 
    247         return studentsTCDF(-t, len - 1); 
    248     } else if(t > 0) { 
    249         return 2 * studentsTCDF(-t, len - 1); 
     372        ret.p = studentsTCDF(-ret.testStat, len - 1); 
     373        real delta = invStudentsTCDF(1 - confLevel, len - 1) * normSd; 
     374        ret.lowerBound = sampleMean + delta; 
     375        ret.upperBound = real.infinity; 
    250376    } else { 
    251         return 2 * studentsTCDF(t, len - 1); 
    252     } 
     377        ret.p = 2 * min(studentsTCDF(ret.testStat, len - 1), 
     378                       studentsTCDF(-ret.testStat, len - 1)); 
     379        real delta = invStudentsTCDF(0.5 * (1 - confLevel), len - 1) * normSd; 
     380        ret.lowerBound = sampleMean + delta; 
     381        ret.upperBound = sampleMean - delta; 
     382    } 
     383    return ret; 
    253384} 
    254385 
    255386unittest { 
    256387    // Values from R. 
    257     assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, Alt.LESS), 0.0889)); 
    258     assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, Alt.GREATER), 0.9111)); 
    259     assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, Alt.TWOSIDE), 0.1778)); 
    260     assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, Alt.LESS, 1), 0.01066)); 
    261     assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, Alt.GREATER, 1), 0.9893)); 
    262     assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, Alt.TWOSIDE, 1), 0.02131)); 
     388    auto t1 = pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, 1); 
     389    assert(approxEqual(t1.p, 0.02131)); 
     390    assert(approxEqual(t1.testStat, -3.6742)); 
     391    assert(approxEqual(t1.lowerBound, -2.1601748)); 
     392    assert(approxEqual(t1.upperBound, 0.561748)); 
     393 
     394    assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, 0, Alt.LESS), 0.0889)); 
     395    assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, 0, Alt.GREATER), 0.9111)); 
     396    assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, 0, Alt.TWOSIDE), 0.1778)); 
     397    assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, 1, Alt.LESS), 0.01066)); 
     398    assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, 1, Alt.GREATER), 0.9893)); 
    263399    writeln("Passed pairedTTest unittest."); 
    264400} 
    265401 
    266 /**Wilcoxon rank-sum test statistic.  This is a non-parametric test for a 
    267  * difference in the mean ranks of two sets of numbers.  The tieSum parameter is 
    268  * mostly for use internally, and if included will place a value in the 
    269  * dereference that can be used in wilcoxonRankSumPval() to adjust for ties in 
    270  * the input data. 
     402/**A test for normality of the distribution of a range of values.  Based on 
     403 * the assumption that normally distributed values will have a sample skewness 
     404 * and sample kurtosis very close to zero. 
     405 * 
     406 * Returns:  A TestRes with the K statistic, which is Chi-Square distributed 
     407 * with 2 degrees of freedom under the null, and the P-value for the alternative 
     408 * that the data has skewness and kurtosis not equal to zero against the null 
     409 * that skewness and kurtosis are near zero.  A normal distribution always has 
     410 * skewness and kurtosis that converge to zero as sample size goes to infinity. 
     411 * 
     412 * References: 
     413 * D'Agostino, Ralph B., Albert Belanger, and Ralph B. D'Agostino, Jr. 
     414 * "A Suggestion for Using Powerful and Informative Tests of Normality", 
     415 * The American Statistician, Vol. 44, No. 4. (Nov., 1990), pp. 316-321. 
     416 */ 
     417TestRes dAgostinoK(T)(T range) 
     418if(realIterable!(T)) { 
     419    // You are not meant to understand this.  I sure don't.  I just implemented 
     420    // these formulas off of Wikipedia, which got them from: 
     421 
     422    // D'Agostino, Ralph B., Albert Belanger, and Ralph B. D'Agostino, Jr. 
     423    // "A Suggestion for Using Powerful and Informative Tests of Normality", 
     424    // The American Statistician, Vol. 44, No. 4. (Nov., 1990), pp. 316-321. 
     425 
     426    // Amazing.  I didn't even realize things this complicated were possible 
     427    // in 1990, before widespread computer algebra systems. 
     428 
     429    // Notation from Wikipedia.  Keeping same notation for simplicity. 
     430    real sqrtb1 = void, b2 = void, n = void; 
     431    { 
     432        auto summ = summary(range); 
     433        sqrtb1 = summ.skew; 
     434        b2 = summ.kurtosis + 3; 
     435        n = summ.N; 
     436    } 
     437 
     438    // Calculate transformed skewness. 
     439    real Y = sqrtb1 * sqrt((n + 1) * (n + 3) / (6 * (n - 2))); 
     440    real beta2b1Numer = 3 * (n * n + 27 * n - 70) * (n + 1) * (n + 3); 
     441    real beta2b1Denom = (n - 2) * (n + 5) * (n + 7) * (n + 9); 
     442    real beta2b1 = beta2b1Numer / beta2b1Denom; 
     443    real Wsq = -1 + sqrt(2 * (beta2b1 - 1)); 
     444    real delta = 1.0L / sqrt(log(sqrt(Wsq))); 
     445    real alpha = sqrt( 2.0L / (Wsq - 1)); 
     446    real Zb1 = delta * log(Y / alpha + sqrt(pow(Y / alpha, 2) + 1)); 
     447 
     448    // Calculate transformed kurtosis. 
     449    real Eb2 = 3 * (n - 1) / (n + 1); 
     450    real sigma2b2 = (24 * n * (n - 2) * (n - 3)) / ( 
     451        (n + 1) * (n + 1) * (n + 3) * (n + 5)); 
     452    real x = (b2 - Eb2) / sqrt(sigma2b2); 
     453 
     454    real sqBeta1b2 = 6 * (n * n - 5 * n + 2) / ((n + 7) * (n + 9)) * 
     455         sqrt((6 * (n + 3) * (n + 5)) / (n * (n - 2) * (n - 3))); 
     456    real A = 6 + 8 / sqBeta1b2 * (2 / sqBeta1b2 + sqrt(1 + 4 / (sqBeta1b2 * sqBeta1b2))); 
     457    real Zb2 = ((1 - 2 / (9 * A)) - 
     458        cbrt((1 - 2 / A) / (1 + x * sqrt(2 / (A - 4)))) ) * 
     459        sqrt(9 * A / 2); 
     460 
     461    real K2 = Zb1 * Zb1 + Zb2 * Zb2; 
     462    return TestRes(K2, chiSqrCDFR(K2, 2)); 
     463
     464 
     465unittest { 
     466    // Values from R's fBasics package. 
     467    int[] arr1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]; 
     468    int[] arr2 = [8,6,7,5,3,0,9,8,6,7,5,3,0,9,3,6,2,4,3,6,8]; 
     469 
     470    auto r1 = dAgostinoK(arr1); 
     471    auto r2 = dAgostinoK(arr2); 
     472 
     473    assert(approxEqual(r1.testStat, 3.1368)); 
     474    assert(approxEqual(r1.p, 0.2084)); 
     475    writeln("Passed dAgostinoK test."); 
     476
     477 
     478 
     479/**Computes Wilcoxon rank sum test statistic and P-value for 
     480 * a set of observations against another set, using the given alternative. 
     481 * Alt.LESS means that sample1 is stochastically less than sample2. 
     482 * Alt.GREATER means sample1 is stochastically greater than sample2. 
     483 * Alt.TWOSIDE means sample1 is stochastically less than or greater than 
     484 * sample2. 
     485 * 
     486 * exactThresh is the threshold value of (n1 + n2) at which this function 
     487 * switches from exact to approximate computation of the p-value.  Exact 
     488 * computation is very slow for large datasets, and the asymptotic 
     489 * approximation is good enough for all practical purposes at sample sizes much 
     490 * smaller than this.  Do not set exactThresh to more than 200, as the exact 
     491 * calculation is both very slow and not numerically stable past this point, 
     492 * and the asymptotic calculation is very good for N this large.  To disable 
     493 * exact calculation entirely, set exactThresh to 0. 
     494 * 
     495 * Notes:  Exact p-value computation is never used when ties are present in the 
     496 * data, because it is not computationally feasible. 
    271497 * 
    272498 * Input ranges for this function must define a length. 
    273499 * 
    274  * Returns:  The Wilcoxon test statistic W.*/ 
    275 real wilcoxonRankSum(T)(T sample1, T sample2, real* tieSum = null) 
     500 * This test is also known as the Mann-Whitney test. 
     501 * 
     502 * Returns:  A TestRes containing the W test statistic and the P-value against 
     503 * the given alternative.*/ 
     504TestRes wilcoxonRankSum(T)(T sample1, T sample2, Alt alt = Alt.TWOSIDE, 
     505    uint exactThresh = 50) if(isInputRange!(T) && dstats.base.hasLength!(T)) { 
     506 
     507    real tieSum; 
     508    real W = wilcoxonRankSumW(sample1, sample2, &tieSum); 
     509    real p = wilcoxonRankSumPval(W, sample1.length, sample2.length, alt, tieSum, 
     510                               exactThresh); 
     511    return TestRes(W, p); 
     512
     513 
     514 unittest { 
     515     // Values from R. 
     516 
     517    assert(wilcoxonRankSum([1, 2, 3, 4, 5].dup, [2, 4, 6, 8, 10].dup).testStat == 5); 
     518    assert(wilcoxonRankSum([2, 4, 6, 8, 10].dup, [1, 2, 3, 4, 5].dup).testStat == 20); 
     519    assert(wilcoxonRankSum([3, 7, 21, 5, 9].dup, [2, 4, 6, 8, 10].dup).testStat == 15); 
     520 
     521     // Simple stuff (no ties) first.  Testing approximate 
     522     // calculation first. 
     523     assert(approxEqual(wilcoxonRankSum([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, 
     524           Alt.TWOSIDE, 0), 0.9273)); 
     525     assert(approxEqual(wilcoxonRankSum([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, 
     526           Alt.LESS, 0), 0.6079)); 
     527     assert(approxEqual(wilcoxonRankSum([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, 
     528           Alt.GREATER, 0), 0.4636)); 
     529     assert(approxEqual(wilcoxonRankSum([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, 
     530            Alt.TWOSIDE, 0), 0.4113)); 
     531     assert(approxEqual(wilcoxonRankSum([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, 
     532            Alt.LESS, 0), 0.2057)); 
     533     assert(approxEqual(wilcoxonRankSum([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, 
     534            Alt.GREATER, 0), 0.8423)); 
     535     assert(approxEqual(wilcoxonRankSum([1,3,5,7,9].dup, [2,4,6,8,10].dup, 
     536            Alt.TWOSIDE, 0), .6745)); 
     537     assert(approxEqual(wilcoxonRankSum([1,3,5,7,9].dup, [2,4,6,8,10].dup, 
     538            Alt.LESS, 0), .3372)); 
     539     assert(approxEqual(wilcoxonRankSum([1,3,5,7,9].dup, [2,4,6,8,10].dup, 
     540            Alt.GREATER, 0), .7346)); 
     541 
     542    // Now, lots of ties. 
     543    assert(approxEqual(wilcoxonRankSum([1,2,3,4,5].dup, [2,3,4,5,6].dup, 
     544           Alt.TWOSIDE, 0), 0.3976)); 
     545    assert(approxEqual(wilcoxonRankSum([1,2,3,4,5].dup, [2,3,4,5,6].dup, 
     546           Alt.LESS, 0), 0.1988)); 
     547    assert(approxEqual(wilcoxonRankSum([1,2,3,4,5].dup, [2,3,4,5,6].dup, 
     548           Alt.GREATER, 0), 0.8548)); 
     549    assert(approxEqual(wilcoxonRankSum([1,2,1,1,2].dup, [1,2,3,1,1].dup, 
     550           Alt.TWOSIDE, 0), 0.9049)); 
     551    assert(approxEqual(wilcoxonRankSum([1,2,1,1,2].dup, [1,2,3,1,1].dup, 
     552           Alt.LESS, 0), 0.4524)); 
     553    assert(approxEqual(wilcoxonRankSum([1,2,1,1,2].dup, [1,2,3,1,1].dup, 
     554           Alt.GREATER, 0), 0.64)); 
     555 
     556    // Now, testing the exact calculation on the same data. 
     557     assert(approxEqual(wilcoxonRankSum([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, 
     558       Alt.TWOSIDE), 0.9307)); 
     559     assert(approxEqual(wilcoxonRankSum([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, 
     560           Alt.LESS), 0.6039)); 
     561     assert(approxEqual(wilcoxonRankSum([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, 
     562           Alt.GREATER), 0.4654)); 
     563     assert(approxEqual(wilcoxonRankSum([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, 
     564            Alt.TWOSIDE), 0.4286)); 
     565     assert(approxEqual(wilcoxonRankSum([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, 
     566            Alt.LESS), 0.2143)); 
     567     assert(approxEqual(wilcoxonRankSum([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, 
     568            Alt.GREATER), 0.8355)); 
     569     assert(approxEqual(wilcoxonRankSum([1,3,5,7,9].dup, [2,4,6,8,10].dup, 
     570            Alt.TWOSIDE), .6905)); 
     571     assert(approxEqual(wilcoxonRankSum([1,3,5,7,9].dup, [2,4,6,8,10].dup, 
     572            Alt.LESS), .3452)); 
     573     assert(approxEqual(wilcoxonRankSum([1,3,5,7,9].dup, [2,4,6,8,10].dup, 
     574            Alt.GREATER), .7262)); 
     575    writeln("Passed wilcoxonRankSum test."); 
     576
     577 
     578real wilcoxonRankSumW(T)(T sample1, T sample2, real* tieSum = null) 
    276579if(isInputRange!(T) && dstats.base.hasLength!(T)) { 
    277580    ulong n1 = sample1.length, n2 = sample2.length, N = n1 + n2; 
     
    310613} 
    311614 
    312 unittest { 
    313     assert(wilcoxonRankSum([1, 2, 3, 4, 5].dup, [2, 4, 6, 8, 10].dup) == 5); 
    314     assert(wilcoxonRankSum([2, 4, 6, 8, 10].dup, [1, 2, 3, 4, 5].dup) == 20); 
    315     assert(wilcoxonRankSum([3, 7, 21, 5, 9].dup, [2, 4, 6, 8, 10].dup) == 15); 
    316     writeln("Passed wilcoxonRankSum test."); 
    317 } 
    318  
    319 /**Computes a P-value for a Wilcoxon rank sum test score against the given 
    320  * alternative. Alt.LESS means that mean rank(sample1) < mean rank(sample2). 
    321  * Alt.GREATER means mean rank(sample1) > mean rank(sample2).  Alt.TWOSIDE means 
    322  * mean rank(sample1) != mean rank(sample2). 
    323  * 
    324  * exactThresh 
    325  * is the threshold value of (n1 + n2) at which this function switches from 
    326  * exact to approximate computation of the p-value.   Do not set 
    327  * exactThresh to more than 200, as the exact calculation is both very slow and 
    328  * not numerically stable past this point, and the asymptotic calculation is 
    329  * very good for N this large.  To disable exact calculation entirely, set 
    330  * exactThresh to 0. 
    331  * 
    332  * Note:  Exact p-value computation is never used when tieSum > 0, i.e. when 
    333  * there were ties in the data, because it is not computationally feasible. 
    334  * In these cases, exactThresh will be ignored. 
    335  * 
    336  * Returns:  The p-value against the given alternative.*/ 
    337615real wilcoxonRankSumPval(T)(T w, ulong n1, ulong n2, Alt alt = Alt.TWOSIDE, 
    338616                           real tieSum = 0,  uint exactThresh = 50) { 
     
    361639    assert(approxEqual(wilcoxonRankSumPval(1500, 50, 50, Alt.LESS), .957903)); 
    362640    assert(approxEqual(wilcoxonRankSumPval(8500, 100, 200, Alt.LESS), .01704)); 
    363     auto w = wilcoxonRankSum([2,4,6,8,12].dup, [1,3,5,7,11,9].dup); 
     641    auto w = wilcoxonRankSumW([2,4,6,8,12].dup, [1,3,5,7,11,9].dup); 
    364642    assert(approxEqual(wilcoxonRankSumPval(w, 5, 6), 0.9273)); 
    365643    assert(approxEqual(wilcoxonRankSumPval(w, 5, 6, Alt.GREATER), 0.4636)); 
     
    367645} 
    368646 
    369 /**Computes Wilcoxon rank sum test P-value for 
    370  * a set of observations against another set, using the given alternative. 
    371  * Alt.LESS means that mean rank(sample1) < mean rank(sample2).  Alt.GREATER means 
    372  * mean rank(sample1) > mean rank(sample2).  Alt.TWOSIDE means mean rank(sample1) != 
    373  * mean rank(sample2). 
    374  * 
    375  * tieSum is a parameter that is used  internally to adjust for 
    376  * ties in the data.  It is computed by wilcoxonRankSum(). 
    377  * exactThresh is the threshold value of (n1 + n2) at which this function 
    378  * switches from exact to approximate computation of the p-value.  Exact 
    379  * computation is very slow for large datasets, and for anything but very small 
    380  * datasets, the asymptotic approximation is good enough for all practical 
    381  * purposes.  Do not set exactThresh to more than 200, as the exact 
    382  * calculation is both very slow and not numerically stable past this point, 
    383  * and the asymptotic calculation is very good for N this large.  To disable 
    384  * exact calculation entirely, set exactThresh to 0. 
    385  * 
    386  * Note:  Exact p-value computation is never used when tieSum > 0, i.e. when 
    387  * there were ties in the data, because it is not computationally feasible. 
    388  * In these cases, exactThresh will be ignored. 
    389  * 
    390  * Input ranges for this function must define a length. 
    391  * 
    392  * Returns:  The p-value against the given alternative.*/ 
    393 real wilcoxonRankSumPval(T)(T sample1, T sample2, Alt alt = Alt.TWOSIDE, 
    394     uint exactThresh = 50) if(isInputRange!(T) && dstats.base.hasLength!(T)) { 
    395  
    396     real tieSum; 
    397     real W = wilcoxonRankSum(sample1, sample2, &tieSum); 
    398     return wilcoxonRankSumPval(W, sample1.length, sample2.length, alt, tieSum, 
    399                                exactThresh); 
    400 
    401  
    402  unittest { 
    403      // Values from R.  Simple stuff (no ties) first.  Testing approximate 
    404      // calculation first. 
    405      assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, 
    406            Alt.TWOSIDE, 0), 0.9273)); 
    407      assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, 
    408            Alt.LESS, 0), 0.6079)); 
    409      assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, 
    410            Alt.GREATER, 0), 0.4636)); 
    411      assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, 
    412             Alt.TWOSIDE, 0), 0.4113)); 
    413      assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, 
    414             Alt.LESS, 0), 0.2057)); 
    415      assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, 
    416             Alt.GREATER, 0), 0.8423)); 
    417      assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9].dup, [2,4,6,8,10].dup, 
    418             Alt.TWOSIDE, 0), .6745)); 
    419      assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9].dup, [2,4,6,8,10].dup, 
    420             Alt.LESS, 0), .3372)); 
    421      assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9].dup, [2,4,6,8,10].dup, 
    422             Alt.GREATER, 0), .7346)); 
    423  
    424     // Now, lots of ties. 
    425     assert(approxEqual(wilcoxonRankSumPval([1,2,3,4,5].dup, [2,3,4,5,6].dup, 
    426            Alt.TWOSIDE, 0), 0.3976)); 
    427     assert(approxEqual(wilcoxonRankSumPval([1,2,3,4,5].dup, [2,3,4,5,6].dup, 
    428            Alt.LESS, 0), 0.1988)); 
    429     assert(approxEqual(wilcoxonRankSumPval([1,2,3,4,5].dup, [2,3,4,5,6].dup, 
    430            Alt.GREATER, 0), 0.8548)); 
    431     assert(approxEqual(wilcoxonRankSumPval([1,2,1,1,2].dup, [1,2,3,1,1].dup, 
    432            Alt.TWOSIDE, 0), 0.9049)); 
    433     assert(approxEqual(wilcoxonRankSumPval([1,2,1,1,2].dup, [1,2,3,1,1].dup, 
    434            Alt.LESS, 0), 0.4524)); 
    435     assert(approxEqual(wilcoxonRankSumPval([1,2,1,1,2].dup, [1,2,3,1,1].dup, 
    436            Alt.GREATER, 0), 0.64)); 
    437  
    438     // Now, testing the exact calculation on the same data. 
    439      assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, 
    440        Alt.TWOSIDE), 0.9307)); 
    441      assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, 
    442            Alt.LESS), 0.6039)); 
    443      assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, 
    444            Alt.GREATER), 0.4654)); 
    445      assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, 
    446             Alt.TWOSIDE), 0.4286)); 
    447      assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, 
    448             Alt.LESS), 0.2143)); 
    449      assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, 
    450             Alt.GREATER), 0.8355)); 
    451      assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9].dup, [2,4,6,8,10].dup, 
    452             Alt.TWOSIDE), .6905)); 
    453      assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9].dup, [2,4,6,8,10].dup, 
    454             Alt.LESS), .3452)); 
    455      assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9].dup, [2,4,6,8,10].dup, 
    456             Alt.GREATER), .7262)); 
    457     writeln("Passed wilcoxonRankSumPval test."); 
    458  } 
    459  
    460  
    461 /* Used internally by wilcoxonRankSumPval.  This function uses dynamic 
     647/* Used internally by wilcoxonRankSum.  This function uses dynamic 
    462648 * programming to count the number of combinations of numbers [1..N] that sum 
    463649 * of length n1 that sum to <= W in O(N * W * n1) time.*/ 
     
    561747} 
    562748 
    563 /**Wilcoxon signed-rank statistic.  This is a non-parametric test for a 
    564  * difference between paired sets of numbers.  Since this is a paired test, 
    565  * before.length must equal after.length.  The tieSum parameter is 
    566  * mostly for use internally, and if included will place a value in the 
    567  * dereference that can be used in wilcoxonRankSumPval() to adjust for ties in 
    568  * the input data. 
    569  * 
    570  * Input ranges for this function must define a length. 
    571  * 
    572  * Returns:  The Wilcoxon test statistic W.*/ 
    573 real wilcoxonSignedRank(T, U)(T before, U after, real* tieSum = null) 
     749/**Computes a test statistic and P-value for a Wilcoxon signed rank test against 
     750 * the given alternative. Alt.LESS means that elements of before are stochastically 
     751 * less than corresponding elements of after.  Alt.GREATER means elements of 
     752 * before are typically greater than corresponding elements of after. 
     753 * Alt.TWOSIDE means there is a significant difference in either direction. 
     754 * 
     755 * exactThresh is the threshold value of before.length at which this function 
     756 * switches from exact to approximate computation of the p-value.   Do not set 
     757 * exactThresh to more than 200, as the exact calculation is both very slow and 
     758 * not numerically stable past this point, and the asymptotic calculation is 
     759 * very good for N this large.  To disable exact calculation entirely, set 
     760 * exactThresh to 0. 
     761 * 
     762 * Notes:  Exact p-value computation is never used when ties are present, 
     763 * because it is not computationally feasible. 
     764 * 
     765 * The input ranges for this function must define a length and must be 
     766 * forward ranges. 
     767 * 
     768 * Returns:  A TestRes of the W statistic and the p-value against the given 
     769 * alternative.*/ 
     770TestRes wilcoxonSignedRank(T, U)(T before, U after, Alt alt = Alt.TWOSIDE, uint exactThresh = 50) 
     771if(realInput!(T) && dstats.base.hasLength!(T) && isForwardRange!(T)  && 
     772 realInput!(U) && dstats.base.hasLength!(U) && isForwardRange!(U)) 
     773in { 
     774    assert(before.length == after.length); 
     775} body { 
     776    real tieSum; 
     777    real W = wilcoxonSignedRankW(before, after, &tieSum); 
     778    ulong N = before.length; 
     779    while(!before.empty && !after.empty) { 
     780        if(before.front == after.front) 
     781            N--; 
     782        before.popFront; 
     783        after.popFront; 
     784 
     785    } 
     786    return TestRes(W, wilcoxonSignedRankPval(W, N, alt, tieSum, exactThresh)); 
     787
     788 
     789unittest { 
     790    // Values from R. 
     791    alias approxEqual ae; 
     792    assert(wilcoxonSignedRank([1,2,3,4,5].dup, [2,1,4,5,3].dup).testStat == 7.5); 
     793    assert(wilcoxonSignedRank([3,1,4,1,5].dup, [2,7,1,8,2].dup).testStat == 6); 
     794    assert(wilcoxonSignedRank([8,6,7,5,3].dup, [0,9,8,6,7].dup).testStat == 5); 
     795 
     796    // With ties, normal approx. 
     797    assert(ae(wilcoxonSignedRank([1,2,3,4,5].dup, [2,1,4,5,3].dup), 1)); 
     798    assert(ae(wilcoxonSignedRank([3,1,4,1,5].dup, [2,7,1,8,2].dup), 0.7865)); 
     799    assert(ae(wilcoxonSignedRank([8,6,7,5,3].dup, [0,9,8,6,7].dup), 0.5879)); 
     800    assert(ae(wilcoxonSignedRank([1,2,3,4,5].dup, [2,1,4,5,3].dup, Alt.LESS), 0.5562)); 
     801    assert(ae(wilcoxonSignedRank([3,1,4,1,5].dup, [2,7,1,8,2].dup, Alt.LESS), 0.3932)); 
     802    assert(ae(wilcoxonSignedRank([8,6,7,5,3].dup, [0,9,8,6,7].dup, Alt.LESS), 0.2940)); 
     803    assert(ae(wilcoxonSignedRank([1,2,3,4,5].dup, [2,1,4,5,3].dup, Alt.GREATER), 0.5562)); 
     804    assert(ae(wilcoxonSignedRank([3,1,4,1,5].dup, [2,7,1,8,2].dup, Alt.GREATER), 0.706)); 
     805    assert(ae(wilcoxonSignedRank([8,6,7,5,3].dup, [0,9,8,6,7].dup, Alt.GREATER), 0.7918)); 
     806 
     807    // Exact. 
     808    assert(ae(wilcoxonSignedRank([1,2,3,4,5].dup, [2,-4,-8,16,32].dup), 0.625)); 
     809    assert(ae(wilcoxonSignedRank([1,2,3,4,5].dup, [2,-4,-8,16,32].dup, Alt.LESS), 0.3125)); 
     810    assert(ae(wilcoxonSignedRank([1,2,3,4,5].dup, [2,-4,-8,16,32].dup, Alt.GREATER), 0.7812)); 
     811    assert(ae(wilcoxonSignedRank([1,2,3,4,5].dup, [2,-4,-8,-16,32].dup), 0.8125)); 
     812    assert(ae(wilcoxonSignedRank([1,2,3,4,5].dup, [2,-4,-8,-16,32].dup, Alt.LESS), 0.6875)); 
     813    assert(ae(wilcoxonSignedRank([1,2,3,4,5].dup, [2,-4,-8,-16,32].dup, Alt.GREATER), 0.4062)); 
     814    writeln("Passed wilcoxonSignedRank unittest."); 
     815
     816 
     817real wilcoxonSignedRankW(T, U)(T before, U after, real* tieSum = null) 
    574818if(realInput!(T) && realInput!(U) && 
    575819   dstats.base.hasLength!(T) && dstats.base.hasLength!(U)) { 
     
    629873} 
    630874 
    631 unittest { 
    632     assert(wilcoxonSignedRank([1,2,3,4,5].dup, [2,1,4,5,3].dup) == 7.5); 
    633     assert(wilcoxonSignedRank([3,1,4,1,5].dup, [2,7,1,8,2].dup) == 6); 
    634     assert(wilcoxonSignedRank([8,6,7,5,3].dup, [0,9,8,6,7].dup) == 5); 
    635     writeln("Passed wilcoxonSignedRank unittest."); 
    636 } 
    637  
    638 /**Computes a P-value for a Wilcoxon signed rank test score against the given 
    639  * alternative. Alt.LESS means that elements of before are typically less 
    640  * than corresponding elements of after.  Alt.GREATER means elements of 
    641  * before are typically greater than corresponding elements of after. 
    642  * Alt.TWOSIDE means there is a significant difference in either direction. 
    643  * 
    644  * exactThresh is the threshold value of before.length at which this function 
    645  * switches from exact to approximate computation of the p-value.   Do not set 
    646  * exactThresh to more than 200, as the exact calculation is both very slow and 
    647  * not numerically stable past this point, and the asymptotic calculation is 
    648  * very good for N this large.  To disable exact calculation entirely, set 
    649  * exactThresh to 0. 
    650  * 
    651  * Notes:  Exact p-value computation is never used when tieSum > 0, i.e. when 
    652  * there were ties in the data, because it is not computationally feasible. 
    653  * In these cases, exactThresh will be ignored. 
    654  * 
    655  * May give a different answer than calling wilcoxonSignedRank, and then 
    656  * passing the W value into wilcoxonSignedRankPval in the presence of ties 
    657  * or equal pairs. 
    658  * 
    659  * The input ranges for this function must define a length and must be 
    660  * forward ranges. 
    661  * 
    662  * Returns:  The p-value against the given alternative.*/ 
    663 real wilcoxonSignedRankPval(T)(T before, T after, 
    664       Alt alt = Alt.TWOSIDE, uint exactThresh = 50) 
    665 if(realInput!(T) && dstats.base.hasLength!(T) && isForwardRange!(T)) 
    666 in { 
    667     assert(before.length == after.length); 
    668 } body { 
    669     real tieSum; 
    670     real W = wilcoxonSignedRank(before, after, &tieSum); 
    671     ulong N = before.length; 
    672     while(!before.empty && !after.empty) { 
    673         if(before.front == after.front) 
    674             N--; 
    675         before.popFront; 
    676         after.popFront; 
    677  
    678     } 
    679     return wilcoxonSignedRankPval(W, N, alt, tieSum, exactThresh); 
    680 } 
    681  
    682 unittest { 
    683     // Values from R. 
    684     alias approxEqual ae; 
    685     // With ties, normal approx. 
    686     assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,1,4,5,3].dup), 1)); 
    687     assert(ae(wilcoxonSignedRankPval([3,1,4,1,5].dup, [2,7,1,8,2].dup), 0.7865)); 
    688     assert(ae(wilcoxonSignedRankPval([8,6,7,5,3].dup, [0,9,8,6,7].dup), 0.5879)); 
    689     assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,1,4,5,3].dup, Alt.LESS), 0.5562)); 
    690     assert(ae(wilcoxonSignedRankPval([3,1,4,1,5].dup, [2,7,1,8,2].dup, Alt.LESS), 0.3932)); 
    691     assert(ae(wilcoxonSignedRankPval([8,6,7,5,3].dup, [0,9,8,6,7].dup, Alt.LESS), 0.2940)); 
    692     assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,1,4,5,3].dup, Alt.GREATER), 0.5562)); 
    693     assert(ae(wilcoxonSignedRankPval([3,1,4,1,5].dup, [2,7,1,8,2].dup, Alt.GREATER), 0.706)); 
    694     assert(ae(wilcoxonSignedRankPval([8,6,7,5,3].dup, [0,9,8,6,7].dup, Alt.GREATER), 0.7918)); 
    695  
    696     // Exact. 
    697     assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,-4,-8,16,32].dup), 0.625)); 
    698     assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,-4,-8,16,32].dup, Alt.LESS), 0.3125)); 
    699     assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,-4,-8,16,32].dup, Alt.GREATER), 0.7812)); 
    700     assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,-4,-8,-16,32].dup), 0.8125)); 
    701     assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,-4,-8,-16,32].dup, Alt.LESS), 0.6875)); 
    702     assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,-4,-8,-16,32].dup, Alt.GREATER), 0.4062)); 
    703     writeln("Passed wilcoxonSignedRankPval unittest."); 
    704 } 
    705  
    706 /**Computes Wilcoxon signed rank test P-value for 
    707  * a set of observations against another set, using the given alternative. 
    708  * Alt.LESS means that elements of before are typically less than corresponding 
    709  * elements of after.  Alt.GREATER means elements of before are typically 
    710  * greater than corresponding elements of after.  Alt.TWOSIDE means that 
    711  * there is a significant difference in either direction. 
    712  * 
    713  * tieSum is a parameter that is used  internally to adjust for 
    714  * ties in the data.  It is computed by wilcoxonSignedRank(). 
    715  * exactThresh is the threshold value of N at which this function 
    716  * switches from exact to approximate computation of the p-value.  Exact 
    717  * computation is very slow for large datasets, and for anything but very small 
    718  * datasets, the asymptotic approximation is good enough for all practical 
    719  * purposes.  Do not set exactThresh to more than 200, as the exact 
    720  * calculation is both very slow and not numerically stable past this point, 
    721  * and the asymptotic calculation is very good for N this large.  To disable 
    722  * exact calculation entirely, set exactThresh to 0. 
    723  * 
    724  * Note:  Exact p-value computation is never used when tieSum > 0, i.e. when 
    725  * there were ties in the data, because it is not computationally feasible. 
    726  * In these cases, exactThresh will be ignored. 
    727  * 
    728  * Returns:  The p-value against the given alternative.*/ 
    729875real wilcoxonSignedRankPval(T)(T W, ulong N, Alt alt = Alt.TWOSIDE, 
    730876     real tieSum = 0, uint exactThresh = 50) 
     
    835981 * Alt.GREATER, meaning elements of before are typically greater than 
    836982 * elements of after, and Alt.TWOSIDE, meaning that there is a significant 
    837  * difference in either direction.*/ 
    838 real signTest(T)(T before, T after, Alt alt = Alt.TWOSIDE) 
    839 if(realInput!(T)) { 
     983 * difference in either direction. 
     984 * 
     985 * Returns:  A TestRes with the proportion of elements of before that were 
     986 * greater than the corresponding element of after, and the P-value against 
     987 * the given alternative.*/ 
     988TestRes signTest(T, U)(T before, U after, Alt alt = Alt.TWOSIDE) 
     989if(realInput!(T) && realInput!(U)) { 
    840990    uint greater, less; 
    841991    while(!before.empty && !after.empty) { 
     
    848998        after.popFront; 
    849999    } 
     1000    real propGreater = cast(real) greater / (greater + less); 
    8501001    if(alt == Alt.LESS) { 
    851         return binomialCDF(greater, less + greater, 0.5); 
     1002        return TestRes(propGreater, binomialCDF(greater, less + greater, 0.5)); 
    8521003    } else if(alt == Alt.GREATER) { 
    853         return binomialCDF(less, less + greater, 0.5); 
     1004        return TestRes(propGreater, binomialCDF(less, less + greater, 0.5)); 
    8541005    } else if(less > greater) { 
    855         return 2 * binomialCDF(greater, less + greater, 0.5); 
     1006        return TestRes(propGreater, 2 * binomialCDF(greater, less + greater, 0.5)); 
    8561007    } else if(greater > less) { 
    857         return 2 * binomialCDF(less, less + greater, 0.5); 
    858     } else return 1
     1008        return TestRes(propGreater, 2 * binomialCDF(less, less + greater, 0.5)); 
     1009    } else return TestRes(propGreater, 1)
    8591010} 
    8601011 
     
    8671018    assert(ae(signTest([5,3,4,6,8].dup, [1,2,3,4,5].dup, Alt.LESS), 1)); 
    8681019    assert(ae(signTest([5,3,4,6,8].dup, [1,2,3,4,5].dup), 0.0625)); 
    869 
    870  
    871 /**Sign test for differences between a set of values and an a priori 
    872  * median.  This is a very robust  but very low power test. 
    873  * Alternatives are Alt.LESS, meaning elements 
    874  * of data are typically less than median, 
    875  * Alt.GREATER, meaning elements of data are typically greater than 
    876  * median, and Alt.TWOSIDE, meaning that there is a significant 
    877  * difference in either direction.*/ 
    878 real signTest(T, U)(T data, U median, Alt alt = Alt.TWOSIDE) 
    879 if(realInput!(T) && isNumeric!(U)) { 
    880     uint greater, less; 
    881     foreach(elem; data) { 
    882         if(elem < median) 
    883             less++; 
    884         else if(median < elem) 
    885             greater++; 
    886         // Ignore equals. 
    887     } 
    888     if(alt == Alt.LESS) { 
    889         return binomialCDF(greater, less + greater, 0.5); 
    890     } else if(alt == Alt.GREATER) { 
    891         return binomialCDF(less, less + greater, 0.5); 
    892     } else if(less > greater) { 
    893         return 2 * binomialCDF(greater, less + greater, 0.5); 
    894     } else if(greater > less) { 
    895         return 2 * binomialCDF(less, less + greater, 0.5); 
    896     } else return 1; 
    897 
    898  
    899 unittest { 
    900     assert(approxEqual(signTest([1,2,6,7,9].dup, 2), 0.625)); 
     1020 
     1021    assert(approxEqual(signTest([1,2,6,7,9].dup, repeat(2)), 0.625)); 
     1022    assert(ae(signTest([1,2,6,7,9].dup, repeat(2)).testStat, 0.75)); 
    9011023    writeln("Passed signTest unittest."); 
     1024} 
     1025 
     1026///For chiSqrFit, is expected value range counts or proportions? 
     1027enum Expected { 
     1028    /// 
     1029    COUNT, 
     1030 
     1031    /// 
     1032    PROPORTION 
    9021033} 
    9031034 
     
    9061037 * for testing whether a set of observations fits a discrete distribution. 
    9071038 * 
    908  * Returns:  The P-value for the alternative hypothesis that observed is not 
    909  * a sample from expected against the null that observed is a sample from 
    910  * expected. 
    911  * 
    912  * Notes:  The chi-square test relies on asymptotic statistical properties 
     1039 * Returns:  A TestRes of the chi-square statistic and the P-value for the 
     1040 * alternative hypothesis that observed is not a sample from expected against 
     1041 * the null that observed is a sample from expected. 
     1042 * 
     1043 * Notes:  By default, expected is assumed to be a range of expected counts. 
     1044 * By passing Expected.PROPORTION in as the last parameter, 
     1045 * expected counts will be calculated from expected proportions internally. 
     1046 * If this feature is used, observed must be a forward range because it will 
     1047 * be iterated over twice. 
     1048 * 
     1049 * The chi-square test relies on asymptotic statistical properties 
    9131050 * and is therefore not considered valid when expected values are below 5. 
    9141051 * 
     
    9221059 * 
    9231060 * uint[] observed = [980, 1028, 1001, 964, 1102]; 
    924  * auto expected = repeat(cast(real) sum(observed) / observed.length); 
    925  * auto pVal = chiSqrFit(observed, expected); 
    926  * assert(approxEqual(pVal, 0.0207)); 
     1061 * auto expected = repeat(1.0L / observed.length); 
     1062 * auto res2 = chiSqrFit(observed, expected, Expected.PROPORTION); 
     1063 * assert(approxEqual(res2, 0.0207)); 
     1064 * assert(approxEqual(res2.testStat, 11.59)); 
    9271065 * --- 
    9281066 */ 
    929 real chiSqrFit(T, U)(T observed, U expected) 
    930 if(realInput!(T) && realInput!(U)) { 
     1067TestRes chiSqrFit(T, U)(T observed, U expected, Expected countProp = Expected.COUNT) 
     1068if(realInput!(T) && realInput!(U)) 
     1069in { 
     1070    if(countProp == Expected.COUNT) { 
     1071        assert(isForwardRange!(U)); 
     1072    } 
     1073} body { 
    9311074    uint len = 0; 
    9321075    real chiSq = 0; 
     1076    real multiplier = 1; 
     1077 
     1078    if(countProp == Expected.PROPORTION) { 
     1079        multiplier = 0; 
     1080        foreach(elem; observed) { 
     1081            multiplier += elem; 
     1082        } 
     1083    } 
     1084 
    9331085    while(!observed.empty && !expected.empty) { 
    934         real e = expected.front
     1086        real e = expected.front * multiplier
    9351087        real diff = cast(real) observed.front - e; 
    9361088        chiSq += (diff * diff) / e; 
     
    9391091        len++; 
    9401092    } 
    941     return chiSqrCDFR(chiSq, len - 1); 
     1093    return TestRes(chiSq, chiSqrCDFR(chiSq, len - 1)); 
    9421094} 
    9431095 
     
    9471099    uint[] observed = [980, 1028, 1001, 964, 1102]; 
    9481100    auto expected = repeat(cast(real) sum(observed) / observed.length); 
    949     auto pVal = chiSqrFit(observed, expected); 
    950     assert(approxEqual(pVal, 0.0207)); 
     1101    auto res = chiSqrFit(observed, expected); 
     1102    assert(approxEqual(res, 0.0207)); 
     1103    assert(approxEqual(res.testStat, 11.59)); 
     1104 
     1105    expected = repeat(1.0L / observed.length); 
     1106    auto res2 = chiSqrFit(observed, expected, Expected.PROPORTION); 
     1107    assert(approxEqual(res2, 0.0207)); 
     1108    assert(approxEqual(res2.testStat, 11.59)); 
    9511109    writeln("Passed chiSqrFit test."); 
    9521110} 
     
    10801238} 
    10811239 
    1082  
    1083  
    10841240/**Performs a Kolmogorov-Smirnov (K-S) 2-sample test and returns 
    10851241 * the D value.  The K-S test is a non-parametric test for a difference between 
    10861242 * two empirical distributions or between an empirical distribution and a 
    1087  * reference distribution. This implementation uses a signed D value to indicate 
    1088  * the direction of the difference between distributions. 
    1089  * To get the D value used in standard notation, 
    1090  * simply take the absolute value of this D value.*/ 
    1091 real ksTest(T, U)(T F, U Fprime) 
     1243 * reference distribution. 
     1244 * 
     1245 * Returns:  A TestRes with the K-S D value and a P value for the null that 
     1246 * FPrime is distribuuted identically to F against the alternative that it isn't. 
     1247 * This implementation uses a signed D value to indicate the direction of the 
     1248 * difference between distributions.  To get the D value used in standard 
     1249 * notation, simply take the absolute value of this D value. 
     1250 * 
     1251 * Bugs:  Exact calculation not implemented.  Uses asymptotic approximation.*/ 
     1252TestRes ksTest(T, U)(T F, U Fprime) 
     1253if(realInput!(T) && realInput!(U)) { 
     1254    real D = ksTestD(F, Fprime); 
     1255    return TestRes(D, ksPval(F.length, Fprime.length, D)); 
     1256
     1257 
     1258unittest { 
     1259    assert(approxEqual(ksTest([1,2,3,4,5].dup, [1,2,3,4,5].dup).testStat, 0)); 
     1260    assert(approxEqual(ksTestDestructive([1,2,3,4,5].dup, [1,2,2,3,5].dup).testStat, -.2)); 
     1261    assert(approxEqual(ksTest([-1,0,2,8, 6].dup, [1,2,2,3,5].dup).testStat, .4)); 
     1262    assert(approxEqual(ksTest([1,2,3,4,5].dup, [1,2,2,3,5,7,8].dup).testStat, .2857)); 
     1263    assert(approxEqual(ksTestDestructive([1, 2, 3, 4, 4, 4, 5].dup, 
     1264           [1, 2, 3, 4, 5, 5, 5].dup).testStat, .2857)); 
     1265 
     1266    assert(approxEqual(ksTest([1, 2, 3, 4, 4, 4, 5].dup, [1, 2, 3, 4, 5, 5, 5].dup), 
     1267           .9375)); 
     1268    assert(approxEqual(ksTestDestructive([1, 2, 3, 4, 4, 4, 5].dup, 
     1269        [1, 2, 3, 4, 5, 5, 5].dup), .9375)); 
     1270    writeln("Passed ksTest 2-sample test."); 
     1271
     1272 
     1273template isArrayLike(T) { 
     1274    enum bool isArrayLike = hasSwappableElements!(T) && hasAssignableElements!(T) 
     1275        && dstats.base.hasLength!(T) && isRandomAccessRange!(T); 
     1276
     1277 
     1278/**One-sample KS test against a reference distribution, doesn't modify input 
     1279 * data.  Takes a function pointer or delegate for the CDF of refernce 
     1280 * distribution. 
     1281 * 
     1282 * Returns:  A TestRes with the K-S D value and a P value for the null that 
     1283 * Femp is a sample from F against the alternative that it isn't. This 
     1284 * implementation uses a signed D value to indicate the direction of the 
     1285 * difference between distributions.  To get the D value used in standard 
     1286 * notation, simply take the absolute value of this D value. 
     1287 * 
     1288 * Bugs:  Exact calculation not implemented.  Uses asymptotic approximation. 
     1289 * 
     1290 * Examples: 
     1291 * --- 
     1292 * auto stdNormal = parametrize!(normalCDF)(0.0L, 1.0L); 
     1293 * auto empirical = [1, 2, 3, 4, 5]; 
     1294 * real res = ksTest(empirical, stdNormal); 
     1295 * --- 
     1296 */ 
     1297TestRes ksTest(T, Func)(T Femp, Func F) 
     1298if(realInput!(T) && (is(Func == function) || is(Func == delegate))) { 
     1299    real D = ksTestD(Femp, F); 
     1300    return TestRes(D, ksPval(Femp.length, D)); 
     1301
     1302 
     1303unittest { 
     1304    auto stdNormal = parametrize!(normalCDF)(0.0L, 1.0L); 
     1305    assert(approxEqual(ksTest([1,2,3,4,5].dup, stdNormal).testStat, -.8413)); 
     1306    assert(approxEqual(ksTestDestructive([-1,0,2,8, 6].dup, stdNormal).testStat, -.5772)); 
     1307    auto lotsOfTies = [5,1,2,2,2,2,2,2,3,4].dup; 
     1308    assert(approxEqual(ksTest(lotsOfTies, stdNormal).testStat, -0.8772)); 
     1309 
     1310    assert(approxEqual(ksTest([0,1,2,3,4].dup, stdNormal), .03271)); 
     1311 
     1312    auto uniform01 = parametrize!(uniformCDF)(0, 1); 
     1313    assert(approxEqual(ksTestDestructive([0.1, 0.3, 0.5, 0.9, 1].dup, uniform01), 0.7591)); 
     1314 
     1315    writeln("Passed ksTest 1-sample test."); 
     1316
     1317 
     1318/**Same as ksTest, except sorts in place, avoiding memory allocations.*/ 
     1319TestRes ksTestDestructive(T, U)(T F, U Fprime) 
     1320if(isArrayLike!(T) && isArrayLike!(U)) { 
     1321    real D = ksTestDDestructive(F, Fprime); 
     1322    return TestRes(D, ksPval(F.length, Fprime.length, D)); 
     1323
     1324 
     1325///Ditto. 
     1326TestRes ksTestDestructive(T, Func)(T Femp, Func F) 
     1327if(isArrayLike!(T) && (is(Func == function) || is(Func == delegate))) { 
     1328    real D =  ksTestDDestructive(Femp, F); 
     1329    return TestRes(D, ksPval(Femp.length, D)); 
     1330
     1331 
     1332real ksTestD(T, U)(T F, U Fprime) 
    10921333if(isInputRange!(T) && isInputRange!(U)) { 
    10931334    auto TAState = TempAlloc.getState; 
     
    10961337        TempAlloc.free(TAState); 
    10971338    } 
    1098     return ksTestDestructive(tempdup(F), tempdup(Fprime)); 
    1099 
    1100  
    1101 /**Same as ksTest, but sorts input data in place instead of duplicating 
    1102  * data.  Therefore, less "safe" but doesn't allocate memory.  F, 
    1103  * Fprime must both be random access ranges w/ lengths.*/ 
    1104 real ksTestDestructive(T, U)(T F, U Fprime) 
    1105 if(isRandomAccessRange!(U) && isRandomAccessRange!(T) && 
    1106    dstats.base.hasLength!(T) && dstats.base.hasLength!(U)) { 
     1339    return ksTestDDestructive(tempdup(F), tempdup(Fprime)); 
     1340
     1341 
     1342real ksTestDDestructive(T, U)(T F, U Fprime) 
     1343if(isArrayLike!(T) && isArrayLike!(U)) { 
    11071344    qsort(F); 
    11081345    qsort(Fprime); 
     
    11281365} 
    11291366 
    1130 unittest { 
    1131     // Values from R. 
    1132     assert(approxEqual(ksTestDestructive([1,2,3,4,5].dup, [1,2,3,4,5].dup), 0)); 
    1133     assert(approxEqual(ksTestDestructive([1,2,3,4,5].dup, [1,2,2,3,5].dup), -.2)); 
    1134     assert(approxEqual(ksTestDestructive([-1,0,2,8, 6].dup, [1,2,2,3,5].dup), .4)); 
    1135     assert(approxEqual(ksTestDestructive([1,2,3,4,5].dup, [1,2,2,3,5,7,8].dup), .2857)); 
    1136     assert(approxEqual(ksTestDestructive([1, 2, 3, 4, 4, 4, 5].dup, 
    1137            [1, 2, 3, 4, 5, 5, 5].dup), .2857)); 
    1138     writeln("Passed 2-sample ksTestDestructive."); 
    1139 
    1140  
    1141 /**One-sample KS test against a reference distribution, doesn't modify input 
    1142  * data.  Takes a function pointer or delegate for the CDF of refernce 
    1143  * distribution. 
    1144  * 
    1145  * Examples: 
    1146  * --- 
    1147  * auto stdNormal = parametrize!(normalCDF)(0.0L, 1.0L); 
    1148  * auto empirical = [1, 2, 3, 4, 5]; 
    1149  * real D = ksTest(empirical, stdNormal); 
    1150  * --- 
    1151  */ 
    1152 real ksTest(T, Func)(T Femp, Func F) 
     1367real ksTestD(T, Func)(T Femp, Func F) 
    11531368if(realInput!(T) && (is(Func == function) || is(Func == delegate))) { 
    11541369    scope(exit) TempAlloc.free; 
    1155     return ksTestDestructive(tempdup(Femp), F); 
    1156 
    1157  
    1158 /**One-sample KS test, sorts in place.  Femp must be a random access range 
    1159  * with length.*/ 
    1160 real ksTestDestructive(T, Func)(T Femp, Func F) 
    1161 if(isRandomAccessRange!(T) && dstats.base.hasLength!(T) && 
    1162     (is(Func == function) || is(Func == delegate))) { 
     1370    return ksTestDDestructive(tempdup(Femp), F); 
     1371
     1372 
     1373real ksTestDDestructive(T, Func)(T Femp, Func F) 
     1374if(isArrayLike!(T) && (is(Func == function) || is(Func == delegate))) { 
    11631375    qsort(Femp); 
    11641376    real D = 0; 
     
    11731385} 
    11741386 
    1175 unittest { 
    1176     // Testing against values from R. 
    1177     auto stdNormal = parametrize!(normalCDF)(0.0L, 1.0L); 
    1178     assert(approxEqual(ksTestDestructive([1,2,3,4,5].dup, stdNormal), -.8413)); 
    1179     assert(approxEqual(ksTestDestructive([-1,0,2,8, 6].dup, stdNormal), -.5772)); 
    1180     auto lotsOfTies = [5,1,2,2,2,2,2,2,3,4].dup; 
    1181     assert(approxEqual(ksTestDestructive(lotsOfTies, stdNormal), -0.8772)); 
    1182     writeln("Passed 1-sample ksTestDestructive."); 
    1183 
    1184  
    1185 /**Computes 2-sided P-val given D, N, Nprime.  N is the number of observations 
    1186  * in the first empirical distribution, Nprime is the number of observations 
    1187  * in the second empirical distribution. 
    1188  * Bugs:  Exact calculation not implemented.  Uses asymptotic approximation.*/ 
    1189 real ksPvalD(ulong N, ulong Nprime, real D) 
     1387real ksPval(ulong N, ulong Nprime, real D) 
    11901388in { 
    11911389    assert(D >= -1); 
     
    11951393} 
    11961394 
    1197 /**One-sided P-val. 
    1198  * Bugs:  Exact calculation not implemented.  Uses asymptotic approximation.*/ 
    1199 real ksPvalD(ulong N, real D) 
     1395real ksPval(ulong N, real D) 
    12001396in { 
    12011397    assert(D >= -1); 
     
    12051401} 
    12061402 
    1207 /**KS-test, returns 2-sided P-val instead of D. 
    1208  * Bugs:  Exact calculation not implemented.  Uses asymptotic approximation.*/ 
    1209 real ksPval(T, U)(T F, U Fprime) 
    1210 if(realInput!(T) && realInput!(U)) { 
    1211     return ksPvalD(F.length, Fprime.length, ksTest(F, Fprime)); 
    1212 
    1213  
    1214 unittest { 
    1215     assert(approxEqual(ksPval([1, 2, 3, 4, 4, 4, 5].dup, [1, 2, 3, 4, 5, 5, 5].dup), 
    1216            .9375)); 
    1217     writeln("Passed ksPval 2-sample test."); 
    1218 
    1219  
    1220 /**One-sided K-S test, returns P-val instead of D. 
    1221  * Bugs:  Exact calculation not implemented.  Uses asymptotic approximation.*/ 
    1222 real ksPval(T, Func)(T Femp, Func F) 
    1223 if(realInput!(T) && (is(Func == function) || is(Func == delegate))) { 
    1224     return ksPvalD(Femp.length, ksTest(Femp, F)); 
    1225 
    1226  
    1227 unittest { 
    1228     auto stdNormal = parametrize!(normalCDF)(0.0L, 1.0L); 
    1229     assert(approxEqual(ksPval([0,1,2,3,4].dup, stdNormal), .03271)); 
    1230  
    1231     auto uniform01 = parametrize!(uniformCDF)(0, 1); 
    1232     assert(approxEqual(ksPval([0.1, 0.3, 0.5, 0.9, 1].dup, uniform01), 0.7591)); 
    1233  
    1234     writeln("Passed ksPval 1-sample test."); 
     1403/**Fisher's Exact test for difference in odds between rows/columns 
     1404 * in a 2x2 contingency table.  Specifically, this function tests the odds 
     1405 * ratio, which is defined, for a contingency table c, as (c[0][0] * c[1][1]) 
     1406 *  / (c[1][0] * c[0][1]).  Alternatives are Alt.LESS, meaning true odds ratio 
     1407 * < 1, Alt.GREATER, meaning true odds ratio > 1, and Alt.TWOSIDE, meaning 
     1408 * true odds ratio != 1. 
     1409 * 
     1410 * Accepts a 2x2 contingency table as an array of arrays of uints. 
     1411 * For now, only does 2x2 contingency tables. 
     1412 * 
     1413 * Returns:  A TestRes of the odds ratio and the P-value against the given 
     1414 * alternative. 
     1415 * 
     1416 * Examples: 
     1417 * --- 
     1418 * real res = fisherExact([[2u, 7], [8, 2]], Alt.LESS); 
     1419 * assert(approxEqual(res.p, 0.01852));  // Odds ratio is very small in this case. 
     1420 * assert(approxEqual(res.testStat, 4.0 / 56.0)); 
     1421 * --- 
     1422 * */ 
     1423TestRes fisherExact(T)(const T[2][2] contingencyTable, Alt alt = Alt.TWOSIDE) 
     1424if(isIntegral!(T)) { 
     1425 
     1426    static real fisherLower(const uint[2][2] contingencyTable) { 
     1427        alias contingencyTable c; 
     1428        return hypergeometricCDF(c[0][0], c[0][0] + c[0][1], c[1][0] + c[1][1], 
     1429                                 c[0][0] + c[1][0]); 
     1430    } 
     1431 
     1432    static real fisherUpper(const uint[2][2] contingencyTable) { 
     1433        alias contingencyTable c; 
     1434        return hypergeometricCDFR(c[0][0], c[0][0] + c[0][1], c[1][0] + c[1][1], 
     1435                                 c[0][0] + c[1][0]); 
     1436    } 
     1437 
     1438 
     1439    alias contingencyTable c; 
     1440    real oddsRatio = cast(real) c[0][0] * c[1][1] / c[0][1] / c[1][0]; 
     1441    if(alt == Alt.LESS) 
     1442        return TestRes(oddsRatio, fisherLower(contingencyTable)); 
     1443    else if(alt == Alt.GREATER) 
     1444        return TestRes(oddsRatio, fisherUpper(contingencyTable)); 
     1445 
     1446 
     1447    immutable uint n1 = c[0][0] + c[0][1], 
     1448                   n2 = c[1][0] + c[1][1], 
     1449                   n  = c[0][0] + c[1][0]; 
     1450 
     1451    immutable uint mode = 
     1452        cast(uint) ((cast(real) (n + 1) * (n1 + 1)) / (n1 + n2 + 2)); 
     1453    immutable real pExact = hypergeometricPMF(c[0][0], n1, n2, n); 
     1454    immutable real pMode = hypergeometricPMF(mode, n1, n2, n); 
     1455 
     1456    if(approxEqual(pExact, pMode, 1e-7)) { 
     1457        return TestRes(oddsRatio, 1); 
     1458    } else if(c[0][0] < mode) { 
     1459        immutable real pLower = hypergeometricCDF(c[0][0], n1, n2, n); 
     1460 
     1461        // Special case to prevent binary search from getting stuck. 
     1462        if(hypergeometricPMF(n, n1, n2, n) > pExact) { 
     1463            return TestRes(oddsRatio, pLower); 
     1464        } 
     1465 
     1466        // Binary search for where to begin upper half. 
     1467        uint min = mode, max = n, guess = uint.max; 
     1468        while(min != max) { 
     1469            guess = (max == min + 1 && guess == min) ? max : 
     1470                    (cast(ulong) max + cast(ulong) min) / 2UL; 
     1471 
     1472            immutable real pGuess = hypergeometricPMF(guess, n1, n2, n); 
     1473            if(pGuess <= pExact && 
     1474                hypergeometricPMF(guess - 1, n1, n2, n) > pExact) { 
     1475                break; 
     1476            } else if(pGuess < pExact) { 
     1477                max = guess; 
     1478            } else min = guess; 
     1479        } 
     1480        if(guess == uint.max && min == max) 
     1481            guess = min; 
     1482 
     1483        auto p = std.algorithm.min(pLower + 
     1484               hypergeometricCDFR(guess, n1, n2, n), 1.0L); 
     1485        return TestRes(oddsRatio, p); 
     1486    } else { 
     1487        immutable real pUpper = hypergeometricCDFR(c[0][0], n1, n2, n); 
     1488 
     1489        // Special case to prevent binary search from getting stuck. 
     1490        if(hypergeometricPMF(0, n1, n2, n) > pExact) { 
     1491            return TestRes(oddsRatio, pUpper); 
     1492        } 
     1493 
     1494        // Binary search for where to begin lower half. 
     1495        uint min = 0, max = mode, guess = uint.max; 
     1496        while(min != max) { 
     1497            guess = (max == min + 1 && guess == min) ? max : 
     1498                    (cast(ulong) max + cast(ulong) min) / 2UL; 
     1499            real pGuess = hypergeometricPMF(guess, n1, n2, n); 
     1500 
     1501            if(pGuess <= pExact && 
     1502                hypergeometricPMF(guess + 1, n1, n2, n) > pExact) { 
     1503                break; 
     1504            } else if(pGuess <= pExact) { 
     1505                min = guess; 
     1506            } else max = guess; 
     1507        } 
     1508 
     1509        if(guess == uint.max && min == max) 
     1510            guess = min; 
     1511 
     1512        auto p = std.algorithm.min(pUpper + 
     1513               hypergeometricCDF(guess, n1, n2, n), 1.0L); 
     1514        return TestRes(oddsRatio, p); 
     1515    } 
    12351516} 
    12361517 
    12371518/**Convenience function.  Converts a dynamic array to a static one, then 
    12381519 * calls the overload.*/ 
    1239 real fisherExact(T)(const T[][] contingencyTable, Alt alt = Alt.TWOSIDE) 
     1520TestRes fisherExact(T)(const T[][] contingencyTable, Alt alt = Alt.TWOSIDE) 
    12401521if(isIntegral!(T)) 
    12411522in { 
     
    12501531    newTable[1][0] = contingencyTable[1][0]; 
    12511532    return fisherExact(newTable, alt); 
    1252 } 
    1253  
    1254  
    1255 /**Fisher's Exact test for difference in odds between rows/columns 
    1256  * in a 2x2 contingency table.  Specifically, this function tests the odds 
    1257  * ratio, which is defined, for a contingency table c, as (c[0][0] * c[1][1]) 
    1258  *  / (c[1][0] * c[0][1]).  Alternatives are Alt.LESS, meaning true odds ratio 
    1259  * < 1, Alt.GREATER, meaning true odds ratio > 1, and Alt.TWOSIDE, meaning 
    1260  * true odds ratio != 1. 
    1261  * 
    1262  * Accepts a 2x2 contingency table as an array of arrays of uints. 
    1263  * For now, only does 2x2 contingency tables. 
    1264  * Examples: 
    1265  * --- 
    1266  * real res = fisherExact([[2u, 7], [8, 2]], Alt.LESS); 
    1267  * assert(approxEqual(res, 0.01852));  // Odds ratio is very small in this case. 
    1268  * --- 
    1269  * */ 
    1270 real fisherExact(T)(const T[2][2] contingencyTable, Alt alt = Alt.TWOSIDE) 
    1271 if(isIntegral!(T)) { 
    1272  
    1273     static real fisherLower(const uint[2][2] contingencyTable) { 
    1274         alias contingencyTable c; 
    1275         return hypergeometricCDF(c[0][0], c[0][0] + c[0][1], c[1][0] + c[1][1], 
    1276                                  c[0][0] + c[1][0]); 
    1277     } 
    1278  
    1279     static real fisherUpper(const uint[2][2] contingencyTable) { 
    1280         alias contingencyTable c; 
    1281         return hypergeometricCDFR(c[0][0], c[0][0] + c[0][1], c[1][0] + c[1][1], 
    1282                                  c[0][0] + c[1][0]); 
    1283     } 
    1284  
    1285  
    1286     if(alt == Alt.LESS) 
    1287         return fisherLower(contingencyTable); 
    1288     else if(alt == Alt.GREATER) 
    1289         return fisherUpper(contingencyTable); 
    1290  
    1291     alias contingencyTable c; 
    1292  
    1293     immutable uint n1 = c[0][0] + c[0][1], 
    1294                    n2 = c[1][0] + c[1][1], 
    1295                    n  = c[0][0] + c[1][0]; 
    1296  
    1297     immutable uint mode = 
    1298         cast(uint) ((cast(real) (n + 1) * (n1 + 1)) / (n1 + n2 + 2)); 
    1299     immutable real pExact = hypergeometricPMF(c[0][0], n1, n2, n); 
    1300     immutable real pMode = hypergeometricPMF(mode, n1, n2, n); 
    1301  
    1302     if(approxEqual(pExact, pMode, 1e-7)) { 
    1303         return 1; 
    1304     } else if(c[0][0] < mode) { 
    1305         immutable real pLower = hypergeometricCDF(c[0][0], n1, n2, n); 
    1306  
    1307         // Special case to prevent binary search from getting stuck. 
    1308         if(hypergeometricPMF(n, n1, n2, n) > pExact) { 
    1309             return pLower; 
    1310         } 
    1311  
    1312         // Binary search for where to begin upper half. 
    1313         uint min = mode, max = n, guess = uint.max; 
    1314         while(min != max) { 
    1315             guess = (max == min + 1 && guess == min) ? max : 
    1316                     (cast(ulong) max + cast(ulong) min) / 2UL; 
    1317  
    1318             immutable real pGuess = hypergeometricPMF(guess, n1, n2, n); 
    1319             if(pGuess <= pExact && 
    1320                 hypergeometricPMF(guess - 1, n1, n2, n) > pExact) { 
    1321                 break; 
    1322             } else if(pGuess < pExact) { 
    1323                 max = guess; 
    1324             } else min = guess; 
    1325         } 
    1326         if(guess == uint.max && min == max) 
    1327             guess = min; 
    1328  
    1329         return std.algorithm.min(pLower + 
    1330                hypergeometricCDFR(guess, n1, n2, n), 1.0L); 
    1331     } else { 
    1332         immutable real pUpper = hypergeometricCDFR(c[0][0], n1, n2, n); 
    1333  
    1334         // Special case to prevent binary search from getting stuck. 
    1335         if(hypergeometricPMF(0, n1, n2, n) > pExact) { 
    1336             return pUpper; 
    1337         } 
    1338  
    1339         // Binary search for where to begin lower half. 
    1340         uint min = 0, max = mode, guess = uint.max; 
    1341         while(min != max) { 
    1342             guess = (max == min + 1 && guess == min) ? max : 
    1343                     (cast(ulong) max + cast(ulong) min) / 2UL; 
    1344             real pGuess = hypergeometricPMF(guess, n1, n2, n); 
    1345  
    1346             if(pGuess <= pExact && 
    1347                 hypergeometricPMF(guess + 1, n1, n2, n) > pExact) { 
    1348                 break; 
    1349             } else if(pGuess <= pExact) { 
    1350                 min = guess; 
    1351             } else max = guess; 
    1352         } 
    1353  
    1354         if(guess == uint.max && min == max) 
    1355             guess = min; 
    1356  
    1357         return std.algorithm.min(pUpper + 
    1358                hypergeometricCDF(guess, n1, n2, n), 1.0L); 
    1359     } 
    13601533} 
    13611534 
     
    13941567 
    13951568    auto res = fisherExact([[19000u, 80000], [20000, 90000]]); 
     1569    assert(approxEqual(res.testStat, 1.068731)); 
    13961570    assert(approxEqual(res, 3.319e-9)); 
    13971571    res = fisherExact([[18000u, 80000], [20000, 90000]]); 
     
    15281702} 
    15291703 
    1530 /**Calculates the significance (P-value) of the given Pearson correlation 
    1531  * coefficient against the given alternative, for an input vector of length N. 
    1532  * Alternatives are Alt.LESS, meaning the true cor is < 0, Alt.GREATER, meaning 
    1533  * the true cor is > 0, and Alt.TWOSIDE, meaning the true cor != 0. 
    1534  */ 
    1535 real pcorSig(real cor, ulong N, Alt alt = Alt.TWOSIDE) { 
    1536     real t = cor / sqrt((1 - cor * cor) / (N - 2)); 
    1537  
     1704/**Tests the hypothesis that the Pearson correlation between two ranges is 
     1705 * different from some 0.  Alternatives are 
     1706 * Alt.LESS (pcor(range1, range2) < 0), Alt.GREATER (pcor(range1, range2) 
     1707 * > 0) and Alt.TWOSIDE (pcor(range1, range2) != 0). 
     1708 * 
     1709 * Returns:  A ConfInt of the estimated Pearson correlation of the two ranges, 
     1710 * the P-value against the given alternative, and the confidence interval of 
     1711 * the correlation at the level specified by confLevel.*/ 
     1712ConfInt pcorTest(T, U)(T range1, U range2, Alt alt = Alt.TWOSIDE, real confLevel = 0.95) 
     1713if(realInput!(T) && realInput!(U)) { 
     1714    OnlinePcor res; 
     1715    while(!range1.empty && !range2.empty) { 
     1716        res.put(range1.front, range2.front); 
     1717        range1.popFront; 
     1718        range2.popFront; 
     1719    } 
     1720    return finishPearsonSpearman(res.cor, res.N, alt, confLevel); 
     1721
     1722 
     1723unittest { 
     1724    // Values from R. 
     1725    auto t1 = pcorTest([1,2,3,4,5].dup, [2,1,4,3,5].dup, Alt.TWOSIDE); 
     1726    auto t2 = pcorTest([1,2,3,4,5].dup, [2,1,4,3,5].dup, Alt.LESS); 
     1727    auto t3 = pcorTest([1,2,3,4,5].dup, [2,1,4,3,5].dup, Alt.GREATER); 
     1728 
     1729    assert(approxEqual(t1.testStat, 0.8)); 
     1730    assert(approxEqual(t2.testStat, 0.8)); 
     1731    assert(approxEqual(t3.testStat, 0.8)); 
     1732 
     1733    assert(approxEqual(t1.p, 0.1041)); 
     1734    assert(approxEqual(t2.p, 0.948)); 
     1735    assert(approxEqual(t3.p, 0.05204)); 
     1736 
     1737    assert(approxEqual(t1.lowerBound, -0.2796400)); 
     1738    assert(approxEqual(t3.lowerBound, -0.06438567)); 
     1739    assert(approxEqual(t2.lowerBound, -1)); 
     1740 
     1741    assert(approxEqual(t1.upperBound, 0.9861962)); 
     1742    assert(approxEqual(t2.upperBound, 0.9785289)); 
     1743    assert(approxEqual(t3.upperBound, 1)); 
     1744 
     1745    writeln("Passed pcorSig test."); 
     1746
     1747 
     1748/**Tests the hypothesis that the Spearman correlation between two ranges is 
     1749 * different from some 0.  Alternatives are 
     1750 * Alt.LESS (scor(range1, range2) < 0), Alt.GREATER (scor(range1, range2) 
     1751 * > 0) and Alt.TWOSIDE (scor(range1, range2) != 0). 
     1752 * 
     1753 * Returns:  A TestRes containing the Spearman correlation coefficient and 
     1754 * the P-value for the given alternative. 
     1755 * 
     1756 * Bugs:  Exact P-value computation not yet implemented.  Uses asymptotic 
     1757 * approximation only.  This is good enough for most practical purposes given 
     1758 * reasonably large N, but is not perfectly accurate.  Not valid for data with 
     1759 * very large amounts of ties.  */ 
     1760TestRes scorTest(T, U)(T range1, U range2, Alt alt = Alt.TWOSIDE) 
     1761if(isInputRange!(T) && isInputRange!(U) && 
     1762   dstats.base.hasLength!(T) && dstats.base.hasLength!(U)) { 
     1763    real N = range1.length; 
     1764    return finishPearsonSpearman(scor(range1, range2), N, alt, 0); 
     1765
     1766 
     1767unittest { 
     1768    // Values from R. 
     1769    int[] arr1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]; 
     1770    int[] arr2 = [8,6,7,5,3,0,9,8,6,7,5,3,0,9,3,6,2,4,3,6,8]; 
     1771    auto t1 = scorTest(arr1, arr2, Alt.TWOSIDE); 
     1772    auto t2 = scorTest(arr1, arr2, Alt.LESS); 
     1773    auto t3 = scorTest(arr1, arr2, Alt.GREATER); 
     1774 
     1775    assert(approxEqual(t1.testStat, -0.1769406)); 
     1776    assert(approxEqual(t2.testStat, -0.1769406)); 
     1777    assert(approxEqual(t3.testStat, -0.1769406)); 
     1778 
     1779    assert(approxEqual(t1.p, 0.4429)); 
     1780    assert(approxEqual(t3.p, 0.7785)); 
     1781    assert(approxEqual(t2.p, 0.2215)); 
     1782 
     1783    writeln("Passed scorSig test."); 
     1784
     1785 
     1786private ConfInt finishPearsonSpearman(real cor, real N, Alt alt, real confLevel) { 
     1787    real denom = sqrt((1 - cor * cor) / (N - 2)); 
     1788    real t = cor / denom; 
     1789    ConfInt ret; 
     1790    ret.testStat = cor; 
     1791    real sqN = sqrt(N - 3); 
     1792    real z = sqN * atanh(cor); 
    15381793    switch(alt) { 
    15391794        case Alt.TWOSIDE: 
    1540             return 2 * min(studentsTCDF(t, N - 2), studentsTCDFR(t, N - 2)); 
     1795            ret.p = 2 * min(studentsTCDF(t, N - 2), studentsTCDFR(t, N - 2)); 
     1796            real deltaZ = invNormalCDF(0.5 * (1 - confLevel)); 
     1797            ret.lowerBound = tanh((z + deltaZ) / sqN); 
     1798            ret.upperBound = tanh((z - deltaZ) / sqN); 
     1799            break; 
    15411800        case Alt.LESS: 
    1542             return studentsTCDF(t, N - 2); 
     1801            ret.p = studentsTCDF(t, N - 2); 
     1802            real deltaZ = invNormalCDF(1 - confLevel); 
     1803            ret.lowerBound = -1; 
     1804            ret.upperBound = tanh((z - deltaZ) / sqN); 
     1805            break; 
    15431806        case Alt.GREATER: 
    1544             return studentsTCDFR(t, N - 2); 
     1807            ret.p = studentsTCDFR(t, N - 2); 
     1808            real deltaZ = invNormalCDF(1 - confLevel); 
     1809            ret.lowerBound = tanh((z + deltaZ) / sqN); 
     1810            ret.upperBound = 1; 
     1811            break; 
    15451812        default: 
    15461813            assert(0); 
    15471814    } 
    1548 
    1549  
    1550 unittest { 
    1551     // Values from R. 
    1552     assert(approxEqual(pcorSig(0.9, 5), .03739)); 
    1553     assert(approxEqual(pcorSig(0.9, 5, Alt.GREATER), .01869)); 
    1554     assert(approxEqual(pcorSig(0.9, 5, Alt.LESS), .9813)); 
    1555     writeln("Passed pcorSig test."); 
    1556 
    1557  
    1558 /**Calculates the significance (P-value) of the given Spearman rho correlation 
    1559  * coefficient against the given alternative, for an input vector of length N. 
    1560  * Alternatives are Alt.LESS, meaning the true rho is < 0, Alt.GREATER, meaning 
    1561  * the true rho is > 0, and Alt.TWOSIDE, meaning the true rho != 0. 
     1815    return ret; 
     1816
     1817 
     1818/**Tests the hypothesis that the Kendall correlation between two ranges is 
     1819 * different from some 0.  Alternatives are 
     1820 * Alt.LESS (kcor(range1, range2) < 0), Alt.GREATER (kcor(range1, range2) 
     1821 * > 0) and Alt.TWOSIDE (kcor(range1, range2) != 0). 
     1822 * 
     1823 * Returns:  A TestRes containing the Kendall correlation coefficient and 
     1824 * the P-value for the given alternative. 
    15621825 * 
    15631826 * Bugs:  Exact computation not yet implemented.  Uses asymptotic approximation 
     
    15651828 * large N, but is not perfectly accurate.  Not valid for data with very large 
    15661829 * amounts of ties.  */ 
    1567 real scorSig(real rho, ulong N, Alt alt = Alt.TWOSIDE) { 
    1568     return pcorSig(rho, N, alt); 
    1569 
    1570  
    1571 unittest { 
    1572     // Values from R.  The epsilon here will be relatively large because 
    1573     // I'm comparing my approximate function to R's exact function.  R's 
    1574     // approximate function does not use a continuity correction, and is 
    1575     // therefore quite bad. 
    1576     assert(approxEqual(scorSig(0.3984962, 20), 0.08226, 0.0, .01)); 
    1577     assert(approxEqual(scorSig(0.3984962, 20, Alt.LESS), 0.9592, 0.0, .01)); 
    1578     assert(approxEqual(scorSig(0.3984962, 20, Alt.GREATER), 0.04113, 0.0, .01)); 
    1579     writeln("Passed scorSig test."); 
    1580 
    1581  
    1582 /**Calculates the significance (P-value) of the given Kendall tau correlation 
    1583  * coefficient against the given alternative, for an input vector of length N. 
    1584  * Alternatives are Alt.LESS, meaning the true tau is < 0, Alt.GREATER, meaning 
    1585  * the true tau is > 0, and Alt.TWOSIDE, meaning the true tau != 0. 
    1586  * 
    1587  * Bugs:  Exact computation not yet implemented.  Uses asymptotic approximation 
    1588  * only.  This is good enough for most practical purposes given reasonably 
    1589  * large N, but is not perfectly accurate.  Not valid for data with very large 
    1590  * amounts of ties.  */ 
    1591 real kcorSig(real tau, ulong N, Alt alt = Alt.TWOSIDE) { 
     1830TestRes kcorTest(T, U)(T range1, U range2, Alt alt = Alt.TWOSIDE) 
     1831if(isInputRange!(T) && isInputRange!(U) && dstats.base.hasLength!(T) 
     1832 && dstats.base.hasLength!(U)) { 
     1833    real tau = kcor(range1, range2); 
     1834    real N = range1.length; 
    15921835    real cc = 2.0L / (N * (N - 1));  // Continuity correction. 
    15931836    real sd = sqrt(cast(real) (4 * N + 10) / (9 * N * (N - 1))); 
     
    15951838    switch(alt) { 
    15961839        case Alt.TWOSIDE: 
    1597             return 2 * min(normalCDF(tau + cc, 0, sd), 
    1598                            normalCDFR(tau - cc, 0, sd))
     1840            return TestRes(tau, 2 * min(normalCDF(tau + cc, 0, sd), 
     1841                           normalCDFR(tau - cc, 0, sd)))
    15991842        case Alt.LESS: 
    1600             return normalCDF(tau + cc, 0, sd); 
     1843            return TestRes(tau, normalCDF(tau + cc, 0, sd)); 
    16011844        case Alt.GREATER: 
    1602             return normalCDFR(tau - cc, 0, sd); 
     1845            return TestRes(tau, normalCDFR(tau - cc, 0, sd)); 
    16031846        default: 
    16041847            assert(0); 
     
    16071850 
    16081851unittest { 
    1609     // Values from R.  The epsilon here will be relatively large because 
    1610     // I'm comparing my approximate function to R's exact function.  R's 
    1611     // approximate function does not use a continuity correction, and is 
     1852    // Values from R.  The epsilon for P-vals will be relatively large because 
     1853    // R's approximate function does not use a continuity correction, and is 
    16121854    // therefore quite bad. 
    1613     assert(approxEqual(kcorSig(0.2105263, 20), 0.2086, 0.0, .01)); 
    1614     assert(approxEqual(kcorSig(0.2105263, 20, Alt.LESS), 0.907, 0.0, .01)); 
    1615     assert(approxEqual(kcorSig(0.2105263, 20, Alt.GREATER), 0.1043, 0.0, .01)); 
    1616     writeln("Passed kcorSig test."); 
     1855 
     1856    int[] arr1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]; 
     1857    int[] arr2 = [8,6,7,5,3,0,9,8,6,7,5,3,0,9,3,6,2,4,3,6,8]; 
     1858    auto t1 = kcorTest(arr1, arr2, Alt.TWOSIDE); 
     1859    auto t2 = kcorTest(arr1, arr2, Alt.LESS); 
     1860    auto t3 = kcorTest(arr1, arr2, Alt.GREATER); 
     1861 
     1862    assert(approxEqual(t1.testStat, -.1448010)); 
     1863    assert(approxEqual(t2.testStat, -.1448010)); 
     1864    assert(approxEqual(t3.testStat, -.1448010)); 
     1865 
     1866    assert(approxEqual(t1.p, 0.3757, 0.0, 0.02)); 
     1867    assert(approxEqual(t3.p, 0.8122, 0.0, 0.02)); 
     1868    assert(approxEqual(t2.p, 0.1878, 0.0, 0.02)); 
     1869 
     1870    writeln("Passed scorSig test."); 
    16171871} 
    16181872 
     
    16211875 * This is the most basic, intuitive version of the false discovery rate 
    16221876 * statistic, and assumes all hypotheses are independent. 
     1877 * 
    16231878 * Returns:   An array of Q-values with indices 
    16241879 * corresponding to the indices of the p-values passed in.*/ 
     
    16271882    // Not optimized at all because I can't imagine anyone writing code where 
    16281883    // FDR calculations are the main bottleneck. 
    1629     auto p = tempdup(pVals);  scope(exit) TempAlloc.free; 
    1630     auto perm = newStack!(uint)(pVals.length);  scope(exit) TempAlloc.free; 
     1884    mixin(newFrame); 
     1885    auto p = tempdup(pVals); 
     1886    auto perm = newStack!(uint)(pVals.length); 
    16311887    foreach(i, ref elem; perm) 
    16321888        elem = i;