# root/trunk/random.d

Revision 234, 39.2 kB (checked in by dsimcha, 4 years ago) |
---|

Line | |
---|---|

1 | /**Generates random samples from a various probability distributions. |

2 | * These are mostly D ports of the NumPy random number generators.*/ |

3 | |

4 | /* This library is a D port of a large portion of the Numpy random number |

5 | * library. A few distributions were excluded because they were too obscure |

6 | * to be tested properly. They may be included at some point in the future. |

7 | * |

8 | * Port to D copyright 2009 David Simcha. |

9 | * |

10 | * The original C code is available under the licenses below. No additional |

11 | * restrictions shall apply to this D translation. Eventually, I will try to |

12 | * discuss the licensing issues with the original authors of Numpy and |

13 | * make this sane enough that this module can be included in Phobos without |

14 | * concern. For now, it's free enough that you can at least use it in |

15 | * personal projects without any serious issues. |

16 | * |

17 | * Main Numpy license: |

18 | * |

19 | * Copyright (c) 2005-2009, NumPy Developers. |

20 | * All rights reserved. |

21 | * |

22 | * Redistribution and use in source and binary forms, with or without |

23 | * modification, are permitted provided that the following conditions are |

24 | * met: |

25 | * |

26 | * * Redistributions of source code must retain the above copyright |

27 | * notice, this list of conditions and the following disclaimer. |

28 | * |

29 | * * Redistributions in binary form must reproduce the above |

30 | * copyright notice, this list of conditions and the following |

31 | * disclaimer in the documentation and/or other materials provided |

32 | * with the distribution. |

33 | * |

34 | * * Neither the name of the NumPy Developers nor the names of any |

35 | * contributors may be used to endorse or promote products derived |

36 | * from this software without specific prior written permission. |

37 | * |

38 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |

39 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |

40 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |

41 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |

42 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |

43 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |

44 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |

45 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |

46 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |

47 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |

48 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |

49 | * |

50 | * distribution.c license: |

51 | * |

52 | * Copyright 2005 Robert Kern (robert.kern@gmail.com) |

53 | * |

54 | * Permission is hereby granted, free of charge, to any person obtaining a |

55 | * copy of this software and associated documentation files (the |

56 | * "Software"), to deal in the Software without restriction, including |

57 | * without limitation the rights to use, copy, modify, merge, publish, |

58 | * distribute, sublicense, and/or sell copies of the Software, and to |

59 | * permit persons to whom the Software is furnished to do so, subject to |

60 | * the following conditions: |

61 | * |

62 | * The above copyright notice and this permission notice shall be included |

63 | * in all copies or substantial portions of the Software. |

64 | * |

65 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |

66 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |

67 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |

68 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |

69 | * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |

70 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |

71 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |

72 | */ |

73 | |

74 | /* The implementations of rHypergeometricHyp() and rHypergeometricHrua() |

75 | * were adapted from Ivan Frohne's rv.py which has this |

76 | * license: |

77 | * |

78 | * Copyright 1998 by Ivan Frohne; Wasilla, Alaska, U.S.A. |

79 | * All Rights Reserved |

80 | * |

81 | * Permission to use, copy, modify and distribute this software and its |

82 | * documentation for any purpose, free of charge, is granted subject to the |

83 | * following conditions: |

84 | * The above copyright notice and this permission notice shall be included in |

85 | * all copies or substantial portions of the software. |

86 | * |

87 | * THE SOFTWARE AND DOCUMENTATION IS PROVIDED WITHOUT WARRANTY OF ANY KIND, |

88 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO MERCHANTABILITY, FITNESS |

89 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHOR |

90 | * OR COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM OR DAMAGES IN A CONTRACT |

91 | * ACTION, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |

92 | * SOFTWARE OR ITS DOCUMENTATION. |

93 | */ |

94 | |

95 | /* References: |

96 | * |

97 | * Devroye, Luc. _Non-Uniform Random Variate Generation_. |

98 | * Springer-Verlag, New York, 1986. |

99 | * http://cgm.cs.mcgill.ca/~luc/rnbookindex.html |

100 | * |

101 | * Kachitvichyanukul, V. and Schmeiser, B. W. Binomial Random Variate |

102 | * Generation. Communications of the ACM, 31, 2 (February, 1988) 216. |

103 | * |

104 | * Hoermann, W. The Transformed Rejection Method for Generating Poisson Random |

105 | * Variables. Insurance: Mathematics and Economics, (to appear) |

106 | * http://citeseer.csail.mit.edu/151115.html |

107 | * |

108 | * Marsaglia, G. and Tsang, W. W. A Simple Method for Generating Gamma |

109 | * Variables. ACM Transactions on Mathematical Software, Vol. 26, No. 3, |

110 | * September 2000, Pages 363-372. |

111 | */ |

112 | |

113 | |

114 | /* Unit tests are non-deterministic. They prove that the distributions |

115 | * are reasonable by using K-S tests and summary stats, but cannot |

116 | * deterministically prove correctness.*/ |

117 | |

118 | module dstats.random; |

119 | |

120 | import std.math, std.algorithm, dstats.distrib, std.traits, std.typetuple, |

121 | std.exception, std.mathspecial; |

122 | public import std.random; //For uniform distrib. |

123 | |

124 | import dstats.alloc, dstats.base; |

125 | |

126 | version(unittest) { |

127 | import std.stdio, dstats.tests, dstats.summary, std.range; |

128 | void main() {} |

129 | } |

130 | |

131 | /**Convenience function to allow one-statement creation of arrays of random |

132 | * numbers. |

133 | * |

134 | * Examples: |

135 | * --- |

136 | * // Create an array of 10 random numbers distributed Normal(0, 1). |

137 | * auto normals = randArray!rNorm(10, 0, 1); |

138 | * --- |

139 | */ |

140 | auto randArray(alias randFun, Args...)(size_t N, Args args) { |

141 | alias typeof(randFun(args)) R; |

142 | return randArray!(R, randFun, Args)(N, args); |

143 | } |

144 | |

145 | unittest { |

146 | // Just check if it compiles. |

147 | auto nums = randArray!rNorm(5, 0, 1); |

148 | auto nums2 = randArray!rBinomial(10, 5, 0.5); |

149 | } |

150 | |

151 | /**Allows the creation of an array of random numbers with an explicitly |

152 | * specified type. Useful, for example, when single-precision floats are all |

153 | * you need. |

154 | * |

155 | * Examples: |

156 | * --- |

157 | * // Create an array of 10 million floats distributed Normal(0, 1). |

158 | * float[] normals = randArray!(float, rNorm)(10, 0, 1); |

159 | * --- |

160 | */ |

161 | R[] randArray(R, alias randFun, Args...)(size_t N, Args args) { |

162 | auto ret = newVoid!(R)(N); |

163 | foreach(ref elem; ret) { |

164 | elem = randFun(args); |

165 | } |

166 | |

167 | return ret; |

168 | } |

169 | |

170 | /// |

171 | struct RandRange(alias randFun, T...) { |

172 | private: |

173 | T args; |

174 | double normData = double.nan; // TLS stuff for normal. |

175 | typeof(randFun(args)) frontElem; |

176 | public: |

177 | enum bool empty = false; |

178 | |

179 | this(T args) { |

180 | this.args = args; |

181 | popFront; |

182 | } |

183 | |

184 | @property typeof(randFun(args)) front() { |

185 | return frontElem; |

186 | } |

187 | |

188 | void popFront() { |

189 | /* This is a kludge to make the contents of this range deterministic |

190 | * given the state of the underlying random number generator without |

191 | * a massive redesign. We store the state in this struct and |

192 | * swap w/ the TLS data for rNorm on each call to popFront. This has to |

193 | * be done no matter what distribution we're using b/c a lot of others |

194 | * rely on the normal.*/ |

195 | auto lastNormPtr = &lastNorm; // Cache ptr once, avoid repeated TLS lookup. |

196 | auto temp = *lastNormPtr; // Store old state. |

197 | *lastNormPtr = normData; // Replace it. |

198 | this.frontElem = randFun(args); |

199 | normData = *lastNormPtr; |

200 | *lastNormPtr = temp; |

201 | } |

202 | |

203 | @property typeof(this) save() { |

204 | return this; |

205 | } |

206 | } |

207 | |

208 | /**Turn a random number generator function into an infinite range. |

209 | * Params is a tuple of the distribution parameters. This is specified |

210 | * in the same order as when calling the function directly. |

211 | * |

212 | * The sequence generated by this range is deterministic and repeatable given |

213 | * the state of the underlying random number generator. If the underlying |

214 | * random number generator is explicitly specified, as opposed to using the |

215 | * default thread-local global RNG, it is copied when the struct is copied. |

216 | * See below for an example of this behavior. |

217 | * |

218 | * Examples: |

219 | * --- |

220 | * // Print out some summary statistics for 10,000 Poisson-distributed |

221 | * // random numbers w/ Poisson parameter 2. |

222 | * auto gen = Random(unpredictableSeed); |

223 | * auto pois1k = take(10_000, randRange!rPoisson(2, gen)); |

224 | * writeln( summary(pois1k) ); |

225 | * writeln( summary(pois1k) ); // Exact same results as first call. |

226 | * --- |

227 | */ |

228 | RandRange!(randFun, T) randRange(alias randFun, T...)(T params) { |

229 | alias RandRange!(randFun, T) RT; |

230 | RT ret; // Bypass the ctor b/c it's screwy. |

231 | ret.args = params; |

232 | ret.popFront; |

233 | return ret; |

234 | } |

235 | |

236 | unittest { |

237 | // The thing to test here is that the results are deterministic given |

238 | // an underlying RNG. |

239 | |

240 | { |

241 | auto norms = take(randRange!rNorm(0, 1, Random(unpredictableSeed)), 99); |

242 | auto arr1 = toArray(norms); |

243 | auto arr2 = toArray(norms); |

244 | assert(arr1 == arr2); |

245 | } |

246 | |

247 | { |

248 | auto binomSmall = take(randRange!rBinomial(20, 0.5, Random(unpredictableSeed)), 99); |

249 | auto arr1 = toArray(binomSmall); |

250 | auto arr2 = toArray(binomSmall); |

251 | assert(arr1 == arr2); |

252 | } |

253 | |

254 | { |

255 | auto binomLarge = take(randRange!rBinomial(20000, 0.4, Random(unpredictableSeed)), 99); |

256 | auto arr1 = toArray(binomLarge); |

257 | auto arr2 = toArray(binomLarge); |

258 | assert(arr1 == arr2); |

259 | } |

260 | writeln("Passed RandRange test."); |

261 | } |

262 | |

263 | // Thread local data for normal distrib. that is preserved across calls. |

264 | private static double lastNorm = double.nan; |

265 | |

266 | /// |

267 | double rNorm(RGen = Random)(double mean, double sd, ref RGen gen = rndGen) { |

268 | dstatsEnforce(sd > 0, "Standard deviation must be > 0 for rNorm."); |

269 | |

270 | double lr = lastNorm; |

271 | if (!isNaN(lr)) { |

272 | lastNorm = double.nan; |

273 | return lr * sd + mean; |

274 | } |

275 | |

276 | double x1 = void, x2 = void, r2 = void; |

277 | do { |

278 | x1 = uniform(-1.0L, 1.0L, gen); |

279 | x2 = uniform(-1.0L, 1.0L, gen); |

280 | r2 = x1 * x1 + x2 * x2; |

281 | } while (r2 > 1.0L || r2 == 0.0L); |

282 | double f = sqrt(-2.0L * log(r2) / r2); |

283 | lastNorm = f * x1; |

284 | return f * x2 * sd + mean; |

285 | } |

286 | |

287 | |

288 | unittest { |

289 | auto observ = randArray!rNorm(100_000, 0, 1); |

290 | auto ksRes = ksTest(observ, parametrize!(normalCDF)(0.0L, 1.0L)); |

291 | auto summ = summary(observ); |

292 | |

293 | writeln("100k samples from normal(0, 1): K-S P-val: ", ksRes.p); |

294 | writeln("\tMean Expected: 0 Observed: ", summ.mean); |

295 | writeln("\tMedian Expected: 0 Observed: ", median(observ)); |

296 | writeln("\tStdev Expected: 1 Observed: ", summ.stdev); |

297 | writeln("\tKurtosis Expected: 0 Observed: ", summ.kurtosis); |

298 | writeln("\tSkewness Expected: 0 Observed: ", summ.skewness); |

299 | } |

300 | |

301 | /// |

302 | double rCauchy(RGen = Random)(double X0, double gamma, ref RGen gen = rndGen) { |

303 | dstatsEnforce(gamma > 0, "gamma must be > 0 for Cauchy distribution."); |

304 | |

305 | return (rNorm(0, 1, gen) / rNorm(0, 1, gen)) * gamma + X0; |

306 | } |

307 | |

308 | unittest { |

309 | auto observ = randArray!rCauchy(100_000, 2, 5); |

310 | auto ksRes = ksTest(observ, parametrize!(cauchyCDF)(2.0L, 5.0L)); |

311 | |

312 | auto summ = summary(observ); |

313 | writeln("100k samples from Cauchy(2, 5): K-S P-val: ", ksRes.p); |

314 | writeln("\tMean Expected: N/A Observed: ", summ.mean); |

315 | writeln("\tMedian Expected: 2 Observed: ", median(observ)); |

316 | writeln("\tStdev Expected: N/A Observed: ", summ.stdev); |

317 | writeln("\tKurtosis Expected: N/A Observed: ", summ.kurtosis); |

318 | writeln("\tSkewness Expected: N/A Observed: ", summ.skewness); |

319 | } |

320 | |

321 | /// |

322 | double rStudentT(RGen = Random)(double df, ref RGen gen = rndGen) { |

323 | dstatsEnforce(df > 0, "Student's T distribution must have >0 degrees of freedom."); |

324 | |

325 | double N = rNorm(0, 1, gen); |

326 | double G = stdGamma(df / 2, gen); |

327 | double X = sqrt(df / 2) * N / sqrt(G); |

328 | return X; |

329 | } |

330 | |

331 | unittest { |

332 | auto observ = randArray!rStudentT(100_000, 5); |

333 | auto ksRes = ksTest(observ, parametrize!(studentsTCDF)(5)); |

334 | |

335 | auto summ = summary(observ); |

336 | writeln("100k samples from T(5): K-S P-val: ", ksRes.p); |

337 | writeln("\tMean Expected: 0 Observed: ", summ.mean); |

338 | writeln("\tMedian Expected: 0 Observed: ", median(observ)); |

339 | writeln("\tStdev Expected: 1.2909 Observed: ", summ.stdev); |

340 | writeln("\tKurtosis Expected: 6 Observed: ", summ.kurtosis); |

341 | writeln("\tSkewness Expected: 0 Observed: ", summ.skewness); |

342 | } |

343 | |

344 | /// |

345 | double rFisher(RGen = Random)(double df1, double df2, ref RGen gen = rndGen) { |

346 | dstatsEnforce(df1 > 0 && df2 > 0, |

347 | "df1 and df2 must be >0 for the Fisher distribution."); |

348 | |

349 | return (rChiSquare(df1, gen) * df2) / |

350 | (rChiSquare(df2, gen) * df1); |

351 | } |

352 | |

353 | unittest { |

354 | auto observ = randArray!rFisher(100_000, 5, 7); |

355 | auto ksRes = ksTest(observ, parametrize!(fisherCDF)(5, 7)); |

356 | writeln("100k samples from fisher(5, 7): K-S P-val: ", ksRes.p); |

357 | writeln("\tMean Expected: ", 7.0 / 5, " Observed: ", mean(observ)); |

358 | writeln("\tMedian Expected: ?? Observed: ", median(observ)); |

359 | writeln("\tStdev Expected: ?? Observed: ", stdev(observ)); |

360 | writeln("\tKurtosis Expected: ?? Observed: ", kurtosis(observ)); |

361 | writeln("\tSkewness Expected: ?? Observed: ", skewness(observ)); |

362 | delete observ; |

363 | } |

364 | |

365 | /// |

366 | double rChiSquare(RGen = Random)(double df, ref RGen gen = rndGen) { |

367 | dstatsEnforce(df > 0, "df must be > 0 for chiSquare distribution."); |

368 | |

369 | return 2.0 * stdGamma(df / 2.0L, gen); |

370 | } |

371 | |

372 | unittest { |

373 | double df = 5; |

374 | double[] observ = new double[100_000]; |

375 | foreach(ref elem; observ) |

376 | elem = rChiSquare(df); |

377 | auto ksRes = ksTest(observ, parametrize!(chiSqrCDF)(5)); |

378 | writeln("100k samples from Chi-Square: K-S P-val: ", ksRes.p); |

379 | writeln("\tMean Expected: ", df, " Observed: ", mean(observ)); |

380 | writeln("\tMedian Expected: ", df - (2.0L / 3.0L), " Observed: ", median(observ)); |

381 | writeln("\tStdev Expected: ", sqrt(2 * df), " Observed: ", stdev(observ)); |

382 | writeln("\tKurtosis Expected: ", 12 / df, " Observed: ", kurtosis(observ)); |

383 | writeln("\tSkewness Expected: ", sqrt(8 / df), " Observed: ", skewness(observ)); |

384 | delete observ; |

385 | } |

386 | |

387 | /// |

388 | int rPoisson(RGen = Random)(double lam, ref RGen gen = rndGen) { |

389 | dstatsEnforce(lam > 0, "lambda must be >0 for Poisson distribution."); |

390 | |

391 | static int poissonMult(ref RGen gen, double lam) { |

392 | double U = void; |

393 | |

394 | double enlam = exp(-lam); |

395 | int X = 0; |

396 | double prod = 1.0; |

397 | while (true) { |

398 | U = uniform(0.0L, 1.0L, gen); |

399 | prod *= U; |

400 | if (prod > enlam) { |

401 | X += 1; |

402 | } else { |

403 | return X; |

404 | } |

405 | } |

406 | assert(0); |

407 | } |

408 | |

409 | enum double LS2PI = 0.91893853320467267; |

410 | enum double TWELFTH = 0.083333333333333333333333; |

411 | static int poissonPtrs(ref RGen gen, double lam) { |

412 | int k; |

413 | double U = void, V = void, us = void; |

414 | |

415 | double slam = sqrt(lam); |

416 | double loglam = log(lam); |

417 | double b = 0.931 + 2.53*slam; |

418 | double a = -0.059 + 0.02483*b; |

419 | double invalpha = 1.1239 + 1.1328/(b-3.4); |

420 | double vr = 0.9277 - 3.6224/(b-2); |

421 | |

422 | while (true) { |

423 | U = uniform(-0.5L, 0.5L, gen); |

424 | V = uniform(0.0L, 1.0L, gen); |

425 | us = 0.5 - abs(U); |

426 | k = cast(int) floor((2*a/us + b)*U + lam + 0.43); |

427 | if ((us >= 0.07) && (V <= vr)) { |

428 | return k; |

429 | } |

430 | if ((k < 0) || ((us < 0.013) && (V > us))) { |

431 | continue; |

432 | } |

433 | if ((log(V) + log(invalpha) - log(a/(us*us)+b)) <= |

434 | (-lam + k*loglam - lgamma(k+1))) { |

435 | return k; |

436 | } |

437 | } |

438 | assert(0); |

439 | } |

440 | |

441 | |

442 | if (lam >= 10) { |

443 | return poissonPtrs(gen, lam); |

444 | } else if (lam == 0) { |

445 | return 0; |

446 | } else { |

447 | return poissonMult(gen, lam); |

448 | } |

449 | } |

450 | |

451 | unittest { |

452 | double lambda = 15L; |

453 | int[] observ = new int[100_000]; |

454 | foreach(ref elem; observ) |

455 | elem = rPoisson(lambda); |

456 | writeln("100k samples from poisson(", lambda, "):"); |

457 | writeln("\tMean Expected: ", lambda, |

458 | " Observed: ", mean(observ)); |

459 | writeln("\tMedian Expected: ?? Observed: ", median(observ)); |

460 | writeln("\tStdev Expected: ", sqrt(lambda), |

461 | " Observed: ", stdev(observ)); |

462 | writeln("\tKurtosis Expected: ", 1 / lambda, |

463 | " Observed: ", kurtosis(observ)); |

464 | writeln("\tSkewness Expected: ", 1 / sqrt(lambda), |

465 | " Observed: ", skewness(observ)); |

466 | delete observ; |

467 | } |

468 | |

469 | /// |

470 | int rBernoulli(RGen = Random)(double P = 0.5, ref RGen gen = rndGen) { |

471 | dstatsEnforce(P >= 0 && P <= 1, "P must be between 0, 1 for Bernoulli distribution."); |

472 | |

473 | double pVal = uniform(0.0L, 1.0L, gen); |

474 | return cast(int) (pVal <= P); |

475 | } |

476 | |

477 | private struct BinoState { |

478 | bool has_binomial; |

479 | int nsave; |

480 | double psave; |

481 | int m; |

482 | double r,q,fm,p1,xm,xl,xr,c,laml,lamr,p2,p3,p4; |

483 | double a,u,v,s,F,rho,t,A,nrq,x1,x2,f1,f2,z,z2,w,w2,x; |

484 | } |

485 | |

486 | private BinoState* binoState() { |

487 | // Store BinoState structs on heap rather than directly in TLS. |

488 | |

489 | static BinoState* stateTLS; |

490 | auto tlsPtr = stateTLS; |

491 | if (tlsPtr is null) { |

492 | tlsPtr = new BinoState; |

493 | stateTLS = tlsPtr; |

494 | } |

495 | return tlsPtr; |

496 | } |

497 | |

498 | |

499 | private int rBinomialBtpe(RGen = Random)(int n, double p, ref RGen gen = rndGen) { |

500 | auto state = binoState; |

501 | double r,q,fm,p1,xm,xl,xr,c,laml,lamr,p2,p3,p4; |

502 | double a,u,v,s,F,rho,t,A,nrq,x1,x2,f1,f2,z,z2,w,w2,x; |

503 | int m,y,k,i; |

504 | |

505 | if (!(state.has_binomial) || |

506 | (state.nsave != n) || |

507 | (state.psave != p)) { |

508 | /* initialize */ |

509 | state.nsave = n; |

510 | state.psave = p; |

511 | state.has_binomial = 1; |

512 | state.r = r = min(p, 1.0-p); |

513 | state.q = q = 1.0 - r; |

514 | state.fm = fm = n*r+r; |

515 | state.m = m = cast(int)floor(state.fm); |

516 | state.p1 = p1 = floor(2.195*sqrt(n*r*q)-4.6*q) + 0.5; |

517 | state.xm = xm = m + 0.5; |

518 | state.xl = xl = xm - p1; |

519 | state.xr = xr = xm + p1; |

520 | state.c = c = 0.134 + 20.5/(15.3 + m); |

521 | a = (fm - xl)/(fm-xl*r); |

522 | state.laml = laml = a*(1.0 + a/2.0); |

523 | a = (xr - fm)/(xr*q); |

524 | state.lamr = lamr = a*(1.0 + a/2.0); |

525 | state.p2 = p2 = p1*(1.0 + 2.0*c); |

526 | state.p3 = p3 = p2 + c/laml; |

527 | state.p4 = p4 = p3 + c/lamr; |

528 | } else { |

529 | r = state.r; |

530 | q = state.q; |

531 | fm = state.fm; |

532 | m = state.m; |

533 | p1 = state.p1; |

534 | xm = state.xm; |

535 | xl = state.xl; |

536 | xr = state.xr; |

537 | c = state.c; |

538 | laml = state.laml; |

539 | lamr = state.lamr; |

540 | p2 = state.p2; |

541 | p3 = state.p3; |

542 | p4 = state.p4; |

543 | } |

544 | |

545 | /* sigh ... */ |

546 | Step10: |

547 | nrq = n*r*q; |

548 | u = uniform(0.0L, p4, gen); |

549 | v = uniform(0.0L, 1.0L, gen); |

550 | if (u > p1) goto Step20; |

551 | y = cast(int)floor(xm - p1*v + u); |

552 | goto Step60; |

553 | |

554 | Step20: |

555 | if (u > p2) goto Step30; |

556 | x = xl + (u - p1)/c; |

557 | v = v*c + 1.0 - fabs(m - x + 0.5)/p1; |

558 | if (v > 1.0) goto Step10; |

559 | y = cast(int)floor(x); |

560 | goto Step50; |

561 | |

562 | Step30: |

563 | if (u > p3) goto Step40; |

564 | y = cast(int)floor(xl + log(v)/laml); |

565 | if (y < 0) goto Step10; |

566 | v = v*(u-p2)*laml; |

567 | goto Step50; |

568 | |

569 | Step40: |

570 | y = cast(int)floor(xr - log(v)/lamr); |

571 | if (y > n) goto Step10; |

572 | v = v*(u-p3)*lamr; |

573 | |

574 | Step50: |

575 | k = cast(int) fabs(y - m); |

576 | if ((k > 20) && (k < ((nrq)/2.0 - 1))) goto Step52; |

577 | |

578 | s = r/q; |

579 | a = s*(n+1); |

580 | F = 1.0; |

581 | if (m < y) { |

582 | for (i=m; i<=y; i++) { |

583 | F *= (a/i - s); |

584 | } |

585 | } else if (m > y) { |

586 | for (i=y; i<=m; i++) { |

587 | F /= (a/i - s); |

588 | } |

589 | } else { |

590 | if (v > F) goto Step10; |

591 | goto Step60; |

592 | } |

593 | |

594 | Step52: |

595 | rho = (k/(nrq))*((k*(k/3.0 + 0.625) + 0.16666666666666666)/nrq + 0.5); |

596 | t = -k*k/(2*nrq); |

597 | A = log(v); |

598 | if (A < (t - rho)) goto Step60; |

599 | if (A > (t + rho)) goto Step10; |

600 | |

601 | x1 = y+1; |

602 | f1 = m+1; |

603 | z = n+1-m; |

604 | w = n-y+1; |

605 | x2 = x1*x1; |

606 | f2 = f1*f1; |

607 | z2 = z*z; |

608 | w2 = w*w; |

609 | if (A > (xm*log(f1/x1) |

610 | + (n-m+0.5)*log(z/w) |

611 | + (y-m)*log(w*r/(x1*q)) |

612 | + (13680.-(462.-(132.-(99.-140./f2)/f2)/f2)/f2)/f1/166320. |

613 | + (13680.-(462.-(132.-(99.-140./z2)/z2)/z2)/z2)/z/166320. |

614 | + (13680.-(462.-(132.-(99.-140./x2)/x2)/x2)/x2)/x1/166320. |

615 | + (13680.-(462.-(132.-(99.-140./w2)/w2)/w2)/w2)/w/166320.)) { |

616 | goto Step10; |

617 | } |

618 | |

619 | Step60: |

620 | if (p > 0.5) { |

621 | y = n - y; |

622 | } |

623 | |

624 | return y; |

625 | } |

626 | |

627 | private int rBinomialInversion(RGen = Random)(int n, double p, ref RGen gen = rndGen) { |

628 | auto state = binoState; |

629 | double q, qn, np, px, U; |

630 | int X, bound; |

631 | |

632 | if (!(state.has_binomial) || |

633 | (state.nsave != n) || |

634 | (state.psave != p)) { |

635 | state.nsave = n; |

636 | state.psave = p; |

637 | state.has_binomial = 1; |

638 | state.q = q = 1.0 - p; |

639 | state.r = qn = exp(n * log(q)); |

640 | state.c = np = n*p; |

641 | state.m = bound = cast(int) min(n, np + 10.0*sqrt(np*q + 1)); |

642 | } else { |

643 | q = state.q; |

644 | qn = state.r; |

645 | np = state.c; |

646 | bound = cast(int) state.m; |

647 | } |

648 | X = 0; |

649 | px = qn; |

650 | U = uniform(0.0L, 1.0L, gen); |

651 | while (U > px) { |

652 | X++; |

653 | if (X > bound) { |

654 | X = 0; |

655 | px = qn; |

656 | U = uniform(0.0L, 1.0L, gen); |

657 | } else { |

658 | U -= px; |

659 | px = ((n-X+1) * p * px)/(X*q); |

660 | } |

661 | } |

662 | return X; |

663 | } |

664 | |

665 | /// |

666 | int rBinomial(RGen = Random)(int n, double p, ref RGen gen = rndGen) { |

667 | dstatsEnforce(n >= 0, "n must be >= 0 for binomial distribution."); |

668 | dstatsEnforce(p >= 0 && p <= 1, "p must be between 0, 1 for binomial distribution."); |

669 | |

670 | if (p <= 0.5) { |

671 | if (p*n <= 30.0) { |

672 | return rBinomialInversion(n, p, gen); |

673 | } else { |

674 | return rBinomialBtpe(n, p, gen); |

675 | } |

676 | } else { |

677 | double q = 1.0-p; |

678 | if (q*n <= 30.0) { |

679 | return n - rBinomialInversion(n, q, gen); |

680 | } else { |

681 | return n - rBinomialBtpe(n, q, gen); |

682 | } |

683 | } |

684 | } |

685 | |

686 | unittest { |

687 | void testBinom(int n, double p) { |

688 | int[] observ = new int[100_000]; |

689 | foreach(ref elem; observ) |

690 | elem = rBinomial(n, p); |

691 | writeln("100k samples from binom.(", n, ", ", p, "):"); |

692 | writeln("\tMean Expected: ", n * p, |

693 | " Observed: ", mean(observ)); |

694 | writeln("\tMedian Expected: ", n * p, " Observed: ", median(observ)); |

695 | writeln("\tStdev Expected: ", sqrt(n * p * (1 - p)), |

696 | " Observed: ", stdev(observ)); |

697 | writeln("\tKurtosis Expected: ", (1 - 6 * p * (1 - p)) / (n * p * (1 - p)), |

698 | " Observed: ", kurtosis(observ)); |

699 | writeln("\tSkewness Expected: ", (1 - 2 * p) / (sqrt(n * p * (1 - p))), |

700 | " Observed: ", skewness(observ)); |

701 | delete observ; |

702 | } |

703 | |

704 | testBinom(1000, 0.6); |

705 | testBinom(3, 0.7); |

706 | } |

707 | |

708 | private int hypergeoHyp(RGen = Random)(int good, int bad, int sample, ref RGen gen = rndGen) { |

709 | int Z = void; |

710 | double U = void; |

711 | |

712 | int d1 = bad + good - sample; |

713 | double d2 = cast(double)min(bad, good); |

714 | |

715 | double Y = d2; |

716 | int K = sample; |

717 | while (Y > 0.0) { |

718 | U = uniform(0.0L, 1.0L, gen); |

719 | Y -= cast(int)floor(U + Y/(d1 + K)); |

720 | K--; |

721 | if (K == 0) break; |

722 | } |

723 | Z = cast(int)(d2 - Y); |

724 | if (good > bad) Z = sample - Z; |

725 | return Z; |

726 | } |

727 | |

728 | private enum double D1 = 1.7155277699214135; |

729 | private enum double D2 = 0.8989161620588988; |

730 | private int hypergeoHrua(RGen = Random)(int good, int bad, int sample, ref RGen gen = rndGen) { |

731 | int Z = void; |

732 | double T = void, W = void, X = void, Y = void; |

733 | |

734 | int mingoodbad = min(good, bad); |

735 | int popsize = good + bad; |

736 | int maxgoodbad = max(good, bad); |

737 | int m = min(sample, popsize - sample); |

738 | double d4 = (cast(double)mingoodbad) / popsize; |

739 | double d5 = 1.0 - d4; |

740 | double d6 = m*d4 + 0.5; |

741 | double d7 = sqrt((popsize - m) * sample * d4 *d5 / (popsize-1) + 0.5); |

742 | double d8 = D1*d7 + D2; |

743 | int d9 = cast(int)floor(cast(double)((m+1)*(mingoodbad+1))/(popsize+2)); |

744 | double d10 = (lgamma(d9+1) + lgamma(mingoodbad-d9+1) + lgamma(m-d9+1) + |

745 | lgamma(maxgoodbad-m+d9+1)); |

746 | double d11 = min(min(m, mingoodbad)+1.0, floor(d6+16*d7)); |

747 | /* 16 for 16-decimal-digit precision in D1 and D2 */ |

748 | |

749 | while (true) { |

750 | X = uniform(0.0L, 1.0L, gen); |

751 | Y = uniform(0.0L, 1.0L, gen); |

752 | W = d6 + d8*(Y- 0.5)/X; |

753 | |

754 | /* fast rejection: */ |

755 | if ((W < 0.0) || (W >= d11)) continue; |

756 | |

757 | Z = cast(int)floor(W); |

758 | T = d10 - (lgamma(Z+1) + lgamma(mingoodbad-Z+1) + lgamma(m-Z+1) + |

759 | lgamma(maxgoodbad-m+Z+1)); |

760 | |

761 | /* fast acceptance: */ |

762 | if ((X*(4.0-X)-3.0) <= T) break; |

763 | |

764 | /* fast rejection: */ |

765 | if (X*(X-T) >= 1) continue; |

766 | |

767 | if (2.0*log(X) <= T) break; /* acceptance */ |

768 | } |

769 | |

770 | /* this is a correction to HRUA* by Ivan Frohne in rv.py */ |

771 | if (good > bad) Z = m - Z; |

772 | |

773 | /* another fix from rv.py to allow sample to exceed popsize/2 */ |

774 | if (m < sample) Z = good - Z; |

775 | |

776 | return Z; |

777 | } |

778 | |

779 | /// |

780 | int rHypergeometric(RGen = Random)(int n1, int n2, int n, ref RGen gen = rndGen) { |

781 | dstatsEnforce(n <= n1 + n2, "n must be <= n1 + n2 for hypergeometric distribution."); |

782 | dstatsEnforce(n1 >= 0 && n2 >= 0 && n >= 0, |

783 | "n, n1, n2 must be >= 0 for hypergeometric distribution."); |

784 | |

785 | alias n1 good; |

786 | alias n2 bad; |

787 | alias n sample; |

788 | if (sample > 10) { |

789 | return hypergeoHrua(good, bad, sample, gen); |

790 | } else { |

791 | return hypergeoHyp(good, bad, sample, gen); |

792 | } |

793 | } |

794 | |

795 | unittest { |

796 | |

797 | static double hyperStdev(int n1, int n2, int n) { |

798 | return sqrt(cast(double) n * (cast(double) n1 / (n1 + n2)) |

799 | * (1 - cast(double) n1 / (n1 + n2)) * (n1 + n2 - n) / (n1 + n2 - 1)); |

800 | } |

801 | |

802 | static double hyperSkew(double n1, double n2, double n) { |

803 | double N = n1 + n2; |

804 | alias n1 m; |

805 | double numer = (N - 2 * m) * sqrt(N - 1) * (N - 2 * n); |

806 | double denom = sqrt(n * m * (N - m) * (N - n)) * (N - 2); |

807 | return numer / denom; |

808 | } |

809 | |

810 | void testHyper(int n1, int n2, int n) { |

811 | int[] observ = new int[100_000]; |

812 | foreach(ref elem; observ) |

813 | elem = rHypergeometric(n1, n2, n); |

814 | auto ksRes = ksTest(observ, parametrize!(hypergeometricCDF)(n1, n2, n)); |

815 | writeln("100k samples from hypergeom.(", n1, ", ", n2, ", ", n, "):"); |

816 | writeln("\tMean Expected: ", n * cast(double) n1 / (n1 + n2), |

817 | " Observed: ", mean(observ)); |

818 | writeln("\tMedian Expected: ?? Observed: ", median(observ)); |

819 | writeln("\tStdev Expected: ", hyperStdev(n1, n2, n), |

820 | " Observed: ", stdev(observ)); |

821 | writeln("\tKurtosis Expected: ?? Observed: ", kurtosis(observ)); |

822 | writeln("\tSkewness Expected: ", hyperSkew(n1, n2, n), " Observed: ", skewness(observ)); |

823 | delete observ; |

824 | } |

825 | |

826 | testHyper(4, 5, 2); |

827 | testHyper(120, 105, 70); |

828 | } |

829 | |

830 | private int rGeomSearch(RGen = Random)(double p, ref RGen gen = rndGen) { |

831 | int X = 1; |

832 | double sum = p, prod = p; |

833 | double q = 1.0 - p; |

834 | double U = uniform(0.0L, 1.0L, gen); |

835 | while (U > sum) { |

836 | prod *= q; |

837 | sum += prod; |

838 | X++; |

839 | } |

840 | return X; |

841 | } |

842 | |

843 | private int rGeomInvers(RGen = Random)(double p, ref RGen gen = rndGen) { |

844 | return cast(int)ceil(log(1.0-uniform(0.0L, 1.0L, gen))/log(1.0-p)); |

845 | } |

846 | |

847 | int rGeometric(RGen = Random)(double p, ref RGen gen = rndGen) { |

848 | dstatsEnforce(p >= 0 && p <= 1, "p must be between 0, 1 for geometric distribution."); |

849 | |

850 | if (p >= 0.333333333333333333333333) { |

851 | return rGeomSearch(p, gen); |

852 | } else { |

853 | return rGeomInvers(p, gen); |

854 | } |

855 | } |

856 | |

857 | unittest { |

858 | |

859 | void testGeom(double p) { |

860 | int[] observ = new int[100_000]; |

861 | foreach(ref elem; observ) |

862 | elem = rGeometric(p); |

863 | writeln("100k samples from geometric.(", p, "):"); |

864 | writeln("\tMean Expected: ", 1 / p, |

865 | " Observed: ", mean(observ)); |

866 | writeln("\tMedian Expected: ", ceil(-log(2) / log(1 - p)), |

867 | " Observed: ", median(observ)); |

868 | writeln("\tStdev Expected: ", sqrt((1 - p) / (p * p)), |

869 | " Observed: ", stdev(observ)); |

870 | writeln("\tKurtosis Expected: ", 6 + (p * p) / (1 - p), |

871 | " Observed: ", kurtosis(observ)); |

872 | writeln("\tSkewness Expected: ", (2 - p) / sqrt(1 - p), |

873 | " Observed: ", skewness(observ)); |

874 | delete observ; |

875 | } |

876 | |

877 | testGeom(0.1); |

878 | testGeom(0.74); |

879 | |

880 | } |

881 | |

882 | /// |

883 | int rNegBinom(RGen = Random)(double n, double p, ref RGen gen = rndGen) { |

884 | dstatsEnforce(n >= 0, "n must be >= 0 for negative binomial distribution."); |

885 | dstatsEnforce(p >= 0 && p <= 1, |

886 | "p must be between 0, 1 for negative binomial distribution."); |

887 | |

888 | double Y = stdGamma(n, gen); |

889 | Y *= (1 - p) / p; |

890 | return rPoisson(Y, gen); |

891 | } |

892 | |

893 | unittest { |

894 | Random gen; |

895 | gen.seed(unpredictableSeed); |

896 | double p = 0.3L; |

897 | int n = 30; |

898 | int[] observ = new int[100_000]; |

899 | foreach(ref elem; observ) |

900 | elem = rNegBinom(n, p); |

901 | writeln("100k samples from neg. binom.(", n,", ", p, "):"); |

902 | writeln("\tMean Expected: ", n * (1 - p) / p, |

903 | " Observed: ", mean(observ)); |

904 | writeln("\tMedian Expected: ?? Observed: ", median(observ)); |

905 | writeln("\tStdev Expected: ", sqrt(n * (1 - p) / (p * p)), |

906 | " Observed: ", stdev(observ)); |

907 | writeln("\tKurtosis Expected: ", (6 - p * (6 - p)) / (n * (1 - p)), |

908 | " Observed: ", kurtosis(observ)); |

909 | writeln("\tSkewness Expected: ", (2 - p) / sqrt(n * (1 - p)), |

910 | " Observed: ", skewness(observ)); |

911 | delete observ; |

912 | } |

913 | |

914 | /// |

915 | double rLaplace(RGen = Random)(double mu = 0, double b = 1, ref RGen gen = rndGen) { |

916 | dstatsEnforce(b > 0, "b must be > 0 for Laplace distribution."); |

917 | |

918 | double p = uniform(0.0L, 1.0L, gen); |

919 | return invLaplaceCDF(p, mu, b); |

920 | } |

921 | |

922 | unittest { |

923 | Random gen; |

924 | gen.seed(unpredictableSeed); |

925 | double[] observ = new double[100_000]; |

926 | foreach(ref elem; observ) |

927 | elem = rLaplace(); |

928 | auto ksRes = ksTest(observ, parametrize!(laplaceCDF)(0.0L, 1.0L)); |

929 | writeln("100k samples from Laplace(0, 1): K-S P-val: ", ksRes.p); |

930 | writeln("\tMean Expected: 0 Observed: ", mean(observ)); |

931 | writeln("\tMedian Expected: 0 Observed: ", median(observ)); |

932 | writeln("\tStdev Expected: 1.414 Observed: ", stdev(observ)); |

933 | writeln("\tKurtosis Expected: 3 Observed: ", kurtosis(observ)); |

934 | writeln("\tSkewness Expected: 0 Observed: ", skewness(observ)); |

935 | delete observ; |

936 | } |

937 | |

938 | /// |

939 | double rExponential(RGen = Random)(double lambda, ref RGen gen = rndGen) { |

940 | dstatsEnforce(lambda > 0, "lambda must be > 0 for exponential distribution."); |

941 | |

942 | double p = uniform(0.0L, 1.0L, gen); |

943 | return -log(p) / lambda; |

944 | } |

945 | |

946 | unittest { |

947 | double[] observ = new double[100_000]; |

948 | foreach(ref elem; observ) |

949 | elem = rExponential(2.0L); |

950 | auto ksRes = ksTest(observ, parametrize!(gammaCDF)(2, 1)); |

951 | writeln("100k samples from exponential(2): K-S P-val: ", ksRes.p); |

952 | writeln("\tMean Expected: 0.5 Observed: ", mean(observ)); |

953 | writeln("\tMedian Expected: 0.3465 Observed: ", median(observ)); |

954 | writeln("\tStdev Expected: 0.5 Observed: ", stdev(observ)); |

955 | writeln("\tKurtosis Expected: 6 Observed: ", kurtosis(observ)); |

956 | writeln("\tSkewness Expected: 2 Observed: ", skewness(observ)); |

957 | delete observ; |

958 | } |

959 | |

960 | private double stdGamma(RGen = Random)(double shape, ref RGen gen) { |

961 | double b = void, c = void; |

962 | double U = void, V = void, X = void, Y = void; |

963 | |

964 | if (shape == 1.0) { |

965 | return rExponential(1.0, gen); |

966 | } else if (shape < 1.0) { |

967 | for (;;) { |

968 | U = uniform(0.0L, 1.0, gen); |

969 | V = rExponential(1.0, gen); |

970 | if (U <= 1.0 - shape) { |

971 | X = pow(U, 1.0/shape); |

972 | if (X <= V) { |

973 | return X; |

974 | } |

975 | } else { |

976 | Y = -log((1-U)/shape); |

977 | X = pow(1.0 - shape + shape*Y, 1./shape); |

978 | if (X <= (V + Y)) { |

979 | return X; |

980 | } |

981 | } |

982 | } |

983 | } else { |

984 | b = shape - 1./3.; |

985 | c = 1./sqrt(9*b); |

986 | for (;;) { |

987 | do { |

988 | X = rNorm(0.0L, 1.0L, gen); |

989 | V = 1.0 + c*X; |

990 | } while (V <= 0.0); |

991 | |

992 | V = V*V*V; |

993 | U = uniform(0.0L, 1.0L, gen); |

994 | if (U < 1.0 - 0.0331*(X*X)*(X*X)) return (b*V); |

995 | if (log(U) < 0.5*X*X + b*(1. - V + log(V))) return (b*V); |

996 | } |

997 | } |

998 | } |

999 | |

1000 | /// |

1001 | double rGamma(RGen = Random)(double a, double b, ref RGen gen = rndGen) { |

1002 | dstatsEnforce(a > 0, "a must be > 0 for gamma distribution."); |

1003 | dstatsEnforce(b > 0, "b must be > 0 for gamma distribution."); |

1004 | |

1005 | return stdGamma(b, gen) / a; |

1006 | } |

1007 | |

1008 | unittest { |

1009 | double[] observ = new double[100_000]; |

1010 | foreach(ref elem; observ) |

1011 | elem = rGamma(2.0L, 3.0L); |

1012 | auto ksRes = ksTest(observ, parametrize!(gammaCDF)(2, 3)); |

1013 | writeln("100k samples from gamma(2, 3): K-S P-val: ", ksRes.p); |

1014 | writeln("\tMean Expected: 1.5 Observed: ", mean(observ)); |

1015 | writeln("\tMedian Expected: ?? Observed: ", median(observ)); |

1016 | writeln("\tStdev Expected: 0.866 Observed: ", stdev(observ)); |

1017 | writeln("\tKurtosis Expected: 2 Observed: ", kurtosis(observ)); |

1018 | writeln("\tSkewness Expected: 1.15 Observed: ", skewness(observ)); |

1019 | delete observ; |

1020 | } |

1021 | |

1022 | /// |

1023 | double rBeta(RGen = Random)(double a, double b, ref RGen gen = rndGen) { |

1024 | dstatsEnforce(a > 0, "a must be > 0 for beta distribution."); |

1025 | dstatsEnforce(b > 0, "b must be > 0 for beta distribution."); |

1026 | |

1027 | double Ga = void, Gb = void; |

1028 | |

1029 | if ((a <= 1.0) && (b <= 1.0)) { |

1030 | double U, V, X, Y; |

1031 | /* Use Jonk's algorithm */ |

1032 | |

1033 | while (1) { |

1034 | U = uniform(0.0L, 1.0L, gen); |

1035 | V = uniform(0.0L, 1.0L, gen); |

1036 | X = pow(U, 1.0/a); |

1037 | Y = pow(V, 1.0/b); |

1038 | |

1039 | if ((X + Y) <= 1.0) { |

1040 | return X / (X + Y); |

1041 | } |

1042 | } |

1043 | } else { |

1044 | Ga = stdGamma(a, gen); |

1045 | Gb = stdGamma(b, gen); |

1046 | return Ga/(Ga + Gb); |

1047 | } |

1048 | assert(0); |

1049 | } |

1050 | |

1051 | unittest { |

1052 | double delegate(double) paramBeta(double a, double b) { |

1053 | double parametrizedBeta(double x) { |

1054 | return betaIncomplete(a, b, x); |

1055 | } |

1056 | return ¶metrizedBeta; |

1057 | } |

1058 | |

1059 | static double betaStdev(double a, double b) { |

1060 | return sqrt(a * b / ((a + b) * (a + b) * (a + b + 1))); |

1061 | } |

1062 | |

1063 | static double betaSkew(double a, double b) { |

1064 | auto numer = 2 * (b - a) * sqrt(a + b + 1); |

1065 | auto denom = (a + b + 2) * sqrt(a * b); |

1066 | return numer / denom; |

1067 | } |

1068 | |

1069 | static double betaKurtosis(double a, double b) { |

1070 | double numer = a * a * a - a * a * (2 * b - 1) + b * b * (b + 1) - 2 * a * b * (b + 2); |

1071 | double denom = a * b * (a + b + 2) * (a + b + 3); |

1072 | return 6 * numer / denom; |

1073 | } |

1074 | |

1075 | void testBeta(double a, double b) { |

1076 | double[] observ = new double[100_000]; |

1077 | foreach(ref elem; observ) |

1078 | elem = rBeta(a, b); |

1079 | auto ksRes = ksTest(observ, paramBeta(a, b)); |

1080 | auto summ = summary(observ); |

1081 | writeln("100k samples from beta(", a, ", ", b, "): K-S P-val: ", ksRes.p); |

1082 | writeln("\tMean Expected: ", a / (a + b), " Observed: ", summ.mean); |

1083 | writeln("\tMedian Expected: ?? Observed: ", median(observ)); |

1084 | writeln("\tStdev Expected: ", betaStdev(a, b), " Observed: ", summ.stdev); |

1085 | writeln("\tKurtosis Expected: ", betaKurtosis(a, b), " Observed: ", summ.kurtosis); |

1086 | writeln("\tSkewness Expected: ", betaSkew(a, b), " Observed: ", summ.skewness); |

1087 | delete observ; |

1088 | } |

1089 | |

1090 | testBeta(0.5, 0.7); |

1091 | testBeta(5, 3); |

1092 | } |

1093 | |

1094 | /// |

1095 | double rLogistic(RGen = Random)(double loc, double scale, ref RGen gen = rndGen) { |

1096 | dstatsEnforce(scale > 0, "scale must be > 0 for logistic distribution."); |

1097 | |

1098 | double U = uniform(0.0L, 1.0L, gen); |

1099 | return loc + scale * log(U/(1.0 - U)); |

1100 | } |

1101 | |

1102 | unittest { |

1103 | double[] observ = new double[100_000]; |

1104 | foreach(ref elem; observ) |

1105 | elem = rLogistic(2.0L, 3.0L); |

1106 | auto ksRes = ksTest(observ, parametrize!(logisticCDF)(2, 3)); |

1107 | writeln("100k samples from logistic(2, 3): K-S P-val: ", ksRes.p); |

1108 | writeln("\tMean Expected: 2 Observed: ", mean(observ)); |

1109 | writeln("\tMedian Expected: 2 Observed: ", median(observ)); |

1110 | writeln("\tStdev Expected: ", PI * PI * 3, " Observed: ", stdev(observ)); |

1111 | writeln("\tKurtosis Expected: 1.2 Observed: ", kurtosis(observ)); |

1112 | writeln("\tSkewness Expected: 0 Observed: ", skewness(observ)); |

1113 | delete observ; |

1114 | } |

1115 | |

1116 | /// |

1117 | double rLogNorm(RGen = Random)(double mu, double sigma, ref RGen gen = rndGen) { |

1118 | dstatsEnforce(sigma > 0, "sigma must be > 0 for log-normal distribution."); |

1119 | |

1120 | return exp(rNorm(mu, sigma, gen)); |

1121 | } |

1122 | |

1123 | unittest { |

1124 | auto observ = randArray!rLogNorm(100_000, -2, 1); |

1125 | auto ksRes = ksTest(observ, paramFunctor!(logNormalCDF)(-2, 1)); |

1126 | |

1127 | auto summ = summary(observ); |

1128 | writeln("100k samples from log-normal(-2, 1): K-S P-val: ", ksRes.p); |

1129 | writeln("\tMean Expected: ", exp(-1.5), " Observed: ", summ.mean); |

1130 | writeln("\tMedian Expected: ", exp(-2.0L), " Observed: ", median(observ)); |

1131 | writeln("\tStdev Expected: ", sqrt((exp(1.) - 1) * exp(-4.0L + 1)), |

1132 | " Observed: ", summ.stdev); |

1133 | writeln("\tKurtosis Expected: ?? Observed: ", summ.kurtosis); |

1134 | writeln("\tSkewness Expected: ", (exp(1.) + 2) * sqrt(exp(1.) - 1), |

1135 | " Observed: ", summ.skewness); |

1136 | } |

1137 | |

1138 | /// |

1139 | double rWeibull(RGen = Random)(double shape, double scale = 1, ref RGen gen = rndGen) { |

1140 | dstatsEnforce(shape > 0, "shape must be > 0 for weibull distribution."); |

1141 | dstatsEnforce(scale > 0, "scale must be > 0 for weibull distribution."); |

1142 | |

1143 | return pow(rExponential(1, gen), 1. / shape) * scale; |

1144 | } |

1145 | |

1146 | unittest { |

1147 | double[] observ = new double[100_000]; |

1148 | foreach(ref elem; observ) |

1149 | elem = rWeibull(2.0L, 3.0L); |

1150 | auto ksRes = ksTest(observ, parametrize!(weibullCDF)(2.0, 3.0)); |

1151 | writeln("100k samples from weibull(2, 3): K-S P-val: ", ksRes.p); |

1152 | delete observ; |

1153 | } |

1154 | |

1155 | /// |

1156 | double rWald(RGen = Random)(double mu, double lambda, ref RGen gen = rndGen) { |

1157 | dstatsEnforce(mu > 0, "mu must be > 0 for Wald distribution."); |

1158 | dstatsEnforce(lambda > 0, "lambda must be > 0 for Wald distribution."); |

1159 | |

1160 | alias mu mean; |

1161 | alias lambda scale; |

1162 | |

1163 | double mu_2l = mean / (2*scale); |

1164 | double Y = rNorm(0, 1, gen); |

1165 | Y = mean*Y*Y; |

1166 | double X = mean + mu_2l*(Y - sqrt(4*scale*Y + Y*Y)); |

1167 | double U = uniform(0.0L, 1.0L, gen); |

1168 | if (U <= mean/(mean+X)) { |

1169 | return X; |

1170 | } else |

1171 | |

1172 | { |

1173 | return mean*mean/X; |

1174 | } |

1175 | } |

1176 | |

1177 | unittest { |

1178 | auto observ = randArray!rWald(100_000, 4, 7); |

1179 | auto ksRes = ksTest(observ, parametrize!(waldCDF)(4, 7)); |

1180 | |

1181 | auto summ = summary(observ); |

1182 | writeln("100k samples from wald(4, 7): K-S P-val: ", ksRes.p); |

1183 | writeln("\tMean Expected: ", 4, " Observed: ", summ.mean); |

1184 | writeln("\tMedian Expected: ?? Observed: ", median(observ)); |

1185 | writeln("\tStdev Expected: ", sqrt(64.0 / 7), " Observed: ", summ.stdev); |

1186 | writeln("\tKurtosis Expected: ", 15.0 * 4 / 7, " Observed: ", summ.kurtosis); |

1187 | writeln("\tSkewness Expected: ", 3 * sqrt(4.0 / 7), " Observed: ", summ.skewness); |

1188 | } |

1189 | |

1190 | /// |

1191 | double rRayleigh(RGen = Random)(double mode, ref RGen gen = rndGen) { |

1192 | dstatsEnforce(mode > 0, "mode must be > 0 for Rayleigh distribution."); |

1193 | |

1194 | return mode*sqrt(-2.0 * log(1.0 - uniform(0.0L, 1.0L, gen))); |

1195 | } |

1196 | |

1197 | unittest { |

1198 | auto observ = randArray!rRayleigh(100_000, 3); |

1199 | auto ksRes = ksTest(observ, parametrize!(rayleighCDF)(3)); |

1200 | writeln("100k samples from rayleigh(3): K-S P-val: ", ksRes.p); |

1201 | } |

**Note:**See TracBrowser for help on using the browser.