| 1 |
/**This module contains a small but growing library for performing kernel |
|---|
| 2 |
* density estimation. |
|---|
| 3 |
* |
|---|
| 4 |
* Author: David Simcha |
|---|
| 5 |
*/ |
|---|
| 6 |
/* |
|---|
| 7 |
* License: |
|---|
| 8 |
* Boost Software License - Version 1.0 - August 17th, 2003 |
|---|
| 9 |
* |
|---|
| 10 |
* Permission is hereby granted, free of charge, to any person or organization |
|---|
| 11 |
* obtaining a copy of the software and accompanying documentation covered by |
|---|
| 12 |
* this license (the "Software") to use, reproduce, display, distribute, |
|---|
| 13 |
* execute, and transmit the Software, and to prepare derivative works of the |
|---|
| 14 |
* Software, and to permit third-parties to whom the Software is furnished to |
|---|
| 15 |
* do so, all subject to the following: |
|---|
| 16 |
* |
|---|
| 17 |
* The copyright notices in the Software and this entire statement, including |
|---|
| 18 |
* the above license grant, this restriction and the following disclaimer, |
|---|
| 19 |
* must be included in all copies of the Software, in whole or in part, and |
|---|
| 20 |
* all derivative works of the Software, unless such copies or derivative |
|---|
| 21 |
* works are solely in the form of machine-executable object code generated by |
|---|
| 22 |
* a source language processor. |
|---|
| 23 |
* |
|---|
| 24 |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|---|
| 25 |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|---|
| 26 |
* FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT |
|---|
| 27 |
* SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE |
|---|
| 28 |
* FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, |
|---|
| 29 |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
|---|
| 30 |
* DEALINGS IN THE SOFTWARE. |
|---|
| 31 |
*/ |
|---|
| 32 |
module dstats.kerneldensity; |
|---|
| 33 |
|
|---|
| 34 |
import std.conv, std.math, std.algorithm, std.exception, std.traits, std.range, |
|---|
| 35 |
std.array, std.typetuple, dstats.distrib; |
|---|
| 36 |
|
|---|
| 37 |
import dstats.alloc, dstats.base, dstats.summary; |
|---|
| 38 |
|
|---|
| 39 |
version(unittest) { |
|---|
| 40 |
|
|---|
| 41 |
import dstats.random, std.stdio; |
|---|
| 42 |
|
|---|
| 43 |
void main() {} |
|---|
| 44 |
} |
|---|
| 45 |
|
|---|
| 46 |
/**Estimates densities in the 1-dimensional case. The 1-D case is special |
|---|
| 47 |
* enough to be treated as a special case, since it's very common and enables |
|---|
| 48 |
* some significant optimizations that are otherwise not feasible. |
|---|
| 49 |
* |
|---|
| 50 |
* Under the hood, this works by binning the data into a large number of bins |
|---|
| 51 |
* (currently 1,000), convolving it with the kernel function to smooth it, and |
|---|
| 52 |
* then using linear interpolation to evaluate the density estimates. This |
|---|
| 53 |
* will produce results that are different from the textbook definition of |
|---|
| 54 |
* kernel density estimation, but to an extent that's negligible in most cases. |
|---|
| 55 |
* It also means that constructing this object is relatively expensive, but |
|---|
| 56 |
* evaluating a density estimate can be done in O(1) time complexity afterwords. |
|---|
| 57 |
*/ |
|---|
| 58 |
class KernelDensity1D { |
|---|
| 59 |
private: |
|---|
| 60 |
immutable double[] bins; |
|---|
| 61 |
immutable double[] cumulative; |
|---|
| 62 |
immutable double minElem; |
|---|
| 63 |
immutable double maxElem; |
|---|
| 64 |
immutable double diffNeg1Nbin; |
|---|
| 65 |
|
|---|
| 66 |
|
|---|
| 67 |
this(immutable double[] bins, immutable double[] cumulative, |
|---|
| 68 |
double minElem, double maxElem) { |
|---|
| 69 |
this.bins = bins; |
|---|
| 70 |
this.cumulative = cumulative; |
|---|
| 71 |
this.minElem = minElem; |
|---|
| 72 |
this.maxElem = maxElem; |
|---|
| 73 |
this.diffNeg1Nbin = bins.length / (maxElem - minElem); |
|---|
| 74 |
} |
|---|
| 75 |
|
|---|
| 76 |
private static double findEdgeBuffer(C)(C kernel) { |
|---|
| 77 |
// Search for the approx. point where the kernel's density is 0.001 * |
|---|
| 78 |
// what it is at zero. |
|---|
| 79 |
immutable zeroVal = kernel(0); |
|---|
| 80 |
double ret = 1; |
|---|
| 81 |
|
|---|
| 82 |
double factor = 4; |
|---|
| 83 |
double kernelVal; |
|---|
| 84 |
|
|---|
| 85 |
do { |
|---|
| 86 |
while(kernel(ret) / zeroVal > 1e-3) { |
|---|
| 87 |
ret *= factor; |
|---|
| 88 |
} |
|---|
| 89 |
|
|---|
| 90 |
factor = (factor - 1) / 2 + 1; |
|---|
| 91 |
while(kernel(ret) / zeroVal < 1e-4) { |
|---|
| 92 |
ret /= factor; |
|---|
| 93 |
} |
|---|
| 94 |
|
|---|
| 95 |
kernelVal = kernel(ret) / zeroVal; |
|---|
| 96 |
} while((kernelVal > 1e-3 || kernelVal < 1e-4) && factor > 1); |
|---|
| 97 |
|
|---|
| 98 |
return ret; |
|---|
| 99 |
} |
|---|
| 100 |
|
|---|
| 101 |
public: |
|---|
| 102 |
/**Construct a kernel density estimation object from a callable object. |
|---|
| 103 |
* R must be a range of numeric types. C must be a kernel function, |
|---|
| 104 |
* delegate, or class or struct with overloaded opCall. The kernel |
|---|
| 105 |
* function is assumed to be symmetric about zero, to take its maximum |
|---|
| 106 |
* value at zero and to be unimodal. |
|---|
| 107 |
* |
|---|
| 108 |
* edgeBuffer determines how much space below and above the smallest and |
|---|
| 109 |
* largest observed value will be allocated when doing the binning. |
|---|
| 110 |
* Values less than reduce!min(range) - edgeBuffer or greater than |
|---|
| 111 |
* reduce!max(range) + edgeBuffer will be assigned a density of zero. |
|---|
| 112 |
* If this value is left at its default, it will be set to a value at which |
|---|
| 113 |
* the kernel is somewhere between 1e-3 and 1e-4 times its value at zero. |
|---|
| 114 |
* |
|---|
| 115 |
* The bandwidth of the kernel is indirectly selected by parametrizing the |
|---|
| 116 |
* kernel function. |
|---|
| 117 |
* |
|---|
| 118 |
* Examples: |
|---|
| 119 |
* --- |
|---|
| 120 |
* auto randNums = randArray!rNorm(1_000, 0, 1); |
|---|
| 121 |
* auto kernel = parametrize!normalPDF(0, 0.01); |
|---|
| 122 |
* auto density = KernelDensity1D(kernel, randNums); |
|---|
| 123 |
* writeln(normalPDF(1, 0, 1), " ", density(1)). // Should be about the same. |
|---|
| 124 |
* --- |
|---|
| 125 |
*/ |
|---|
| 126 |
static KernelDensity1D fromCallable(C, R) |
|---|
| 127 |
(scope C kernel, R range, double edgeBuffer = double.nan) |
|---|
| 128 |
if(isForwardRange!R && is(typeof(kernel(2.0)) : double)) { |
|---|
| 129 |
enum nBin = 1000; |
|---|
| 130 |
mixin(newFrame); |
|---|
| 131 |
|
|---|
| 132 |
uint N = 0; |
|---|
| 133 |
double minElem = double.infinity; |
|---|
| 134 |
double maxElem = -double.infinity; |
|---|
| 135 |
foreach(elem; range) { |
|---|
| 136 |
minElem = min(minElem, elem); |
|---|
| 137 |
maxElem = max(maxElem, elem); |
|---|
| 138 |
N++; |
|---|
| 139 |
} |
|---|
| 140 |
|
|---|
| 141 |
if(isNaN(edgeBuffer)) { |
|---|
| 142 |
edgeBuffer = findEdgeBuffer(kernel); |
|---|
| 143 |
} |
|---|
| 144 |
minElem -= edgeBuffer; |
|---|
| 145 |
maxElem += edgeBuffer; |
|---|
| 146 |
|
|---|
| 147 |
// Using ints here because they convert faster to floats than uints do. |
|---|
| 148 |
auto binsRaw = newStack!int(nBin); |
|---|
| 149 |
binsRaw[] = 0; |
|---|
| 150 |
|
|---|
| 151 |
foreach(elemRaw; range) { |
|---|
| 152 |
double elem = elemRaw - minElem; |
|---|
| 153 |
elem /= (maxElem - minElem); |
|---|
| 154 |
elem *= nBin; |
|---|
| 155 |
auto bin = to!uint(elem); |
|---|
| 156 |
if(bin == nBin) { |
|---|
| 157 |
bin--; |
|---|
| 158 |
} |
|---|
| 159 |
|
|---|
| 160 |
binsRaw[bin]++; |
|---|
| 161 |
} |
|---|
| 162 |
|
|---|
| 163 |
// Convolve the binned data with our kernel. Since N is fairly small |
|---|
| 164 |
// we'll use a simple O(N^2) algorithm. According to my measurements, |
|---|
| 165 |
// this is actually comparable in speed to using an FFT (and a lot |
|---|
| 166 |
// simplier and more space efficient) because: |
|---|
| 167 |
// |
|---|
| 168 |
// 1. We can take advantage of kernel symmetry. |
|---|
| 169 |
// |
|---|
| 170 |
// 2. We can take advantage of the sparsity of binsRaw. (We don't |
|---|
| 171 |
// need to convolve the zero count bins.) |
|---|
| 172 |
// |
|---|
| 173 |
// 3. We don't need to do any zero padding to get a non-cyclic |
|---|
| 174 |
// convolution. |
|---|
| 175 |
// |
|---|
| 176 |
// 4. We don't need to convolve the tails of the kernel function, |
|---|
| 177 |
// where the contribution to the final density estimate would be |
|---|
| 178 |
// negligible. |
|---|
| 179 |
auto binsCooked = newVoid!double(nBin); |
|---|
| 180 |
binsCooked[] = 0; |
|---|
| 181 |
|
|---|
| 182 |
auto kernelPoints = newStack!double(nBin); |
|---|
| 183 |
immutable stepSize = (maxElem - minElem) / nBin; |
|---|
| 184 |
|
|---|
| 185 |
kernelPoints[0] = kernel(0); |
|---|
| 186 |
immutable stopAt = kernelPoints[0] * 1e-10; |
|---|
| 187 |
foreach(ptrdiff_t i; 1..kernelPoints.length) { |
|---|
| 188 |
kernelPoints[i] = kernel(stepSize * i); |
|---|
| 189 |
|
|---|
| 190 |
// Don't bother convolving stuff that contributes negligibly. |
|---|
| 191 |
if(kernelPoints[i] < stopAt) { |
|---|
| 192 |
kernelPoints = kernelPoints[0..i]; |
|---|
| 193 |
break; |
|---|
| 194 |
} |
|---|
| 195 |
} |
|---|
| 196 |
|
|---|
| 197 |
foreach(i, count; binsRaw) if(count > 0) { |
|---|
| 198 |
binsCooked[i] += kernelPoints[0] * count; |
|---|
| 199 |
|
|---|
| 200 |
foreach(offset; 1..min(kernelPoints.length, max(i + 1, nBin - i))) { |
|---|
| 201 |
immutable kernelVal = kernelPoints[offset]; |
|---|
| 202 |
|
|---|
| 203 |
if(i >= offset) { |
|---|
| 204 |
binsCooked[i - offset] += kernelVal * count; |
|---|
| 205 |
} |
|---|
| 206 |
|
|---|
| 207 |
if(i + offset < nBin) { |
|---|
| 208 |
binsCooked[i + offset] += kernelVal * count; |
|---|
| 209 |
} |
|---|
| 210 |
} |
|---|
| 211 |
} |
|---|
| 212 |
|
|---|
| 213 |
binsCooked[] /= sum(binsCooked); |
|---|
| 214 |
binsCooked[] *= nBin / (maxElem - minElem); // Make it a density. |
|---|
| 215 |
|
|---|
| 216 |
auto cumulative = newVoid!double(nBin); |
|---|
| 217 |
cumulative[0] = binsCooked[0]; |
|---|
| 218 |
foreach(i; 1..nBin) { |
|---|
| 219 |
cumulative[i] = cumulative[i - 1] + binsCooked[i]; |
|---|
| 220 |
} |
|---|
| 221 |
cumulative[] /= cumulative[$ - 1]; |
|---|
| 222 |
|
|---|
| 223 |
return new typeof(this)( |
|---|
| 224 |
assumeUnique(binsCooked), assumeUnique(cumulative), |
|---|
| 225 |
minElem, maxElem); |
|---|
| 226 |
} |
|---|
| 227 |
|
|---|
| 228 |
/**Construct a kernel density estimator from an alias.*/ |
|---|
| 229 |
static KernelDensity1D fromAlias(alias kernel, R) |
|---|
| 230 |
(R range, double edgeBuffer = double.nan) |
|---|
| 231 |
if(isForwardRange!R && is(typeof(kernel(2.0)) : double)) { |
|---|
| 232 |
static double kernelFun(double x) { |
|---|
| 233 |
return kernel(x); |
|---|
| 234 |
} |
|---|
| 235 |
|
|---|
| 236 |
return fromCallable(&kernelFun, range, edgeBuffer); |
|---|
| 237 |
} |
|---|
| 238 |
|
|---|
| 239 |
/**Construct a kernel density estimator using the default kernel, which is |
|---|
| 240 |
* a Gaussian kernel with the Scott bandwidth. |
|---|
| 241 |
*/ |
|---|
| 242 |
static KernelDensity1D fromDefaultKernel(R) |
|---|
| 243 |
(R range, double edgeBuffer = double.nan) |
|---|
| 244 |
if(isForwardRange!R && is(ElementType!R : double)) { |
|---|
| 245 |
immutable bandwidth = scottBandwidth(range.save); |
|---|
| 246 |
|
|---|
| 247 |
double kernel(double x) { |
|---|
| 248 |
return normalPDF(x, 0, bandwidth); |
|---|
| 249 |
} |
|---|
| 250 |
|
|---|
| 251 |
return fromCallable(&kernel, range, edgeBuffer); |
|---|
| 252 |
} |
|---|
| 253 |
|
|---|
| 254 |
/**Compute the probability density at a given point.*/ |
|---|
| 255 |
double opCall(double x) const { |
|---|
| 256 |
if(x < minElem || x > maxElem) { |
|---|
| 257 |
return 0; |
|---|
| 258 |
} |
|---|
| 259 |
|
|---|
| 260 |
x -= minElem; |
|---|
| 261 |
x *= diffNeg1Nbin; |
|---|
| 262 |
|
|---|
| 263 |
immutable fract = x - floor(x); |
|---|
| 264 |
immutable upper = to!size_t(ceil(x)); |
|---|
| 265 |
immutable lower = to!size_t(floor(x)); |
|---|
| 266 |
|
|---|
| 267 |
if(upper == bins.length) { |
|---|
| 268 |
return bins[$ - 1]; |
|---|
| 269 |
} |
|---|
| 270 |
|
|---|
| 271 |
immutable ret = fract * bins[upper] + (1 - fract) * bins[lower]; |
|---|
| 272 |
return max(0, ret); // Compensate for roundoff |
|---|
| 273 |
} |
|---|
| 274 |
|
|---|
| 275 |
/**Compute the cumulative density, i.e. the integral from -infinity to x.*/ |
|---|
| 276 |
double cdf(double x) const { |
|---|
| 277 |
if(x <= minElem) { |
|---|
| 278 |
return 0; |
|---|
| 279 |
} else if(x >= maxElem) { |
|---|
| 280 |
return 1; |
|---|
| 281 |
} |
|---|
| 282 |
|
|---|
| 283 |
x -= minElem; |
|---|
| 284 |
x *= diffNeg1Nbin; |
|---|
| 285 |
|
|---|
| 286 |
immutable fract = x - floor(x); |
|---|
| 287 |
immutable upper = to!size_t(ceil(x)); |
|---|
| 288 |
immutable lower = to!size_t(floor(x)); |
|---|
| 289 |
|
|---|
| 290 |
if(upper == cumulative.length) { |
|---|
| 291 |
return 1; |
|---|
| 292 |
} |
|---|
| 293 |
|
|---|
| 294 |
return fract * cumulative[upper] + (1 - fract) * cumulative[lower]; |
|---|
| 295 |
} |
|---|
| 296 |
|
|---|
| 297 |
/**Compute the cumulative density from the rhs, i.e. the integral from |
|---|
| 298 |
* x to infinity. |
|---|
| 299 |
*/ |
|---|
| 300 |
double cdfr(double x) const { |
|---|
| 301 |
// Here, we can get away with just returning 1 - cdf b/c |
|---|
| 302 |
// there are inaccuracies several orders of magnitude bigger than |
|---|
| 303 |
// the rounding error. |
|---|
| 304 |
return 1.0 - cdf(x); |
|---|
| 305 |
} |
|---|
| 306 |
} |
|---|
| 307 |
|
|---|
| 308 |
unittest { |
|---|
| 309 |
auto kde = KernelDensity1D.fromCallable(parametrize!normalPDF(0, 1), [0]); |
|---|
| 310 |
assert(approxEqual(kde(1), normalPDF(1))); |
|---|
| 311 |
assert(approxEqual(kde.cdf(1), normalCDF(1))); |
|---|
| 312 |
assert(approxEqual(kde.cdfr(1), normalCDFR(1))); |
|---|
| 313 |
|
|---|
| 314 |
// This is purely to see if fromAlias works. |
|---|
| 315 |
auto cosKde = KernelDensity1D.fromAlias!cos([0], 1); |
|---|
| 316 |
|
|---|
| 317 |
// Make sure fromDefaultKernel at least instantiates. |
|---|
| 318 |
auto defaultKde = KernelDensity1D.fromDefaultKernel([1, 2, 3]); |
|---|
| 319 |
} |
|---|
| 320 |
|
|---|
| 321 |
/**Uses Scott's Rule to select the bandwidth of the Gaussian kernel density |
|---|
| 322 |
* estimator. This is 1.06 * min(stdev(data), interquartileRange(data) / 1.34) |
|---|
| 323 |
* N ^^ -0.2. R must be a forward range of numeric types. |
|---|
| 324 |
* |
|---|
| 325 |
* Examples: |
|---|
| 326 |
* --- |
|---|
| 327 |
* immutable bandwidth = scottBandwidth(data); |
|---|
| 328 |
* auto kernel = parametrize!normalPDF(0, bandwidth); |
|---|
| 329 |
* auto kde = KernelDensity1D(data, kernel); |
|---|
| 330 |
* --- |
|---|
| 331 |
* |
|---|
| 332 |
* References: |
|---|
| 333 |
* Scott, D. W. (1992) Multivariate Density Estimation: Theory, Practice, |
|---|
| 334 |
* and Visualization. Wiley. |
|---|
| 335 |
*/ |
|---|
| 336 |
double scottBandwidth(R)(R data) |
|---|
| 337 |
if(isForwardRange!R && is(ElementType!R : double)) { |
|---|
| 338 |
|
|---|
| 339 |
immutable summary = meanStdev(data.save); |
|---|
| 340 |
immutable interquartile = interquantileRange(data.save, 0.25) / 1.34; |
|---|
| 341 |
immutable sigmaHat = min(summary.stdev, interquartile); |
|---|
| 342 |
|
|---|
| 343 |
return 1.06 * sigmaHat * (summary.N ^^ -0.2); |
|---|
| 344 |
} |
|---|
| 345 |
|
|---|
| 346 |
unittest { |
|---|
| 347 |
// Values from R. |
|---|
| 348 |
assert(approxEqual(scottBandwidth([1,2,3,4,5]), 1.14666)); |
|---|
| 349 |
assert(approxEqual(scottBandwidth([1,2,2,2,2,8,8,8,8]), 2.242446)); |
|---|
| 350 |
} |
|---|
| 351 |
|
|---|
| 352 |
/**Construct an N-dimensional kernel density estimator. This is done using |
|---|
| 353 |
* the textbook definition of kernel density estimation, since the binning |
|---|
| 354 |
* and convolving method used in the 1-D case would rapidly become |
|---|
| 355 |
* unfeasible w.r.t. memory usage as dimensionality increased. |
|---|
| 356 |
* |
|---|
| 357 |
* Eventually, a 2-D estimator might be added as another special case, but |
|---|
| 358 |
* beyond 2-D, bin-and-convolute clearly isn't feasible. |
|---|
| 359 |
* |
|---|
| 360 |
* This class can be used for 1-D estimation instead of KernelDensity1D, and |
|---|
| 361 |
* will work properly. This is useful if: |
|---|
| 362 |
* |
|---|
| 363 |
* 1. You can't accept even the slightest deviation from the results that the |
|---|
| 364 |
* textbook definition of kernel density estimation would produce. |
|---|
| 365 |
* |
|---|
| 366 |
* 2. You are only going to evaluate at a few points and want to avoid the |
|---|
| 367 |
* up-front cost of the convolution used in the 1-D case. |
|---|
| 368 |
* |
|---|
| 369 |
* 3. You're using some weird kernel that doesn't meet the assumptions |
|---|
| 370 |
* required for KernelDensity1D. |
|---|
| 371 |
*/ |
|---|
| 372 |
class KernelDensity { |
|---|
| 373 |
private immutable double[][] points; |
|---|
| 374 |
private double delegate(double[]...) kernel; |
|---|
| 375 |
|
|---|
| 376 |
private this(immutable double[][] points) { |
|---|
| 377 |
this.points = points; |
|---|
| 378 |
} |
|---|
| 379 |
|
|---|
| 380 |
/**Returns the number of dimensions in the estimator.*/ |
|---|
| 381 |
uint nDimensions() const @property { |
|---|
| 382 |
// More than uint.max dimensions is absolutely implausible. |
|---|
| 383 |
assert(points.length <= uint.max); |
|---|
| 384 |
return cast(uint) points.length; |
|---|
| 385 |
} |
|---|
| 386 |
|
|---|
| 387 |
/**Construct a kernel density estimator from a kernel provided as a callable |
|---|
| 388 |
* object (such as a function pointer, delegate, or class with overloaded |
|---|
| 389 |
* opCall). R must be either a range of ranges, multiple ranges passed in |
|---|
| 390 |
* as variadic arguments, or a single range for the 1D case. Each range |
|---|
| 391 |
* represents the values of one variable in the joint distribution. |
|---|
| 392 |
* kernel must accept either an array of doubles or the same number of |
|---|
| 393 |
* arguments as the number of dimensions, and must return a floating point |
|---|
| 394 |
* number. |
|---|
| 395 |
* |
|---|
| 396 |
* Examples: |
|---|
| 397 |
* --- |
|---|
| 398 |
* // Create an estimate of the density of the joint distribution of |
|---|
| 399 |
* // hours sleep and programming skill. |
|---|
| 400 |
* auto programmingSkill = [8,6,7,5,3,0,9]; |
|---|
| 401 |
* auto hoursSleep = [3,6,2,4,3,5,8]; |
|---|
| 402 |
* |
|---|
| 403 |
* // Make a 2D Gaussian kernel function with bandwidth 0.5 in both |
|---|
| 404 |
* // dimensions and covariance zero. |
|---|
| 405 |
* static double myKernel(double x1, double x2) { |
|---|
| 406 |
* return normalPDF(x1, 0, 0.5) * normalPDF(x2, 0, 0.5); |
|---|
| 407 |
* } |
|---|
| 408 |
* |
|---|
| 409 |
* auto estimator = KernelDensity.fromCallable |
|---|
| 410 |
* (&myKernel, programmingSkill, hoursSleep); |
|---|
| 411 |
* |
|---|
| 412 |
* // Estimate the density at programming skill 1, 2 hours sleep. |
|---|
| 413 |
* auto density = estimator(1, 2); |
|---|
| 414 |
* --- |
|---|
| 415 |
*/ |
|---|
| 416 |
static KernelDensity fromCallable(C, R...)(C kernel, R ranges) |
|---|
| 417 |
if(allSatisfy!(isInputRange, R)) { |
|---|
| 418 |
auto kernelWrapped = wrapToArrayVariadic(kernel); |
|---|
| 419 |
|
|---|
| 420 |
static if(R.length == 1 && isInputRange!(typeof(ranges[0].front))) { |
|---|
| 421 |
alias ranges[0] data; |
|---|
| 422 |
} else { |
|---|
| 423 |
alias ranges data; |
|---|
| 424 |
} |
|---|
| 425 |
|
|---|
| 426 |
double[][] points; |
|---|
| 427 |
foreach(range; data) { |
|---|
| 428 |
double[] asDoubles; |
|---|
| 429 |
|
|---|
| 430 |
static if(dstats.base.hasLength!(typeof(range))) { |
|---|
| 431 |
asDoubles = newVoid!double(range.length); |
|---|
| 432 |
|
|---|
| 433 |
size_t i = 0; |
|---|
| 434 |
foreach(elem; range) { |
|---|
| 435 |
asDoubles[i++] = elem; |
|---|
| 436 |
} |
|---|
| 437 |
} else { |
|---|
| 438 |
auto app = appender(&asDoubles); |
|---|
| 439 |
foreach(elem; range) { |
|---|
| 440 |
app.put(elem); |
|---|
| 441 |
} |
|---|
| 442 |
} |
|---|
| 443 |
|
|---|
| 444 |
points ~= asDoubles; |
|---|
| 445 |
} |
|---|
| 446 |
|
|---|
| 447 |
dstatsEnforce(points.length, |
|---|
| 448 |
"Can't construct a zero dimensional kernel density estimator."); |
|---|
| 449 |
|
|---|
| 450 |
foreach(arr; points[1..$]) { |
|---|
| 451 |
dstatsEnforce(arr.length == points[0].length, |
|---|
| 452 |
"All ranges must be the same length to construct a KernelDensity."); |
|---|
| 453 |
} |
|---|
| 454 |
|
|---|
| 455 |
auto ret = new KernelDensity(assumeUnique(points)); |
|---|
| 456 |
ret.kernel = kernelWrapped; |
|---|
| 457 |
|
|---|
| 458 |
return ret; |
|---|
| 459 |
} |
|---|
| 460 |
|
|---|
| 461 |
/**Estimate the density at the point given by x. The variables in X are |
|---|
| 462 |
* provided in the same order as the ranges were provided for estimation. |
|---|
| 463 |
*/ |
|---|
| 464 |
double opCall(double[] x...) const { |
|---|
| 465 |
dstatsEnforce(x.length == points.length, |
|---|
| 466 |
"Dimension mismatch when evaluating kernel density."); |
|---|
| 467 |
double sum = 0; |
|---|
| 468 |
|
|---|
| 469 |
mixin(newFrame); |
|---|
| 470 |
auto dataPoint = newStack!double(points.length); |
|---|
| 471 |
foreach(i; 0..points[0].length) { |
|---|
| 472 |
foreach(j; 0..points.length) { |
|---|
| 473 |
dataPoint[j] = x[j] - points[j][i]; |
|---|
| 474 |
} |
|---|
| 475 |
|
|---|
| 476 |
sum += kernel(dataPoint); |
|---|
| 477 |
} |
|---|
| 478 |
|
|---|
| 479 |
sum /= points[0].length; |
|---|
| 480 |
return sum; |
|---|
| 481 |
} |
|---|
| 482 |
} |
|---|
| 483 |
|
|---|
| 484 |
unittest { |
|---|
| 485 |
auto data = randArray!rNorm(100, 0, 1); |
|---|
| 486 |
auto kernel = parametrize!normalPDF(0, scottBandwidth(data)); |
|---|
| 487 |
auto kde = KernelDensity.fromCallable(kernel, data); |
|---|
| 488 |
auto kde1 = KernelDensity1D.fromCallable(kernel, data); |
|---|
| 489 |
foreach(i; 0..5) { |
|---|
| 490 |
assert(abs(kde(i) - kde1(i)) < 0.01); |
|---|
| 491 |
} |
|---|
| 492 |
|
|---|
| 493 |
// Make sure example compiles. |
|---|
| 494 |
auto programmingSkill = [8,6,7,5,3,0,9]; |
|---|
| 495 |
auto hoursSleep = [3,6,2,4,3,5,8]; |
|---|
| 496 |
|
|---|
| 497 |
// Make a 2D Gaussian kernel function with bandwidth 0.5 in both |
|---|
| 498 |
// dimensions and covariance zero. |
|---|
| 499 |
static double myKernel(double x1, double x2) { |
|---|
| 500 |
return normalPDF(x1, 0, 0.5) * normalPDF(x2, 0, 0.5); |
|---|
| 501 |
} |
|---|
| 502 |
|
|---|
| 503 |
auto estimator = KernelDensity.fromCallable |
|---|
| 504 |
(&myKernel, programmingSkill, hoursSleep); |
|---|
| 505 |
|
|---|
| 506 |
// Estimate the density at programming skill 1, 2 hours sleep. |
|---|
| 507 |
auto density = estimator(1, 2); |
|---|
| 508 |
|
|---|
| 509 |
// Test instantiating from functor. |
|---|
| 510 |
auto foo = KernelDensity.fromCallable(estimator, hoursSleep); |
|---|
| 511 |
} |
|---|
| 512 |
|
|---|
| 513 |
|
|---|
| 514 |
private: |
|---|
| 515 |
|
|---|
| 516 |
double delegate(double[]...) wrapToArrayVariadic(C)(C callable) { |
|---|
| 517 |
static if(is(C == delegate) || isFunctionPointer!C) { |
|---|
| 518 |
alias callable fun; |
|---|
| 519 |
} else { // It's a functor. |
|---|
| 520 |
alias callable.opCall fun; |
|---|
| 521 |
} |
|---|
| 522 |
|
|---|
| 523 |
alias ParameterTypeTuple!fun params; |
|---|
| 524 |
static if(params.length == 1 && is(params[0] == double[])) { |
|---|
| 525 |
// Already in the right form. |
|---|
| 526 |
static if(is(C == delegate) && is(ReturnType!C == double)) { |
|---|
| 527 |
return callable; |
|---|
| 528 |
} else static if(is(ReturnType!(callable.opCall) == double)) { |
|---|
| 529 |
return &callable.opCall; |
|---|
| 530 |
} else { // Need to forward. |
|---|
| 531 |
double forward(double[] args...) { |
|---|
| 532 |
return fun(args); |
|---|
| 533 |
} |
|---|
| 534 |
|
|---|
| 535 |
return &forward; |
|---|
| 536 |
} |
|---|
| 537 |
} else { // Need to convert to single arguments and forward. |
|---|
| 538 |
static assert(allSatisfy!(isFloatingPoint, params)); |
|---|
| 539 |
|
|---|
| 540 |
double doCall(double[] args...) { |
|---|
| 541 |
assert(args.length == params.length); |
|---|
| 542 |
mixin("return fun(" ~ makeCallList(params.length) ~ ");"); |
|---|
| 543 |
} |
|---|
| 544 |
|
|---|
| 545 |
return &doCall; |
|---|
| 546 |
} |
|---|
| 547 |
} |
|---|
| 548 |
|
|---|
| 549 |
// CTFE function for forwarding elements of an array as single function |
|---|
| 550 |
// arguments. |
|---|
| 551 |
string makeCallList(uint N) { |
|---|
| 552 |
string ret; |
|---|
| 553 |
foreach(i; 0..N - 1) { |
|---|
| 554 |
ret ~= "args[" ~ to!string(i) ~ "], "; |
|---|
| 555 |
} |
|---|
| 556 |
|
|---|
| 557 |
ret ~= "args[" ~ to!string(N - 1) ~ "]"; |
|---|
| 558 |
return ret; |
|---|
| 559 |
} |
|---|