| 1 |
/**A module for performing linear regression. This module has an unusual |
|---|
| 2 |
* interface, as it is range-based instead of matrix based. Values for |
|---|
| 3 |
* independent variables are provided as either a tuple or a range of ranges. |
|---|
| 4 |
* This means that one can use, for example, map, to fit high order models and |
|---|
| 5 |
* lazily evaluate certain values. (For details, see examples below.) |
|---|
| 6 |
* |
|---|
| 7 |
* Author: David Simcha*/ |
|---|
| 8 |
/* |
|---|
| 9 |
* License: |
|---|
| 10 |
* Boost Software License - Version 1.0 - August 17th, 2003 |
|---|
| 11 |
* |
|---|
| 12 |
* Permission is hereby granted, free of charge, to any person or organization |
|---|
| 13 |
* obtaining a copy of the software and accompanying documentation covered by |
|---|
| 14 |
* this license (the "Software") to use, reproduce, display, distribute, |
|---|
| 15 |
* execute, and transmit the Software, and to prepare derivative works of the |
|---|
| 16 |
* Software, and to permit third-parties to whom the Software is furnished to |
|---|
| 17 |
* do so, all subject to the following: |
|---|
| 18 |
* |
|---|
| 19 |
* The copyright notices in the Software and this entire statement, including |
|---|
| 20 |
* the above license grant, this restriction and the following disclaimer, |
|---|
| 21 |
* must be included in all copies of the Software, in whole or in part, and |
|---|
| 22 |
* all derivative works of the Software, unless such copies or derivative |
|---|
| 23 |
* works are solely in the form of machine-executable object code generated by |
|---|
| 24 |
* a source language processor. |
|---|
| 25 |
* |
|---|
| 26 |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|---|
| 27 |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|---|
| 28 |
* FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT |
|---|
| 29 |
* SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE |
|---|
| 30 |
* FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, |
|---|
| 31 |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
|---|
| 32 |
* DEALINGS IN THE SOFTWARE. |
|---|
| 33 |
*/ |
|---|
| 34 |
module dstats.regress; |
|---|
| 35 |
|
|---|
| 36 |
import std.math, std.algorithm, std.traits, std.array, std.traits, std.exception, |
|---|
| 37 |
std.typetuple, std.typecons, std.numeric; |
|---|
| 38 |
|
|---|
| 39 |
import dstats.alloc, std.range, std.conv, dstats.distrib, dstats.cor, |
|---|
| 40 |
dstats.base, dstats.summary, dstats.sort; |
|---|
| 41 |
|
|---|
| 42 |
version(unittest) { |
|---|
| 43 |
import std.stdio, dstats.random, std.functional; |
|---|
| 44 |
void main(){} |
|---|
| 45 |
} |
|---|
| 46 |
|
|---|
| 47 |
/// |
|---|
| 48 |
struct PowMap(ExpType, T) |
|---|
| 49 |
if(isForwardRange!(T)) { |
|---|
| 50 |
Unqual!T range; |
|---|
| 51 |
Unqual!ExpType exponent; |
|---|
| 52 |
double cache; |
|---|
| 53 |
|
|---|
| 54 |
this(T range, ExpType exponent) { |
|---|
| 55 |
this.range = range; |
|---|
| 56 |
this.exponent = exponent; |
|---|
| 57 |
|
|---|
| 58 |
static if(isIntegral!ExpType) { |
|---|
| 59 |
cache = pow(cast(double) range.front, exponent); |
|---|
| 60 |
} else { |
|---|
| 61 |
cache = pow(cast(ExpType) range.front, exponent); |
|---|
| 62 |
} |
|---|
| 63 |
} |
|---|
| 64 |
|
|---|
| 65 |
@property double front() const pure nothrow { |
|---|
| 66 |
return cache; |
|---|
| 67 |
} |
|---|
| 68 |
|
|---|
| 69 |
void popFront() { |
|---|
| 70 |
range.popFront; |
|---|
| 71 |
if(!range.empty) { |
|---|
| 72 |
cache = pow(cast(double) range.front, exponent); |
|---|
| 73 |
} |
|---|
| 74 |
} |
|---|
| 75 |
|
|---|
| 76 |
@property typeof(this) save() { |
|---|
| 77 |
return this; |
|---|
| 78 |
} |
|---|
| 79 |
|
|---|
| 80 |
@property bool empty() { |
|---|
| 81 |
return range.empty; |
|---|
| 82 |
} |
|---|
| 83 |
} |
|---|
| 84 |
|
|---|
| 85 |
/**Maps a forward range to a power determined at runtime. ExpType is the type |
|---|
| 86 |
* of the exponent. Using an int is faster than using a double, but obviously |
|---|
| 87 |
* less flexible.*/ |
|---|
| 88 |
PowMap!(ExpType, T) powMap(ExpType, T)(T range, ExpType exponent) { |
|---|
| 89 |
alias PowMap!(ExpType, T) RT; |
|---|
| 90 |
return RT(range, exponent); |
|---|
| 91 |
} |
|---|
| 92 |
|
|---|
| 93 |
// Very ad-hoc, does a bunch of matrix ops for linearRegress and |
|---|
| 94 |
// linearRegressBeta. Specifically, computes xTx and xTy. |
|---|
| 95 |
// Written specifically to be efficient in the context used here. |
|---|
| 96 |
private void rangeMatrixMulTrans(U, T...) |
|---|
| 97 |
(ref double[] xTy, out double[][] xTx, U y, ref T matIn) { |
|---|
| 98 |
static if(isArray!(T[0]) && |
|---|
| 99 |
isInputRange!(typeof(matIn[0][0])) && matIn.length == 1) { |
|---|
| 100 |
alias matIn[0] mat; |
|---|
| 101 |
} else { |
|---|
| 102 |
alias matIn mat; |
|---|
| 103 |
} |
|---|
| 104 |
|
|---|
| 105 |
bool someEmpty() { |
|---|
| 106 |
if(y.empty) { |
|---|
| 107 |
return true; |
|---|
| 108 |
} |
|---|
| 109 |
foreach(range; mat) { |
|---|
| 110 |
if(range.empty) { |
|---|
| 111 |
return true; |
|---|
| 112 |
} |
|---|
| 113 |
} |
|---|
| 114 |
return false; |
|---|
| 115 |
} |
|---|
| 116 |
|
|---|
| 117 |
void popAll() { |
|---|
| 118 |
foreach(ti, range; mat) { |
|---|
| 119 |
mat[ti].popFront; |
|---|
| 120 |
} |
|---|
| 121 |
y.popFront; |
|---|
| 122 |
} |
|---|
| 123 |
|
|---|
| 124 |
xTy[] = 0; |
|---|
| 125 |
|
|---|
| 126 |
xTx = newStack!(double[])(mat.length); |
|---|
| 127 |
foreach(ref elem; xTx) { |
|---|
| 128 |
elem = newStack!double(mat.length); |
|---|
| 129 |
} |
|---|
| 130 |
|
|---|
| 131 |
foreach(row; xTx) { |
|---|
| 132 |
row[] = 0; |
|---|
| 133 |
} |
|---|
| 134 |
|
|---|
| 135 |
mixin(newFrame); |
|---|
| 136 |
auto deltas = newStack!double(mat.length); |
|---|
| 137 |
|
|---|
| 138 |
// Using an algorithm similar to the one for Pearson cor to improve |
|---|
| 139 |
// numerical stability. Calculate means and covariances, then |
|---|
| 140 |
// combine them: Sum of squares = mean1 * N * mean2 + N * cov. |
|---|
| 141 |
double k = 0; |
|---|
| 142 |
double[] means = newStack!(double)(mat.length); |
|---|
| 143 |
means[] = 0; |
|---|
| 144 |
double yMean = 0; |
|---|
| 145 |
|
|---|
| 146 |
while(!someEmpty) { |
|---|
| 147 |
foreach(i, elem; mat) { |
|---|
| 148 |
deltas[i] = cast(double) elem.front; |
|---|
| 149 |
} |
|---|
| 150 |
|
|---|
| 151 |
immutable kMinus1 = k; |
|---|
| 152 |
immutable kNeg1 = 1 / ++k; |
|---|
| 153 |
deltas[] -= means[]; |
|---|
| 154 |
means[] += deltas[] * kNeg1; |
|---|
| 155 |
|
|---|
| 156 |
immutable yDelta = cast(double) y.front - yMean; |
|---|
| 157 |
yMean += yDelta * kNeg1; |
|---|
| 158 |
|
|---|
| 159 |
foreach(i, delta1; deltas) { |
|---|
| 160 |
xTy[i] += kMinus1 * delta1 * kNeg1 * yDelta; |
|---|
| 161 |
xTx[i][i] += kMinus1 * delta1 * kNeg1 * delta1; |
|---|
| 162 |
auto xTxi = xTx[i]; |
|---|
| 163 |
|
|---|
| 164 |
foreach(j, delta2; deltas[0..i]) { |
|---|
| 165 |
xTxi[j] += kMinus1 * delta1 * kNeg1 * delta2; |
|---|
| 166 |
} |
|---|
| 167 |
} |
|---|
| 168 |
popAll(); |
|---|
| 169 |
} |
|---|
| 170 |
|
|---|
| 171 |
// k is n now that we're done looping over the data. |
|---|
| 172 |
alias k n; |
|---|
| 173 |
|
|---|
| 174 |
// mat now consists of covariance * n. Divide by n and add mean1 * mean2 |
|---|
| 175 |
// to get sum of products. |
|---|
| 176 |
foreach(i; 0..xTx.length) foreach(j; 0..i + 1) { |
|---|
| 177 |
xTx[i][j] /= n; |
|---|
| 178 |
xTx[i][j] += means[i] * means[j]; |
|---|
| 179 |
} |
|---|
| 180 |
|
|---|
| 181 |
// Similarly for the xTy vector |
|---|
| 182 |
foreach(i, ref elem; xTy) { |
|---|
| 183 |
xTy[i] /= n; |
|---|
| 184 |
elem += yMean * means[i]; |
|---|
| 185 |
} |
|---|
| 186 |
symmetrize(xTx); |
|---|
| 187 |
} |
|---|
| 188 |
|
|---|
| 189 |
// Copies values from lower triangle to upper triangle. |
|---|
| 190 |
private void symmetrize(double[][] mat) pure nothrow { |
|---|
| 191 |
foreach(i; 1..mat.length) { |
|---|
| 192 |
foreach(j; 0..i) { |
|---|
| 193 |
mat[j][i] = mat[i][j]; |
|---|
| 194 |
} |
|---|
| 195 |
} |
|---|
| 196 |
} |
|---|
| 197 |
|
|---|
| 198 |
// Uses Gauss-Jordan elim. w/ row pivoting to invert from. Stores the results |
|---|
| 199 |
// in to and leaves from in an undefined state. |
|---|
| 200 |
package void invert(double[][] from, double[][] to) { |
|---|
| 201 |
// Normalize. |
|---|
| 202 |
foreach(i, row; from) { |
|---|
| 203 |
double absMax = 1.0 / reduce!(max)(map!(abs)(row[0..from.length])); |
|---|
| 204 |
row[] *= absMax; |
|---|
| 205 |
to[i][] = 0; |
|---|
| 206 |
to[i][i] = absMax; |
|---|
| 207 |
} |
|---|
| 208 |
|
|---|
| 209 |
foreach(col; 0..from.length) { |
|---|
| 210 |
size_t bestRow; |
|---|
| 211 |
double biggest = 0; |
|---|
| 212 |
foreach(row; col..from.length) { |
|---|
| 213 |
if(abs(from[row][col]) > biggest) { |
|---|
| 214 |
bestRow = row; |
|---|
| 215 |
biggest = abs(from[row][col]); |
|---|
| 216 |
} |
|---|
| 217 |
} |
|---|
| 218 |
|
|---|
| 219 |
swap(from[col], from[bestRow]); |
|---|
| 220 |
swap(to[col], to[bestRow]); |
|---|
| 221 |
immutable pivotFactor = from[col][col]; |
|---|
| 222 |
|
|---|
| 223 |
foreach(row; 0..from.length) if(row != col) { |
|---|
| 224 |
immutable ratio = from[row][col] / pivotFactor; |
|---|
| 225 |
|
|---|
| 226 |
// If you're ever looking to optimize this code, good luck. The |
|---|
| 227 |
// bottleneck is almost ENTIRELY this one line: |
|---|
| 228 |
from[row][] -= from[col][] * ratio; |
|---|
| 229 |
to[row][] -= to[col][] * ratio; |
|---|
| 230 |
} |
|---|
| 231 |
} |
|---|
| 232 |
|
|---|
| 233 |
foreach(i; 0..from.length) { |
|---|
| 234 |
immutable diagVal = from[i][i]; |
|---|
| 235 |
from[i][] /= diagVal; |
|---|
| 236 |
to[i][] /= diagVal; |
|---|
| 237 |
} |
|---|
| 238 |
} |
|---|
| 239 |
|
|---|
| 240 |
unittest { |
|---|
| 241 |
auto mat = [[1.0, 2, 3], [4.0, 7, 6], [7.0, 8, 9]]; |
|---|
| 242 |
auto toMat = [new double[3], new double[3], new double[3]]; |
|---|
| 243 |
invert(mat, toMat); |
|---|
| 244 |
assert(approxEqual(toMat[0], [-0.625, -0.25, 0.375])); |
|---|
| 245 |
assert(approxEqual(toMat[1], [-0.25, 0.5, -0.25])); |
|---|
| 246 |
assert(approxEqual(toMat[2], [0.708333, -0.25, 0.041667])); |
|---|
| 247 |
} |
|---|
| 248 |
|
|---|
| 249 |
// Solve a system of linear equations mat * b = vec for b. The result is |
|---|
| 250 |
// stored in vec, and mat is left in an undefined state. Uses Gaussian |
|---|
| 251 |
// elimination w/ row pivoting. Roughly 4x faster than using inversion to |
|---|
| 252 |
// solve the system, and uses roughly half the memory. |
|---|
| 253 |
private void solve(double[][] mat, double[] vec) { |
|---|
| 254 |
// Normalize. |
|---|
| 255 |
foreach(i, row; mat) { |
|---|
| 256 |
double absMax = 1.0 / reduce!(max)(map!(abs)(row[0..mat.length])); |
|---|
| 257 |
row[] *= absMax; |
|---|
| 258 |
vec[i] *= absMax; |
|---|
| 259 |
} |
|---|
| 260 |
|
|---|
| 261 |
foreach(col; 0..mat.length - 1) { |
|---|
| 262 |
size_t bestRow; |
|---|
| 263 |
double biggest = 0; |
|---|
| 264 |
foreach(row; col..mat.length) { |
|---|
| 265 |
if(abs(mat[row][col]) > biggest) { |
|---|
| 266 |
bestRow = row; |
|---|
| 267 |
biggest = abs(mat[row][col]); |
|---|
| 268 |
} |
|---|
| 269 |
} |
|---|
| 270 |
|
|---|
| 271 |
swap(mat[col], mat[bestRow]); |
|---|
| 272 |
swap(vec[col], vec[bestRow]); |
|---|
| 273 |
immutable pivotFactor = mat[col][col]; |
|---|
| 274 |
|
|---|
| 275 |
foreach(row; col + 1..mat.length) { |
|---|
| 276 |
immutable ratio = mat[row][col] / pivotFactor; |
|---|
| 277 |
|
|---|
| 278 |
// If you're ever looking to optimize this code, good luck. The |
|---|
| 279 |
// bottleneck is almost ENTIRELY this one line: |
|---|
| 280 |
mat[row][col..$] -= mat[col][col..$] * ratio; |
|---|
| 281 |
vec[row] -= vec[col] * ratio; |
|---|
| 282 |
} |
|---|
| 283 |
} |
|---|
| 284 |
|
|---|
| 285 |
foreach(i; 0..mat.length) { |
|---|
| 286 |
double diagVal = mat[i][i]; |
|---|
| 287 |
mat[i][] /= diagVal; |
|---|
| 288 |
vec[i] /= diagVal; |
|---|
| 289 |
} |
|---|
| 290 |
|
|---|
| 291 |
// Do back substitution. |
|---|
| 292 |
for(size_t row = mat.length - 1; row != size_t.max; row--) { |
|---|
| 293 |
auto v1 = vec[row + 1..$]; |
|---|
| 294 |
auto v2 = mat[row][$ - v1.length..$]; |
|---|
| 295 |
vec[row] -= dotProduct(v1, v2); |
|---|
| 296 |
} |
|---|
| 297 |
} |
|---|
| 298 |
|
|---|
| 299 |
unittest { |
|---|
| 300 |
auto mat = [[2.0, 1, -1], [-3.0, -1, 2], [-2.0, 1, 2]]; |
|---|
| 301 |
auto vec = [8.0, -11, -3]; |
|---|
| 302 |
solve(mat, vec); |
|---|
| 303 |
assert(approxEqual(vec, [2, 3, -1])); |
|---|
| 304 |
|
|---|
| 305 |
auto mat2 = [[1.0, 2, 3], [4.0, 7, 6], [7.0, 8, 9]]; |
|---|
| 306 |
auto vec2 = [8.0, 6, 7]; |
|---|
| 307 |
solve(mat2, vec2); |
|---|
| 308 |
assert(approxEqual(vec2, [-3.875, -0.75, 4.45833])); |
|---|
| 309 |
} |
|---|
| 310 |
|
|---|
| 311 |
// Cholesky decomposition functions adapted from Don Clugston's MathExtra |
|---|
| 312 |
// lib, used w/ permission. |
|---|
| 313 |
private void choleskyDecompose(double[][] a, double[] diag) { |
|---|
| 314 |
immutable N = diag.length; |
|---|
| 315 |
|
|---|
| 316 |
foreach(i; 0..N) { |
|---|
| 317 |
const ai = a[i]; |
|---|
| 318 |
double sum = ai[i]; |
|---|
| 319 |
|
|---|
| 320 |
for(sizediff_t k = i - 1; k >= 0; --k) { |
|---|
| 321 |
sum -= ai[k] * ai[k]; |
|---|
| 322 |
} |
|---|
| 323 |
|
|---|
| 324 |
if (sum > 0.0) { |
|---|
| 325 |
diag[i] = sqrt(sum); |
|---|
| 326 |
|
|---|
| 327 |
foreach(j; i + 1..N) { |
|---|
| 328 |
sum = ai[j] - dotProduct(ai[0..i], a[j][0..i]); |
|---|
| 329 |
a[j][i] = sum / diag[i]; |
|---|
| 330 |
} |
|---|
| 331 |
} else { |
|---|
| 332 |
// not positive definite (could be caused by rounding errors) |
|---|
| 333 |
diag[i] = 0; |
|---|
| 334 |
// make this whole row zero so they have no further effect |
|---|
| 335 |
foreach(j; i + 1..N) a[j][i] = 0; |
|---|
| 336 |
} |
|---|
| 337 |
} |
|---|
| 338 |
} |
|---|
| 339 |
|
|---|
| 340 |
private void choleskySolve(double[][] a, double[] diag, double[] b, double[] x) { |
|---|
| 341 |
immutable N = x.length; |
|---|
| 342 |
|
|---|
| 343 |
foreach(i; 0..N) { |
|---|
| 344 |
const ai = a[i]; |
|---|
| 345 |
|
|---|
| 346 |
if(diag[i] > 0) { |
|---|
| 347 |
double sum = b[i]; |
|---|
| 348 |
sum -= dotProduct(ai[0..i], x[0..i]); |
|---|
| 349 |
x[i] = sum / diag[i]; |
|---|
| 350 |
} else x[i] = 0; // skip pos definite rows |
|---|
| 351 |
} |
|---|
| 352 |
|
|---|
| 353 |
for(sizediff_t i = N - 1; i >= 0; --i) { |
|---|
| 354 |
if (diag[i] > 0) { |
|---|
| 355 |
double sum = x[i]; |
|---|
| 356 |
for(sizediff_t k = i + 1; k < N; ++k) sum -= a[k][i] * x[k]; |
|---|
| 357 |
x[i] = sum / diag[i]; |
|---|
| 358 |
} else x[i] = 0; // skip pos definite rows |
|---|
| 359 |
} |
|---|
| 360 |
// Convert failed columns in solution to NANs if required. |
|---|
| 361 |
foreach(i; 0..N) { |
|---|
| 362 |
if(diag[i] !> 0) x[i] = double.nan; |
|---|
| 363 |
} |
|---|
| 364 |
} |
|---|
| 365 |
|
|---|
| 366 |
private void choleskySolve(double[][] a, double[] b, double[] x) { |
|---|
| 367 |
mixin(newFrame); |
|---|
| 368 |
auto diag = newStack!double(x.length); |
|---|
| 369 |
choleskyDecompose(a, diag); |
|---|
| 370 |
choleskySolve(a, diag, b, x); |
|---|
| 371 |
} |
|---|
| 372 |
|
|---|
| 373 |
/**Struct that holds the results of a linear regression. It's a plain old |
|---|
| 374 |
* data struct.*/ |
|---|
| 375 |
struct RegressRes { |
|---|
| 376 |
/**The coefficients, one for each range in X. These will be in the order |
|---|
| 377 |
* that the X ranges were passed in.*/ |
|---|
| 378 |
double[] betas; |
|---|
| 379 |
|
|---|
| 380 |
/**The standard error terms of the X ranges passed in.*/ |
|---|
| 381 |
double[] stdErr; |
|---|
| 382 |
|
|---|
| 383 |
/**The lower confidence bounds of the beta terms, at the confidence level |
|---|
| 384 |
* specificied. (Default 0.95).*/ |
|---|
| 385 |
double[] lowerBound; |
|---|
| 386 |
|
|---|
| 387 |
/**The upper confidence bounds of the beta terms, at the confidence level |
|---|
| 388 |
* specificied. (Default 0.95).*/ |
|---|
| 389 |
double[] upperBound; |
|---|
| 390 |
|
|---|
| 391 |
/**The P-value for the alternative that the corresponding beta value is |
|---|
| 392 |
* different from zero against the null that it is equal to zero.*/ |
|---|
| 393 |
double[] p; |
|---|
| 394 |
|
|---|
| 395 |
/**The coefficient of determination.*/ |
|---|
| 396 |
double R2; |
|---|
| 397 |
|
|---|
| 398 |
/**The adjusted coefficient of determination.*/ |
|---|
| 399 |
double adjustedR2; |
|---|
| 400 |
|
|---|
| 401 |
/**The root mean square of the residuals.*/ |
|---|
| 402 |
double residualError; |
|---|
| 403 |
|
|---|
| 404 |
/**The P-value for the model as a whole. Based on an F-statistic. The |
|---|
| 405 |
* null here is that the model has no predictive value, the alternative |
|---|
| 406 |
* is that it does.*/ |
|---|
| 407 |
double overallP; |
|---|
| 408 |
|
|---|
| 409 |
// Just used internally. |
|---|
| 410 |
private static string arrStr(T)(T arr) { |
|---|
| 411 |
return text(arr)[1..$ - 1]; |
|---|
| 412 |
} |
|---|
| 413 |
|
|---|
| 414 |
/**Print out the results in the default format.*/ |
|---|
| 415 |
string toString() { |
|---|
| 416 |
return "Betas: " ~ arrStr(betas) ~ "\nLower Conf. Int.: " ~ |
|---|
| 417 |
arrStr(lowerBound) ~ "\nUpper Conf. Int.: " ~ arrStr(upperBound) ~ |
|---|
| 418 |
"\nStd. Err: " ~ arrStr(stdErr) ~ "\nP Values: " ~ arrStr(p) ~ |
|---|
| 419 |
"\nR^2: " ~ text(R2) ~ |
|---|
| 420 |
"\nAdjusted R^2: " ~ text(adjustedR2) ~ |
|---|
| 421 |
"\nStd. Residual Error: " ~ text(residualError) |
|---|
| 422 |
~ "\nOverall P: " ~ text(overallP); |
|---|
| 423 |
} |
|---|
| 424 |
} |
|---|
| 425 |
|
|---|
| 426 |
/**Forward Range for holding the residuals from a regression analysis.*/ |
|---|
| 427 |
struct Residuals(F, U, T...) { |
|---|
| 428 |
static if(T.length == 1 && isForwardRange!(typeof(T[0].front()))) { |
|---|
| 429 |
alias T[0] R; |
|---|
| 430 |
alias typeof(array(R.init)) XType; |
|---|
| 431 |
enum bool needDup = true; |
|---|
| 432 |
} else { |
|---|
| 433 |
alias T R; |
|---|
| 434 |
alias staticMap!(Unqual, R) XType; |
|---|
| 435 |
enum bool needDup = false; |
|---|
| 436 |
} |
|---|
| 437 |
|
|---|
| 438 |
Unqual!U Y; |
|---|
| 439 |
XType X; |
|---|
| 440 |
F[] betas; |
|---|
| 441 |
double residual; |
|---|
| 442 |
bool _empty; |
|---|
| 443 |
|
|---|
| 444 |
void nextResidual() { |
|---|
| 445 |
double sum = 0; |
|---|
| 446 |
size_t i = 0; |
|---|
| 447 |
foreach(elem; X) { |
|---|
| 448 |
double frnt = elem.front; |
|---|
| 449 |
sum += frnt * betas[i]; |
|---|
| 450 |
i++; |
|---|
| 451 |
} |
|---|
| 452 |
residual = Y.front - sum; |
|---|
| 453 |
} |
|---|
| 454 |
|
|---|
| 455 |
this(F[] betas, U Y, R X) { |
|---|
| 456 |
static if(is(typeof(X.length))) { |
|---|
| 457 |
dstatsEnforce(X.length == betas.length, |
|---|
| 458 |
"Betas and X must have same length for residuals."); |
|---|
| 459 |
} else { |
|---|
| 460 |
dstatsEnforce(walkLength(X) == betas.length, |
|---|
| 461 |
"Betas and X must have same length for residuals."); |
|---|
| 462 |
} |
|---|
| 463 |
|
|---|
| 464 |
static if(needDup) { |
|---|
| 465 |
this.X = array(X); |
|---|
| 466 |
} else { |
|---|
| 467 |
this.X = X; |
|---|
| 468 |
} |
|---|
| 469 |
|
|---|
| 470 |
foreach(i, elem; this.X) { |
|---|
| 471 |
static if(isForwardRange!(typeof(elem))) { |
|---|
| 472 |
this.X[i] = this.X[i].save; |
|---|
| 473 |
} |
|---|
| 474 |
} |
|---|
| 475 |
|
|---|
| 476 |
this.Y = Y; |
|---|
| 477 |
this.betas = betas; |
|---|
| 478 |
if(Y.empty) { |
|---|
| 479 |
_empty = true; |
|---|
| 480 |
return; |
|---|
| 481 |
} |
|---|
| 482 |
foreach(elem; X) { |
|---|
| 483 |
if(elem.empty) { |
|---|
| 484 |
_empty = true; |
|---|
| 485 |
return; |
|---|
| 486 |
} |
|---|
| 487 |
} |
|---|
| 488 |
nextResidual; |
|---|
| 489 |
} |
|---|
| 490 |
|
|---|
| 491 |
@property double front() const pure nothrow { |
|---|
| 492 |
return residual; |
|---|
| 493 |
} |
|---|
| 494 |
|
|---|
| 495 |
void popFront() { |
|---|
| 496 |
Y.popFront; |
|---|
| 497 |
if(Y.empty) { |
|---|
| 498 |
_empty = true; |
|---|
| 499 |
return; |
|---|
| 500 |
} |
|---|
| 501 |
foreach(ti, elem; X) { |
|---|
| 502 |
X[ti].popFront; |
|---|
| 503 |
if(X[ti].empty) { |
|---|
| 504 |
_empty = true; |
|---|
| 505 |
return; |
|---|
| 506 |
} |
|---|
| 507 |
} |
|---|
| 508 |
nextResidual; |
|---|
| 509 |
} |
|---|
| 510 |
|
|---|
| 511 |
@property bool empty() const pure nothrow { |
|---|
| 512 |
return _empty; |
|---|
| 513 |
} |
|---|
| 514 |
|
|---|
| 515 |
@property typeof(this) save() { |
|---|
| 516 |
auto ret = this; |
|---|
| 517 |
ret.Y = ret.Y.save; |
|---|
| 518 |
foreach(ti, xElem; ret.X) { |
|---|
| 519 |
ret.X[ti] = ret.X[ti].save; |
|---|
| 520 |
} |
|---|
| 521 |
|
|---|
| 522 |
return ret; |
|---|
| 523 |
} |
|---|
| 524 |
} |
|---|
| 525 |
|
|---|
| 526 |
/**Given the beta coefficients from a linear regression, and X and Y values, |
|---|
| 527 |
* returns a range that lazily computes the residuals. |
|---|
| 528 |
*/ |
|---|
| 529 |
Residuals!(F, U, T) residuals(F, U, T...)(F[] betas, U Y, T X) |
|---|
| 530 |
if(isFloatingPoint!F && isForwardRange!U && allSatisfy!(isForwardRange, T)) { |
|---|
| 531 |
alias Residuals!(F, U, T) RT; |
|---|
| 532 |
return RT(betas, Y, X); |
|---|
| 533 |
} |
|---|
| 534 |
|
|---|
| 535 |
// Compiles summary statistics while iterating, to allow ridge regression over |
|---|
| 536 |
// input ranges. |
|---|
| 537 |
private struct SummaryIter(R) { |
|---|
| 538 |
R range; |
|---|
| 539 |
MeanSD summ; |
|---|
| 540 |
|
|---|
| 541 |
this(R range) { |
|---|
| 542 |
this.range = range; |
|---|
| 543 |
} |
|---|
| 544 |
|
|---|
| 545 |
double front() @property { |
|---|
| 546 |
return range.front; |
|---|
| 547 |
} |
|---|
| 548 |
|
|---|
| 549 |
void popFront() { |
|---|
| 550 |
summ.put(range.front); |
|---|
| 551 |
range.popFront(); |
|---|
| 552 |
} |
|---|
| 553 |
|
|---|
| 554 |
bool empty() @property { |
|---|
| 555 |
return range.empty; |
|---|
| 556 |
} |
|---|
| 557 |
|
|---|
| 558 |
double mse() @property const pure nothrow { return summ.mse; } |
|---|
| 559 |
|
|---|
| 560 |
double N() @property const pure nothrow { return summ.N; } |
|---|
| 561 |
} |
|---|
| 562 |
|
|---|
| 563 |
private template SummaryType(R) { |
|---|
| 564 |
alias SummaryIter!R SummaryType; |
|---|
| 565 |
} |
|---|
| 566 |
|
|---|
| 567 |
/** |
|---|
| 568 |
Perform a linear regression and return just the beta values. The advantages |
|---|
| 569 |
to just returning the beta values are that it's faster and that each range |
|---|
| 570 |
needs to be iterated over only once, and thus can be just an input range. |
|---|
| 571 |
The beta values are returned such that the smallest index corresponds to |
|---|
| 572 |
the leftmost element of X. X can be either a tuple or a range of input |
|---|
| 573 |
ranges. Y must be an input range. |
|---|
| 574 |
|
|---|
| 575 |
If, after all X variables are passed in, a numeric type is passed as the last |
|---|
| 576 |
parameter, this is treated as a ridge parameter and ridge regression is |
|---|
| 577 |
performed. Ridge regression is a form of regression that penalizes the L2 norm |
|---|
| 578 |
of the beta vector and therefore results in more parsimonious models. |
|---|
| 579 |
However, it makes statistical inference such as that supported by |
|---|
| 580 |
linearRegress() difficult to impossible. Therefore, linearRegress() doesn't |
|---|
| 581 |
support ridges. |
|---|
| 582 |
|
|---|
| 583 |
If no ridge parameter is passed, or equivalently if the ridge parameter is |
|---|
| 584 |
zero, then ordinary least squares regression is performed. |
|---|
| 585 |
|
|---|
| 586 |
Notes: The X ranges are traversed in lockstep, but the traversal is stopped |
|---|
| 587 |
at the end of the shortest one. Therefore, using infinite ranges is safe. |
|---|
| 588 |
For example, using repeat(1) to get an intercept term works. |
|---|
| 589 |
|
|---|
| 590 |
References: |
|---|
| 591 |
|
|---|
| 592 |
http://www.mathworks.com/help/toolbox/stats/ridge.html |
|---|
| 593 |
|
|---|
| 594 |
Venables, W. N. & Ripley, B. D. (2002) Modern Applied Statistics with S. |
|---|
| 595 |
Fourth Edition. Springer, New York. ISBN 0-387-95457-0 |
|---|
| 596 |
(This is the citation for the MASS R package.) |
|---|
| 597 |
|
|---|
| 598 |
Examples: |
|---|
| 599 |
--- |
|---|
| 600 |
int[] nBeers = [8,6,7,5,3,0,9]; |
|---|
| 601 |
int[] nCoffees = [3,6,2,4,3,6,8]; |
|---|
| 602 |
int[] musicVolume = [3,1,4,1,5,9,2]; |
|---|
| 603 |
int[] programmingSkill = [2,7,1,8,2,8,1]; |
|---|
| 604 |
double[] betas = linearRegressBeta(programmingSkill, repeat(1), nBeers, nCoffees, |
|---|
| 605 |
musicVolume, map!"a * a"(musicVolume)); |
|---|
| 606 |
|
|---|
| 607 |
// Now throw in a ridge parameter of 2.5. |
|---|
| 608 |
double[] ridgeBetas = linearRegressBeta(programmingSkill, repeat(1), nBeers, |
|---|
| 609 |
nCoffees, musicVolume, map!"a * a"(musicVolume), 2.5); |
|---|
| 610 |
--- |
|---|
| 611 |
*/ |
|---|
| 612 |
double[] linearRegressBeta(U, T...)(U Y, T XIn) |
|---|
| 613 |
if(doubleInput!(U)) { |
|---|
| 614 |
double[] dummy; |
|---|
| 615 |
return linearRegressBetaBuf!(U, T)(dummy, Y, XIn); |
|---|
| 616 |
} |
|---|
| 617 |
|
|---|
| 618 |
/** |
|---|
| 619 |
Same as linearRegressBeta, but allows the user to specify a buffer for |
|---|
| 620 |
the beta terms. If the buffer is too short, a new one is allocated. |
|---|
| 621 |
Otherwise, the results are returned in the user-provided buffer. |
|---|
| 622 |
*/ |
|---|
| 623 |
double[] linearRegressBetaBuf(U, TRidge...)(double[] buf, U Y, TRidge XRidge) |
|---|
| 624 |
if(doubleInput!(U)) { |
|---|
| 625 |
mixin(newFrame); |
|---|
| 626 |
|
|---|
| 627 |
static if(isFloatingPoint!(TRidge[$ - 1]) || isIntegral!(TRidge[$ - 1])) { |
|---|
| 628 |
// ridge param. |
|---|
| 629 |
alias XRidge[$ - 1] ridgeParam; |
|---|
| 630 |
alias TRidge[0..$ - 1] T; |
|---|
| 631 |
alias XRidge[0..$ - 1] XIn; |
|---|
| 632 |
enum bool ridge = true; |
|---|
| 633 |
dstatsEnforce(ridgeParam >= 0, |
|---|
| 634 |
"Cannot do ridge regerssion with ridge param <= 0."); |
|---|
| 635 |
|
|---|
| 636 |
static SummaryIter!R summaryIter(R)(R range) { |
|---|
| 637 |
return typeof(return)(range); |
|---|
| 638 |
} |
|---|
| 639 |
|
|---|
| 640 |
} else { |
|---|
| 641 |
enum bool ridge = false; |
|---|
| 642 |
enum ridgeParam = 0; |
|---|
| 643 |
alias TRidge T; |
|---|
| 644 |
alias XRidge XIn; |
|---|
| 645 |
|
|---|
| 646 |
static R summaryIter(R)(R range) { |
|---|
| 647 |
return range; |
|---|
| 648 |
} |
|---|
| 649 |
} |
|---|
| 650 |
|
|---|
| 651 |
static if(isArray!(T[0]) && isInputRange!(typeof(XIn[0][0])) && |
|---|
| 652 |
T.length == 1) { |
|---|
| 653 |
auto X = tempdup(map!(summaryIter)(XIn[0])); |
|---|
| 654 |
alias typeof(X[0]) E; |
|---|
| 655 |
} else { |
|---|
| 656 |
static if(ridge) { |
|---|
| 657 |
alias staticMap!(SummaryType, T) XType; |
|---|
| 658 |
XType X; |
|---|
| 659 |
|
|---|
| 660 |
foreach(ti, elem; XIn) { |
|---|
| 661 |
X[ti] = summaryIter(elem); |
|---|
| 662 |
} |
|---|
| 663 |
} else { |
|---|
| 664 |
alias XIn X; |
|---|
| 665 |
} |
|---|
| 666 |
} |
|---|
| 667 |
|
|---|
| 668 |
double[][] xTx; |
|---|
| 669 |
double[] xTy = newStack!double(X.length); |
|---|
| 670 |
double[] ret; |
|---|
| 671 |
if(buf.length < X.length) { |
|---|
| 672 |
ret = new double[X.length]; |
|---|
| 673 |
} else { |
|---|
| 674 |
ret = buf[0..X.length]; |
|---|
| 675 |
} |
|---|
| 676 |
|
|---|
| 677 |
rangeMatrixMulTrans(xTy, xTx, Y, X); |
|---|
| 678 |
|
|---|
| 679 |
static if(ridge) { |
|---|
| 680 |
if(ridgeParam > 0) { |
|---|
| 681 |
foreach(i, range; X) { |
|---|
| 682 |
// The whole matrix is scaled by N, as well as the xTy vector, |
|---|
| 683 |
// so scale the ridge param similarly. |
|---|
| 684 |
xTx[i][i] += ridgeParam * range.mse / range.N; |
|---|
| 685 |
} |
|---|
| 686 |
} |
|---|
| 687 |
} |
|---|
| 688 |
|
|---|
| 689 |
choleskySolve(xTx, xTy, ret); |
|---|
| 690 |
return ret; |
|---|
| 691 |
} |
|---|
| 692 |
|
|---|
| 693 |
/** |
|---|
| 694 |
Perform a linear regression as in linearRegressBeta, but return a |
|---|
| 695 |
RegressRes with useful stuff for statistical inference. If the last element |
|---|
| 696 |
of input is a real, this is used to specify the confidence intervals to |
|---|
| 697 |
be calculated. Otherwise, the default of 0.95 is used. The rest of input |
|---|
| 698 |
should be the elements of X. |
|---|
| 699 |
|
|---|
| 700 |
When using this function, which provides several useful statistics useful |
|---|
| 701 |
for inference, each range must be traversed twice. This means: |
|---|
| 702 |
|
|---|
| 703 |
1. They have to be forward ranges, not input ranges. |
|---|
| 704 |
|
|---|
| 705 |
2. If you have a large amount of data and you're mapping it to some |
|---|
| 706 |
expensive function, you may want to do this eagerly instead of lazily. |
|---|
| 707 |
|
|---|
| 708 |
Notes: |
|---|
| 709 |
|
|---|
| 710 |
The X ranges are traversed in lockstep, but the traversal is stopped |
|---|
| 711 |
at the end of the shortest one. Therefore, using infinite ranges is safe. |
|---|
| 712 |
For example, using repeat(1) to get an intercept term works. |
|---|
| 713 |
|
|---|
| 714 |
If the confidence interval specified is exactly 0, this is treated as a |
|---|
| 715 |
special case and confidence interval calculation is skipped. This can speed |
|---|
| 716 |
things up significantly and therefore can be useful in monte carlo and possibly |
|---|
| 717 |
data mining contexts. |
|---|
| 718 |
|
|---|
| 719 |
Bugs: The statistical tests performed in this function assume that an |
|---|
| 720 |
intercept term is included in your regression model. If no intercept term |
|---|
| 721 |
is included, the P-values, confidence intervals and adjusted R^2 values |
|---|
| 722 |
calculated by this function will be wrong. |
|---|
| 723 |
|
|---|
| 724 |
Examples: |
|---|
| 725 |
--- |
|---|
| 726 |
int[] nBeers = [8,6,7,5,3,0,9]; |
|---|
| 727 |
int[] nCoffees = [3,6,2,4,3,6,8]; |
|---|
| 728 |
int[] musicVolume = [3,1,4,1,5,9,2]; |
|---|
| 729 |
int[] programmingSkill = [2,7,1,8,2,8,1]; |
|---|
| 730 |
|
|---|
| 731 |
// Using default confidence interval: |
|---|
| 732 |
auto results = linearRegress(programmingSkill, repeat(1), nBeers, nCoffees, |
|---|
| 733 |
musicVolume, map!"a * a"(musicVolume)); |
|---|
| 734 |
|
|---|
| 735 |
// Using user-specified confidence interval: |
|---|
| 736 |
auto results = linearRegress(programmingSkill, repeat(1), nBeers, nCoffees, |
|---|
| 737 |
musicVolume, map!"a * a"(musicVolume), 0.8675309); |
|---|
| 738 |
--- |
|---|
| 739 |
*/ |
|---|
| 740 |
RegressRes linearRegress(U, TC...)(U Y, TC input) { |
|---|
| 741 |
static if(is(TC[$ - 1] : double)) { |
|---|
| 742 |
double confLvl = input[$ - 1]; |
|---|
| 743 |
enforceConfidence(confLvl); |
|---|
| 744 |
alias TC[0..$ - 1] T; |
|---|
| 745 |
alias input[0..$ - 1] XIn; |
|---|
| 746 |
} else { |
|---|
| 747 |
double confLvl = 0.95; // Default; |
|---|
| 748 |
alias TC T; |
|---|
| 749 |
alias input XIn; |
|---|
| 750 |
} |
|---|
| 751 |
|
|---|
| 752 |
mixin(newFrame); |
|---|
| 753 |
static if(isForwardRange!(T[0]) && isForwardRange!(typeof(XIn[0].front())) && |
|---|
| 754 |
T.length == 1) { |
|---|
| 755 |
|
|---|
| 756 |
enum bool arrayX = true; |
|---|
| 757 |
alias typeof(XIn[0].front) E; |
|---|
| 758 |
E[] X = tempdup(XIn[0]); |
|---|
| 759 |
} else static if(allSatisfy!(isForwardRange, T)) { |
|---|
| 760 |
enum bool arrayX = false; |
|---|
| 761 |
alias XIn X; |
|---|
| 762 |
} else { |
|---|
| 763 |
static assert(0, "Linear regression can only be performed with " ~ |
|---|
| 764 |
"tuples of forward ranges or ranges of forward ranges."); |
|---|
| 765 |
} |
|---|
| 766 |
|
|---|
| 767 |
double[][] xTx = newStack!(double[])(X.length), |
|---|
| 768 |
xTxNeg1 = newStack!(double[])(X.length); |
|---|
| 769 |
|
|---|
| 770 |
foreach(i; 0..X.length) { |
|---|
| 771 |
xTx[i] = newStack!double(X.length); |
|---|
| 772 |
} |
|---|
| 773 |
|
|---|
| 774 |
double[] xTy = newStack!double(X.length); |
|---|
| 775 |
|
|---|
| 776 |
foreach(i; 0..X.length) { |
|---|
| 777 |
xTxNeg1[i] = newStack!double(X.length); |
|---|
| 778 |
} |
|---|
| 779 |
|
|---|
| 780 |
static if(arrayX) { |
|---|
| 781 |
auto xSaved = X.tempdup; |
|---|
| 782 |
foreach(ref elem; xSaved) { |
|---|
| 783 |
elem = elem.save; |
|---|
| 784 |
} |
|---|
| 785 |
} else { |
|---|
| 786 |
auto xSaved = X; |
|---|
| 787 |
foreach(ti, Type; X) { |
|---|
| 788 |
xSaved[ti] = X[ti].save; |
|---|
| 789 |
} |
|---|
| 790 |
} |
|---|
| 791 |
|
|---|
| 792 |
rangeMatrixMulTrans(xTy, xTx, Y.save, X); |
|---|
| 793 |
invert(xTx, xTxNeg1); |
|---|
| 794 |
double[] betas = new double[X.length]; |
|---|
| 795 |
foreach(i; 0..betas.length) { |
|---|
| 796 |
betas[i] = 0; |
|---|
| 797 |
foreach(j; 0..betas.length) { |
|---|
| 798 |
betas[i] += xTxNeg1[i][j] * xTy[j]; |
|---|
| 799 |
} |
|---|
| 800 |
} |
|---|
| 801 |
|
|---|
| 802 |
X = xSaved; |
|---|
| 803 |
auto residuals = residuals(betas, Y, X); |
|---|
| 804 |
double S = 0; |
|---|
| 805 |
ulong n = 0; |
|---|
| 806 |
PearsonCor R2Calc; |
|---|
| 807 |
for(; !residuals.empty; residuals.popFront) { |
|---|
| 808 |
double residual = residuals.front; |
|---|
| 809 |
S += residual * residual; |
|---|
| 810 |
double Yfront = residuals.Y.front; |
|---|
| 811 |
double predicted = Yfront - residual; |
|---|
| 812 |
R2Calc.put(predicted, Yfront); |
|---|
| 813 |
n++; |
|---|
| 814 |
} |
|---|
| 815 |
immutable ulong df = n - X.length; |
|---|
| 816 |
immutable double R2 = R2Calc.cor ^^ 2; |
|---|
| 817 |
immutable double adjustedR2 = 1.0L - (1.0L - R2) * ((n - 1.0L) / df); |
|---|
| 818 |
|
|---|
| 819 |
immutable double sigma2 = S / (n - X.length); |
|---|
| 820 |
|
|---|
| 821 |
double[] stdErr = new double[betas.length]; |
|---|
| 822 |
foreach(i, ref elem; stdErr) { |
|---|
| 823 |
elem = sqrt( S * xTxNeg1[i][i] / df / n); |
|---|
| 824 |
} |
|---|
| 825 |
|
|---|
| 826 |
double[] lowerBound, upperBound; |
|---|
| 827 |
if(confLvl == 0) { |
|---|
| 828 |
// Then we're going to skip the computation to save time. (See below.) |
|---|
| 829 |
lowerBound = betas; |
|---|
| 830 |
upperBound = betas; |
|---|
| 831 |
} else { |
|---|
| 832 |
lowerBound = new double[betas.length]; |
|---|
| 833 |
upperBound = new double[betas.length]; |
|---|
| 834 |
} |
|---|
| 835 |
auto p = new double[betas.length]; |
|---|
| 836 |
|
|---|
| 837 |
foreach(i, beta; betas) { |
|---|
| 838 |
try { |
|---|
| 839 |
p[i] = 2 * min(studentsTCDF(beta / stdErr[i], df), |
|---|
| 840 |
studentsTCDFR(beta / stdErr[i], df)); |
|---|
| 841 |
} catch(DstatsArgumentException) { |
|---|
| 842 |
// Leave it as a NaN. |
|---|
| 843 |
} |
|---|
| 844 |
|
|---|
| 845 |
if(confLvl > 0) { |
|---|
| 846 |
// Skip confidence level computation if level is zero, to save |
|---|
| 847 |
// computation time. This is important in monte carlo and possibly |
|---|
| 848 |
// data mining contexts. |
|---|
| 849 |
try { |
|---|
| 850 |
double delta = invStudentsTCDF(0.5 * (1 - confLvl), df) * |
|---|
| 851 |
stdErr[i]; |
|---|
| 852 |
upperBound[i] = beta - delta; |
|---|
| 853 |
lowerBound[i] = beta + delta; |
|---|
| 854 |
} catch(DstatsArgumentException) { |
|---|
| 855 |
// Leave confidence bounds as NaNs. |
|---|
| 856 |
} |
|---|
| 857 |
} |
|---|
| 858 |
} |
|---|
| 859 |
|
|---|
| 860 |
double F = (R2 / (X.length - 1)) / ((1 - R2) / (n - X.length)); |
|---|
| 861 |
double overallP; |
|---|
| 862 |
try { |
|---|
| 863 |
overallP = fisherCDFR(F, X.length - 1, n - X.length); |
|---|
| 864 |
} catch(DstatsArgumentException) { |
|---|
| 865 |
// Leave it as a NaN. |
|---|
| 866 |
} |
|---|
| 867 |
|
|---|
| 868 |
return RegressRes(betas, stdErr, lowerBound, upperBound, p, R2, |
|---|
| 869 |
adjustedR2, sqrt(sigma2), overallP); |
|---|
| 870 |
} |
|---|
| 871 |
|
|---|
| 872 |
|
|---|
| 873 |
/**Struct returned by polyFit.*/ |
|---|
| 874 |
struct PolyFitRes(T) { |
|---|
| 875 |
|
|---|
| 876 |
/**The array of PowMap ranges created by polyFit.*/ |
|---|
| 877 |
T X; |
|---|
| 878 |
|
|---|
| 879 |
/**The rest of the results. This is alias this'd.*/ |
|---|
| 880 |
RegressRes regressRes; |
|---|
| 881 |
alias regressRes this; |
|---|
| 882 |
} |
|---|
| 883 |
|
|---|
| 884 |
/**Convenience function that takes a forward range X and a forward range Y, |
|---|
| 885 |
* creates an array of PowMap structs for integer powers from 0 through N, |
|---|
| 886 |
* and calls linearRegressBeta. |
|---|
| 887 |
* |
|---|
| 888 |
* Returns: An array of doubles. The index of each element corresponds to |
|---|
| 889 |
* the exponent. For example, the X<sup>2</sup> term will have an index of |
|---|
| 890 |
* 2. |
|---|
| 891 |
*/ |
|---|
| 892 |
double[] polyFitBeta(T, U)(U Y, T X, uint N, double ridge = 0) { |
|---|
| 893 |
double[] dummy; |
|---|
| 894 |
return polyFitBetaBuf!(T, U)(dummy, Y, X, N); |
|---|
| 895 |
} |
|---|
| 896 |
|
|---|
| 897 |
/**Same as polyFitBeta, but allows the caller to provide an explicit buffer |
|---|
| 898 |
* to return the coefficients in. If it's too short, a new one will be |
|---|
| 899 |
* allocated. Otherwise, results will be returned in the user-provided buffer. |
|---|
| 900 |
*/ |
|---|
| 901 |
double[] polyFitBetaBuf(T, U)(double[] buf, U Y, T X, uint N, double ridge = 0) { |
|---|
| 902 |
mixin(newFrame); |
|---|
| 903 |
auto pows = newStack!(PowMap!(uint, T))(N + 1); |
|---|
| 904 |
foreach(exponent; 0..N + 1) { |
|---|
| 905 |
pows[exponent] = powMap(X, exponent); |
|---|
| 906 |
} |
|---|
| 907 |
|
|---|
| 908 |
if(ridge == 0) { |
|---|
| 909 |
return linearRegressBetaBuf(buf, Y, pows); |
|---|
| 910 |
} else { |
|---|
| 911 |
return linearRegressBetaBuf(buf, Y, pows, ridge); |
|---|
| 912 |
} |
|---|
| 913 |
} |
|---|
| 914 |
|
|---|
| 915 |
/**Convenience function that takes a forward range X and a forward range Y, |
|---|
| 916 |
* creates an array of PowMap structs for integer powers 0 through N, |
|---|
| 917 |
* and calls linearRegress. |
|---|
| 918 |
* |
|---|
| 919 |
* Returns: A PolyFitRes containing the array of PowMap structs created and |
|---|
| 920 |
* a RegressRes. The PolyFitRes is alias this'd to the RegressRes.*/ |
|---|
| 921 |
PolyFitRes!(PowMap!(uint, T)[]) |
|---|
| 922 |
polyFit(T, U)(U Y, T X, uint N, double confInt = 0.95) { |
|---|
| 923 |
enforceConfidence(confInt); |
|---|
| 924 |
auto pows = new PowMap!(uint, T)[N + 1]; |
|---|
| 925 |
foreach(exponent; 0..N + 1) { |
|---|
| 926 |
pows[exponent] = powMap(X, exponent); |
|---|
| 927 |
} |
|---|
| 928 |
alias PolyFitRes!(typeof(pows)) RT; |
|---|
| 929 |
RT ret; |
|---|
| 930 |
ret.X = pows; |
|---|
| 931 |
ret.regressRes = linearRegress(Y, pows, confInt); |
|---|
| 932 |
return ret; |
|---|
| 933 |
} |
|---|
| 934 |
|
|---|
| 935 |
unittest { |
|---|
| 936 |
// These are a bunch of values gleaned from various examples on the Web. |
|---|
| 937 |
double[] heights = [1.47,1.5,1.52,1.55,1.57,1.60,1.63,1.65,1.68,1.7,1.73,1.75, |
|---|
| 938 |
1.78,1.8,1.83]; |
|---|
| 939 |
double[] weights = [52.21,53.12,54.48,55.84,57.2,58.57,59.93,61.29,63.11,64.47, |
|---|
| 940 |
66.28,68.1,69.92,72.19,74.46]; |
|---|
| 941 |
float[] diseaseSev = [1.9, 3.1, 3.3, 4.8, 5.3, 6.1, 6.4, 7.6, 9.8, 12.4]; |
|---|
| 942 |
ubyte[] temperature = [2,1,5,5,20,20,23,10,30,25]; |
|---|
| 943 |
|
|---|
| 944 |
// Values from R. |
|---|
| 945 |
auto res1 = polyFit(diseaseSev, temperature, 1); |
|---|
| 946 |
assert(approxEqual(res1.betas[0], 2.6623)); |
|---|
| 947 |
assert(approxEqual(res1.betas[1], 0.2417)); |
|---|
| 948 |
assert(approxEqual(res1.stdErr[0], 1.1008)); |
|---|
| 949 |
assert(approxEqual(res1.stdErr[1], 0.0635)); |
|---|
| 950 |
assert(approxEqual(res1.p[0], 0.0419)); |
|---|
| 951 |
assert(approxEqual(res1.p[1], 0.0052)); |
|---|
| 952 |
assert(approxEqual(res1.R2, 0.644)); |
|---|
| 953 |
assert(approxEqual(res1.adjustedR2, 0.6001)); |
|---|
| 954 |
assert(approxEqual(res1.residualError, 2.03)); |
|---|
| 955 |
assert(approxEqual(res1.overallP, 0.00518)); |
|---|
| 956 |
|
|---|
| 957 |
|
|---|
| 958 |
auto res2 = polyFit(weights, heights, 2); |
|---|
| 959 |
assert(approxEqual(res2.betas[0], 128.813)); |
|---|
| 960 |
assert(approxEqual(res2.betas[1], -143.162)); |
|---|
| 961 |
assert(approxEqual(res2.betas[2], 61.960)); |
|---|
| 962 |
|
|---|
| 963 |
assert(approxEqual(res2.stdErr[0], 16.308)); |
|---|
| 964 |
assert(approxEqual(res2.stdErr[1], 19.833)); |
|---|
| 965 |
assert(approxEqual(res2.stdErr[2], 6.008)); |
|---|
| 966 |
|
|---|
| 967 |
assert(approxEqual(res2.p[0], 4.28e-6)); |
|---|
| 968 |
assert(approxEqual(res2.p[1], 1.06e-5)); |
|---|
| 969 |
assert(approxEqual(res2.p[2], 2.57e-7)); |
|---|
| 970 |
|
|---|
| 971 |
assert(approxEqual(res2.R2, 0.9989, 0.0001)); |
|---|
| 972 |
assert(approxEqual(res2.adjustedR2, 0.9987, 0.0001)); |
|---|
| 973 |
|
|---|
| 974 |
assert(approxEqual(res2.lowerBound[0], 92.9, 0.01)); |
|---|
| 975 |
assert(approxEqual(res2.lowerBound[1], -186.8, 0.01)); |
|---|
| 976 |
assert(approxEqual(res2.lowerBound[2], 48.7, 0.01)); |
|---|
| 977 |
assert(approxEqual(res2.upperBound[0], 164.7, 0.01)); |
|---|
| 978 |
assert(approxEqual(res2.upperBound[1], -99.5, 0.01)); |
|---|
| 979 |
assert(approxEqual(res2.upperBound[2], 75.2, 0.01)); |
|---|
| 980 |
|
|---|
| 981 |
auto res3 = linearRegress(weights, repeat(1), heights, map!"a * a"(heights)); |
|---|
| 982 |
assert(res2.betas == res3.betas); |
|---|
| 983 |
|
|---|
| 984 |
double[2] beta1Buf; |
|---|
| 985 |
auto beta1 = linearRegressBetaBuf |
|---|
| 986 |
(beta1Buf[], diseaseSev, repeat(1), temperature); |
|---|
| 987 |
assert(beta1Buf.ptr == beta1.ptr); |
|---|
| 988 |
assert(beta1Buf[] == beta1[]); |
|---|
| 989 |
assert(approxEqual(beta1, res1.betas)); |
|---|
| 990 |
auto beta2 = polyFitBeta(weights, heights, 2); |
|---|
| 991 |
assert(approxEqual(beta2, res2.betas)); |
|---|
| 992 |
|
|---|
| 993 |
auto res4 = linearRegress(weights, repeat(1), heights); |
|---|
| 994 |
assert(approxEqual(res4.p, 3.604e-14)); |
|---|
| 995 |
assert(approxEqual(res4.betas, [-39.062, 61.272])); |
|---|
| 996 |
assert(approxEqual(res4.p, [6.05e-9, 3.60e-14])); |
|---|
| 997 |
assert(approxEqual(res4.R2, 0.9892)); |
|---|
| 998 |
assert(approxEqual(res4.adjustedR2, 0.9884)); |
|---|
| 999 |
assert(approxEqual(res4.residualError, 0.7591)); |
|---|
| 1000 |
assert(approxEqual(res4.lowerBound, [-45.40912, 57.43554])); |
|---|
| 1001 |
assert(approxEqual(res4.upperBound, [-32.71479, 65.10883])); |
|---|
| 1002 |
|
|---|
| 1003 |
// Test residuals. |
|---|
| 1004 |
assert(approxEqual(residuals(res4.betas, weights, repeat(1), heights), |
|---|
| 1005 |
[1.20184170, 0.27367611, 0.40823237, -0.06993322, 0.06462305, |
|---|
| 1006 |
-0.40354255, -0.88170814, -0.74715188, -0.76531747, -0.63076120, |
|---|
| 1007 |
-0.65892680, -0.06437053, -0.08253613, 0.96202014, 1.39385455])); |
|---|
| 1008 |
|
|---|
| 1009 |
// Test nonzero ridge parameters. |
|---|
| 1010 |
// Values from R's MASS package. |
|---|
| 1011 |
auto a = [1, 2, 3, 4, 5, 6, 7]; |
|---|
| 1012 |
auto b = [8, 6, 7, 5, 3, 0, 9]; |
|---|
| 1013 |
auto c = [2, 7, 1, 8, 2, 8, 1]; |
|---|
| 1014 |
|
|---|
| 1015 |
// With a ridge param. of zero, ridge regression reduces to regular |
|---|
| 1016 |
// OLS regression. |
|---|
| 1017 |
assert(approxEqual(linearRegressBeta(a, repeat(1), b, c, 0), |
|---|
| 1018 |
linearRegressBeta(a, repeat(1), b, c))); |
|---|
| 1019 |
|
|---|
| 1020 |
// Test the ridge regression. Values from R MASS package. |
|---|
| 1021 |
auto ridge1 = linearRegressBeta(a, repeat(1), b, c, 1); |
|---|
| 1022 |
auto ridge2 = linearRegressBeta(a, repeat(1), b, c, 2); |
|---|
| 1023 |
auto ridge3 = linearRegressBeta(c, [[1,1,1,1,1,1,1], a, b], 10); |
|---|
| 1024 |
assert(approxEqual(ridge1, [6.0357757, -0.2729671, -0.1337131])); |
|---|
| 1025 |
assert(approxEqual(ridge2, [5.62367784, -0.22449854, -0.09775174])); |
|---|
| 1026 |
assert(approxEqual(ridge3, [5.82653624, -0.05197246, -0.27185592 ])); |
|---|
| 1027 |
} |
|---|
| 1028 |
|
|---|
| 1029 |
private MeanSD[] calculateSummaries(X...)(X xIn) { |
|---|
| 1030 |
// This is slightly wasteful because it sticks this shallow dup in |
|---|
| 1031 |
// an unfreeable pos on TempAlloc. |
|---|
| 1032 |
static if(X.length == 1 && isRoR!(X[0])) { |
|---|
| 1033 |
auto ret = newStack!MeanSD(xIn[0].length); |
|---|
| 1034 |
mixin(newFrame); |
|---|
| 1035 |
auto x = tempdup(xIn[0]); |
|---|
| 1036 |
|
|---|
| 1037 |
foreach(ref range; x) { |
|---|
| 1038 |
range = range.save; |
|---|
| 1039 |
} |
|---|
| 1040 |
} else { |
|---|
| 1041 |
auto ret = newStack!MeanSD(xIn.length); |
|---|
| 1042 |
alias xIn x; |
|---|
| 1043 |
|
|---|
| 1044 |
foreach(ti, R; X) { |
|---|
| 1045 |
x[ti] = x[ti].save; |
|---|
| 1046 |
} |
|---|
| 1047 |
} |
|---|
| 1048 |
|
|---|
| 1049 |
ret[] = MeanSD.init; |
|---|
| 1050 |
|
|---|
| 1051 |
bool someEmpty() { |
|---|
| 1052 |
foreach(range; x) { |
|---|
| 1053 |
if(range.empty) return true; |
|---|
| 1054 |
} |
|---|
| 1055 |
|
|---|
| 1056 |
return false; |
|---|
| 1057 |
} |
|---|
| 1058 |
|
|---|
| 1059 |
void popAll() { |
|---|
| 1060 |
foreach(ti, elem; x) { |
|---|
| 1061 |
x[ti].popFront(); |
|---|
| 1062 |
} |
|---|
| 1063 |
} |
|---|
| 1064 |
|
|---|
| 1065 |
while(!someEmpty) { |
|---|
| 1066 |
foreach(i, range; x) { |
|---|
| 1067 |
ret[i].put(range.front); |
|---|
| 1068 |
} |
|---|
| 1069 |
popAll(); |
|---|
| 1070 |
} |
|---|
| 1071 |
|
|---|
| 1072 |
return ret; |
|---|
| 1073 |
} |
|---|
| 1074 |
|
|---|
| 1075 |
private double softThresh(double z, double gamma) { |
|---|
| 1076 |
if(gamma >= abs(z)) { |
|---|
| 1077 |
return 0; |
|---|
| 1078 |
} else if(z > 0) { |
|---|
| 1079 |
return z - gamma; |
|---|
| 1080 |
} else { |
|---|
| 1081 |
return z + gamma; |
|---|
| 1082 |
} |
|---|
| 1083 |
} |
|---|
| 1084 |
|
|---|
| 1085 |
private struct PreprocessedData { |
|---|
| 1086 |
MeanSD[] xSumm; |
|---|
| 1087 |
MeanSD ySumm; |
|---|
| 1088 |
double[] y; |
|---|
| 1089 |
double[][] x; |
|---|
| 1090 |
} |
|---|
| 1091 |
|
|---|
| 1092 |
private PreprocessedData preprocessStandardize(Y, X...) |
|---|
| 1093 |
(Y yIn, X xIn) { |
|---|
| 1094 |
static if(X.length == 1 && isRoR!(X[0])) { |
|---|
| 1095 |
auto xRaw = tempdup(xIn[0]); |
|---|
| 1096 |
} else { |
|---|
| 1097 |
alias xIn xRaw; |
|---|
| 1098 |
} |
|---|
| 1099 |
|
|---|
| 1100 |
auto summaries = calculateSummaries(xRaw); |
|---|
| 1101 |
immutable minLen = to!size_t( |
|---|
| 1102 |
reduce!min( |
|---|
| 1103 |
map!"a.N"(summaries) |
|---|
| 1104 |
) |
|---|
| 1105 |
); |
|---|
| 1106 |
|
|---|
| 1107 |
auto x = newStack!(double[])(summaries.length); |
|---|
| 1108 |
foreach(i, range; xRaw) { |
|---|
| 1109 |
x[i] = tempdup(map!(to!double)(take(range, minLen))); |
|---|
| 1110 |
x[i][] -= summaries[i].mean; |
|---|
| 1111 |
x[i][] /= sqrt(summaries[i].mse); |
|---|
| 1112 |
} |
|---|
| 1113 |
|
|---|
| 1114 |
double[] y; |
|---|
| 1115 |
MeanSD ySumm; |
|---|
| 1116 |
if(yIn.length) { |
|---|
| 1117 |
y = tempdup(map!(to!double)(take(yIn, minLen))); |
|---|
| 1118 |
ySumm = meanStdev(y); |
|---|
| 1119 |
y[] -= ySumm.mean; |
|---|
| 1120 |
} |
|---|
| 1121 |
|
|---|
| 1122 |
return PreprocessedData(summaries, ySumm, y, x); |
|---|
| 1123 |
} |
|---|
| 1124 |
|
|---|
| 1125 |
/** |
|---|
| 1126 |
Performs lasso (L1) and/or ridge (L2) penalized linear regression. Due to the |
|---|
| 1127 |
way the data is standardized, no intercept term should be included in x |
|---|
| 1128 |
(unlike linearRegress and linearRegressBeta). The intercept coefficient is |
|---|
| 1129 |
implicitly included and returned in the first element of the returned array. |
|---|
| 1130 |
Usage is otherwise identical. |
|---|
| 1131 |
|
|---|
| 1132 |
Note: Setting lasso equal to zero is equivalent to performing ridge regression. |
|---|
| 1133 |
This can also be done with linearRegressBeta. However, the |
|---|
| 1134 |
linearRegressBeta algorithm is optimized for memory efficiency and |
|---|
| 1135 |
large samples. This algorithm is optimized for large feature sets. |
|---|
| 1136 |
|
|---|
| 1137 |
Returns: The beta coefficients for the regression model. |
|---|
| 1138 |
|
|---|
| 1139 |
References: |
|---|
| 1140 |
|
|---|
| 1141 |
Friedman J, et al Pathwise coordinate optimization. Ann. Appl. Stat. |
|---|
| 1142 |
2007;2:302-332. |
|---|
| 1143 |
|
|---|
| 1144 |
Goeman, J. J., L1 penalized estimation in the Cox proportional hazards model. |
|---|
| 1145 |
Biometrical Journal 52(1), 70{84. |
|---|
| 1146 |
|
|---|
| 1147 |
Eilers, P., Boer, J., Van Ommen, G., Van Houwelingen, H. 2001 Classification of |
|---|
| 1148 |
microarray data with penalized logistic regression. Proceedings of SPIE. |
|---|
| 1149 |
Progress in Biomedical Optics and Images vol. 4266, pp. 187-198 |
|---|
| 1150 |
*/ |
|---|
| 1151 |
double[] linearRegressPenalized(Y, X...) |
|---|
| 1152 |
(Y yIn, X xIn, double lasso, double ridge) { |
|---|
| 1153 |
mixin(newFrame); |
|---|
| 1154 |
|
|---|
| 1155 |
auto preproc = preprocessStandardize(yIn, xIn); |
|---|
| 1156 |
|
|---|
| 1157 |
auto summaries = preproc.xSumm; |
|---|
| 1158 |
auto ySumm = preproc.ySumm; |
|---|
| 1159 |
auto x = preproc.x; |
|---|
| 1160 |
auto y = preproc.y; |
|---|
| 1161 |
|
|---|
| 1162 |
auto betasFull = new double[x.length + 1]; |
|---|
| 1163 |
betasFull[] = 0; |
|---|
| 1164 |
auto betas = betasFull[1..$]; |
|---|
| 1165 |
|
|---|
| 1166 |
if(lasso > 0) { |
|---|
| 1167 |
coordDescent(y, x, betas, lasso, ridge, null); |
|---|
| 1168 |
} else if(y.length > x.length) { |
|---|
| 1169 |
// Correct for different )#*$# scaling conventions. |
|---|
| 1170 |
foreach(i, feature; x) { |
|---|
| 1171 |
feature[] /= sqrt(summaries[i].mse); |
|---|
| 1172 |
} |
|---|
| 1173 |
|
|---|
| 1174 |
linearRegressBetaBuf(betas, y, x, ridge); |
|---|
| 1175 |
|
|---|
| 1176 |
// More correction for diff. scaling factors. |
|---|
| 1177 |
foreach(i, ref b; betas) { |
|---|
| 1178 |
b /= sqrt(summaries[i].mse); |
|---|
| 1179 |
} |
|---|
| 1180 |
|
|---|
| 1181 |
} else { |
|---|
| 1182 |
ridgeLargeP(y, x, ridge, betas, null); |
|---|
| 1183 |
} |
|---|
| 1184 |
|
|---|
| 1185 |
foreach(i, ref elem; betas) { |
|---|
| 1186 |
elem /= sqrt(summaries[i].mse); |
|---|
| 1187 |
} |
|---|
| 1188 |
|
|---|
| 1189 |
betasFull[0] = ySumm.mean; |
|---|
| 1190 |
foreach(i, beta; betas) { |
|---|
| 1191 |
betasFull[0] -= beta * summaries[i].mean; |
|---|
| 1192 |
} |
|---|
| 1193 |
|
|---|
| 1194 |
return betasFull; |
|---|
| 1195 |
} |
|---|
| 1196 |
|
|---|
| 1197 |
private void coordDescent |
|---|
| 1198 |
(double[] y, double[][] x, double[] betas, double lasso, double ridge, double[] w) { |
|---|
| 1199 |
mixin(newFrame); |
|---|
| 1200 |
|
|---|
| 1201 |
auto predictions = newStack!double(y.length); |
|---|
| 1202 |
predictions[] = 0; |
|---|
| 1203 |
|
|---|
| 1204 |
void makePredictions() { |
|---|
| 1205 |
foreach(j, beta; betas) { |
|---|
| 1206 |
predictions[] += x[j][] * beta; |
|---|
| 1207 |
} |
|---|
| 1208 |
} |
|---|
| 1209 |
|
|---|
| 1210 |
if(reduce!max(0.0, map!abs(betas)) > 0) { |
|---|
| 1211 |
makePredictions(); |
|---|
| 1212 |
} |
|---|
| 1213 |
|
|---|
| 1214 |
auto residuals = newStack!double(y.length); |
|---|
| 1215 |
|
|---|
| 1216 |
uint iter = 0; |
|---|
| 1217 |
enum maxIter = 10_000; |
|---|
| 1218 |
enum relEpsilon = 1e-5; |
|---|
| 1219 |
enum absEpsilon = 1e-10; |
|---|
| 1220 |
immutable n = cast(double) y.length; |
|---|
| 1221 |
auto perm = tempdup(iota(0U, x.length)); |
|---|
| 1222 |
|
|---|
| 1223 |
auto weightDots = newStack!double(x.length); |
|---|
| 1224 |
if(w.length == 0) { |
|---|
| 1225 |
ridge /= n; |
|---|
| 1226 |
lasso /= n; |
|---|
| 1227 |
weightDots[] = 1; |
|---|
| 1228 |
} else { |
|---|
| 1229 |
foreach(j, col; x) { |
|---|
| 1230 |
weightDots[j] = dotProduct(w, map!"a * a"(col)); |
|---|
| 1231 |
} |
|---|
| 1232 |
} |
|---|
| 1233 |
|
|---|
| 1234 |
double doIter(double[] betas, double[][] x, double mul) { |
|---|
| 1235 |
// stderr.writeln("ITER: ", betas, '\t', predictions); |
|---|
| 1236 |
double maxRelError = 0; |
|---|
| 1237 |
foreach(j, ref b; betas) { |
|---|
| 1238 |
if(b == 0) { |
|---|
| 1239 |
residuals[] = (-predictions[] + y[]); |
|---|
| 1240 |
} else { |
|---|
| 1241 |
residuals[] = (-predictions[] + x[j][] * b + y[]); |
|---|
| 1242 |
predictions[] -= x[j][] * b; |
|---|
| 1243 |
} |
|---|
| 1244 |
|
|---|
| 1245 |
double z; |
|---|
| 1246 |
if(w.length) { |
|---|
| 1247 |
z = 0; |
|---|
| 1248 |
foreach(i, weight; w) { |
|---|
| 1249 |
z += weight * x[j][i] * residuals[i]; |
|---|
| 1250 |
} |
|---|
| 1251 |
} else { |
|---|
| 1252 |
residuals[] /= n; |
|---|
| 1253 |
z = dotProduct(residuals, x[j]); |
|---|
| 1254 |
} |
|---|
| 1255 |
|
|---|
| 1256 |
auto newB = softThresh(z, lasso * mul) / |
|---|
| 1257 |
(weightDots[j] + ridge * mul); |
|---|
| 1258 |
|
|---|
| 1259 |
immutable absErr = abs(b - newB); |
|---|
| 1260 |
immutable err = abs(b - newB) / max(abs(b), abs(newB)); |
|---|
| 1261 |
|
|---|
| 1262 |
if(absErr > absEpsilon) { |
|---|
| 1263 |
maxRelError = max(maxRelError, err); |
|---|
| 1264 |
} |
|---|
| 1265 |
|
|---|
| 1266 |
b = newB; |
|---|
| 1267 |
if(b != 0) { |
|---|
| 1268 |
predictions[] += x[j][] * b; |
|---|
| 1269 |
} |
|---|
| 1270 |
} |
|---|
| 1271 |
|
|---|
| 1272 |
return maxRelError; |
|---|
| 1273 |
} |
|---|
| 1274 |
|
|---|
| 1275 |
void toConvergence(double mul) { |
|---|
| 1276 |
double maxRelErr = doIter(betas, x, mul); |
|---|
| 1277 |
iter++; |
|---|
| 1278 |
if(maxRelErr < relEpsilon) return; |
|---|
| 1279 |
|
|---|
| 1280 |
static bool absGreater(double x, double y) { return abs(x) > abs(y); } |
|---|
| 1281 |
|
|---|
| 1282 |
while(iter < maxIter) { |
|---|
| 1283 |
size_t split = 0; |
|---|
| 1284 |
while(split < betas.length && abs(betas[split]) > 0) { |
|---|
| 1285 |
split++; |
|---|
| 1286 |
} |
|---|
| 1287 |
|
|---|
| 1288 |
try { |
|---|
| 1289 |
qsort!absGreater(betas, x, perm, weightDots); |
|---|
| 1290 |
} catch(SortException) { |
|---|
| 1291 |
betas[] = double.nan; |
|---|
| 1292 |
break; |
|---|
| 1293 |
} |
|---|
| 1294 |
|
|---|
| 1295 |
maxRelErr = double.infinity; |
|---|
| 1296 |
for(; !(maxRelErr < relEpsilon) && split < betas.length |
|---|
| 1297 |
&& iter < maxIter; iter++) { |
|---|
| 1298 |
maxRelErr = doIter(betas[0..split], x[0..split], mul); |
|---|
| 1299 |
} |
|---|
| 1300 |
|
|---|
| 1301 |
|
|---|
| 1302 |
maxRelErr = doIter(betas, x, mul); |
|---|
| 1303 |
iter++; |
|---|
| 1304 |
if(maxRelErr < relEpsilon) break; |
|---|
| 1305 |
} |
|---|
| 1306 |
|
|---|
| 1307 |
//stderr.writefln("Converged in %s iterations for %s mult.", iter, mul); |
|---|
| 1308 |
} |
|---|
| 1309 |
|
|---|
| 1310 |
toConvergence(1); |
|---|
| 1311 |
try { |
|---|
| 1312 |
qsort(perm, x, betas); |
|---|
| 1313 |
} catch(SortException) { |
|---|
| 1314 |
return; |
|---|
| 1315 |
} |
|---|
| 1316 |
} |
|---|
| 1317 |
|
|---|
| 1318 |
// Compute(X X')' = C. |
|---|
| 1319 |
double[][] makeC(double[][] x) { |
|---|
| 1320 |
if(x.length == 0) return null; |
|---|
| 1321 |
immutable n = x[0].length; |
|---|
| 1322 |
auto c = newStack!(double[])(n); |
|---|
| 1323 |
|
|---|
| 1324 |
// Only need lower half. |
|---|
| 1325 |
foreach(i, ref elem; c) { |
|---|
| 1326 |
elem = newStack!double(i + 1); |
|---|
| 1327 |
elem[] = 0; |
|---|
| 1328 |
} |
|---|
| 1329 |
|
|---|
| 1330 |
foreach(col; x) { |
|---|
| 1331 |
foreach(i; 0..n) { |
|---|
| 1332 |
foreach(j; i..n) { |
|---|
| 1333 |
c[j][i] += col[i] * col[j]; |
|---|
| 1334 |
} |
|---|
| 1335 |
} |
|---|
| 1336 |
} |
|---|
| 1337 |
|
|---|
| 1338 |
return c; |
|---|
| 1339 |
} |
|---|
| 1340 |
|
|---|
| 1341 |
private double[][] doCTWC(double[][] c, double[] w = null) { |
|---|
| 1342 |
auto ret = newStack!(double[])(c.length); |
|---|
| 1343 |
foreach(i, ref elem; ret) elem = newStack!double(c.length); |
|---|
| 1344 |
|
|---|
| 1345 |
double getElem(size_t i, size_t j) { |
|---|
| 1346 |
return (i >= j) ? c[i][j] : c[j][i]; |
|---|
| 1347 |
} |
|---|
| 1348 |
|
|---|
| 1349 |
foreach(i; 0..c.length) foreach(j; 0..i + 1) { |
|---|
| 1350 |
ret[i][j] = 0; |
|---|
| 1351 |
|
|---|
| 1352 |
foreach(k; 0..c.length) { |
|---|
| 1353 |
auto toAdd = getElem(i, k) * getElem(j, k); |
|---|
| 1354 |
if(w.length) toAdd *= w[k]; |
|---|
| 1355 |
ret[i][j] += toAdd; |
|---|
| 1356 |
} |
|---|
| 1357 |
} |
|---|
| 1358 |
|
|---|
| 1359 |
symmetrize(ret); |
|---|
| 1360 |
return ret; |
|---|
| 1361 |
} |
|---|
| 1362 |
|
|---|
| 1363 |
// An implementation of ridge regression for large dimension. |
|---|
| 1364 |
private void ridgeLargeP |
|---|
| 1365 |
(double[] yIn, double[][] x, double lambdaIn, double[] betas, double[] w = null) { |
|---|
| 1366 |
{ |
|---|
| 1367 |
mixin(newFrame); |
|---|
| 1368 |
auto y = tempdup(yIn); |
|---|
| 1369 |
auto c = makeC(x); |
|---|
| 1370 |
|
|---|
| 1371 |
// This scaling preserves numerical stability when the distrib. of |
|---|
| 1372 |
// x has a long tail. |
|---|
| 1373 |
auto maxElem = reduce!max(0.0, map!abs(joiner(c))); |
|---|
| 1374 |
foreach(col; c) col[] /= maxElem; |
|---|
| 1375 |
y[] /= (maxElem); |
|---|
| 1376 |
auto lambda = lambdaIn / maxElem; |
|---|
| 1377 |
auto cwc = doCTWC(c, w); |
|---|
| 1378 |
|
|---|
| 1379 |
foreach(i, row; c) foreach(j, elem; row) { |
|---|
| 1380 |
cwc[j][i] += lambda * elem; |
|---|
| 1381 |
if(i != j) cwc[i][j] += lambda * elem; |
|---|
| 1382 |
} |
|---|
| 1383 |
|
|---|
| 1384 |
// Multiply c' * y. c is symmetric, so it doesn't matter that I'm really |
|---|
| 1385 |
// multiplying the transpose. |
|---|
| 1386 |
if(w.length) y[] *= w[]; |
|---|
| 1387 |
auto cTy = newStack!double(y.length); |
|---|
| 1388 |
cTy[] = 0; |
|---|
| 1389 |
|
|---|
| 1390 |
foreach(i; 0..c.length) foreach(j; 0..c.length) { |
|---|
| 1391 |
if(i >= j) { |
|---|
| 1392 |
cTy[j] += y[i] * c[i][j]; |
|---|
| 1393 |
} else { |
|---|
| 1394 |
cTy[j] += y[i] * c[j][i]; |
|---|
| 1395 |
} |
|---|
| 1396 |
} |
|---|
| 1397 |
|
|---|
| 1398 |
solve(cwc, cTy); |
|---|
| 1399 |
|
|---|
| 1400 |
// Multiply X' * csi to get answer. |
|---|
| 1401 |
betas[] = 0; |
|---|
| 1402 |
foreach(i, col; x) { |
|---|
| 1403 |
betas[i] = dotProduct(cTy, col); |
|---|
| 1404 |
} |
|---|
| 1405 |
} |
|---|
| 1406 |
|
|---|
| 1407 |
static bool notFinite(double num) { return !isFinite(num); } |
|---|
| 1408 |
|
|---|
| 1409 |
// In a few pathological cases this algo ends up singular to machine |
|---|
| 1410 |
// precision, and NaNs or infs up in betas. Fall back to solving the |
|---|
| 1411 |
// normal equations directly. |
|---|
| 1412 |
if(filter!notFinite(betas).empty) return; |
|---|
| 1413 |
version(unittest) stderr.writeln("SINGULAR"); |
|---|
| 1414 |
ridgeFallback(yIn, x, lambdaIn, betas, w); |
|---|
| 1415 |
} |
|---|
| 1416 |
|
|---|
| 1417 |
// Used if we end up singular in ridgeLargeP. |
|---|
| 1418 |
private void ridgeFallback |
|---|
| 1419 |
(double[] yIn, double[][] x, double lambda, double[] betas, double[] w) { |
|---|
| 1420 |
mixin(newFrame); |
|---|
| 1421 |
|
|---|
| 1422 |
// Compute xTx or xT * w * x depending on whether w is null. |
|---|
| 1423 |
auto xTx = newStack!(double[])(x.length); |
|---|
| 1424 |
foreach(ref row; xTx) row = newStack!double(x.length); |
|---|
| 1425 |
|
|---|
| 1426 |
foreach(i, xi; x) foreach(j, xj; x[0..i + 1]) { |
|---|
| 1427 |
xTx[i][j] = 0; |
|---|
| 1428 |
foreach(k; 0..xi.length) { |
|---|
| 1429 |
immutable weight = (w.length) ? w[k] : 1.0; |
|---|
| 1430 |
xTx[i][j] += xi[k] * weight * xj[k]; |
|---|
| 1431 |
} |
|---|
| 1432 |
} |
|---|
| 1433 |
|
|---|
| 1434 |
symmetrize(xTx); |
|---|
| 1435 |
|
|---|
| 1436 |
double[] y; |
|---|
| 1437 |
if(w.length) { |
|---|
| 1438 |
y = tempdup(yIn); |
|---|
| 1439 |
y[] *= w[]; |
|---|
| 1440 |
} else { |
|---|
| 1441 |
y = yIn; |
|---|
| 1442 |
} |
|---|
| 1443 |
|
|---|
| 1444 |
auto xTy = newStack!double(x.length); |
|---|
| 1445 |
foreach(i, ref val; xTy) { |
|---|
| 1446 |
val = dotProduct(y, x[i]); |
|---|
| 1447 |
} |
|---|
| 1448 |
|
|---|
| 1449 |
// Add in the penalty. |
|---|
| 1450 |
foreach(i; 0..xTx.length) { |
|---|
| 1451 |
xTx[i][i] += lambda; |
|---|
| 1452 |
} |
|---|
| 1453 |
|
|---|
| 1454 |
choleskySolve(xTx, xTy, betas); |
|---|
| 1455 |
} |
|---|
| 1456 |
|
|---|
| 1457 |
unittest { |
|---|
| 1458 |
// Test ridge regression. We have three impls for all kinds of diff. |
|---|
| 1459 |
// scenarios. See if they all agree. Note that the ridiculously small but |
|---|
| 1460 |
// nonzero lasso param is to force the use of the coord descent algo. |
|---|
| 1461 |
auto y = new double[12]; |
|---|
| 1462 |
auto x = new double[][16]; |
|---|
| 1463 |
foreach(ref elem; x) elem = new double[12]; |
|---|
| 1464 |
x[0][] = 1; |
|---|
| 1465 |
auto gen = Random(31415); // For random but repeatable results. |
|---|
| 1466 |
|
|---|
| 1467 |
foreach(iter; 0..1000) { |
|---|
| 1468 |
foreach(col; x[1..$]) foreach(ref elem; col) elem = rNorm(0, 1, gen); |
|---|
| 1469 |
foreach(ref elem; y) elem = rNorm(0, 1, gen); |
|---|
| 1470 |
immutable ridge = uniform(0.1, 10.0, gen); |
|---|
| 1471 |
|
|---|
| 1472 |
auto normalEq = linearRegressBeta(y, x, ridge); |
|---|
| 1473 |
auto coordDescent = linearRegressPenalized( |
|---|
| 1474 |
y, x[1..$], double.min_normal, ridge); |
|---|
| 1475 |
auto linalgTrick = linearRegressPenalized(y, x[1..$], 0, ridge); |
|---|
| 1476 |
|
|---|
| 1477 |
// Every once in a blue moon coordinate descent doesn't converge that |
|---|
| 1478 |
// well. These small errors are of no practical significance, hence |
|---|
| 1479 |
// the wide tolerance. However, if the direct normal equations |
|---|
| 1480 |
// and linalg trick don't agree extremely closely, then something's |
|---|
| 1481 |
// fundamentally wrong. |
|---|
| 1482 |
assert(approxEqual(normalEq, coordDescent, 0.02, 1e-4), text( |
|---|
| 1483 |
normalEq, coordDescent)); |
|---|
| 1484 |
assert(approxEqual(linalgTrick, coordDescent, 0.02, 1e-4), text( |
|---|
| 1485 |
linalgTrick, coordDescent)); |
|---|
| 1486 |
assert(approxEqual(normalEq, linalgTrick, 1e-6, 1e-8), text( |
|---|
| 1487 |
normalEq, linalgTrick)); |
|---|
| 1488 |
} |
|---|
| 1489 |
|
|---|
| 1490 |
// Make sure fallback and largeP algo get same answers. |
|---|
| 1491 |
auto fb = new double[x.length]; |
|---|
| 1492 |
auto lp = new double[x.length]; |
|---|
| 1493 |
ridgeFallback(y, x, 1, fb, null); |
|---|
| 1494 |
ridgeLargeP(y, x, 1, lp); |
|---|
| 1495 |
assert(approxEqual(fb, lp)); |
|---|
| 1496 |
|
|---|
| 1497 |
// With weights. |
|---|
| 1498 |
auto w = randArray!uniform(y.length, 0.1, 1, gen); |
|---|
| 1499 |
ridgeFallback(y, x, 1, fb, w); |
|---|
| 1500 |
ridgeLargeP(y, x, 1, lp, w); |
|---|
| 1501 |
assert(approxEqual(fb, lp)); |
|---|
| 1502 |
|
|---|
| 1503 |
// Test stuff that's got some lasso in it. Values from R's Penalized |
|---|
| 1504 |
// package. |
|---|
| 1505 |
y = [1.0, 2.0, 3, 4, 5, 6, 7]; |
|---|
| 1506 |
x = [[8.0, 6, 7, 5, 3, 0, 9], |
|---|
| 1507 |
[3.0, 6, 2, 4, 3, 6, 8], |
|---|
| 1508 |
[3.0, 1, 4, 1, 5, 9, 2], |
|---|
| 1509 |
[2.0, 7, 1, 8, 2, 8, 1]]; |
|---|
| 1510 |
|
|---|
| 1511 |
assert(approxEqual(linearRegressPenalized(y, x, 1, 0), |
|---|
| 1512 |
[4.16316, -0.3603197, 0.6308278, 0, -0.2633263])); |
|---|
| 1513 |
assert(approxEqual(linearRegressPenalized(y, x, 1, 3), |
|---|
| 1514 |
[2.519590, -0.09116883, 0.38067757, 0.13122413, -0.05637939])); |
|---|
| 1515 |
assert(approxEqual(linearRegressPenalized(y, x, 2, 0.1), |
|---|
| 1516 |
[1.247235, 0, 0.4440735, 0.2023602, 0])); |
|---|
| 1517 |
assert(approxEqual(linearRegressPenalized(y, x, 5, 7), |
|---|
| 1518 |
[3.453787, 0, 0.10968736, 0.01253992, 0])); |
|---|
| 1519 |
} |
|---|
| 1520 |
|
|---|
| 1521 |
/** |
|---|
| 1522 |
Computes a logistic regression using a maximum likelihood estimator |
|---|
| 1523 |
and returns the beta coefficients. This is a generalized linear model with |
|---|
| 1524 |
the link function f(XB) = 1 / (1 + exp(XB)). This is generally used to model |
|---|
| 1525 |
the probability that a binary Y variable is 1 given a set of X variables. |
|---|
| 1526 |
|
|---|
| 1527 |
For the purpose of this function, Y variables are interpreted as Booleans, |
|---|
| 1528 |
regardless of their type. X may be either a range of ranges or a tuple of |
|---|
| 1529 |
ranges. However, note that unlike in linearRegress, they are copied to an |
|---|
| 1530 |
array if they are not random access ranges. Note that each value is accessed |
|---|
| 1531 |
several times, so if your range is a map to something expensive, you may |
|---|
| 1532 |
want to evaluate it eagerly. |
|---|
| 1533 |
|
|---|
| 1534 |
If the last parameter passed in is a numeric value instead of a range, |
|---|
| 1535 |
it is interpreted as a ridge parameter and ridge regression is performed. This |
|---|
| 1536 |
penalizes the L2 norm of the beta vector (in a scaled space) and results |
|---|
| 1537 |
in more parsimonious models. It limits the usefulness of inference techniques |
|---|
| 1538 |
(p-values, confidence intervals), however, and is therefore not offered |
|---|
| 1539 |
in logisticRegres(). |
|---|
| 1540 |
|
|---|
| 1541 |
If no ridge parameter is passed, or equivalenty if the ridge parameter is |
|---|
| 1542 |
zero, then ordinary maximum likelihood regression is performed. |
|---|
| 1543 |
|
|---|
| 1544 |
Note that, while this implementation of ridge regression was tested against |
|---|
| 1545 |
the R Design Package implementation, it uses slightly different conventions |
|---|
| 1546 |
that make the results not comparable without transformation. dstats uses a |
|---|
| 1547 |
biased estimate of the variance to scale the beta vector penalties, while |
|---|
| 1548 |
Design uses an unbiased estimate. Furthermore, Design penalizes by 1/2 of the |
|---|
| 1549 |
L2 norm, whereas dstats penalizes by the L2 norm. Therefore, if n is the |
|---|
| 1550 |
sample size, and lambda is the penalty used with dstats, the proper penalty |
|---|
| 1551 |
to use in Design to get the same results is 2 * (n - 1) * lambda / n. |
|---|
| 1552 |
|
|---|
| 1553 |
Also note that, as in linearRegress, repeat(1) can be used for the intercept |
|---|
| 1554 |
term. |
|---|
| 1555 |
|
|---|
| 1556 |
Returns: The beta coefficients for the regression model. |
|---|
| 1557 |
|
|---|
| 1558 |
References: |
|---|
| 1559 |
|
|---|
| 1560 |
http://en.wikipedia.org/wiki/Logistic_regression |
|---|
| 1561 |
|
|---|
| 1562 |
http://socserv.mcmaster.ca/jfox/Courses/UCLA/logistic-regression-notes.pdf |
|---|
| 1563 |
|
|---|
| 1564 |
S. Le Cessie and J. C. Van Houwelingen. Ridge Estimators in Logistic |
|---|
| 1565 |
Regression. Journal of the Royal Statistical Society. Series C |
|---|
| 1566 |
(Applied Statistics), Vol. 41, No. 1(1992), pp. 191-201 |
|---|
| 1567 |
|
|---|
| 1568 |
Frank E Harrell Jr (2009). Design: Design Package. R package version 2.3-0. |
|---|
| 1569 |
http://CRAN.R-project.org/package=Design |
|---|
| 1570 |
*/ |
|---|
| 1571 |
double[] logisticRegressBeta(T, U...)(T yIn, U xRidge) { |
|---|
| 1572 |
static if(isFloatingPoint!(U[$ - 1]) || isIntegral!(U[$ - 1])) { |
|---|
| 1573 |
alias xRidge[$ - 1] ridge; |
|---|
| 1574 |
alias xRidge[0..$ - 1] xIn; |
|---|
| 1575 |
} else { |
|---|
| 1576 |
enum double ridge = 0.0; |
|---|
| 1577 |
alias xRidge xIn; |
|---|
| 1578 |
} |
|---|
| 1579 |
|
|---|
| 1580 |
return logisticRegressImpl(false, ridge, yIn, xIn).betas; |
|---|
| 1581 |
} |
|---|
| 1582 |
|
|---|
| 1583 |
/** |
|---|
| 1584 |
Plain old data struct to hold the results of a logistic regression. |
|---|
| 1585 |
*/ |
|---|
| 1586 |
struct LogisticRes { |
|---|
| 1587 |
/**The coefficients, one for each range in X. These will be in the order |
|---|
| 1588 |
* that the X ranges were passed in.*/ |
|---|
| 1589 |
double[] betas; |
|---|
| 1590 |
|
|---|
| 1591 |
/**The standard error terms of the X ranges passed in.*/ |
|---|
| 1592 |
double[] stdErr; |
|---|
| 1593 |
|
|---|
| 1594 |
/** |
|---|
| 1595 |
The Wald lower confidence bounds of the beta terms, at the confidence level |
|---|
| 1596 |
specificied. (Default 0.95).*/ |
|---|
| 1597 |
double[] lowerBound; |
|---|
| 1598 |
|
|---|
| 1599 |
/** |
|---|
| 1600 |
The Wald upper confidence bounds of the beta terms, at the confidence level |
|---|
| 1601 |
specificied. (Default 0.95).*/ |
|---|
| 1602 |
double[] upperBound; |
|---|
| 1603 |
|
|---|
| 1604 |
/** |
|---|
| 1605 |
The P-value for the alternative that the corresponding beta value is |
|---|
| 1606 |
different from zero against the null that it is equal to zero. These |
|---|
| 1607 |
are calculated using the Wald Test.*/ |
|---|
| 1608 |
double[] p; |
|---|
| 1609 |
|
|---|
| 1610 |
/** |
|---|
| 1611 |
The log likelihood for the null model. |
|---|
| 1612 |
*/ |
|---|
| 1613 |
double nullLogLikelihood; |
|---|
| 1614 |
|
|---|
| 1615 |
/** |
|---|
| 1616 |
The log likelihood for the model fit. |
|---|
| 1617 |
*/ |
|---|
| 1618 |
double logLikelihood; |
|---|
| 1619 |
|
|---|
| 1620 |
/** |
|---|
| 1621 |
Akaike Information Criterion, which is a complexity-penalized goodness- |
|---|
| 1622 |
of-fit score, equal to 2 * k - 2 log(L) where L is the log likelihood and |
|---|
| 1623 |
k is the number of parameters. |
|---|
| 1624 |
*/ |
|---|
| 1625 |
double aic() const pure nothrow @property @safe { |
|---|
| 1626 |
return 2 * (betas.length - logLikelihood); |
|---|
| 1627 |
} |
|---|
| 1628 |
|
|---|
| 1629 |
/** |
|---|
| 1630 |
The P-value for the model as a whole, based on the likelihood ratio test. |
|---|
| 1631 |
The null here is that the model has no predictive value, the alternative |
|---|
| 1632 |
is that it does have predictive value.*/ |
|---|
| 1633 |
double overallP; |
|---|
| 1634 |
|
|---|
| 1635 |
// Just used internally. |
|---|
| 1636 |
private static string arrStr(T)(T arr) { |
|---|
| 1637 |
return text(arr)[1..$ - 1]; |
|---|
| 1638 |
} |
|---|
| 1639 |
|
|---|
| 1640 |
/**Print out the results in the default format.*/ |
|---|
| 1641 |
string toString() { |
|---|
| 1642 |
return "Betas: " ~ arrStr(betas) ~ "\nLower Conf. Int.: " ~ |
|---|
| 1643 |
arrStr(lowerBound) ~ "\nUpper Conf. Int.: " ~ arrStr(upperBound) ~ |
|---|
| 1644 |
"\nStd. Err: " ~ arrStr(stdErr) ~ "\nP Values: " ~ arrStr(p) ~ |
|---|
| 1645 |
"\nNull Log Likelihood: " ~ text(nullLogLikelihood) ~ |
|---|
| 1646 |
"\nLog Likelihood: " ~ text(logLikelihood) ~ |
|---|
| 1647 |
"\nAIC: " ~ text(aic) ~ |
|---|
| 1648 |
"\nOverall P: " ~ text(overallP); |
|---|
| 1649 |
} |
|---|
| 1650 |
} |
|---|
| 1651 |
|
|---|
| 1652 |
/** |
|---|
| 1653 |
Similar to logisticRegressBeta, but returns a LogisticRes with useful stuff for |
|---|
| 1654 |
statistical inference. If the last element of input is a floating point |
|---|
| 1655 |
number instead of a range, it is used to specify the confidence interval |
|---|
| 1656 |
calculated. Otherwise, the default of 0.95 is used. |
|---|
| 1657 |
|
|---|
| 1658 |
References: |
|---|
| 1659 |
|
|---|
| 1660 |
http://en.wikipedia.org/wiki/Wald_test |
|---|
| 1661 |
http://en.wikipedia.org/wiki/Akaike_information_criterion |
|---|
| 1662 |
*/ |
|---|
| 1663 |
LogisticRes logisticRegress(T, V...)(T yIn, V input) { |
|---|
| 1664 |
return logisticRegressImpl!(T, V)(true, 0, yIn, input); |
|---|
| 1665 |
} |
|---|
| 1666 |
|
|---|
| 1667 |
unittest { |
|---|
| 1668 |
// Values from R. Confidence intervals from confint.default(). |
|---|
| 1669 |
// R doesn't automatically calculate likelihood ratio P-value, and reports |
|---|
| 1670 |
// deviations instead of log likelihoods. Deviations are just |
|---|
| 1671 |
// -2 * likelihood. |
|---|
| 1672 |
alias approxEqual ae; // Save typing. |
|---|
| 1673 |
|
|---|
| 1674 |
// Start with the basics, with X as a ror. |
|---|
| 1675 |
auto y1 = [1, 0, 0, 0, 1, 0, 0]; |
|---|
| 1676 |
auto x1 = [[1.0, 1 ,1 ,1 ,1 ,1 ,1], |
|---|
| 1677 |
[8.0, 6, 7, 5, 3, 0, 9]]; |
|---|
| 1678 |
auto res1 = logisticRegress(y1, x1); |
|---|
| 1679 |
assert(ae(res1.betas[0], -0.98273)); |
|---|
| 1680 |
assert(ae(res1.betas[1], 0.01219)); |
|---|
| 1681 |
assert(ae(res1.stdErr[0], 1.80803)); |
|---|
| 1682 |
assert(ae(res1.stdErr[1], 0.29291)); |
|---|
| 1683 |
assert(ae(res1.p[0], 0.587)); |
|---|
| 1684 |
assert(ae(res1.p[1], 0.967)); |
|---|
| 1685 |
assert(ae(res1.aic, 12.374)); |
|---|
| 1686 |
assert(ae(res1.logLikelihood, -0.5 * 8.3758)); |
|---|
| 1687 |
assert(ae(res1.nullLogLikelihood, -0.5 * 8.3740)); |
|---|
| 1688 |
assert(ae(res1.lowerBound[0], -4.5264052)); |
|---|
| 1689 |
assert(ae(res1.lowerBound[1], -0.5618933)); |
|---|
| 1690 |
assert(ae(res1.upperBound[0], 2.560939)); |
|---|
| 1691 |
assert(ae(res1.upperBound[1], 0.586275)); |
|---|
| 1692 |
|
|---|
| 1693 |
// Use tuple. |
|---|
| 1694 |
auto y2 = [1,0,1,1,0,1,0,0,0,1,0,1]; |
|---|
| 1695 |
auto x2_1 = [3,1,4,1,5,9,2,6,5,3,5,8]; |
|---|
| 1696 |
auto x2_2 = [2,7,1,8,2,8,1,8,2,8,4,5]; |
|---|
| 1697 |
auto res2 = logisticRegress(y2, repeat(1), x2_1, x2_2); |
|---|
| 1698 |
assert(ae(res2.betas[0], -1.1875)); |
|---|
| 1699 |
assert(ae(res2.betas[1], 0.1021)); |
|---|
| 1700 |
assert(ae(res2.betas[2], 0.1603)); |
|---|
| 1701 |
assert(ae(res2.stdErr[0], 1.5430)); |
|---|
| 1702 |
assert(ae(res2.stdErr[1], 0.2507)); |
|---|
| 1703 |
assert(ae(res2.stdErr[2], 0.2108)); |
|---|
| 1704 |
assert(ae(res2.p[0], 0.442)); |
|---|
| 1705 |
assert(ae(res2.p[1], 0.684)); |
|---|
| 1706 |
assert(ae(res2.p[2], 0.447)); |
|---|
| 1707 |
assert(ae(res2.aic, 21.81)); |
|---|
| 1708 |
assert(ae(res2.nullLogLikelihood, -0.5 * 16.636)); |
|---|
| 1709 |
assert(ae(res2.logLikelihood, -0.5 * 15.810)); |
|---|
| 1710 |
assert(ae(res2.lowerBound[0], -4.2116584)); |
|---|
| 1711 |
assert(ae(res2.lowerBound[1], -0.3892603)); |
|---|
| 1712 |
assert(ae(res2.lowerBound[2], -0.2528110)); |
|---|
| 1713 |
assert(ae(res2.upperBound[0], 1.8366823)); |
|---|
| 1714 |
assert(ae(res2.upperBound[1], 0.5934631)); |
|---|
| 1715 |
assert(ae(res2.upperBound[2], 0.5733693)); |
|---|
| 1716 |
|
|---|
| 1717 |
auto x2Intercept = [1,1,1,1,1,1,1,1,1,1,1,1]; |
|---|
| 1718 |
auto res2a = logisticRegress(y2, |
|---|
| 1719 |
filter!"a.length"([x2Intercept, x2_1, x2_2])); |
|---|
| 1720 |
foreach(ti, elem; res2a.tupleof) { |
|---|
| 1721 |
assert(ae(elem, res2.tupleof[ti])); |
|---|
| 1722 |
} |
|---|
| 1723 |
|
|---|
| 1724 |
// Use a huge range of values to test numerical stability. |
|---|
| 1725 |
|
|---|
| 1726 |
// The filter is to make y3 a non-random access range. |
|---|
| 1727 |
auto y3 = filter!"a < 2"([1,1,1,1,0,0,0,0]); |
|---|
| 1728 |
auto x3_1 = filter!"a > 0"([1, 1e10, 2, 2e10, 3, 3e15, 4, 4e7]); |
|---|
| 1729 |
auto x3_2 = [1e8, 1e6, 1e7, 1e5, 1e3, 1e0, 1e9, 1e11]; |
|---|
| 1730 |
auto x3_3 = [-5e12, 5e2, 6e5, 4e3, -999999, -666, -3e10, -2e10]; |
|---|
| 1731 |
auto res3 = logisticRegress(y3, repeat(1), x3_1, x3_2, x3_3, 0.99); |
|---|
| 1732 |
assert(ae(res3.betas[0], 1.115e0)); |
|---|
| 1733 |
assert(ae(res3.betas[1], -4.674e-15)); |
|---|
| 1734 |
assert(ae(res3.betas[2], -7.026e-9)); |
|---|
| 1735 |
assert(ae(res3.betas[3], -2.109e-12)); |
|---|
| 1736 |
assert(ae(res3.stdErr[0], 1.158)); |
|---|
| 1737 |
assert(ae(res3.stdErr[1], 2.098e-13)); |
|---|
| 1738 |
assert(ae(res3.stdErr[2], 1.878e-8)); |
|---|
| 1739 |
assert(ae(res3.stdErr[3], 4.789e-11)); |
|---|
| 1740 |
assert(ae(res3.p[0], 0.336)); |
|---|
| 1741 |
assert(ae(res3.p[1], 0.982)); |
|---|
| 1742 |
assert(ae(res3.p[2], 0.708)); |
|---|
| 1743 |
assert(ae(res3.p[3], 0.965)); |
|---|
| 1744 |
assert(ae(res3.aic, 12.544)); |
|---|
| 1745 |
assert(ae(res3.nullLogLikelihood, -0.5 * 11.0904)); |
|---|
| 1746 |
assert(ae(res3.logLikelihood, -0.5 * 4.5442)); |
|---|
| 1747 |
// Not testing confidence intervals b/c they'd be so buried in numerical |
|---|
| 1748 |
// fuzz. |
|---|
| 1749 |
|
|---|
| 1750 |
|
|---|
| 1751 |
// Test with a just plain huge dataset that R chokes for several minutes |
|---|
| 1752 |
// on. If you think this unittest is slow, try getting the reference |
|---|
| 1753 |
// values from R. |
|---|
| 1754 |
auto y4 = chain( |
|---|
| 1755 |
take(cycle([0,0,0,0,1]), 500_000), |
|---|
| 1756 |
take(cycle([1,1,1,1,0]), 500_000)); |
|---|
| 1757 |
auto x4_1 = iota(0, 1_000_000); |
|---|
| 1758 |
auto x4_2 = array(map!(compose!(exp, "a / 1_000_000.0"))(x4_1)); |
|---|
| 1759 |
auto x4_3 = take(cycle([1,2,3,4,5]), 1_000_000); |
|---|
| 1760 |
auto x4_4 = take(cycle([8,6,7,5,3,0,9]), 1_000_000); |
|---|
| 1761 |
auto res4 = logisticRegress(y4, repeat(1), x4_1, x4_2, x4_3, x4_4, 0.99); |
|---|
| 1762 |
assert(ae(res4.betas[0], -1.574)); |
|---|
| 1763 |
assert(ae(res4.betas[1], 5.625e-6)); |
|---|
| 1764 |
assert(ae(res4.betas[2], -7.282e-1)); |
|---|
| 1765 |
assert(ae(res4.betas[3], -4.381e-6)); |
|---|
| 1766 |
assert(ae(res4.betas[4], -8.343e-6)); |
|---|
| 1767 |
assert(ae(res4.stdErr[0], 3.693e-2)); |
|---|
| 1768 |
assert(ae(res4.stdErr[1], 7.188e-8)); |
|---|
| 1769 |
assert(ae(res4.stdErr[2], 4.208e-2)); |
|---|
| 1770 |
assert(ae(res4.stdErr[3], 1.658e-3)); |
|---|
| 1771 |
assert(ae(res4.stdErr[4], 8.164e-4)); |
|---|
| 1772 |
assert(ae(res4.p[0], 0)); |
|---|
| 1773 |
assert(ae(res4.p[1], 0)); |
|---|
| 1774 |
assert(ae(res4.p[2], 0)); |
|---|
| 1775 |
assert(ae(res4.p[3], 0.998)); |
|---|
| 1776 |
assert(ae(res4.p[4], 0.992)); |
|---|
| 1777 |
assert(ae(res4.aic, 1089339)); |
|---|
| 1778 |
assert(ae(res4.nullLogLikelihood, -0.5 * 1386294)); |
|---|
| 1779 |
assert(ae(res4.logLikelihood, -0.5 * 1089329)); |
|---|
| 1780 |
assert(ae(res4.lowerBound[0], -1.668899)); |
|---|
| 1781 |
assert(ae(res4.lowerBound[1], 5.439787e-6)); |
|---|
| 1782 |
assert(ae(res4.lowerBound[2], -0.8366273)); |
|---|
| 1783 |
assert(ae(res4.lowerBound[3], -4.27406e-3)); |
|---|
| 1784 |
assert(ae(res4.lowerBound[4], -2.111240e-3)); |
|---|
| 1785 |
assert(ae(res4.upperBound[0], -1.478623)); |
|---|
| 1786 |
assert(ae(res4.upperBound[1], 5.810089e-6)); |
|---|
| 1787 |
assert(ae(res4.upperBound[2], -6.198418e-1)); |
|---|
| 1788 |
assert(ae(res4.upperBound[3], 4.265302e-3)); |
|---|
| 1789 |
assert(ae(res4.upperBound[4], 2.084554e-3)); |
|---|
| 1790 |
|
|---|
| 1791 |
// Test ridge stuff. |
|---|
| 1792 |
auto ridge2 = logisticRegressBeta(y2, repeat(1), x2_1, x2_2, 3); |
|---|
| 1793 |
assert(ae(ridge2[0], -0.40279319)); |
|---|
| 1794 |
assert(ae(ridge2[1], 0.03575638)); |
|---|
| 1795 |
assert(ae(ridge2[2], 0.05313875)); |
|---|
| 1796 |
|
|---|
| 1797 |
auto ridge2_2 = logisticRegressBeta(y2, repeat(1), x2_1, x2_2, 2); |
|---|
| 1798 |
assert(ae(ridge2_2[0], -0.51411490)); |
|---|
| 1799 |
assert(ae(ridge2_2[1], 0.04536590)); |
|---|
| 1800 |
assert(ae(ridge2_2[2], 0.06809964)); |
|---|
| 1801 |
} |
|---|
| 1802 |
|
|---|
| 1803 |
/// The logistic function used in logistic regression. |
|---|
| 1804 |
double logistic(double xb) pure nothrow @safe { |
|---|
| 1805 |
return 1.0 / (1 + exp(-xb)); |
|---|
| 1806 |
} |
|---|
| 1807 |
|
|---|
| 1808 |
/** |
|---|
| 1809 |
Performs lasso (L1) and/or ridge (L2) penalized logistic regression. Due to the |
|---|
| 1810 |
way the data is standardized, no intercept term should be included in x |
|---|
| 1811 |
(unlike logisticRegress and logisticRegressBeta). The intercept coefficient is |
|---|
| 1812 |
implicitly included and returned in the first element of the returned array. |
|---|
| 1813 |
Usage is otherwise identical. |
|---|
| 1814 |
|
|---|
| 1815 |
Note: Setting lasso equal to zero is equivalent to performing ridge regression. |
|---|
| 1816 |
This can also be done with logisticRegressBeta. However, the |
|---|
| 1817 |
logisticRegressBeta algorithm is optimized for memory efficiency and |
|---|
| 1818 |
large samples. This algorithm is optimized for large feature sets. |
|---|
| 1819 |
|
|---|
| 1820 |
Returns: The beta coefficients for the regression model. |
|---|
| 1821 |
|
|---|
| 1822 |
References: |
|---|
| 1823 |
|
|---|
| 1824 |
Friedman J, et al Pathwise coordinate optimization. Ann. Appl. Stat. |
|---|
| 1825 |
2007;2:302-332. |
|---|
| 1826 |
|
|---|
| 1827 |
Goeman, J. J., L1 penalized estimation in the Cox proportional hazards model. |
|---|
| 1828 |
Biometrical Journal 52(1), 70{84. |
|---|
| 1829 |
|
|---|
| 1830 |
Eilers, P., Boer, J., Van Ommen, G., Van Houwelingen, H. 2001 Classification of |
|---|
| 1831 |
microarray data with penalized logistic regression. Proceedings of SPIE. |
|---|
| 1832 |
Progress in Biomedical Optics and Images vol. 4266, pp. 187-198 |
|---|
| 1833 |
*/ |
|---|
| 1834 |
double[] logisticRegressPenalized(Y, X...) |
|---|
| 1835 |
(Y yIn, X xIn, double lasso, double ridge) { |
|---|
| 1836 |
mixin(newFrame); |
|---|
| 1837 |
|
|---|
| 1838 |
static assert(!isInfinite!Y, "Can't do regression with infinite # of Y's."); |
|---|
| 1839 |
static if(isRandomAccessRange!Y) { |
|---|
| 1840 |
alias yIn y; |
|---|
| 1841 |
} else { |
|---|
| 1842 |
auto y = toBools(yIn); |
|---|
| 1843 |
} |
|---|
| 1844 |
|
|---|
| 1845 |
static if(X.length == 1 && isRoR!X) { |
|---|
| 1846 |
enum bool tupleMode = false; |
|---|
| 1847 |
static if(isForwardRange!X) { |
|---|
| 1848 |
auto x = toRandomAccessRoR(y.length, xIn); |
|---|
| 1849 |
} else { |
|---|
| 1850 |
auto x = toRandomAccessRoR(y.length, tempdup(xIn)); |
|---|
| 1851 |
} |
|---|
| 1852 |
} else { |
|---|
| 1853 |
enum bool tupleMode = true; |
|---|
| 1854 |
auto x = toRandomAccessTuple(xIn).expand; |
|---|
| 1855 |
} |
|---|
| 1856 |
|
|---|
| 1857 |
auto betas = new double[x.length + 1]; |
|---|
| 1858 |
if(y.length >= x.length && lasso == 0) { |
|---|
| 1859 |
// Add intercept term. |
|---|
| 1860 |
static if(tupleMode) { |
|---|
| 1861 |
doMLENewton(betas, (double[]).init, ridge, y, repeat(1), x); |
|---|
| 1862 |
} else static if(is(x == double[][])) { |
|---|
| 1863 |
auto xInt = newStack!(double[])(betas.length); |
|---|
| 1864 |
xInt[1..$] = x[]; |
|---|
| 1865 |
xInt[0] = tempdup(replicate(1.0, y.length)); |
|---|
| 1866 |
doMLENewton(betas, (double[]).init, ridge, y, xInt); |
|---|
| 1867 |
} else { |
|---|
| 1868 |
// No choice but to dup the whole thing. |
|---|
| 1869 |
auto xInt = newStack!(double[])(betas.length); |
|---|
| 1870 |
xInt[0] = tempdup(replicate(1.0, y.length)); |
|---|
| 1871 |
|
|---|
| 1872 |
foreach(i, ref arr; xInt[1..$]) { |
|---|
| 1873 |
arr = tempdup( |
|---|
| 1874 |
map!(to!double)(take(x[i], y.length)) |
|---|
| 1875 |
); |
|---|
| 1876 |
} |
|---|
| 1877 |
|
|---|
| 1878 |
doMLENewton(betas, (double[]).init, ridge, y, xInt); |
|---|
| 1879 |
} |
|---|
| 1880 |
|
|---|
| 1881 |
} else { |
|---|
| 1882 |
logisticRegressPenalizedImpl(betas, lasso, ridge, y, x); |
|---|
| 1883 |
} |
|---|
| 1884 |
|
|---|
| 1885 |
return betas; |
|---|
| 1886 |
} |
|---|
| 1887 |
|
|---|
| 1888 |
unittest { |
|---|
| 1889 |
// Test ridge regression. We have three impls for all kinds of diff. |
|---|
| 1890 |
// scenarios. See if they all agree. Note that the ridiculously small but |
|---|
| 1891 |
// nonzero lasso param is to force the use of the coord descent algo. |
|---|
| 1892 |
auto y = new bool[12]; |
|---|
| 1893 |
auto x = new double[][16]; |
|---|
| 1894 |
foreach(ref elem; x) elem = new double[12]; |
|---|
| 1895 |
x[0][] = 1; |
|---|
| 1896 |
auto gen = Random(31415); // For random but repeatable results. |
|---|
| 1897 |
|
|---|
| 1898 |
foreach(iter; 0..1000) { |
|---|
| 1899 |
foreach(col; x[1..$]) foreach(ref elem; col) elem = rNorm(0, 1, gen); |
|---|
| 1900 |
|
|---|
| 1901 |
// Nothing will converge if y is all true's or all false's. |
|---|
| 1902 |
size_t trueCount; |
|---|
| 1903 |
do { |
|---|
| 1904 |
foreach(ref elem; y) elem = cast(bool) rBernoulli(0.5, gen); |
|---|
| 1905 |
trueCount = count!"a"(y); |
|---|
| 1906 |
} while(trueCount == 0 || trueCount == y.length); |
|---|
| 1907 |
|
|---|
| 1908 |
immutable ridge = uniform(0.1, 10.0, gen); |
|---|
| 1909 |
|
|---|
| 1910 |
auto normalEq = logisticRegressBeta(y, x, ridge); |
|---|
| 1911 |
auto coordDescent = logisticRegressPenalized( |
|---|
| 1912 |
y, x[1..$], double.min_normal, ridge); |
|---|
| 1913 |
auto linalgTrick = logisticRegressPenalized(y, x[1..$], 0, ridge); |
|---|
| 1914 |
|
|---|
| 1915 |
// Every once in a blue moon coordinate descent doesn't converge that |
|---|
| 1916 |
// well. These small errors are of no practical significance, hence |
|---|
| 1917 |
// the wide tolerance. However, if the direct normal equations |
|---|
| 1918 |
// and linalg trick don't agree extremely closely, then something's |
|---|
| 1919 |
// fundamentally wrong. |
|---|
| 1920 |
assert(approxEqual(normalEq, coordDescent, 0.02, 1e-4), text( |
|---|
| 1921 |
normalEq, coordDescent)); |
|---|
| 1922 |
assert(approxEqual(linalgTrick, coordDescent, 0.02, 1e-4), text( |
|---|
| 1923 |
linalgTrick, coordDescent)); |
|---|
| 1924 |
assert(approxEqual(normalEq, linalgTrick, 1e-6, 1e-8), text( |
|---|
| 1925 |
normalEq, linalgTrick)); |
|---|
| 1926 |
} |
|---|
| 1927 |
|
|---|
| 1928 |
assert(approxEqual(logisticRegressBeta(y, x[0], x[1], x[2]), |
|---|
| 1929 |
logisticRegressPenalized(y, x[1], x[2], 0, 0))); |
|---|
| 1930 |
assert(approxEqual(logisticRegressBeta(y, [x[0], x[1], x[2]]), |
|---|
| 1931 |
logisticRegressPenalized(y, [x[1], x[2]], 0, 0))); |
|---|
| 1932 |
assert(approxEqual(logisticRegressBeta(y, [x[0], x[1], x[2]]), |
|---|
| 1933 |
logisticRegressPenalized(y, |
|---|
| 1934 |
[to!(float[])(x[1]), to!(float[])(x[2])], 0, 0))); |
|---|
| 1935 |
|
|---|
| 1936 |
// Make sure the adding intercept stuff is right for the Newton path. |
|---|
| 1937 |
//assert(logisticRegressBeta(x[0], x[1], x[2]) == |
|---|
| 1938 |
|
|---|
| 1939 |
// Test stuff that's got some lasso in it. Values from R's Penalized |
|---|
| 1940 |
// package. |
|---|
| 1941 |
y = [1, 0, 0, 1, 1, 1, 0]; |
|---|
| 1942 |
x = [[8.0, 6, 7, 5, 3, 0, 9], |
|---|
| 1943 |
[3.0, 6, 2, 4, 3, 6, 8], |
|---|
| 1944 |
[3.0, 1, 4, 1, 5, 9, 2], |
|---|
| 1945 |
[2.0, 7, 1, 8, 2, 8, 1]]; |
|---|
| 1946 |
|
|---|
| 1947 |
// Values from R's Penalized package. Note that it uses a convention for |
|---|
| 1948 |
// the ridge parameter such that Penalized ridge = 2 * dstats ridge. |
|---|
| 1949 |
assert(approxEqual(logisticRegressPenalized(y, x, 1, 0), |
|---|
| 1950 |
[1.642080, -0.22086515, -0.02587546, 0.00000000, 0.00000000 ])); |
|---|
| 1951 |
assert(approxEqual(logisticRegressPenalized(y, x, 1, 3), |
|---|
| 1952 |
[0.5153373, -0.04278257, -0.00888014, 0.01316831, 0.00000000])); |
|---|
| 1953 |
assert(approxEqual(logisticRegressPenalized(y, x, 2, 0.1), |
|---|
| 1954 |
[0.2876821, 0, 0., 0., 0])); |
|---|
| 1955 |
assert(approxEqual(logisticRegressPenalized(y, x, 1.2, 7), |
|---|
| 1956 |
[0.367613 , -0.017227631, 0.000000000, 0.003875104, 0.000000000])); |
|---|
| 1957 |
} |
|---|
| 1958 |
|
|---|
| 1959 |
// Scheduled for deprecation. This was a terrble name choice. |
|---|
| 1960 |
alias logistic inverseLogit; |
|---|
| 1961 |
|
|---|
| 1962 |
private: |
|---|
| 1963 |
double absMax(double a, double b) { |
|---|
| 1964 |
return max(abs(a), abs(b)); |
|---|
| 1965 |
} |
|---|
| 1966 |
|
|---|
| 1967 |
LogisticRes logisticRegressImpl(T, V...) |
|---|
| 1968 |
(bool inference, double ridge, T yIn, V input) { |
|---|
| 1969 |
mixin(newFrame); |
|---|
| 1970 |
|
|---|
| 1971 |
static if(isFloatingPoint!(V[$ - 1])) { |
|---|
| 1972 |
alias input[$ - 1] conf; |
|---|
| 1973 |
alias V[0..$ - 1] U; |
|---|
| 1974 |
alias input[0..$ - 1] xIn; |
|---|
| 1975 |
enforceConfidence(conf); |
|---|
| 1976 |
} else { |
|---|
| 1977 |
alias V U; |
|---|
| 1978 |
alias input xIn; |
|---|
| 1979 |
enum conf = 0.95; |
|---|
| 1980 |
} |
|---|
| 1981 |
|
|---|
| 1982 |
static assert(!isInfinite!T, "Can't do regression with infinite # of Y's."); |
|---|
| 1983 |
static if(isRandomAccessRange!T) { |
|---|
| 1984 |
alias yIn y; |
|---|
| 1985 |
} else { |
|---|
| 1986 |
auto y = toBools(yIn); |
|---|
| 1987 |
} |
|---|
| 1988 |
|
|---|
| 1989 |
static if(U.length == 1 && isRoR!U) { |
|---|
| 1990 |
static if(isForwardRange!U) { |
|---|
| 1991 |
auto x = toRandomAccessRoR(y.length, xIn); |
|---|
| 1992 |
} else { |
|---|
| 1993 |
auto x = toRandomAccessRoR(y.length, tempdup(xIn)); |
|---|
| 1994 |
} |
|---|
| 1995 |
} else { |
|---|
| 1996 |
auto x = toRandomAccessTuple(xIn).expand; |
|---|
| 1997 |
} |
|---|
| 1998 |
|
|---|
| 1999 |
typeof(return) ret; |
|---|
| 2000 |
ret.betas.length = x.length; |
|---|
| 2001 |
if(inference) ret.stdErr.length = x.length; |
|---|
| 2002 |
ret.logLikelihood = doMLENewton(ret.betas, ret.stdErr, ridge, y, x); |
|---|
| 2003 |
|
|---|
| 2004 |
if(!inference) return ret; |
|---|
| 2005 |
|
|---|
| 2006 |
static bool hasNaNs(R)(R range) { |
|---|
| 2007 |
return !filter!isNaN(range).empty; |
|---|
| 2008 |
} |
|---|
| 2009 |
|
|---|
| 2010 |
if(isNaN(ret.logLikelihood) || hasNaNs(ret.betas) || hasNaNs(ret.stdErr)) { |
|---|
| 2011 |
// Then we didn't converge or our data was defective. |
|---|
| 2012 |
return ret; |
|---|
| 2013 |
} |
|---|
| 2014 |
|
|---|
| 2015 |
ret.nullLogLikelihood = .priorLikelihood(y); |
|---|
| 2016 |
double lratio = ret.logLikelihood - ret.nullLogLikelihood; |
|---|
| 2017 |
|
|---|
| 2018 |
// Compensate for numerical fuzz. |
|---|
| 2019 |
if(lratio < 0 && lratio > -1e-5) lratio = 0; |
|---|
| 2020 |
if(lratio > 0) { |
|---|
| 2021 |
ret.overallP = chiSquareCDFR(2 * lratio, x.length - 1); |
|---|
| 2022 |
} |
|---|
| 2023 |
|
|---|
| 2024 |
ret.p.length = x.length; |
|---|
| 2025 |
ret.lowerBound.length = x.length; |
|---|
| 2026 |
ret.upperBound.length = x.length; |
|---|
| 2027 |
immutable nDev = -invNormalCDF((1 - conf) / 2); |
|---|
| 2028 |
foreach(i; 0..x.length) { |
|---|
| 2029 |
ret.p[i] = 2 * normalCDF(-abs(ret.betas[i]) / ret.stdErr[i]); |
|---|
| 2030 |
ret.lowerBound[i] = ret.betas[i] - nDev * ret.stdErr[i]; |
|---|
| 2031 |
ret.upperBound[i] = ret.betas[i] + nDev * ret.stdErr[i]; |
|---|
| 2032 |
} |
|---|
| 2033 |
|
|---|
| 2034 |
return ret; |
|---|
| 2035 |
} |
|---|
| 2036 |
|
|---|
| 2037 |
private void logisticRegressPenalizedImpl(Y, X...) |
|---|
| 2038 |
(double[] betas, double lasso, double ridge, Y y, X xIn) { |
|---|
| 2039 |
static if(isRoR!(X[0]) && X.length == 1) { |
|---|
| 2040 |
alias xIn[0] x; |
|---|
| 2041 |
} else { |
|---|
| 2042 |
alias xIn x; |
|---|
| 2043 |
} |
|---|
| 2044 |
|
|---|
| 2045 |
auto ps = newStack!double(y.length); |
|---|
| 2046 |
betas[] = 0; |
|---|
| 2047 |
|
|---|
| 2048 |
mixin(newFrame); |
|---|
| 2049 |
auto betasRaw = tempdup(betas[1..$]); |
|---|
| 2050 |
|
|---|
| 2051 |
double oldLikelihood = -double.infinity; |
|---|
| 2052 |
double oldPenalty2 = double.infinity; |
|---|
| 2053 |
double oldPenalty1 = double.infinity; |
|---|
| 2054 |
enum eps = 1e-6; |
|---|
| 2055 |
enum maxIter = 1000; |
|---|
| 2056 |
|
|---|
| 2057 |
auto weights = newStack!double(y.length); |
|---|
| 2058 |
auto z = newStack!double(y.length); |
|---|
| 2059 |
auto xMeans = newStack!double(x.length); |
|---|
| 2060 |
auto xSds = newStack!double(x.length); |
|---|
| 2061 |
double zMean = 0; |
|---|
| 2062 |
auto xCenterScale = newStack!(double[])(x.length); |
|---|
| 2063 |
foreach(ref col; xCenterScale) col = newStack!double(y.length); |
|---|
| 2064 |
|
|---|
| 2065 |
// Puts x in xCenterScale, with weighted mean subtracted and weighted |
|---|
| 2066 |
// biased stdev divided. Also standardizes z similarly. |
|---|
| 2067 |
// |
|---|
| 2068 |
// Returns: true if successful, false if weightSum is so small that the |
|---|
| 2069 |
// algorithm has converged for all practical purposes. |
|---|
| 2070 |
bool doCenterScale() { |
|---|
| 2071 |
immutable weightSum = sum(weights); |
|---|
| 2072 |
if(weightSum < eps) return false; |
|---|
| 2073 |
|
|---|
| 2074 |
xMeans[] = 0; |
|---|
| 2075 |
zMean = 0; |
|---|
| 2076 |
xSds[] = 0; |
|---|
| 2077 |
|
|---|
| 2078 |
// Do copying. |
|---|
| 2079 |
foreach(i, col; x) { |
|---|
| 2080 |
copy(take(col, y.length), xCenterScale[i]); |
|---|
| 2081 |
} |
|---|
| 2082 |
|
|---|
| 2083 |
// Compute weighted means. |
|---|
| 2084 |
foreach(j, col; xCenterScale) { |
|---|
| 2085 |
foreach(i, w; weights) { |
|---|
| 2086 |
xMeans[j] += w * col[i]; |
|---|
| 2087 |
} |
|---|
| 2088 |
} |
|---|
| 2089 |
|
|---|
| 2090 |
foreach(i, w; weights) { |
|---|
| 2091 |
zMean += w * z[i]; |
|---|
| 2092 |
} |
|---|
| 2093 |
|
|---|
| 2094 |
xMeans[] /= weightSum; |
|---|
| 2095 |
zMean /= weightSum; |
|---|
| 2096 |
z[] -= zMean; |
|---|
| 2097 |
foreach(i, col; xCenterScale) col[] -= xMeans[i]; |
|---|
| 2098 |
|
|---|
| 2099 |
// Compute biased stdevs. |
|---|
| 2100 |
foreach(j, ref sd; xSds) { |
|---|
| 2101 |
sd = sqrt(meanStdev(xCenterScale[j]).mse); |
|---|
| 2102 |
} |
|---|
| 2103 |
|
|---|
| 2104 |
foreach(i, col; xCenterScale) col[] /= xSds[i]; |
|---|
| 2105 |
return true; |
|---|
| 2106 |
} |
|---|
| 2107 |
|
|---|
| 2108 |
// Rescales the beta coefficients to undo the effects of standardizing. |
|---|
| 2109 |
void rescaleBetas() { |
|---|
| 2110 |
betas[0] = zMean; |
|---|
| 2111 |
foreach(i, b; betasRaw) { |
|---|
| 2112 |
betas[i + 1] = b / xSds[i]; |
|---|
| 2113 |
betas[0] -= betas[i + 1] * xMeans[i]; |
|---|
| 2114 |
} |
|---|
| 2115 |
} |
|---|
| 2116 |
|
|---|
| 2117 |
foreach(iter; 0..maxIter) { |
|---|
| 2118 |
evalPs(betas[0], ps, betas[1..$], x); |
|---|
| 2119 |
immutable lh = logLikelihood(ps, y); |
|---|
| 2120 |
immutable penalty2 = ridge * reduce!"a + b * b"(0.0, betas); |
|---|
| 2121 |
immutable penalty1 = lasso * reduce!"a + (b < 0) ? -b : b"(0.0, betas); |
|---|
| 2122 |
|
|---|
| 2123 |
if((lh - oldLikelihood) / (absMax(lh, oldLikelihood) + 0.1) < eps |
|---|
| 2124 |
&& (oldPenalty2 - penalty2) / (absMax(penalty2, oldPenalty2) + 0.1) < eps |
|---|
| 2125 |
&& (oldPenalty1 - penalty1) / (absMax(penalty1, oldPenalty1) + 0.1) < eps) { |
|---|
| 2126 |
return; |
|---|
| 2127 |
} else if(isNaN(lh) || isNaN(penalty2) || isNaN(penalty1)) { |
|---|
| 2128 |
betas[] = double.nan; |
|---|
| 2129 |
return; |
|---|
| 2130 |
} |
|---|
| 2131 |
|
|---|
| 2132 |
oldPenalty2 = penalty2; |
|---|
| 2133 |
oldPenalty1 = penalty1; |
|---|
| 2134 |
oldLikelihood = lh; |
|---|
| 2135 |
|
|---|
| 2136 |
foreach(i, ref w; weights) { |
|---|
| 2137 |
w = (ps[i] * (1 - ps[i])); |
|---|
| 2138 |
} |
|---|
| 2139 |
|
|---|
| 2140 |
z[] = betas[0]; |
|---|
| 2141 |
foreach(i, col; x) { |
|---|
| 2142 |
static if(is(typeof(col) : const(double)[])) { |
|---|
| 2143 |
z[] += col[] * betas[i + 1]; |
|---|
| 2144 |
} else { |
|---|
| 2145 |
foreach(j, ref elem; z) { |
|---|
| 2146 |
elem += col[j] + betas[i + 1]; |
|---|
| 2147 |
} |
|---|
| 2148 |
} |
|---|
| 2149 |
} |
|---|
| 2150 |
|
|---|
| 2151 |
foreach(i, w; weights) { |
|---|
| 2152 |
if(w == 0){ |
|---|
| 2153 |
z[i] = 0; |
|---|
| 2154 |
} else { |
|---|
| 2155 |
immutable double yi = (y[i] == 0) ? 0.0 : 1.0; |
|---|
| 2156 |
z[i] += (yi - ps[i]) / w; |
|---|
| 2157 |
} |
|---|
| 2158 |
} |
|---|
| 2159 |
|
|---|
| 2160 |
immutable centerScaleRes = doCenterScale(); |
|---|
| 2161 |
|
|---|
| 2162 |
// If this is false then weightSum is so small that all probabilities |
|---|
| 2163 |
// are for all practical purposes either 0 or 1. We can declare |
|---|
| 2164 |
// convergence and go home. |
|---|
| 2165 |
if(!centerScaleRes) return; |
|---|
| 2166 |
|
|---|
| 2167 |
if(lasso > 0) { |
|---|
| 2168 |
// Correct for different conventions in defining ridge params |
|---|
| 2169 |
// so all functions get the same answer. |
|---|
| 2170 |
immutable ridgeCorrected = ridge * 2.0; |
|---|
| 2171 |
coordDescent(z, xCenterScale, betasRaw, |
|---|
| 2172 |
lasso, ridgeCorrected, weights); |
|---|
| 2173 |
} else { |
|---|
| 2174 |
// Correct for different conventions in defining ridge params |
|---|
| 2175 |
// so all functions get the same answer. |
|---|
| 2176 |
immutable ridgeCorrected = ridge * 2.0; |
|---|
| 2177 |
ridgeLargeP(z, xCenterScale, ridgeCorrected, betasRaw, weights); |
|---|
| 2178 |
} |
|---|
| 2179 |
|
|---|
| 2180 |
rescaleBetas(); |
|---|
| 2181 |
} |
|---|
| 2182 |
|
|---|
| 2183 |
immutable lh = logLikelihood(ps, y); |
|---|
| 2184 |
immutable penalty2 = ridge * reduce!"a + b * b"(0.0, betas); |
|---|
| 2185 |
immutable penalty1 = lasso * reduce!"a + (b < 0) ? -b : b"(0.0, betas); |
|---|
| 2186 |
|
|---|
| 2187 |
if((lh - oldLikelihood) / (absMax(lh, oldLikelihood) + 0.1) < eps |
|---|
| 2188 |
&& (oldPenalty2 - penalty2) / (absMax(penalty2, oldPenalty2) + 0.1) < eps |
|---|
| 2189 |
&& (oldPenalty1 - penalty1) / (absMax(penalty1, oldPenalty1) + 0.1) < eps) { |
|---|
| 2190 |
return; |
|---|
| 2191 |
} else { |
|---|
| 2192 |
// If we got here, we haven't converged. Return NaNs instead of bogus |
|---|
| 2193 |
// values. |
|---|
| 2194
|
|---|