root/trunk/mathextra/leastsqr.d

Revision 76, 5.8 kB (checked in by Don Clugston, 3 years ago)

* Folded in DMD 0.158 changes.
* Changed fcis() to expi().
* Fixes to leastsqr.

Line 
1 // Cholesky decomposition
2 module mathextra.leastsqr;
3 import std.math;
4
5 debug private import std.stdio;
6
7 unittest {
8     debug writefln("--- UnitTest: " __FILE__ " ---");
9 } 
10
11
12 /**
13  *  Perform a multiple linear regression (least squares fit).
14  *
15  *
16   Given a linear equation with unknown coefficients a[],
17      a[0]*f[0](i) + a[1]*f[1](i) + ... = yfunc(i)
18   this function uses a Cholesky decomposition to find the values
19   of a[] which minimize the
20   sum of squares, defined as:
21  
22   sumsq = $(BIGSUM i) $(POWER ( y[i] - $(BIGSUM j) a[j] * f[j](i) ), 2)
23  
24   The f[]() and yfunc() functions must be linearly independent.
25  
26  
27   If the Cholesky decomposition is not positive definite,
28   the corresponding a[] entry will be nan; this indicates that
29   there was insufficient information to determine that value.
30  *
31  * Params:
32  *  N       Number of coefficients.
33  *  numpoints Number of points in yfunc and xfunc.
34  *  f       Array of delegates. f[j](i) is the i'th data point for coefficient j.
35  *  yfunc   yfunc(i) is the i'th data point
36  *
37  * Returns:
38  *          The sum of squares, sumsq. The coefficients are returned
39  *          in solution.
40  */
41 template leastSquaresFit(int N)
42 {
43     real leastSquaresFit(real solution[N], int numpoints, real delegate (int) yfunc, real delegate (int) [N] f ...) {
44         return Regression!(N).fitCholesky(solution, numpoints, yfunc, f);
45     }
46 }
47
48 template Regression(int N) {
49
50 /*
51   Given a positive-definite matrix A,
52  constructs its cholesky decomposition,
53  A = L. L<sup>T</sup>
54  
55  Only the upper triangular part of A is required.
56  eg A[2][3].
57  Returns result in lower triangle of A. Diagonals
58  are returned in diag.
59 */
60 // diag[0] will be 0 for any row which is not positive definite.
61 // The corresponding row & column in the matrix will also be all zeros.
62 void CholeskyDecompose(real a[N][N], real diag[N])
63 {
64     for (int i=0; i<N; ++i) {
65         real sum = a[i][i];
66         for (int k=i-1; k>=0; --k) {
67             sum -= a[i][k] * a[i][k];
68         }
69         if (sum >0.0) {
70             diag[i]=sqrt(sum);
71            
72             for (int j=i+1; j<N; ++j) {
73                 sum=a[i][j];
74                 for (int k=i-1; k>=0; --k) {
75                     sum -= a[i][k] * a[j][k];
76                 }
77                 a[j][i]=sum/diag[i];
78             }
79         } else {
80             // not positive definite (could be caused by rounding errors)
81             diag[i] = 0;
82             // make this whole row zero so they have no further effect
83             for (int j=i+1; j<N; ++j) a[j][i] = 0;
84         }
85     }
86 }
87
88 void CholeskySolve(real a[N][N], real diag[N], real b[N], real x[N])
89 {
90     for (int i=0; i<N; ++i) {
91         if (diag[i]>0)  {
92             real sum=b[i];
93             for (int k=i-1; k>=0; k--) sum-=a[i][k]*x[k];
94             x[i] = sum/diag[i];
95         } else x[i] = 0; // skip pos definite rows
96     }
97     for (int i=N-1; i>=0; --i) {
98         if (diag[i]>0) {
99             real sum=x[i];
100             for (int k=i+1; k<N; ++k) sum-=a[k][i]*x[k];
101             x[i] = sum/diag[i];
102         } else x[i]=0; // skip pos definite rows
103     }
104     // Convert failed columns in solution to NANs if required.
105     for (int i=0; i<N; ++i) {
106         if (diag[i]!>0) x[i]=real.nan;
107     }
108 }
109
110 // The x and y values are calculated from indices.
111 // If for any n, the functions return nan, that point will be ignored.
112 real fitCholesky(real solution[N], int numpoints, real delegate (int) yfunc, real delegate (int) [N] f ...)
113 {
114     real [N][N] m;
115     real [N] b;
116     // Clear the matrices
117     for (int i=0; i<N; ++i) {
118         m[i][i..$] = 0;
119     }
120     b[0..$] = 0.0;
121    
122     real sumsq_y = 0;
123
124     for (int q=0; q<numpoints; ++q) {
125         real [N] v;
126         real y = yfunc(q);
127         real anynans = y;
128         for (int i=0; i<N; ++i) {           
129             v[i] = f[i](q);
130             anynans *= v[i];
131         }
132         if (isnan(anynans)) continue;
133         sumsq_y += y*y;
134         for (int i=0; i<N; ++i) {
135             b[i] += y * v[i];
136             for (int j=i; j<N; ++j) {
137                  m[i][j] += v[i]*v[j];
138             }
139         }
140     }
141     real diag[N];
142     CholeskyDecompose(m, diag);
143     CholeskySolve(m, diag, b, solution);
144    
145     real sumsq=0;
146     for (int i=0; i<N; ++i) {
147         if (isnan(solution[i])) continue;
148         for (int j=i+1; j<N; ++j) {
149             if (isnan(solution[j])) continue;
150             sumsq += m[i][j]  * solution[i]*solution[j];
151         }
152     }
153     sumsq*=2; // account for the lower triangular part
154     for (int i=0; i<N; ++i) {
155         if (isnan(solution[i])) continue;
156         sumsq += m[i][i]*solution[i]*solution[i];
157         sumsq -= 2 * b[i] * solution[i];
158     }
159     return sumsq + sumsq_y;
160  }
161  
162 /+
163 // returns the sum of squares. Used only for unit tests.
164 real evaluateFit(real solution[N], int numpoints, real delegate (int) yfunc, real delegate (int) [N] xfunc ...)
165 {
166     static real sqr(real x) { return x*x; }
167       
168     real sumsq=0;
169     for (int i=0; i<numpoints; ++i) {
170         real a=0;
171         for (int j=0; j<N; ++j) {
172             if (!isnan(solution[j]))  a += solution[j] * xfunc[j](i);
173         }
174         sumsq += sqr( a - yfunc(i) );
175     }
176     return sumsq;
177 }
178 +/
179
180
181 } // end Regression
182
183
184 unittest {
185     real x[];
186     real y[];
187     x.length = 500;
188     y.length = 500;
189     real intercept = 580;
190    for (int i=0; i<x.length; ++i) {
191      x[i] = i*12.82344 + 458743.3;
192      y[i] = x[i]*x[i]*PI + intercept;
193    }
194
195    // fit y = a[0]x^2 + a[1]*1
196    
197    real getx(int n) { return x[n]*x[n]; }
198    real gety(int n) { return y[n]; }
199    
200    real [2] a;
201    
202    real delegate (int)[] delegs = void;
203    leastSquaresFit!(2)(a, x.length, &gety, &getx, delegate real (int n) { return 1; });
204  
205   assert(feqrel(a[0],PI)>=double.mant_dig-5);
206   assert(fabs(a[1]-intercept) < 0.002);
207 }
Note: See TracBrowser for help on using the browser.