| 1 |
/**Relatively low-level primitives on which to build higher-level math/stat |
|---|
| 2 |
* functionality. Some are used internally, some are just things that may be |
|---|
| 3 |
* useful to users of this library. This module is starting to take on the |
|---|
| 4 |
* appearance of a small utility library. |
|---|
| 5 |
* |
|---|
| 6 |
* Note: In several functions in this module that return arrays, the last |
|---|
| 7 |
* parameter is an optional buffer for storing the return value. If this |
|---|
| 8 |
* parameter is ommitted or the buffer is not large enough, one will be |
|---|
| 9 |
* allocated on the GC heap. |
|---|
| 10 |
* |
|---|
| 11 |
* Author: David Simcha*/ |
|---|
| 12 |
/* |
|---|
| 13 |
* License: |
|---|
| 14 |
* Boost Software License - Version 1.0 - August 17th, 2003 |
|---|
| 15 |
* |
|---|
| 16 |
* Permission is hereby granted, free of charge, to any person or organization |
|---|
| 17 |
* obtaining a copy of the software and accompanying documentation covered by |
|---|
| 18 |
* this license (the "Software") to use, reproduce, display, distribute, |
|---|
| 19 |
* execute, and transmit the Software, and to prepare derivative works of the |
|---|
| 20 |
* Software, and to permit third-parties to whom the Software is furnished to |
|---|
| 21 |
* do so, all subject to the following: |
|---|
| 22 |
** The copyright notices in the Software and this entire statement, including |
|---|
| 23 |
* the above license grant, this restriction and the following disclaimer, |
|---|
| 24 |
* must be included in all copies of the Software, in whole or in part, and |
|---|
| 25 |
* all derivative works of the Software, unless such copies or derivative |
|---|
| 26 |
* works are solely in the form of machine-executable object code generated by |
|---|
| 27 |
* a source language processor. |
|---|
| 28 |
* |
|---|
| 29 |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|---|
| 30 |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|---|
| 31 |
* FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT |
|---|
| 32 |
* SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE |
|---|
| 33 |
* FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, |
|---|
| 34 |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
|---|
| 35 |
* DEALINGS IN THE SOFTWARE. |
|---|
| 36 |
*/ |
|---|
| 37 |
module dstats.base; |
|---|
| 38 |
|
|---|
| 39 |
import std.math, std.traits, std.typecons, std.algorithm, std.range, |
|---|
| 40 |
std.exception, std.conv, std.functional, std.typetuple; |
|---|
| 41 |
|
|---|
| 42 |
import dstats.alloc, dstats.sort; |
|---|
| 43 |
|
|---|
| 44 |
import std.string : strip; |
|---|
| 45 |
|
|---|
| 46 |
immutable double[] logFactorialTable; |
|---|
| 47 |
|
|---|
| 48 |
private enum size_t staticFacTableLen = 10_000; |
|---|
| 49 |
|
|---|
| 50 |
shared static this() { |
|---|
| 51 |
// Allocating on heap instead of static data segment to avoid |
|---|
| 52 |
// false pointer GC issues. |
|---|
| 53 |
double[] sfTemp = new double[staticFacTableLen]; |
|---|
| 54 |
sfTemp[0] = 0; |
|---|
| 55 |
for(uint i = 1; i < staticFacTableLen; i++) { |
|---|
| 56 |
sfTemp[i] = sfTemp[i - 1] + log(i); |
|---|
| 57 |
} |
|---|
| 58 |
logFactorialTable = assumeUnique(sfTemp); |
|---|
| 59 |
} |
|---|
| 60 |
|
|---|
| 61 |
version(unittest) { |
|---|
| 62 |
import std.stdio, std.random, std.file; |
|---|
| 63 |
|
|---|
| 64 |
void main (){} |
|---|
| 65 |
} |
|---|
| 66 |
|
|---|
| 67 |
class DstatsArgumentException : Exception { |
|---|
| 68 |
this(string msg, string file, int line) { |
|---|
| 69 |
super(msg, file, line); |
|---|
| 70 |
} |
|---|
| 71 |
} |
|---|
| 72 |
|
|---|
| 73 |
T dstatsEnforce(T, string file = __FILE__, int line = __LINE__) |
|---|
| 74 |
(T value, lazy const(char)[] msg = null) { |
|---|
| 75 |
if(!value) { |
|---|
| 76 |
const(char)[] lazyMsg = msg; |
|---|
| 77 |
auto exceptMsg = (lazyMsg !is null) ? lazyMsg.idup : "Invalid argument."; |
|---|
| 78 |
throw new DstatsArgumentException(exceptMsg, file, line); |
|---|
| 79 |
} |
|---|
| 80 |
|
|---|
| 81 |
return value; |
|---|
| 82 |
} |
|---|
| 83 |
|
|---|
| 84 |
package void enforceConfidence |
|---|
| 85 |
(string file = __FILE__, int line = __LINE__)(double conf) { |
|---|
| 86 |
dstatsEnforce!(bool, file, line)(conf >= 0 && conf <= 1, |
|---|
| 87 |
text("Confidence intervals must be between 0 and 1, not ", conf, ".")); |
|---|
| 88 |
} |
|---|
| 89 |
|
|---|
| 90 |
/** Tests whether T is an input range whose elements can be implicitly |
|---|
| 91 |
* converted to doubles.*/ |
|---|
| 92 |
template doubleInput(T) { |
|---|
| 93 |
enum doubleInput = isInputRange!(T) && is(ElementType!(T) : double); |
|---|
| 94 |
} |
|---|
| 95 |
|
|---|
| 96 |
// See Bugzilla 2873. This can be removed once that's fixed. |
|---|
| 97 |
template hasLength(R) { |
|---|
| 98 |
enum bool hasLength = is(typeof(R.init.length) : ulong) || |
|---|
| 99 |
is(typeof(R.init.length()) : ulong); |
|---|
| 100 |
} |
|---|
| 101 |
|
|---|
| 102 |
// isIterable was added to SVN versions of Phobos, but not to released ones yet. |
|---|
| 103 |
static if(!__traits(compiles, std.traits.isIterable!(uint))) { |
|---|
| 104 |
template isIterable(T) |
|---|
| 105 |
{ |
|---|
| 106 |
static if (is(typeof({foreach(elem; T.init) {}}))) { |
|---|
| 107 |
enum bool isIterable = true; |
|---|
| 108 |
} else { |
|---|
| 109 |
enum bool isIterable = false; |
|---|
| 110 |
} |
|---|
| 111 |
} |
|---|
| 112 |
} |
|---|
| 113 |
|
|---|
| 114 |
unittest { |
|---|
| 115 |
struct Foo { // For testing opApply. |
|---|
| 116 |
|
|---|
| 117 |
int opApply(int delegate(ref uint) dg) { assert(0); } |
|---|
| 118 |
} |
|---|
| 119 |
|
|---|
| 120 |
static assert(isIterable!(uint[])); |
|---|
| 121 |
static assert(!isIterable!(uint)); |
|---|
| 122 |
static assert(isIterable!(Foo)); |
|---|
| 123 |
static assert(isIterable!(uint[string])); |
|---|
| 124 |
|
|---|
| 125 |
auto c = chain((uint[]).init, (uint[]).init); |
|---|
| 126 |
static assert(isIterable!(typeof(c))); |
|---|
| 127 |
} |
|---|
| 128 |
|
|---|
| 129 |
/**Determine the iterable type of any iterable object, regardless of whether |
|---|
| 130 |
* it uses ranges, opApply, etc. This is typeof(elem) if one does |
|---|
| 131 |
* foreach(elem; T.init) {}.*/ |
|---|
| 132 |
template IterType(T) { |
|---|
| 133 |
alias ReturnType!(typeof( |
|---|
| 134 |
{ |
|---|
| 135 |
foreach(elem; T.init) { |
|---|
| 136 |
return elem; |
|---|
| 137 |
} |
|---|
| 138 |
assert(0); |
|---|
| 139 |
})) IterType; |
|---|
| 140 |
} |
|---|
| 141 |
|
|---|
| 142 |
unittest { |
|---|
| 143 |
struct Foo { // For testing opApply. |
|---|
| 144 |
// For testing. |
|---|
| 145 |
|
|---|
| 146 |
int opApply(int delegate(ref uint) dg) { assert(0); } |
|---|
| 147 |
} |
|---|
| 148 |
|
|---|
| 149 |
static assert(is(IterType!(uint[]) == uint)); |
|---|
| 150 |
static assert(is(IterType!(Foo) == uint)); |
|---|
| 151 |
static assert(is(IterType!(uint[string]) == uint)); |
|---|
| 152 |
|
|---|
| 153 |
auto c = chain((uint[]).init, (uint[]).init); |
|---|
| 154 |
static assert(is(IterType!(typeof(c)) == uint)); |
|---|
| 155 |
} |
|---|
| 156 |
|
|---|
| 157 |
/**Tests whether T is iterable and has elements of a type implicitly |
|---|
| 158 |
* convertible to double.*/ |
|---|
| 159 |
template doubleIterable(T) { |
|---|
| 160 |
static if(!isIterable!T) { |
|---|
| 161 |
enum doubleIterable = false; |
|---|
| 162 |
} else { |
|---|
| 163 |
enum doubleIterable = is(IterType!(T) : double); |
|---|
| 164 |
} |
|---|
| 165 |
} |
|---|
| 166 |
|
|---|
| 167 |
/**Writes the contents of an input range to an output range. |
|---|
| 168 |
* |
|---|
| 169 |
* Returns: The output range.*/ |
|---|
| 170 |
O mate(I, O)(I input, O output) |
|---|
| 171 |
if(isInputRange!(I) && isOutputRange!(O, ElementType!(I))) { |
|---|
| 172 |
foreach(elem; input) { |
|---|
| 173 |
output.put(elem); |
|---|
| 174 |
} |
|---|
| 175 |
return output; |
|---|
| 176 |
} |
|---|
| 177 |
|
|---|
| 178 |
/**Given a tuple possibly containing forward ranges, returns a tuple where |
|---|
| 179 |
* save() has been called on all forward ranges. |
|---|
| 180 |
*/ |
|---|
| 181 |
Tuple!T saveAll(T...)(T args) { |
|---|
| 182 |
Tuple!T ret; |
|---|
| 183 |
|
|---|
| 184 |
foreach(ti, elem; args) { |
|---|
| 185 |
static if(isForwardRange!(typeof(elem))) { |
|---|
| 186 |
ret.field[ti] = elem.save; |
|---|
| 187 |
} else { |
|---|
| 188 |
ret.field[ti] = elem; |
|---|
| 189 |
} |
|---|
| 190 |
} |
|---|
| 191 |
|
|---|
| 192 |
return ret; |
|---|
| 193 |
} |
|---|
| 194 |
|
|---|
| 195 |
/**Bins data into nbin equal width bins, indexed from |
|---|
| 196 |
* 0 to nbin - 1, with 0 being the smallest bin, etc. |
|---|
| 197 |
* The values returned are the counts for each bin. Returns results on the GC |
|---|
| 198 |
* heap by default, but uses TempAlloc stack if alloc == Alloc.STACK. |
|---|
| 199 |
* |
|---|
| 200 |
* Works with any forward range with elements implicitly convertible to double.*/ |
|---|
| 201 |
Ret[] binCounts(Ret = uint, T)(T data, uint nbin, Ret[] buf = null) |
|---|
| 202 |
if(isForwardRange!(T) && doubleInput!(T)) { |
|---|
| 203 |
dstatsEnforce(nbin > 0, "Cannot bin data into zero bins."); |
|---|
| 204 |
|
|---|
| 205 |
alias Unqual!(ElementType!(T)) E; |
|---|
| 206 |
E min = data.front, max = data.front; |
|---|
| 207 |
foreach(elem; data) { |
|---|
| 208 |
if(elem > max) |
|---|
| 209 |
max = elem; |
|---|
| 210 |
else if(elem < min) |
|---|
| 211 |
min = elem; |
|---|
| 212 |
} |
|---|
| 213 |
E range = max - min; |
|---|
| 214 |
|
|---|
| 215 |
Ret[] bins; |
|---|
| 216 |
if(buf.length < nbin) { |
|---|
| 217 |
bins = new Ret[nbin]; |
|---|
| 218 |
} else { |
|---|
| 219 |
bins = buf[0..nbin]; |
|---|
| 220 |
bins[] = 0; |
|---|
| 221 |
} |
|---|
| 222 |
|
|---|
| 223 |
foreach(elem; data) { |
|---|
| 224 |
// Using the truncation as a feature. |
|---|
| 225 |
uint whichBin = cast(uint) ((elem - min) * nbin / range); |
|---|
| 226 |
|
|---|
| 227 |
// Handle edge case by putting largest item in largest bin. |
|---|
| 228 |
if(whichBin == nbin) whichBin--; |
|---|
| 229 |
|
|---|
| 230 |
bins[whichBin]++; |
|---|
| 231 |
} |
|---|
| 232 |
|
|---|
| 233 |
return bins; |
|---|
| 234 |
} |
|---|
| 235 |
|
|---|
| 236 |
unittest { |
|---|
| 237 |
double[] data = [0.0, .01, .03, .05, .11, .31, .51, .71, .89, 1]; |
|---|
| 238 |
auto res = binCounts(data, 10); |
|---|
| 239 |
assert(res == [4U, 1, 0, 1, 0, 1, 0, 1, 1, 1]); |
|---|
| 240 |
|
|---|
| 241 |
auto buf = new uint[10]; |
|---|
| 242 |
foreach(ref elem; buf) { |
|---|
| 243 |
elem = uniform(0, 34534); |
|---|
| 244 |
} |
|---|
| 245 |
|
|---|
| 246 |
res = binCounts(data, 10, buf); |
|---|
| 247 |
assert(res == [4U, 1, 0, 1, 0, 1, 0, 1, 1, 1]); |
|---|
| 248 |
} |
|---|
| 249 |
|
|---|
| 250 |
/**Bins data into nbin equal width bins, indexed from |
|---|
| 251 |
* 0 to nbin - 1, with 0 being the smallest bin, etc. |
|---|
| 252 |
* The values returned are the bin index for each element. |
|---|
| 253 |
* |
|---|
| 254 |
* Default return type is ubyte, because in the dstats.infotheory, |
|---|
| 255 |
* entropy() and related functions specialize on ubytes, and become |
|---|
| 256 |
* substandially faster. However, if you're using more than 255 bins, |
|---|
| 257 |
* you'll have to provide a different return type as a template parameter.*/ |
|---|
| 258 |
Ret[] bin(Ret = ubyte, T)(T data, uint nbin, Ret[] buf = null) |
|---|
| 259 |
if(isForwardRange!(T) && doubleInput!(T) && isIntegral!(Ret)) { |
|---|
| 260 |
dstatsEnforce(nbin > 0, "Cannot bin data into zero bins."); |
|---|
| 261 |
dstatsEnforce(nbin <= (cast(uint) Ret.max + 1), "Cannot bin into " ~ |
|---|
| 262 |
to!string(nbin) ~ " bins and store the results in a " ~ |
|---|
| 263 |
Ret.stringof ~ "."); |
|---|
| 264 |
alias ElementType!(T) E; |
|---|
| 265 |
Unqual!(E) min = data.front, max = data.front; |
|---|
| 266 |
|
|---|
| 267 |
auto dminmax = data; |
|---|
| 268 |
dminmax.popFront; |
|---|
| 269 |
foreach(elem; dminmax) { |
|---|
| 270 |
if(elem > max) |
|---|
| 271 |
max = elem; |
|---|
| 272 |
else if(elem < min) |
|---|
| 273 |
min = elem; |
|---|
| 274 |
} |
|---|
| 275 |
E range = max - min; |
|---|
| 276 |
Ret[] bins; |
|---|
| 277 |
if(buf.length < data.length) { |
|---|
| 278 |
bins = newVoid!(Ret)(data.length); |
|---|
| 279 |
} else { |
|---|
| 280 |
bins = buf[0..data.length]; |
|---|
| 281 |
} |
|---|
| 282 |
|
|---|
| 283 |
foreach(i, elem; data) { |
|---|
| 284 |
// Using the truncation as a feature. |
|---|
| 285 |
uint whichBin = cast(uint) ((elem - min) * nbin / range); |
|---|
| 286 |
|
|---|
| 287 |
// Handle edge case by putting largest item in largest bin. |
|---|
| 288 |
if(whichBin == nbin) |
|---|
| 289 |
whichBin--; |
|---|
| 290 |
|
|---|
| 291 |
bins[i] = cast(Ret) whichBin; |
|---|
| 292 |
} |
|---|
| 293 |
|
|---|
| 294 |
return bins; |
|---|
| 295 |
} |
|---|
| 296 |
|
|---|
| 297 |
unittest { |
|---|
| 298 |
mixin(newFrame); |
|---|
| 299 |
double[] data = [0.0, .01, .03, .05, .11, .31, .51, .71, .89, 1]; |
|---|
| 300 |
auto res = bin(data, 10); |
|---|
| 301 |
assert(res == to!(ubyte[])([0, 0, 0, 0, 1, 3, 5, 7, 8, 9])); |
|---|
| 302 |
|
|---|
| 303 |
auto buf = new ubyte[20]; |
|---|
| 304 |
foreach(ref elem; buf) { |
|---|
| 305 |
elem = cast(ubyte) uniform(0U, 255); |
|---|
| 306 |
} |
|---|
| 307 |
|
|---|
| 308 |
res = bin(data, 10, buf); |
|---|
| 309 |
assert(res == to!(ubyte[])([0, 0, 0, 0, 1, 3, 5, 7, 8, 9])); |
|---|
| 310 |
|
|---|
| 311 |
// Make sure this throws: |
|---|
| 312 |
try { |
|---|
| 313 |
auto foo = bin( seq(0U, 1_000U), 512); |
|---|
| 314 |
assert(0); |
|---|
| 315 |
} catch(Exception e) { |
|---|
| 316 |
// It's supposed to throw. |
|---|
| 317 |
} |
|---|
| 318 |
} |
|---|
| 319 |
|
|---|
| 320 |
/**Bins data into nbin equal frequency bins, indexed from |
|---|
| 321 |
* 0 to nbin - 1, with 0 being the smallest bin, etc. |
|---|
| 322 |
* The values returned are the bin index for each element. |
|---|
| 323 |
* |
|---|
| 324 |
* Default return type is ubyte, because in the dstats.infotheory, |
|---|
| 325 |
* entropy() and related functions specialize on ubytes, and become |
|---|
| 326 |
* substandially faster. However, if you're using more than 256 bins, |
|---|
| 327 |
* you'll have to provide a different return type as a template parameter.*/ |
|---|
| 328 |
Ret[] frqBin(Ret = ubyte, T)(T data, uint nbin, Ret[] buf = null) |
|---|
| 329 |
if(doubleInput!(T) && isForwardRange!(T) && hasLength!(T) && isIntegral!(Ret)) { |
|---|
| 330 |
dstatsEnforce(nbin > 0, "Cannot bin data into zero bins."); |
|---|
| 331 |
dstatsEnforce(nbin <= data.length, |
|---|
| 332 |
"Cannot equal frequency bin data into more than data.length bins."); |
|---|
| 333 |
dstatsEnforce(nbin <= (cast(ulong) Ret.max) + 1UL, "Cannot bin into " ~ |
|---|
| 334 |
to!string(nbin) ~ " bins and store the results in a " ~ |
|---|
| 335 |
Ret.stringof ~ "."); |
|---|
| 336 |
|
|---|
| 337 |
Ret[] result; |
|---|
| 338 |
if(buf.length < data.length) { |
|---|
| 339 |
result = newVoid!(Ret)(data.length); |
|---|
| 340 |
} else { |
|---|
| 341 |
result = buf[0..data.length]; |
|---|
| 342 |
} |
|---|
| 343 |
|
|---|
| 344 |
auto perm = newStack!(size_t)(data.length); |
|---|
| 345 |
scope(exit) TempAlloc.free; |
|---|
| 346 |
|
|---|
| 347 |
foreach(i, ref e; perm) { |
|---|
| 348 |
e = i; |
|---|
| 349 |
} |
|---|
| 350 |
|
|---|
| 351 |
static if(isRandomAccessRange!(T)) { |
|---|
| 352 |
bool compare(size_t lhs, size_t rhs) { |
|---|
| 353 |
return data[lhs] < data[rhs]; |
|---|
| 354 |
} |
|---|
| 355 |
|
|---|
| 356 |
qsort!compare(perm); |
|---|
| 357 |
} else { |
|---|
| 358 |
auto dd = tempdup(data); |
|---|
| 359 |
qsort(dd, perm); |
|---|
| 360 |
TempAlloc.free; |
|---|
| 361 |
} |
|---|
| 362 |
|
|---|
| 363 |
auto rem = data.length % nbin; |
|---|
| 364 |
Ret bin = 0; |
|---|
| 365 |
auto i = 0, frq = data.length / nbin; |
|---|
| 366 |
while(i < data.length) { |
|---|
| 367 |
foreach(j; 0..(bin < rem) ? frq + 1 : frq) { |
|---|
| 368 |
result[perm[i++]] = bin; |
|---|
| 369 |
} |
|---|
| 370 |
bin++; |
|---|
| 371 |
} |
|---|
| 372 |
return result; |
|---|
| 373 |
} |
|---|
| 374 |
|
|---|
| 375 |
unittest { |
|---|
| 376 |
double[] data = [5U, 1, 3, 8, 30, 10, 7]; |
|---|
| 377 |
auto res = frqBin(data, 3); |
|---|
| 378 |
assert(res == to!(ubyte[])([0, 0, 0, 1, 2, 2, 1])); |
|---|
| 379 |
data = [3, 1, 4, 1, 5, 9, 2, 6, 5]; |
|---|
| 380 |
|
|---|
| 381 |
auto buf = new ubyte[32]; |
|---|
| 382 |
foreach(i, ref elem; buf) { |
|---|
| 383 |
elem = cast(ubyte) i; |
|---|
| 384 |
} |
|---|
| 385 |
|
|---|
| 386 |
res = frqBin(data, 4, buf); |
|---|
| 387 |
assert(res == to!(ubyte[])([1, 0, 1, 0, 2, 3, 0, 3, 2])); |
|---|
| 388 |
data = [3U, 1, 4, 1, 5, 9, 2, 6, 5, 3, 4, 8, 9, 7, 9, 2]; |
|---|
| 389 |
res = frqBin(data, 4); |
|---|
| 390 |
assert(res == to!(ubyte[])([1, 0, 1, 0, 2, 3, 0, 2, 2, 1, 1, 3, 3, 2, 3, 0])); |
|---|
| 391 |
|
|---|
| 392 |
// Make sure this throws: |
|---|
| 393 |
try { |
|---|
| 394 |
auto foo = frqBin( seq(0U, 1_000U), 512); |
|---|
| 395 |
assert(0); |
|---|
| 396 |
} catch(Exception e) { |
|---|
| 397 |
// It's supposed to throw. |
|---|
| 398 |
} |
|---|
| 399 |
} |
|---|
| 400 |
|
|---|
| 401 |
/**Generates a sequence from [start..end] by increment. Includes start, |
|---|
| 402 |
* excludes end. Does so eagerly as an array. |
|---|
| 403 |
* |
|---|
| 404 |
* Examples: |
|---|
| 405 |
* --- |
|---|
| 406 |
* auto s = seq(0, 5); |
|---|
| 407 |
* assert(s == [0, 1, 2, 3, 4]); |
|---|
| 408 |
* --- |
|---|
| 409 |
*/ |
|---|
| 410 |
CommonType!(T, U)[] seq(T, U, V = uint)(T start, U end, V increment = 1U) { |
|---|
| 411 |
dstatsEnforce(increment > 0, "Cannot have a seq increment <= 0."); |
|---|
| 412 |
dstatsEnforce(end >= start, "End must be >= start in seq."); |
|---|
| 413 |
|
|---|
| 414 |
alias CommonType!(T, U) R; |
|---|
| 415 |
auto output = newVoid!(R)(cast(size_t) ((end - start) / increment)); |
|---|
| 416 |
|
|---|
| 417 |
size_t count = 0; |
|---|
| 418 |
for(T i = start; i < end; i += increment) { |
|---|
| 419 |
output[count++] = i; |
|---|
| 420 |
} |
|---|
| 421 |
return output; |
|---|
| 422 |
} |
|---|
| 423 |
|
|---|
| 424 |
unittest { |
|---|
| 425 |
auto s = seq(0, 5); |
|---|
| 426 |
assert(s == [0, 1, 2, 3, 4]); |
|---|
| 427 |
} |
|---|
| 428 |
|
|---|
| 429 |
/**Given an input array, outputs an array containing the rank from |
|---|
| 430 |
* [1, input.length] corresponding to each element. Ties are dealt with by |
|---|
| 431 |
* averaging. This function does not reorder the input range. |
|---|
| 432 |
* Return type is float[] by default, but if you are sure you have no ties, |
|---|
| 433 |
* ints can be used for efficiency (in which case ties will not be averaged), |
|---|
| 434 |
* and if you need more precision when averaging ties, you can use double or |
|---|
| 435 |
* real. |
|---|
| 436 |
* |
|---|
| 437 |
* Works with any input range. |
|---|
| 438 |
* |
|---|
| 439 |
* Examples: |
|---|
| 440 |
* --- |
|---|
| 441 |
* uint[] test = [3, 5, 3, 1, 2]; |
|---|
| 442 |
* assert(rank!("a < b", float)(test) == [3.5f, 5f, 3.5f, 1f, 2f]); |
|---|
| 443 |
* assert(test == [3U, 5, 3, 1, 2]); |
|---|
| 444 |
* ---*/ |
|---|
| 445 |
Ret[] rank(alias compFun = "a < b", Ret = double, T)(T input, Ret[] buf = null) |
|---|
| 446 |
if(isInputRange!(T) && is(typeof(input.front < input.front))) { |
|---|
| 447 |
static if(!isRandomAccessRange!(T) || !hasLength!(T)) { |
|---|
| 448 |
mixin(newFrame); |
|---|
| 449 |
return rankSort!(compFun, Ret)( tempdup(input), buf); |
|---|
| 450 |
} else { |
|---|
| 451 |
|
|---|
| 452 |
/* It's faster to duplicate the input and then use rankSort on the |
|---|
| 453 |
* duplicate, but more space efficient to create an index array. Check |
|---|
| 454 |
* TempAlloc to see whether we can fit everything we need on the current |
|---|
| 455 |
* frame. If yes, use the faster algorithm since we're effectively |
|---|
| 456 |
* not saving any space by using the space-efficient algorithm. |
|---|
| 457 |
* If no, use the more space-efficient algorithm. |
|---|
| 458 |
*/ |
|---|
| 459 |
auto bytesNeeded = input.length * (ElementType!(T).sizeof + size_t.sizeof); |
|---|
| 460 |
if(bytesNeeded < TempAlloc.slack) { |
|---|
| 461 |
mixin(newFrame); |
|---|
| 462 |
return rankSort!(compFun, Ret)( tempdup(input), buf); |
|---|
| 463 |
} else { |
|---|
| 464 |
return rankUsingIndex!(compFun, Ret)(input, buf); |
|---|
| 465 |
} |
|---|
| 466 |
} |
|---|
| 467 |
|
|---|
| 468 |
assert(0); |
|---|
| 469 |
} |
|---|
| 470 |
|
|---|
| 471 |
/* Space-efficient but slow way of computing ranks. The extra indirection |
|---|
| 472 |
* caused by creating the index array wreaks havock with CPU caches, especially |
|---|
| 473 |
* for large arrays that don't fit entirely in cache. |
|---|
| 474 |
*/ |
|---|
| 475 |
private Ret[] rankUsingIndex(alias compFun, Ret, T)(T input, Ret[] buf) { |
|---|
| 476 |
mixin(newFrame); |
|---|
| 477 |
size_t[] indices = newStack!size_t(input.length); |
|---|
| 478 |
foreach(i, ref elem; indices) { |
|---|
| 479 |
elem = i; |
|---|
| 480 |
} |
|---|
| 481 |
|
|---|
| 482 |
bool compare(size_t lhs, size_t rhs) { |
|---|
| 483 |
alias binaryFun!(compFun) innerComp; |
|---|
| 484 |
return innerComp(input[lhs], input[rhs]); |
|---|
| 485 |
} |
|---|
| 486 |
|
|---|
| 487 |
dstats.sort.qsort!compare(indices); |
|---|
| 488 |
|
|---|
| 489 |
Ret[] ret; |
|---|
| 490 |
if(buf.length < indices.length) { |
|---|
| 491 |
ret = newVoid!Ret(indices.length); |
|---|
| 492 |
} else { |
|---|
| 493 |
ret = buf[0..indices.length]; |
|---|
| 494 |
} |
|---|
| 495 |
|
|---|
| 496 |
foreach(i, index; indices) { |
|---|
| 497 |
ret[index] = i + 1; |
|---|
| 498 |
} |
|---|
| 499 |
|
|---|
| 500 |
auto myIndexed = Indexed!(T)(input, indices); |
|---|
| 501 |
|
|---|
| 502 |
static if(!isIntegral!Ret) { |
|---|
| 503 |
averageTies(myIndexed, ret, indices); |
|---|
| 504 |
} |
|---|
| 505 |
|
|---|
| 506 |
return ret; |
|---|
| 507 |
} |
|---|
| 508 |
|
|---|
| 509 |
private struct Indexed(T) { |
|---|
| 510 |
T someRange; |
|---|
| 511 |
size_t[] indices; |
|---|
| 512 |
|
|---|
| 513 |
ElementType!T opIndex(size_t index) { |
|---|
| 514 |
return someRange[indices[index]]; |
|---|
| 515 |
} |
|---|
| 516 |
|
|---|
| 517 |
@property size_t length() { |
|---|
| 518 |
return indices.length; |
|---|
| 519 |
} |
|---|
| 520 |
} |
|---|
| 521 |
|
|---|
| 522 |
/**Same as rank(), but also sorts the input range. |
|---|
| 523 |
* The array returned will still be identical to that returned by rank(), i.e. |
|---|
| 524 |
* the rank of each element will correspond to the ranks of the elements in the |
|---|
| 525 |
* input array before sorting. |
|---|
| 526 |
* |
|---|
| 527 |
* Examples: |
|---|
| 528 |
* --- |
|---|
| 529 |
* uint[] test = [3, 5, 3, 1, 2]; |
|---|
| 530 |
* assert(rankSort(test) == [3.5, 5, 3.5, 1.0, 2.0]); |
|---|
| 531 |
* assert(test == [1U, 2, 3, 4, 5]); |
|---|
| 532 |
* --- |
|---|
| 533 |
*/ |
|---|
| 534 |
Ret[] rankSort(alias compFun = "a < b", Ret = double, T)(T input, Ret[] buf = null) |
|---|
| 535 |
if(isRandomAccessRange!(T) && hasLength!(T) && is(typeof(input.front < input.front))) { |
|---|
| 536 |
mixin(newFrame); |
|---|
| 537 |
Ret[] ranks; |
|---|
| 538 |
if(buf.length < input.length) { |
|---|
| 539 |
ranks = newVoid!(Ret)(input.length); |
|---|
| 540 |
} else { |
|---|
| 541 |
ranks = buf[0..input.length]; |
|---|
| 542 |
} |
|---|
| 543 |
|
|---|
| 544 |
size_t[] perms = newStack!(size_t)(input.length); |
|---|
| 545 |
foreach(i, ref p; perms) { |
|---|
| 546 |
p = i; |
|---|
| 547 |
} |
|---|
| 548 |
|
|---|
| 549 |
dstats.sort.qsort!compFun(input, perms); |
|---|
| 550 |
foreach(i; 0..perms.length) { |
|---|
| 551 |
ranks[perms[i]] = i + 1; |
|---|
| 552 |
} |
|---|
| 553 |
|
|---|
| 554 |
static if(!isIntegral!Ret) { |
|---|
| 555 |
averageTies(input, ranks, perms); |
|---|
| 556 |
} |
|---|
| 557 |
|
|---|
| 558 |
return ranks; |
|---|
| 559 |
} |
|---|
| 560 |
|
|---|
| 561 |
unittest { |
|---|
| 562 |
uint[] test = [3, 5, 3, 1, 2]; |
|---|
| 563 |
|
|---|
| 564 |
float[] dummy; |
|---|
| 565 |
assert(rankUsingIndex!("a < b", float)(test, dummy) == [3.5f, 5f, 3.5f, 1f, 2f]); |
|---|
| 566 |
assert(test == [3U, 5, 3, 1, 2]); |
|---|
| 567 |
|
|---|
| 568 |
double[] dummy2; |
|---|
| 569 |
assert(rankUsingIndex!("a < b", double)(test, dummy2) == [3.5, 5, 3.5, 1, 2]); |
|---|
| 570 |
assert(rankSort(test) == [3.5, 5.0, 3.5, 1.0, 2.0]); |
|---|
| 571 |
assert(test == [1U,2,3,3,5]); |
|---|
| 572 |
|
|---|
| 573 |
uint[] test2 = [3,3,1,2]; |
|---|
| 574 |
assert(rank(test2) == [3.5,3.5,1,2]); |
|---|
| 575 |
} |
|---|
| 576 |
|
|---|
| 577 |
private void averageTies(T, U)(T sortedInput, U[] ranks, size_t[] perms) |
|---|
| 578 |
in { |
|---|
| 579 |
assert(sortedInput.length == ranks.length); |
|---|
| 580 |
assert(ranks.length == perms.length); |
|---|
| 581 |
} body { |
|---|
| 582 |
size_t tieCount = 1; |
|---|
| 583 |
foreach(i; 1..ranks.length) { |
|---|
| 584 |
if(sortedInput[i] == sortedInput[i - 1]) { |
|---|
| 585 |
tieCount++; |
|---|
| 586 |
} else if(tieCount > 1) { |
|---|
| 587 |
double avg = 0; |
|---|
| 588 |
immutable increment = 1.0 / tieCount; |
|---|
| 589 |
|
|---|
| 590 |
foreach(perm; perms[i - tieCount..i]) { |
|---|
| 591 |
avg += ranks[perm] * increment; |
|---|
| 592 |
} |
|---|
| 593 |
|
|---|
| 594 |
foreach(perm; perms[i - tieCount..i]) { |
|---|
| 595 |
ranks[perm] = avg; |
|---|
| 596 |
} |
|---|
| 597 |
tieCount = 1; |
|---|
| 598 |
} |
|---|
| 599 |
} |
|---|
| 600 |
|
|---|
| 601 |
if(tieCount > 1) { // Handle the end. |
|---|
| 602 |
double avg = 0; |
|---|
| 603 |
immutable increment = 1.0 / tieCount; |
|---|
| 604 |
|
|---|
| 605 |
foreach(perm; perms[perms.length - tieCount..$]) { |
|---|
| 606 |
avg += ranks[perm] * increment; |
|---|
| 607 |
} |
|---|
| 608 |
|
|---|
| 609 |
foreach(perm; perms[perms.length - tieCount..$]) { |
|---|
| 610 |
ranks[perm] = avg; |
|---|
| 611 |
} |
|---|
| 612 |
} |
|---|
| 613 |
} |
|---|
| 614 |
|
|---|
| 615 |
/**Returns an AA of counts of every element in input. Works w/ any iterable. |
|---|
| 616 |
* |
|---|
| 617 |
* Examples: |
|---|
| 618 |
* --- |
|---|
| 619 |
* int[] foo = [1,2,3,1,2,4]; |
|---|
| 620 |
* uint[int] frq = frequency(foo); |
|---|
| 621 |
* assert(frq.length == 4); |
|---|
| 622 |
* assert(frq[1] == 2); |
|---|
| 623 |
* assert(frq[4] == 1); |
|---|
| 624 |
* ---*/ |
|---|
| 625 |
uint[IterType!(T)] frequency(T)(T input) |
|---|
| 626 |
if(isIterable!(T)) { |
|---|
| 627 |
typeof(return) output; |
|---|
| 628 |
foreach(i; input) { |
|---|
| 629 |
output[i]++; |
|---|
| 630 |
} |
|---|
| 631 |
return output; |
|---|
| 632 |
} |
|---|
| 633 |
|
|---|
| 634 |
unittest { |
|---|
| 635 |
int[] foo = [1,2,3,1,2,4]; |
|---|
| 636 |
uint[int] frq = frequency(foo); |
|---|
| 637 |
assert(frq.length == 4); |
|---|
| 638 |
assert(frq[1] == 2); |
|---|
| 639 |
assert(frq[4] == 1); |
|---|
| 640 |
} |
|---|
| 641 |
|
|---|
| 642 |
/**Given a range of values and a range of categories, separates values |
|---|
| 643 |
* by category. This function also guarantees that the order within each |
|---|
| 644 |
* category will be maintained. |
|---|
| 645 |
* |
|---|
| 646 |
* Note: While the general convention of this library is to try to avoid |
|---|
| 647 |
* heap allocations whenever possible so that multithreaded code scales well and |
|---|
| 648 |
* false pointers aren't an issue, this function allocates like crazy |
|---|
| 649 |
* because there's basically no other way to implement it. Don't use it in |
|---|
| 650 |
* performance-critical multithreaded code. |
|---|
| 651 |
* |
|---|
| 652 |
* Examples: |
|---|
| 653 |
* --- |
|---|
| 654 |
* uint[] values = [1,2,3,4,5,6,7,8]; |
|---|
| 655 |
* bool[] categories = [false, false, false, false, true, true, true, true]; |
|---|
| 656 |
* auto separated = byCategory(values, categories); |
|---|
| 657 |
* auto tResult = studentsTTest(separated.values); |
|---|
| 658 |
* --- |
|---|
| 659 |
*/ |
|---|
| 660 |
ElementType!(V)[][ElementType!(C)] byCategory(V, C)(V values, C categories) |
|---|
| 661 |
if(isInputRange!(V) && isInputRange!(C) && !is(ElementType!C == bool)) { |
|---|
| 662 |
alias ElementType!(V) EV; |
|---|
| 663 |
alias ElementType!(C) EC; |
|---|
| 664 |
|
|---|
| 665 |
Appender!(EV[], EV)[EC] aa; |
|---|
| 666 |
while(!values.empty && !categories.empty) { |
|---|
| 667 |
scope(exit) { |
|---|
| 668 |
values.popFront(); |
|---|
| 669 |
categories.popFront(); |
|---|
| 670 |
} |
|---|
| 671 |
|
|---|
| 672 |
auto category = categories.front; |
|---|
| 673 |
auto ptr = category in aa; |
|---|
| 674 |
if(ptr is null) { |
|---|
| 675 |
aa[category] = typeof(aa[category]).init; |
|---|
| 676 |
ptr = category in aa; |
|---|
| 677 |
} |
|---|
| 678 |
|
|---|
| 679 |
ptr.put(values.front); |
|---|
| 680 |
} |
|---|
| 681 |
|
|---|
| 682 |
EV[][EC] ret; |
|---|
| 683 |
foreach(k, v; aa) { |
|---|
| 684 |
ret[k] = v.data; |
|---|
| 685 |
} |
|---|
| 686 |
|
|---|
| 687 |
return ret; |
|---|
| 688 |
} |
|---|
| 689 |
|
|---|
| 690 |
/**Special case implementation for when ElementType!C is boolean.*/ |
|---|
| 691 |
ElementType!(V)[][2] byCategory(V, C)(V values, C categories) |
|---|
| 692 |
if(isInputRange!(V) && isInputRange!(C) && is(ElementType!C == bool)) { |
|---|
| 693 |
typeof(return) ret; |
|---|
| 694 |
|
|---|
| 695 |
static if(hasLength!V || hasLength!C) { |
|---|
| 696 |
// Preallocate. |
|---|
| 697 |
size_t zeroIndex, oneIndex; |
|---|
| 698 |
|
|---|
| 699 |
static if(!hasLength!V) { |
|---|
| 700 |
ret[0].length = categories.length; |
|---|
| 701 |
ret[1].length = values.length; |
|---|
| 702 |
} else static if(!hasLength!C) { |
|---|
| 703 |
ret[0].length = values.length; |
|---|
| 704 |
ret[1].length = values.length; |
|---|
| 705 |
} else { |
|---|
| 706 |
immutable len = min(categories.length, values.length); |
|---|
| 707 |
ret[0].length = len; |
|---|
| 708 |
ret[1].length = len; |
|---|
| 709 |
} |
|---|
| 710 |
|
|---|
| 711 |
while(!values.empty && !categories.empty) { |
|---|
| 712 |
scope(exit) { |
|---|
| 713 |
values.popFront(); |
|---|
| 714 |
categories.popFront(); |
|---|
| 715 |
} |
|---|
| 716 |
|
|---|
| 717 |
auto category = categories.front; |
|---|
| 718 |
if(category) { |
|---|
| 719 |
ret[1][oneIndex++] = values.front; |
|---|
| 720 |
} else { |
|---|
| 721 |
ret[0][zeroIndex++] = values.front; |
|---|
| 722 |
} |
|---|
| 723 |
} |
|---|
| 724 |
|
|---|
| 725 |
ret[0] = ret[0][0..zeroIndex]; |
|---|
| 726 |
ret[1] = ret[1][0..oneIndex]; |
|---|
| 727 |
} else { |
|---|
| 728 |
auto app0 = appender!(ElementType!(V)[])(); |
|---|
| 729 |
auto app1 = appender!(ElementType!(V)[])(); |
|---|
| 730 |
|
|---|
| 731 |
while(!values.empty && !categories.empty) { |
|---|
| 732 |
scope(exit) { |
|---|
| 733 |
values.popFront(); |
|---|
| 734 |
categories.popFront(); |
|---|
| 735 |
} |
|---|
| 736 |
|
|---|
| 737 |
auto category = categories.front; |
|---|
| 738 |
if(category) { |
|---|
| 739 |
app1.put(values.front); |
|---|
| 740 |
} else { |
|---|
| 741 |
app0.put(values.front); |
|---|
| 742 |
} |
|---|
| 743 |
} |
|---|
| 744 |
|
|---|
| 745 |
ret[0] = app0.data; |
|---|
| 746 |
ret[1] = app1.data; |
|---|
| 747 |
} |
|---|
| 748 |
|
|---|
| 749 |
return ret; |
|---|
| 750 |
} |
|---|
| 751 |
|
|---|
| 752 |
unittest { |
|---|
| 753 |
int[] nums = [1,2,3,4,5,6,7,8,9]; |
|---|
| 754 |
int[] categories = [0,1,2,0,1,2,0,1,2]; |
|---|
| 755 |
|
|---|
| 756 |
// The filter is just to prevent having a length. |
|---|
| 757 |
auto categories2 = filter!"a == a"(map!"a % 2 == 0"(nums)); |
|---|
| 758 |
|
|---|
| 759 |
auto result = byCategory(nums, categories); |
|---|
| 760 |
assert(result[0] == [1,4,7]); |
|---|
| 761 |
assert(result[1] == [2,5,8]); |
|---|
| 762 |
assert(result[2] == [3,6,9]); |
|---|
| 763 |
|
|---|
| 764 |
auto res2 = byCategory(filter!"a == a"(nums), categories2); |
|---|
| 765 |
assert(res2[0] == [1,3,5,7,9]); |
|---|
| 766 |
assert(res2[1] == [2,4,6,8]); |
|---|
| 767 |
|
|---|
| 768 |
auto res3 = byCategory(nums, |
|---|
| 769 |
[false, true, false, true, false, true, false, true, false]); |
|---|
| 770 |
assert(res2 == res3); |
|---|
| 771 |
} |
|---|
| 772 |
|
|---|
| 773 |
/**Finds the area under the ROC curve (a curve with sensitivity on the Y-axis |
|---|
| 774 |
* and 1 - specificity on the X-axis). This is a useful metric for |
|---|
| 775 |
* determining how well a test statistic discriminates between two classes. |
|---|
| 776 |
* The following assumptions are made in this implementation: |
|---|
| 777 |
* |
|---|
| 778 |
* 1. For some cutoff value c and test statistic T, your decision rule is of |
|---|
| 779 |
* the form "Class A if T > c, Class B if T < c". |
|---|
| 780 |
* |
|---|
| 781 |
* 2. In the case of ties, i.e. if class A and class B both have an identical |
|---|
| 782 |
* value, linear interpolation is used. This is because changing the |
|---|
| 783 |
* value of c infinitesimally will change both sensitivity and specificity |
|---|
| 784 |
* in these cases. |
|---|
| 785 |
*/ |
|---|
| 786 |
double auroc(R1, R2)(R1 classATs, R2 classBTs) |
|---|
| 787 |
if(isNumeric!(ElementType!R1) && isNumeric!(ElementType!R2)) { |
|---|
| 788 |
mixin(newFrame); |
|---|
| 789 |
auto classA = tempdup(classATs); |
|---|
| 790 |
auto classB = tempdup(classBTs); |
|---|
| 791 |
qsort(classA); |
|---|
| 792 |
qsort(classB); |
|---|
| 793 |
|
|---|
| 794 |
// Start cutoff at -infinity, such that we get everything in class A, i.e. |
|---|
| 795 |
// perfect specificity, zero sensitivity. We arbitrarily define class B |
|---|
| 796 |
// as our "positive" class. |
|---|
| 797 |
double tp = 0, tn = classA.length, fp = 0, fn = classB.length; |
|---|
| 798 |
double[2] lastPoint = 0; |
|---|
| 799 |
|
|---|
| 800 |
Unqual!(CommonType!(ElementType!R1, ElementType!R2)) currentVal; |
|---|
| 801 |
|
|---|
| 802 |
ElementType!R1 popA() { |
|---|
| 803 |
tn--; |
|---|
| 804 |
fp++; |
|---|
| 805 |
auto ret = classA.front; |
|---|
| 806 |
classA.popFront(); |
|---|
| 807 |
return ret; |
|---|
| 808 |
} |
|---|
| 809 |
|
|---|
| 810 |
ElementType!R2 popB() { |
|---|
| 811 |
fn--; |
|---|
| 812 |
tp++; |
|---|
| 813 |
auto ret = classB.front; |
|---|
| 814 |
classB.popFront(); |
|---|
| 815 |
return ret; |
|---|
| 816 |
} |
|---|
| 817 |
|
|---|
| 818 |
double area = 0; |
|---|
| 819 |
while(!classA.empty && !classB.empty) { |
|---|
| 820 |
if(classA.front < classB.front) { |
|---|
| 821 |
currentVal = popA(); |
|---|
| 822 |
} else { |
|---|
| 823 |
currentVal = popB(); |
|---|
| 824 |
} |
|---|
| 825 |
|
|---|
| 826 |
// Handle ties. |
|---|
| 827 |
while(!classA.empty && classA.front == currentVal) { |
|---|
| 828 |
popA(); |
|---|
| 829 |
} |
|---|
| 830 |
|
|---|
| 831 |
while(!classB.empty && classB.front == currentVal) { |
|---|
| 832 |
popB(); |
|---|
| 833 |
} |
|---|
| 834 |
|
|---|
| 835 |
double[2] curPoint; |
|---|
| 836 |
curPoint[0] = 1.0 - tn / (fp + tn); |
|---|
| 837 |
curPoint[1] = tp / (tp + fn); |
|---|
| 838 |
|
|---|
| 839 |
immutable xDist = curPoint[0] - lastPoint[0]; |
|---|
| 840 |
area += xDist * lastPoint[1]; // Rectangular part. |
|---|
| 841 |
area += xDist * 0.5 * (curPoint[1] - lastPoint[1]); // Triangular part. |
|---|
| 842 |
lastPoint[] = curPoint[]; |
|---|
| 843 |
} |
|---|
| 844 |
|
|---|
| 845 |
if(classA.length > 0 && classB.length == 0) { |
|---|
| 846 |
// Then we already have a sensitivity of 1, move straight to the right |
|---|
| 847 |
// to the point (1, 1). |
|---|
| 848 |
|
|---|
| 849 |
immutable xDist = 1 - lastPoint[0]; |
|---|
| 850 |
area += xDist * lastPoint[1]; // Rectangular part. |
|---|
| 851 |
area += xDist * 0.5 * (1 - lastPoint[1]); // Triangular part. |
|---|
| 852 |
} |
|---|
| 853 |
|
|---|
| 854 |
return area; |
|---|
| 855 |
} |
|---|
| 856 |
|
|---|
| 857 |
unittest { |
|---|
| 858 |
// Values worked out by hand on paper. If you don't believe me, work |
|---|
| 859 |
// them out yourself. |
|---|
| 860 |
assert(auroc([4,5,6], [1,2,3]) == 1); |
|---|
| 861 |
assert(approxEqual(auroc([8,6,7,5,3,0,9], [3,6,2,4,3,6]), 0.6904762)); |
|---|
| 862 |
assert(approxEqual(auroc([2,7,1,8,2,8,1,8], [3,1,4,1,5,9,2,6]), 0.546875)); |
|---|
| 863 |
} |
|---|
| 864 |
|
|---|
| 865 |
/// |
|---|
| 866 |
T sign(T)(T num) pure nothrow if(is(typeof(num < 0))) { |
|---|
| 867 |
if (num > 0) return 1; |
|---|
| 868 |
if (num < 0) return -1; |
|---|
| 869 |
return 0; |
|---|
| 870 |
} |
|---|
| 871 |
|
|---|
| 872 |
unittest { |
|---|
| 873 |
assert(sign(3.14159265)==1); |
|---|
| 874 |
assert(sign(-3)==-1); |
|---|
| 875 |
assert(sign(-2.7182818)==-1); |
|---|
| 876 |
} |
|---|
| 877 |
|
|---|
| 878 |
/// |
|---|
| 879 |
/*Values up to 9,999 are pre-calculated and stored in |
|---|
| 880 |
* an immutable global array, for performance. After this point, the gamma |
|---|
| 881 |
* function is used, because caching would take up too much memory, and if |
|---|
| 882 |
* done lazily, would cause threading issues.*/ |
|---|
| 883 |
double logFactorial(ulong n) { |
|---|
| 884 |
//Input is uint, can't be less than 0, no need to check. |
|---|
| 885 |
if(n < staticFacTableLen) { |
|---|
| 886 |
return logFactorialTable[cast(size_t) n]; |
|---|
| 887 |
} else return lgamma(n + 1); |
|---|
| 888 |
} |
|---|
| 889 |
|
|---|
| 890 |
unittest { |
|---|
| 891 |
// Cache branch. |
|---|
| 892 |
assert(cast(uint) round(exp(logFactorial(4)))==24); |
|---|
| 893 |
assert(cast(uint) round(exp(logFactorial(5)))==120); |
|---|
| 894 |
assert(cast(uint) round(exp(logFactorial(6)))==720); |
|---|
| 895 |
assert(cast(uint) round(exp(logFactorial(7)))==5040); |
|---|
| 896 |
assert(cast(uint) round(exp(logFactorial(3)))==6); |
|---|
| 897 |
// Gamma branch. |
|---|
| 898 |
assert(approxEqual(logFactorial(12000), 1.007175584216837e5, 1e-14)); |
|---|
| 899 |
assert(approxEqual(logFactorial(14000), 1.196610688711534e5, 1e-14)); |
|---|
| 900 |
} |
|---|
| 901 |
|
|---|
| 902 |
///Log of (n choose k). |
|---|
| 903 |
double logNcomb(ulong n, ulong k) |
|---|
| 904 |
in { |
|---|
| 905 |
assert(k <= n); |
|---|
| 906 |
} body { |
|---|
| 907 |
if(n < k) return -double.infinity; |
|---|
| 908 |
//Extra parentheses increase numerical accuracy. |
|---|
| 909 |
return logFactorial(n) - (logFactorial(k) + logFactorial(n - k)); |
|---|
| 910 |
} |
|---|
| 911 |
|
|---|
| 912 |
unittest { |
|---|
| 913 |
assert(cast(uint) round(exp(logNcomb(4,2)))==6); |
|---|
| 914 |
assert(cast(uint) round(exp(logNcomb(30,8)))==5852925); |
|---|
| 915 |
assert(cast(uint) round(exp(logNcomb(28,5)))==98280); |
|---|
| 916 |
} |
|---|
| 917 |
|
|---|
| 918 |
/**Controls whether Perm and Comb duplicate their buffer on each iteration and |
|---|
| 919 |
* return the copy, or recycle it and return an alias of it. |
|---|
| 920 |
* You want to choose recycle if each permutation/combination |
|---|
| 921 |
* will only be needed within the scope of the foreach statement. If they |
|---|
| 922 |
* may escape this scope, you want to choose dup. The default is dup, |
|---|
| 923 |
* because it's safer, but recycle can avoid lots of unnecessary GC activity. |
|---|
| 924 |
*/ |
|---|
| 925 |
enum Buffer { |
|---|
| 926 |
|
|---|
| 927 |
/// |
|---|
| 928 |
dup, |
|---|
| 929 |
|
|---|
| 930 |
/// |
|---|
| 931 |
recycle, |
|---|
| 932 |
|
|---|
| 933 |
DUP = dup, |
|---|
| 934 |
|
|---|
| 935 |
RECYCLE = recycle |
|---|
| 936 |
} |
|---|
| 937 |
|
|---|
| 938 |
static if(size_t.sizeof == 4) { |
|---|
| 939 |
private enum MAX_PERM_LEN = 12; |
|---|
| 940 |
} else { |
|---|
| 941 |
private enum MAX_PERM_LEN = 20; |
|---|
| 942 |
} |
|---|
| 943 |
|
|---|
| 944 |
/**A struct that generates all possible permutations of a sequence. |
|---|
| 945 |
* |
|---|
| 946 |
* Note: Permutations are output in undefined order. |
|---|
| 947 |
* |
|---|
| 948 |
* Bugs: Only supports iterating over up to size_t.max permutations. |
|---|
| 949 |
* This means the max permutation length is 12 on 32-bit machines, or 20 |
|---|
| 950 |
* on 64-bit. This was a conscious tradeoff to allow this range to have a |
|---|
| 951 |
* length of type size_t, since iterating over such huge permutation spaces |
|---|
| 952 |
* would be insanely slow anyhow. |
|---|
| 953 |
* |
|---|
| 954 |
* Examples: |
|---|
| 955 |
* --- |
|---|
| 956 |
* double[][] res; |
|---|
| 957 |
* auto perm = Perm!(double)([1.0, 2.0, 3.0][]); |
|---|
| 958 |
* foreach(p; perm) { |
|---|
| 959 |
* res ~= p; |
|---|
| 960 |
* } |
|---|
| 961 |
* |
|---|
| 962 |
* sort(res); |
|---|
| 963 |
* assert(res.canFindSorted([1.0, 2.0, 3.0])); |
|---|
| 964 |
* assert(res.canFindSorted([1.0, 3.0, 2.0])); |
|---|
| 965 |
* assert(res.canFindSorted([2.0, 1.0, 3.0])); |
|---|
| 966 |
* assert(res.canFindSorted([2.0, 3.0, 1.0])); |
|---|
| 967 |
* assert(res.canFindSorted([3.0, 1.0, 2.0])); |
|---|
| 968 |
* assert(res.canFindSorted([3.0, 2.0, 1.0])); |
|---|
| 969 |
* assert(res.length == 6); |
|---|
| 970 |
* --- |
|---|
| 971 |
*/ |
|---|
| 972 |
struct Perm(Buffer bufType = Buffer.dup, T) { |
|---|
| 973 |
private: |
|---|
| 974 |
|
|---|
| 975 |
// Optimization: Since we know this thing can't get too big (there's |
|---|
| 976 |
// an dstatsEnforce statement for it in the c'tor), just use arrays of the max |
|---|
| 977 |
// possible size for stuff and store them inline, if it's all just bytes. |
|---|
| 978 |
static if(T.sizeof == 1) { |
|---|
| 979 |
T[MAX_PERM_LEN] perm; |
|---|
| 980 |
} else { |
|---|
| 981 |
T* perm; |
|---|
| 982 |
} |
|---|
| 983 |
|
|---|
| 984 |
// The length of this range. |
|---|
| 985 |
size_t nPerms; |
|---|
| 986 |
|
|---|
| 987 |
ubyte[MAX_PERM_LEN] Is; |
|---|
| 988 |
ubyte currentIndex; |
|---|
| 989 |
|
|---|
| 990 |
// The length of these arrays. Stored once to minimize overhead. |
|---|
| 991 |
ubyte len; |
|---|
| 992 |
|
|---|
| 993 |
static if(bufType == Buffer.dup) { |
|---|
| 994 |
alias T[] PermArray; |
|---|
| 995 |
} else { |
|---|
| 996 |
alias const(T)[] PermArray; |
|---|
| 997 |
} |
|---|
| 998 |
|
|---|
| 999 |
public: |
|---|
| 1000 |
/**Generate permutations from an input range. |
|---|
| 1001 |
* Create a duplicate of this sequence |
|---|
| 1002 |
* so that the original sequence is not modified.*/ |
|---|
| 1003 |
this(U)(U input) |
|---|
| 1004 |
if(isForwardRange!(U)) { |
|---|
| 1005 |
|
|---|
| 1006 |
static if(ElementType!(U).sizeof > 1) { |
|---|
| 1007 |
auto arr = toArray(input); |
|---|
| 1008 |
dstatsEnforce(arr.length <= MAX_PERM_LEN, text( |
|---|
| 1009 |
"Can't iterate permutations of an array this long. (Max length: ", |
|---|
| 1010 |
MAX_PERM_LEN, ")")); |
|---|
| 1011 |
len = cast(ubyte) arr.length; |
|---|
| 1012 |
perm = arr.ptr; |
|---|
| 1013 |
} else { |
|---|
| 1014 |
foreach(elem; input) { |
|---|
| 1015 |
dstatsEnforce(len < MAX_PERM_LEN, text( |
|---|
| 1016 |
"Can't iterate permutations of an array this long. (Max length: ", |
|---|
| 1017 |
MAX_PERM_LEN, ")")); |
|---|
| 1018 |
|
|---|
| 1019 |
perm[len++] = elem; |
|---|
| 1020 |
} |
|---|
| 1021 |
} |
|---|
| 1022 |
|
|---|
| 1023 |
popFront(); |
|---|
| 1024 |
|
|---|
| 1025 |
nPerms = 1; |
|---|
| 1026 |
for(size_t i = 2; i <= len; i++) { |
|---|
| 1027 |
nPerms *= i; |
|---|
| 1028 |
} |
|---|
| 1029 |
} |
|---|
| 1030 |
|
|---|
| 1031 |
/**Note: PermArray is just an alias to either T[] or const(T)[], |
|---|
| 1032 |
* depending on whether bufType == Buf.dup or Buf.recycle. |
|---|
| 1033 |
*/ |
|---|
| 1034 |
@property PermArray front() { |
|---|
| 1035 |
static if(bufType == Buffer.dup) { |
|---|
| 1036 |
return perm[0..len].dup; |
|---|
| 1037 |
} else { |
|---|
| 1038 |
return perm[0..len]; |
|---|
| 1039 |
} |
|---|
| 1040 |
} |
|---|
| 1041 |
|
|---|
| 1042 |
/**Get the next permutation in the sequence.*/ |
|---|
| 1043 |
void popFront() { |
|---|
| 1044 |
if(len == 0) { |
|---|
| 1045 |
nPerms--; |
|---|
| 1046 |
return; |
|---|
| 1047 |
} |
|---|
| 1048 |
if(currentIndex == len - 1) { |
|---|
| 1049 |
currentIndex--; |
|---|
| 1050 |
nPerms--; |
|---|
| 1051 |
return; |
|---|
| 1052 |
} |
|---|
| 1053 |
|
|---|
| 1054 |
uint max = len - currentIndex; |
|---|
| 1055 |
if(Is[currentIndex] == max) { |
|---|
| 1056 |
|
|---|
| 1057 |
if(currentIndex == 0) { |
|---|
| 1058 |
nPerms--; |
|---|
| 1059 |
assert(nPerms == 0, to!string(nPerms)); |
|---|
| 1060 |
return; |
|---|
| 1061 |
} |
|---|
| 1062 |
|
|---|
| 1063 |
Is[currentIndex..len] = 0; |
|---|
| 1064 |
currentIndex--; |
|---|
| 1065 |
return popFront(); |
|---|
| 1066 |
} else { |
|---|
| 1067 |
rotateLeft(perm[currentIndex..len]); |
|---|
| 1068 |
Is[currentIndex]++; |
|---|
| 1069 |
currentIndex++; |
|---|
| 1070 |
return popFront(); |
|---|
| 1071 |
} |
|---|
| 1072 |
} |
|---|
| 1073 |
|
|---|
| 1074 |
/// |
|---|
| 1075 |
@property bool empty() { |
|---|
| 1076 |
return nPerms == 0; |
|---|
| 1077 |
} |
|---|
| 1078 |
|
|---|
| 1079 |
/**The number of permutations left. |
|---|
| 1080 |
*/ |
|---|
| 1081 |
@property size_t length() const pure nothrow { |
|---|
| 1082 |
return nPerms; |
|---|
| 1083 |
} |
|---|
| 1084 |
|
|---|
| 1085 |
/// |
|---|
| 1086 |
@property typeof(this) save() { |
|---|
| 1087 |
auto ret = this; |
|---|
| 1088 |
static if(T.sizeof > 1) { |
|---|
| 1089 |
ret.perm = (ret.perm[0..len].dup).ptr; |
|---|
| 1090 |
} |
|---|
| 1091 |
return ret; |
|---|
| 1092 |
} |
|---|
| 1093 |
} |
|---|
| 1094 |
|
|---|
| 1095 |
private template PermRet(Buffer bufType, T...) { |
|---|
| 1096 |
static if(isForwardRange!(T[0])) { |
|---|
| 1097 |
alias Perm!(bufType, ElementType!(T[0])) PermRet; |
|---|
| 1098 |
} else static if(T.length == 1) { |
|---|
| 1099 |
alias Perm!(bufType, byte) PermRet; |
|---|
| 1100 |
} else alias Perm!(bufType, T[0]) PermRet; |
|---|
| 1101 |
} |
|---|
| 1102 |
|
|---|
| 1103 |
/**Create a Perm struct from a range or of a set of bounds. |
|---|
| 1104 |
* |
|---|
| 1105 |
* Note: PermRet is just a template to figure out what this should return. |
|---|
| 1106 |
* I would use auto if not for bug 2251. |
|---|
| 1107 |
* |
|---|
| 1108 |
* Examples: |
|---|
| 1109 |
* --- |
|---|
| 1110 |
* auto p = perm([1,2,3]); // All permutations of [1,2,3]. |
|---|
| 1111 |
* auto p = perm(5); // All permutations of [0,1,2,3,4]. |
|---|
| 1112 |
* auto p = perm(-1, 2); // All permutations of [-1, 0, 1]. |
|---|
| 1113 |
* --- |
|---|
| 1114 |
*/ |
|---|
| 1115 |
PermRet!(bufType, T) perm(Buffer bufType = Buffer.dup, T...)(T stuff) { |
|---|
| 1116 |
alias typeof(return) rt; |
|---|
| 1117 |
static if(isForwardRange!(T[0])) { |
|---|
| 1118 |
return rt(stuff); |
|---|
| 1119 |
} else static if(T.length == 1) { |
|---|
| 1120 |
static assert(isIntegral!(T[0]), |
|---|
| 1121 |
"If one argument is passed to perm(), it must be an integer."); |
|---|
| 1122 |
|
|---|
| 1123 |
dstatsEnforce(stuff[0] >= 0, "Cannot generate permutations of length < 0."); |
|---|
| 1124 |
dstatsEnforce(stuff[0] <= MAX_PERM_LEN, text( |
|---|
| 1125 |
"Can't iterate permutations of an array of length ", |
|---|
| 1126 |
stuff[0], ". (Max length: ", MAX_PERM_LEN, ")")); |
|---|
| 1127 |
|
|---|
| 1128 |
// Optimization: If we're duplicating the array every time we return |
|---|
| 1129 |
// it, we want it to be as small as possible. Since we know the lower |
|---|
| 1130 |
// bound is zero and the upper bound can't be > byte.max, use bytes |
|---|
| 1131 |
// instead of bigger integer types. |
|---|
| 1132 |
return rt(seq(cast(byte) 0, cast(byte) stuff[0])); |
|---|
| 1133 |
} else { |
|---|
| 1134 |
static assert(stuff.length == 2); |
|---|
| 1135 |
return rt(seq(stuff[0], stuff[1])); |
|---|
| 1136 |
} |
|---|
| 1137 |
} |
|---|
| 1138 |
|
|---|
| 1139 |
unittest { |
|---|
| 1140 |
// Test degenerate case of len == 0; |
|---|
| 1141 |
uint nZero = 0; |
|---|
| 1142 |
foreach(elem; perm(0)) { |
|---|
| 1143 |
assert(elem.length == 0); |
|---|
| 1144 |
nZero++; |
|---|
| 1145 |
} |
|---|
| 1146 |
assert(nZero == 1); |
|---|
| 1147 |
|
|---|
| 1148 |
double[][] res; |
|---|
| 1149 |
auto p1 = perm([1.0, 2.0, 3.0][]); |
|---|
| 1150 |
assert(p1.length == 6); |
|---|
| 1151 |
foreach(p; p1) { |
|---|
| 1152 |
res ~= p; |
|---|
| 1153 |
} |
|---|
| 1154 |
auto sortedRes = sort(res); |
|---|
| 1155 |
assert(sortedRes.canFind([1.0, 2.0, 3.0])); |
|---|
| 1156 |
assert(sortedRes.canFind([1.0, 3.0, 2.0])); |
|---|
| 1157 |
assert(sortedRes.canFind([2.0, 1.0, 3.0])); |
|---|
| 1158 |
assert(sortedRes.canFind([2.0, 3.0, 1.0])); |
|---|
| 1159 |
assert(sortedRes.canFind([3.0, 1.0, 2.0])); |
|---|
| 1160 |
assert(sortedRes.canFind([3.0, 2.0, 1.0])); |
|---|
| 1161 |
assert(res.length == 6); |
|---|
| 1162 |
byte[][] res2; |
|---|
| 1163 |
auto perm2 = perm(3); |
|---|
| 1164 |
foreach(p; perm2) { |
|---|
| 1165 |
res2 ~= p.dup; |
|---|
| 1166 |
} |
|---|
| 1167 |
auto sortedRes2 = sort(res2); |
|---|
| 1168 |
assert(sortedRes2.canFind( to!(byte[])([0, 1, 2]))); |
|---|
| 1169 |
assert(sortedRes2.canFind( to!(byte[])([0, 2, 1]))); |
|---|
| 1170 |
assert(sortedRes2.canFind( to!(byte[])([1, 0, 2]))); |
|---|
| 1171 |
assert(sortedRes2.canFind( to!(byte[])([1, 2, 0]))); |
|---|
| 1172 |
assert(sortedRes2.canFind( to!(byte[])([2, 0, 1]))); |
|---|
| 1173 |
assert(sortedRes2.canFind( to!(byte[])([2, 1, 0]))); |
|---|
| 1174 |
assert(res2.length == 6); |
|---|
| 1175 |
|
|---|
| 1176 |
// Indirect tests: If the elements returned are unique, there are N! of |
|---|
| 1177 |
// them, and they contain what they're supposed to contain, the result is |
|---|
| 1178 |
// correct. |
|---|
| 1179 |
auto perm3 = perm(0U, 6U); |
|---|
| 1180 |
bool[uint[]] table; |
|---|
| 1181 |
foreach(p; perm3) { |
|---|
| 1182 |
table[p.idup] = true; |
|---|
| 1183 |
} |
|---|
| 1184 |
assert(table.length == 720); |
|---|
| 1185 |
foreach(elem, val; table) { |
|---|
| 1186 |
assert(elem.dup.insertionSort == [0U, 1, 2, 3, 4, 5]); |
|---|
| 1187 |
} |
|---|
| 1188 |
auto perm4 = perm(5); |
|---|
| 1189 |
bool[byte[]] table2; |
|---|
| 1190 |
foreach(p; perm4) { |
|---|
| 1191 |
table2[p.idup] = true; |
|---|
| 1192 |
} |
|---|
| 1193 |
assert(table2.length == 120); |
|---|
| 1194 |
foreach(elem, val; table2) { |
|---|
| 1195 |
assert(elem.dup.insertionSort == to!(byte[])([0, 1, 2, 3, 4])); |
|---|
| 1196 |
} |
|---|
| 1197 |
} |
|---|
| 1198 |
|
|---|
| 1199 |
/**Generates every possible combination of r elements of the given sequence, or r |
|---|
| 1200 |
* array indices from zero to N, depending on which ctor is called. Uses |
|---|
| 1201 |
* an input range interface. |
|---|
| 1202 |
* |
|---|
| 1203 |
* Bugs: Only supports iterating over up to size_t.max combinations. |
|---|
| 1204 |
* This was a conscious tradeoff to allow this range to have a |
|---|
| 1205 |
* length of type size_t, since iterating over such huge combination spaces |
|---|
| 1206 |
* would be insanely slow anyhow. |
|---|
| 1207 |
* |
|---|
| 1208 |
* Examples: |
|---|
| 1209 |
* --- |
|---|
| 1210 |
auto comb1 = Comb!(uint)(5, 2); |
|---|
| 1211 |
uint[][] vals; |
|---|
| 1212 |
foreach(c; comb1) { |
|---|
| 1213 |
vals ~= c; |
|---|
| 1214 |
} |
|---|
| 1215 |
sort(vals); |
|---|
| 1216 |
assert(vals.canFindSorted([0u,1].dup)); |
|---|
| 1217 |
assert(vals.canFindSorted([0u,2].dup)); |
|---|
| 1218 |
assert(vals.canFindSorted([0u,3].dup)); |
|---|
| 1219 |
assert(vals.canFindSorted([0u,4].dup)); |
|---|
| 1220 |
assert(vals.canFindSorted([1u,2].dup)); |
|---|
| 1221 |
assert(vals.canFindSorted([1u,3].dup)); |
|---|
| 1222 |
assert(vals.canFindSorted([1u,4].dup)); |
|---|
| 1223 |
assert(vals.canFindSorted([2u,3].dup)); |
|---|
| 1224 |
assert(vals.canFindSorted([2u,4].dup)); |
|---|
| 1225 |
assert(vals.canFindSorted([3u,4].dup)); |
|---|
| 1226 |
assert(vals.length == 10); |
|---|
| 1227 |
--- |
|---|
| 1228 |
*/ |
|---|
| 1229 |
struct Comb(T, Buffer bufType = Buffer.dup) { |
|---|
| 1230 |
private: |
|---|
| 1231 |
int N; |
|---|
| 1232 |
int R; |
|---|
| 1233 |
int diff; |
|---|
| 1234 |
uint* pos; |
|---|
| 1235 |
T* myArray; |
|---|
| 1236 |
T* chosen; |
|---|
| 1237 |
size_t _length; |
|---|
| 1238 |
|
|---|
| 1239 |
static if(bufType == Buffer.dup) { |
|---|
| 1240 |
alias T[] CombArray; |
|---|
| 1241 |
} else { |
|---|
| 1242 |
alias const(T)[] CombArray; |
|---|
| 1243 |
} |
|---|
| 1244 |
|
|---|
| 1245 |
void popFrontNum() { |
|---|
| 1246 |
int index = R - 1; |
|---|
| 1247 |
for(; index != -1 && pos[index] == diff + index; --index) {} |
|---|
| 1248 |
if(index == -1) { |
|---|
| 1249 |
_length--; |
|---|
| 1250 |
return; |
|---|
| 1251 |
} |
|---|
| 1252 |
pos[index]++; |
|---|
| 1253 |
for(uint i = index + 1; i < R; ++i) { |
|---|
| 1254 |
pos[i] = pos[index] + i - index; |
|---|
| 1255 |
} |
|---|
| 1256 |
_length--; |
|---|
| 1257 |
} |
|---|
| 1258 |
|
|---|
| 1259 |
void popFrontArray() { |
|---|
| 1260 |
int index = R - 1; |
|---|
| 1261 |
for(; index != -1 && pos[index] == diff + index; --index) {} |
|---|
| 1262 |
if(index == -1) { |
|---|
| 1263 |
_length--; |
|---|
| 1264 |
return; |
|---|
| 1265 |
} |
|---|
| 1266 |
pos[index]++; |
|---|
| 1267 |
chosen[index] = myArray[pos[index]]; |
|---|
| 1268 |
for(uint i = index + 1; i < R; ++i) { |
|---|
| 1269 |
pos[i] = pos[index] + i - index; |
|---|
| 1270 |
chosen[i] = myArray[pos[i]]; |
|---|
| 1271 |
} |
|---|
| 1272 |
_length--; |
|---|
| 1273 |
} |
|---|
| 1274 |
|
|---|
| 1275 |
void setLen() { |
|---|
| 1276 |
// Used at construction. |
|---|
| 1277 |
auto rLen = exp( logNcomb(N, R)); |
|---|
| 1278 |
dstatsEnforce(rLen < size_t.max, "Too many combinations."); |
|---|
| 1279 |
_length = roundTo!size_t(rLen); |
|---|
| 1280 |
} |
|---|
| 1281 |
|
|---|
| 1282 |
public: |
|---|
| 1283 |
|
|---|
| 1284 |
/**Ctor to generate all possible combinations of array indices for a length r |
|---|
| 1285 |
* array. This is a special-case optimization and is faster than simply |
|---|
| 1286 |
* using the other ctor to generate all length r combinations from |
|---|
| 1287 |
* seq(0, length).*/ |
|---|
| 1288 |
static if(is(T == uint)) { |
|---|
| 1289 |
this(uint n, uint r) |
|---|
| 1290 |
in { |
|---|
| 1291 |
assert(n >= r); |
|---|
| 1292 |
} body { |
|---|
| 1293 |
if(r > 0) { |
|---|
| 1294 |
pos = (seq(0U, r)).ptr; |
|---|
| 1295 |
pos[r - 1]--; |
|---|
| 1296 |
} |
|---|
| 1297 |
N = n; |
|---|
| 1298 |
R = r; |
|---|
| 1299 |
diff = N - R; |
|---|
| 1300 |
popFront(); |
|---|
| 1301 |
setLen(); |
|---|
| 1302 |
} |
|---|
| 1303 |
} |
|---|
| 1304 |
|
|---|
| 1305 |
/**General ctor. array is a sequence from which to generate the |
|---|
| 1306 |
* combinations. r is the length of the combinations to be generated.*/ |
|---|
| 1307 |
this(T[] array, uint r) { |
|---|
| 1308 |
if(r > 0) { |
|---|
| 1309 |
pos = (seq(0U, r)).ptr; |
|---|
| 1310 |
pos[r - 1]--; |
|---|
| 1311 |
} |
|---|
| 1312 |
N = to!uint(array.length); |
|---|
| 1313 |
R = r; |
|---|
| 1314 |
diff = N - R; |
|---|
| 1315 |
auto temp = array.dup; |
|---|
| 1316 |
myArray = temp.ptr; |
|---|
| 1317 |
chosen = (new T[r]).ptr; |
|---|
| 1318 |
foreach(i; 0..r) { |
|---|
| 1319 |
chosen[i] = myArray[pos[i]]; |
|---|
| 1320 |
} |
|---|
| 1321 |
popFront(); |
|---|
| 1322 |
setLen(); |
|---|
| 1323 |
} |
|---|
| 1324 |
|
|---|
| 1325 |
@property CombArray front() { |
|---|
| 1326 |
static if(bufType == Buffer.recycle) { |
|---|
| 1327 |
static if(!is(T == uint)) { |
|---|
| 1328 |
return chosen[0..R].dup; |
|---|
| 1329 |
} else { |
|---|
| 1330 |
return (myArray is null) ? pos[0..R] : chosen[0..R]; |
|---|
| 1331 |
} |
|---|
| 1332 |
} else { |
|---|
| 1333 |
static if(!is(T == uint)) { |
|---|
| 1334 |
return chosen[0..R].dup; |
|---|
| 1335 |
} else { |
|---|
| 1336 |
return (myArray is null) ? pos[0..R].dup : chosen[0..R].dup; |
|---|
| 1337 |
} |
|---|
| 1338 |
} |
|---|
| 1339 |
} |
|---|
| 1340 |
|
|---|
| 1341 |
void popFront() { |
|---|
| 1342 |
return (myArray is null) ? popFrontNum() : popFrontArray(); |
|---|
| 1343 |
} |
|---|
| 1344 |
|
|---|
| 1345 |
/// |
|---|
| 1346 |
@property bool empty() const pure nothrow { |
|---|
| 1347 |
return length == 0; |
|---|
| 1348 |
} |
|---|
| 1349 |
|
|---|
| 1350 |
/// |
|---|
| 1351 |
@property size_t length() const pure nothrow { |
|---|
| 1352 |
return _length; |
|---|
| 1353 |
} |
|---|
| 1354 |
|
|---|
| 1355 |
/// |
|---|
| 1356 |
@property typeof(this) save() { |
|---|
| 1357 |
auto ret = this; |
|---|
| 1358 |
ret.pos = (pos[0..R].dup).ptr; |
|---|
| 1359 |
|
|---|
| 1360 |
if(chosen !is null) { |
|---|
| 1361 |
ret.chosen = (chosen[0..R].dup).ptr; |
|---|
| 1362 |
} |
|---|
| 1363 |
|
|---|
| 1364 |
return ret; |
|---|
| 1365 |
} |
|---|
| 1366 |
} |
|---|
| 1367 |
|
|---|
| 1368 |
private template CombRet(T, Buffer bufType) { |
|---|
| 1369 |
static if(isForwardRange!(T)) { |
|---|
| 1370 |
alias Comb!(Unqual!(ElementType!(T)), bufType) CombRet; |
|---|
| 1371 |
} else static if(isIntegral!T) { |
|---|
| 1372 |
alias Comb!(uint, bufType) CombRet; |
|---|
| 1373 |
} else static assert(0, "comb can only be created with range or uint."); |
|---|
| 1374 |
} |
|---|
| 1375 |
|
|---|
| 1376 |
/**Create a Comb struct from a range or of a set of bounds. |
|---|
| 1377 |
* |
|---|
| 1378 |
* Note: CombRet is just a template to figure out what this should return. |
|---|
| 1379 |
* I would use auto if not for bug 2251. |
|---|
| 1380 |
* |
|---|
| 1381 |
* Examples: |
|---|
| 1382 |
* --- |
|---|
| 1383 |
* auto c1 = comb([1,2,3], 2); // Any two elements from [1,2,3]. |
|---|
| 1384 |
* auto c2 = comb(5, 3); // Any three elements from [0,1,2,3,4]. |
|---|
| 1385 |
* --- |
|---|
| 1386 |
*/ |
|---|
| 1387 |
CombRet!(T, bufType) comb(Buffer bufType = Buffer.dup, T)(T stuff, uint r) { |
|---|
| 1388 |
alias typeof(return) rt; |
|---|
| 1389 |
static if(isForwardRange!(T)) { |
|---|
| 1390 |
return rt(stuff, r); |
|---|
| 1391 |
} else { |
|---|
| 1392 |
static assert(isIntegral!T, "Can only call comb on ints and ranges."); |
|---|
| 1393 |
return rt(cast(uint) stuff, r); |
|---|
| 1394 |
} |
|---|
| 1395 |
} |
|---|
| 1396 |
|
|---|
| 1397 |
unittest { |
|---|
| 1398 |
// Test degenerate case of r == 0. Shouldn't segfault. |
|---|
| 1399 |
uint nZero = 0; |
|---|
| 1400 |
foreach(elem; comb(5, 0)) { |
|---|
| 1401 |
assert(elem.length == 0); |
|---|
| 1402 |
nZero++; |
|---|
| 1403 |
} |
|---|
| 1404 |
assert(nZero == 1); |
|---|
| 1405 |
|
|---|
| 1406 |
nZero = 0; |
|---|
| 1407 |
uint[] foo = [1,2,3,4,5]; |
|---|
| 1408 |
foreach(elem; comb(foo, 0)) { |
|---|
| 1409 |
assert(elem.length == 0); |
|---|
| 1410 |
nZero++; |
|---|
| 1411 |
} |
|---|
| 1412 |
assert(nZero == 1); |
|---|
| 1413 |
|
|---|
| 1414 |
// Test indexing verison first. |
|---|
| 1415 |
auto comb1 = comb(5, 2); |
|---|
| 1416 |
uint[][] vals; |
|---|
| 1417 |
foreach(c; comb1) { |
|---|
| 1418 |
vals ~= c; |
|---|
| 1419 |
} |
|---|
| 1420 |
|
|---|
| 1421 |
auto sortedVals = sort(vals); |
|---|
| 1422 |
assert(sortedVals.canFind([0u,1].dup)); |
|---|
| 1423 |
assert(sortedVals.canFind([0u,2].dup)); |
|---|
| 1424 |
assert(sortedVals.canFind([0u,3].dup)); |
|---|
| 1425 |
assert(sortedVals.canFind([0u,4].dup)); |
|---|
| 1426 |
assert(sortedVals.canFind([1u,2].dup)); |
|---|
| 1427 |
assert(sortedVals.canFind([1u,3].dup)); |
|---|
| 1428 |
assert(sortedVals.canFind([1u,4].dup)); |
|---|
| 1429 |
assert(sortedVals.canFind([2u,3].dup)); |
|---|
| 1430 |
assert(sortedVals.canFind([2u,4].dup)); |
|---|
| 1431 |
assert(sortedVals.canFind([3u,4].dup)); |
|---|
| 1432 |
assert(vals.length == 10); |
|---|
| 1433 |
|
|---|
| 1434 |
// Now, test the array version. |
|---|
| 1435 |
auto comb2 = comb(seq(5U, 10U), 3); |
|---|
| 1436 |
vals = null; |
|---|
| 1437 |
foreach(c; comb2) { |
|---|
| 1438 |
vals ~= c; |
|---|
| 1439 |
} |
|---|
| 1440 |
sortedVals = sort(vals); |
|---|
| 1441 |
assert(sortedVals.canFind([5u, 6, 7].dup)); |
|---|
| 1442 |
assert(sortedVals.canFind([5u, 6, 8].dup)); |
|---|
| 1443 |
assert(sortedVals.canFind([5u, 6, 9].dup)); |
|---|
| 1444 |
assert(sortedVals.canFind([5u, 7, 8].dup)); |
|---|
| 1445 |
assert(sortedVals.canFind([5u, 7, 9].dup)); |
|---|
| 1446 |
assert(sortedVals.canFind([5u, 8, 9].dup)); |
|---|
| 1447 |
assert(sortedVals.canFind([6U, 7, 8].dup)); |
|---|
| 1448 |
assert(sortedVals.canFind([6u, 7, 9].dup)); |
|---|
| 1449 |
assert(sortedVals.canFind([6u, 8, 9].dup)); |
|---|
| 1450 |
assert(sortedVals.canFind([7u, 8, 9].dup)); |
|---|
| 1451 |
assert(vals.length == 10); |
|---|
| 1452 |
|
|---|
| 1453 |
// Now a test of a larger dataset where more subtle bugs could hide. |
|---|
| 1454 |
// If the values returned are unique even after sorting, are composed of |
|---|
| 1455 |
// the correct elements, and there is the right number of them, this thing |
|---|
| 1456 |
// works. |
|---|
| 1457 |
|
|---|
| 1458 |
bool[uint[]] results; // Keep track of how many UNIQUE items we have. |
|---|
| 1459 |
auto comb3 = Comb!(uint)(seq(10U, 22U), 6); |
|---|
| 1460 |
foreach(c; comb3) { |
|---|
| 1461 |
auto dupped = c.dup.sort; |
|---|
| 1462 |
// Make sure all elems are unique and within range. |
|---|
| 1463 |
assert(dupped.length == 6); |
|---|
| 1464 |
assert(dupped[0] > 9 && dupped[0] < 22); |
|---|
| 1465 |
foreach(i; 1..dupped.length) { |
|---|
| 1466 |
// Make sure elements are unique. Remember, the array is sorted. |
|---|
| 1467 |
assert(dupped[i] > dupped[i - 1]); |
|---|
| 1468 |
assert(dupped[i] > 9 && dupped[i] < 22); |
|---|
| 1469 |
} |
|---|
| 1470 |
results[dupped.idup] = true; |
|---|
| 1471 |
} |
|---|
| 1472 |
assert(results.length == 924); // (12 choose 6). |
|---|
| 1473 |
} |
|---|
| 1474 |
|
|---|
| 1475 |
/**Converts a range with arbitrary element types (usually strings) to a |
|---|
| 1476 |
* range of reals lazily. Ignores any elements that could not be successfully |
|---|
| 1477 |
* converted. Useful for creating an input range that can be used with this |
|---|
| 1478 |
* lib out of a File without having to read the whole file into an array first. |
|---|
| 1479 |
* The advantages to this over just using std.algorithm.map are that it's |
|---|
| 1480 |
* less typing and that it ignores non-convertible elements, such as blank |
|---|
| 1481 |
* lines. |
|---|
| 1482 |
* |
|---|
| 1483 |
* If rangeIn is an inputRange, then the result of this function is an input |
|---|
| 1484 |
* range. Otherwise, the result is a forward range. |
|---|
| 1485 |
* |
|---|
| 1486 |
* Note: The reason this struct doesn't have length or random access, |
|---|
| 1487 |
* even if this is supported by rangeIn, is because it has to be able to |
|---|
| 1488 |
* filter out non-convertible elements. |
|---|
| 1489 |
* |
|---|
| 1490 |
* Examples: |
|---|
| 1491 |
* --- |
|---|
| 1492 |
* // Perform a T-test to see whether the mean of the data being input as text |
|---|
| 1493 |
* // from stdin is different from zero. This data might not even fit in memory |
|---|
| 1494 |
* // if it were read in eagerly. |
|---|
| 1495 |
* |
|---|
| 1496 |
* auto myRange = toNumericRange( stdin.byLine() ); |
|---|
| 1497 |
* TestRes result = studentsTTest(myRange); |
|---|
| 1498 |
* writeln(result); |
|---|
| 1499 |
* --- |
|---|
| 1500 |
*/ |
|---|
| 1501 |
ToNumericRange!R toNumericRange(R)(R rangeIn) if(isInputRange!R) { |
|---|
| 1502 |
alias ToNumericRange!R RT; |
|---|
| 1503 |
return RT(rangeIn); |
|---|
| 1504 |
} |
|---|
| 1505 |
|
|---|
| 1506 |
/// |
|---|
| 1507 |
struct ToNumericRange(R) if(isInputRange!R) { |
|---|
| 1508 |
private: |
|---|
| 1509 |
alias ElementType!R E; |
|---|
| 1510 |
R inputRange; |
|---|
| 1511 |
real _front; |
|---|
| 1512 |
|
|---|
| 1513 |
public: |
|---|
| 1514 |
this(R inputRange) { |
|---|
| 1515 |
this.inputRange = inputRange; |
|---|
| 1516 |
try { |
|---|
| 1517 |
_front = to!real(inputRange.front); |
|---|
| 1518 |
} catch(ConvException) { |
|---|
| 1519 |
popFront(); |
|---|
| 1520 |
} |
|---|
| 1521 |
} |
|---|
| 1522 |
|
|---|
| 1523 |
@property real front() { |
|---|
| 1524 |
return _front; |
|---|
| 1525 |
} |
|---|
| 1526 |
|
|---|
| 1527 |
void popFront() { |
|---|
| 1528 |
while(true) { |
|---|
| 1529 |
inputRange.popFront(); |
|---|
| 1530 |
if(inputRange.empty) { |
|---|
| 1531 |
return; |
|---|
| 1532 |
} |
|---|
| 1533 |
auto inFront = inputRange.front; |
|---|
| 1534 |
|
|---|
| 1535 |
// If inFront is some string, strip the whitespace. |
|---|
| 1536 |
static if( is(typeof(strip(inFront)))) { |
|---|
| 1537 |
inFront = strip(inFront); |
|---|
| 1538 |
} |
|---|
| 1539 |
|
|---|
| 1540 |
try { |
|---|
| 1541 |
_front = to!real(inFront); |
|---|
| 1542 |
return; |
|---|
| 1543 |
} catch(ConvException) { |
|---|
| 1544 |
continue; |
|---|
| 1545 |
} |
|---|
| 1546 |
} |
|---|
| 1547 |
} |
|---|
| 1548 |
|
|---|
| 1549 |
@property bool empty() { |
|---|
| 1550 |
return inputRange.empty; |
|---|
| 1551 |
} |
|---|
| 1552 |
} |
|---|
| 1553 |
|
|---|
| 1554 |
unittest { |
|---|
| 1555 |
// Test both with non-convertible element as first element and without. |
|---|
| 1556 |
// This is because non-convertible elements as the first element are |
|---|
| 1557 |
// handled as a special case in the implementation. |
|---|
| 1558 |
string[2] dataArr = ["3.14\n2.71\n8.67\nabracadabra\n362436", |
|---|
| 1559 |
"foobar\n3.14\n2.71\n8.67\nabracadabra\n362436"]; |
|---|
| 1560 |
|
|---|
| 1561 |
foreach(data; dataArr) { |
|---|
| 1562 |
std.file.write("NumericFileTestDeleteMe.txt", data); |
|---|
| 1563 |
scope(exit) std.file.remove("NumericFileTestDeleteMe.txt"); |
|---|
| 1564 |
auto myFile = File("NumericFileTestDeleteMe.txt"); |
|---|
| 1565 |
auto rng = toNumericRange(myFile.byLine()); |
|---|
| 1566 |
assert(approxEqual(rng.front, 3.14)); |
|---|
| 1567 |
rng.popFront; |
|---|
| 1568 |
assert(approxEqual(rng.front, 2.71)); |
|---|
| 1569 |
rng.popFront; |
|---|
| 1570 |
assert(approxEqual(rng.front, 8.67)); |
|---|
| 1571 |
rng.popFront; |
|---|
| 1572 |
assert(approxEqual(rng.front, 362435)); |
|---|
| 1573 |
assert(!rng.empty); |
|---|
| 1574 |
rng.popFront; |
|---|
| 1575 |
assert(rng.empty); |
|---|
| 1576 |
myFile.close(); |
|---|
| 1577 |
} |
|---|
| 1578 |
} |
|---|
| 1579 |
|
|---|
| 1580 |
// Used for ILP optimizations. |
|---|
| 1581 |
package template StaticIota(size_t upTo) { |
|---|
| 1582 |
static if(upTo == 0) { |
|---|
| 1583 |
alias TypeTuple!() StaticIota; |
|---|
| 1584 |
} else { |
|---|
| 1585 |
alias TypeTuple!(StaticIota!(upTo - 1), upTo - 1) StaticIota; |
|---|
| 1586 |
} |
|---|
| 1587 |
} |
|---|
| 1588 |
|
|---|
| 1589 |
// Verify that there are no TempAlloc memory leaks anywhere in the code covered |
|---|
| 1590 |
// by the unittest. This should always be the last unittest of the module. |
|---|
| 1591 |
unittest { |
|---|
| 1592 |
auto TAState = TempAlloc.getState; |
|---|
| 1593 |
assert(TAState.used == 0); |
|---|
| 1594 |
assert(TAState.nblocks < 2); |
|---|
| 1595 |
} |
|---|