root/trunk/base.d

Revision 293, 46.9 kB (checked in by dsimcha, 1 year ago)

Better loop unrolling: Speed up ILP optimized functions ~10%.

Line 
1 /**Relatively low-level primitives on which to build higher-level math/stat
2  * functionality.  Some are used internally, some are just things that may be
3  * useful to users of this library.  This module is starting to take on the
4  * appearance of a small utility library.
5  *
6  * Note:  In several functions in this module that return arrays, the last
7  * parameter is an optional buffer for storing the return value.  If this
8  * parameter is ommitted or the buffer is not large enough, one will be
9  * allocated on the GC heap.
10  *
11  * Author:  David Simcha*/
12  /*
13  * License:
14  * Boost Software License - Version 1.0 - August 17th, 2003
15  *
16  * Permission is hereby granted, free of charge, to any person or organization
17  * obtaining a copy of the software and accompanying documentation covered by
18  * this license (the "Software") to use, reproduce, display, distribute,
19  * execute, and transmit the Software, and to prepare derivative works of the
20  * Software, and to permit third-parties to whom the Software is furnished to
21  * do so, all subject to the following:
22  ** The copyright notices in the Software and this entire statement, including
23  * the above license grant, this restriction and the following disclaimer,
24  * must be included in all copies of the Software, in whole or in part, and
25  * all derivative works of the Software, unless such copies or derivative
26  * works are solely in the form of machine-executable object code generated by
27  * a source language processor.
28  *
29  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31  * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
32  * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
33  * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
34  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
35  * DEALINGS IN THE SOFTWARE.
36  */
37 module dstats.base;
38
39 import std.math, std.traits, std.typecons, std.algorithm, std.range,
40     std.exception, std.conv, std.functional, std.typetuple;
41
42 import dstats.alloc, dstats.sort;
43
44 import std.string : strip;
45
46 immutable double[] logFactorialTable;
47
48 private enum size_t staticFacTableLen = 10_000;
49
50 shared static this() {
51     // Allocating on heap instead of static data segment to avoid
52     // false pointer GC issues.
53     double[] sfTemp = new double[staticFacTableLen];
54     sfTemp[0] = 0;
55     for(uint i = 1; i < staticFacTableLen; i++) {
56         sfTemp[i] = sfTemp[i - 1] + log(i);
57     }
58     logFactorialTable = assumeUnique(sfTemp);
59 }
60
61 version(unittest) {
62     import std.stdio, std.random, std.file;
63
64     void main (){}
65 }
66
67 class DstatsArgumentException : Exception {
68     this(string msg, string file, int line) {
69         super(msg, file, line);
70     }
71 }
72
73 T dstatsEnforce(T, string file = __FILE__, int line = __LINE__)
74 (T value, lazy const(char)[] msg = null) {
75     if(!value) {
76         const(char)[] lazyMsg = msg;
77         auto exceptMsg = (lazyMsg !is null) ? lazyMsg.idup : "Invalid argument.";
78         throw new DstatsArgumentException(exceptMsg, file, line);
79     }
80
81     return value;
82 }
83
84 package void enforceConfidence
85 (string file = __FILE__, int line = __LINE__)(double conf) {
86     dstatsEnforce!(bool, file, line)(conf >= 0 && conf <= 1,
87         text("Confidence intervals must be between 0 and 1, not ", conf, "."));
88 }
89
90 /** Tests whether T is an input range whose elements can be implicitly
91  * converted to doubles.*/
92 template doubleInput(T) {
93     enum doubleInput = isInputRange!(T) && is(ElementType!(T) : double);
94 }
95
96 // See Bugzilla 2873.  This can be removed once that's fixed.
97 template hasLength(R) {
98     enum bool hasLength = is(typeof(R.init.length) : ulong) ||
99                       is(typeof(R.init.length()) : ulong);
100 }
101
102 // isIterable was added to SVN versions of Phobos, but not to released ones yet.
103 static if(!__traits(compiles, std.traits.isIterable!(uint))) {
104     template isIterable(T)
105     {
106         static if (is(typeof({foreach(elem; T.init) {}}))) {
107             enum bool isIterable = true;
108         } else {
109             enum bool isIterable = false;
110         }
111     }
112 }
113
114 unittest {
115     struct Foo {  // For testing opApply.
116
117         int opApply(int delegate(ref uint) dg) { assert(0); }
118     }
119
120     static assert(isIterable!(uint[]));
121     static assert(!isIterable!(uint));
122     static assert(isIterable!(Foo));
123     static assert(isIterable!(uint[string]));
124
125     auto c = chain((uint[]).init, (uint[]).init);
126     static assert(isIterable!(typeof(c)));
127 }
128
129 /**Determine the iterable type of any iterable object, regardless of whether
130  * it uses ranges, opApply, etc.  This is typeof(elem) if one does
131  * foreach(elem; T.init) {}.*/
132 template IterType(T) {
133     alias ReturnType!(typeof(
134         {
135             foreach(elem; T.init) {
136                 return elem;
137             }
138             assert(0);
139         })) IterType;
140 }
141
142 unittest {
143     struct Foo {  // For testing opApply.
144         // For testing.
145
146         int opApply(int delegate(ref uint) dg) { assert(0); }
147     }
148
149     static assert(is(IterType!(uint[]) == uint));
150     static assert(is(IterType!(Foo) == uint));
151     static assert(is(IterType!(uint[string]) == uint));
152
153     auto c = chain((uint[]).init, (uint[]).init);
154     static assert(is(IterType!(typeof(c)) == uint));
155 }
156
157 /**Tests whether T is iterable and has elements of a type implicitly
158  * convertible to double.*/
159 template doubleIterable(T) {
160     static if(!isIterable!T) {
161         enum doubleIterable = false;
162     } else {
163         enum doubleIterable = is(IterType!(T) : double);
164     }
165 }
166
167 /**Writes the contents of an input range to an output range.
168  *
169  * Returns:  The output range.*/
170 O mate(I, O)(I input, O output)
171 if(isInputRange!(I) && isOutputRange!(O, ElementType!(I))) {
172     foreach(elem; input) {
173         output.put(elem);
174     }
175     return output;
176 }
177
178 /**Given a tuple possibly containing forward ranges, returns a tuple where
179  * save() has been called on all forward ranges.
180  */
181 Tuple!T saveAll(T...)(T args) {
182     Tuple!T ret;
183
184     foreach(ti, elem; args) {
185         static if(isForwardRange!(typeof(elem))) {
186             ret.field[ti] = elem.save;
187         } else {
188             ret.field[ti] = elem;
189         }
190     }
191
192     return ret;
193 }
194
195 /**Bins data into nbin equal width bins, indexed from
196  * 0 to nbin - 1, with 0 being the smallest bin, etc.
197  * The values returned are the counts for each bin.  Returns results on the GC
198  * heap by default, but uses TempAlloc stack if alloc == Alloc.STACK.
199  *
200  * Works with any forward range with elements implicitly convertible to double.*/
201 Ret[] binCounts(Ret = uint, T)(T data, uint nbin, Ret[] buf = null)
202 if(isForwardRange!(T) && doubleInput!(T)) {
203     dstatsEnforce(nbin > 0, "Cannot bin data into zero bins.");
204
205     alias Unqual!(ElementType!(T)) E;
206     E min = data.front, max = data.front;
207     foreach(elem; data) {
208         if(elem > max)
209             max = elem;
210         else if(elem < min)
211             min = elem;
212     }
213     E range = max - min;
214
215     Ret[] bins;
216     if(buf.length < nbin) {
217         bins = new Ret[nbin];
218     } else {
219         bins = buf[0..nbin];
220         bins[] = 0;
221     }
222
223     foreach(elem; data) {
224         // Using the truncation as a feature.
225         uint whichBin = cast(uint) ((elem - min) * nbin / range);
226
227         // Handle edge case by putting largest item in largest bin.
228         if(whichBin == nbin) whichBin--;
229
230         bins[whichBin]++;
231     }
232
233     return bins;
234 }
235
236 unittest {
237     double[] data = [0.0, .01, .03, .05, .11, .31, .51, .71, .89, 1];
238     auto res = binCounts(data, 10);
239     assert(res == [4U, 1, 0, 1, 0, 1, 0, 1, 1, 1]);
240
241     auto buf = new uint[10];
242     foreach(ref elem; buf) {
243         elem = uniform(0, 34534);
244     }
245
246     res = binCounts(data, 10, buf);
247     assert(res == [4U, 1, 0, 1, 0, 1, 0, 1, 1, 1]);
248 }
249
250 /**Bins data into nbin equal width bins, indexed from
251  * 0 to nbin - 1, with 0 being the smallest bin, etc.
252  * The values returned are the bin index for each element.
253  *
254  * Default return type is ubyte, because in the dstats.infotheory,
255  * entropy() and related functions specialize on ubytes, and become
256  * substandially faster.  However, if you're using more than 255 bins,
257  * you'll have to provide a different return type as a template parameter.*/
258 Ret[] bin(Ret = ubyte, T)(T data, uint nbin, Ret[] buf = null)
259 if(isForwardRange!(T) && doubleInput!(T) && isIntegral!(Ret)) {
260     dstatsEnforce(nbin > 0, "Cannot bin data into zero bins.");
261     dstatsEnforce(nbin <= (cast(uint) Ret.max + 1), "Cannot bin into " ~
262         to!string(nbin) ~ " bins and store the results in a " ~
263         Ret.stringof ~ ".");
264     alias ElementType!(T) E;
265     Unqual!(E) min = data.front, max = data.front;
266
267     auto dminmax = data;
268     dminmax.popFront;
269     foreach(elem; dminmax) {
270         if(elem > max)
271             max = elem;
272         else if(elem < min)
273             min = elem;
274     }
275     E range = max - min;
276     Ret[] bins;
277     if(buf.length < data.length) {
278         bins = newVoid!(Ret)(data.length);
279     } else {
280         bins = buf[0..data.length];
281     }
282
283     foreach(i, elem; data) {
284         // Using the truncation as a feature.
285         uint whichBin = cast(uint) ((elem - min) * nbin / range);
286
287         // Handle edge case by putting largest item in largest bin.
288         if(whichBin == nbin)
289             whichBin--;
290
291         bins[i] = cast(Ret) whichBin;
292     }
293
294     return bins;
295 }
296
297 unittest {
298     mixin(newFrame);
299     double[] data = [0.0, .01, .03, .05, .11, .31, .51, .71, .89, 1];
300     auto res = bin(data, 10);
301     assert(res == to!(ubyte[])([0, 0, 0, 0, 1, 3, 5, 7, 8, 9]));
302
303     auto buf = new ubyte[20];
304     foreach(ref elem; buf) {
305         elem = cast(ubyte) uniform(0U, 255);
306     }
307
308     res = bin(data, 10, buf);
309     assert(res == to!(ubyte[])([0, 0, 0, 0, 1, 3, 5, 7, 8, 9]));
310
311     // Make sure this throws:
312     try {
313         auto foo = bin( seq(0U, 1_000U), 512);
314         assert(0);
315     } catch(Exception e) {
316         // It's supposed to throw.
317     }
318 }
319
320 /**Bins data into nbin equal frequency bins, indexed from
321  * 0 to nbin - 1, with 0 being the smallest bin, etc.
322  * The values returned are the bin index for each element.
323  *
324  * Default return type is ubyte, because in the dstats.infotheory,
325  * entropy() and related functions specialize on ubytes, and become
326  * substandially faster.  However, if you're using more than 256 bins,
327  * you'll have to provide a different return type as a template parameter.*/
328 Ret[] frqBin(Ret = ubyte, T)(T data, uint nbin, Ret[] buf = null)
329 if(doubleInput!(T) && isForwardRange!(T) && hasLength!(T) && isIntegral!(Ret)) {
330     dstatsEnforce(nbin > 0, "Cannot bin data into zero bins.");
331     dstatsEnforce(nbin <= data.length,
332         "Cannot equal frequency bin data into more than data.length bins.");
333     dstatsEnforce(nbin <= (cast(ulong) Ret.max) + 1UL, "Cannot bin into " ~
334         to!string(nbin) ~ " bins and store the results in a " ~
335         Ret.stringof ~ ".");
336
337     Ret[] result;
338     if(buf.length < data.length) {
339         result = newVoid!(Ret)(data.length);
340     } else {
341         result = buf[0..data.length];
342     }
343
344     auto perm = newStack!(size_t)(data.length);
345     scope(exit) TempAlloc.free;
346
347     foreach(i, ref e; perm) {
348         e = i;
349     }
350
351     static if(isRandomAccessRange!(T)) {
352         bool compare(size_t lhs, size_t rhs) {
353             return data[lhs] < data[rhs];
354         }
355
356         qsort!compare(perm);
357     } else {
358         auto dd = tempdup(data);
359         qsort(dd, perm);
360         TempAlloc.free;
361     }
362
363     auto rem = data.length % nbin;
364     Ret bin = 0;
365     auto i = 0, frq = data.length / nbin;
366     while(i < data.length) {
367         foreach(j; 0..(bin < rem) ? frq + 1 : frq) {
368             result[perm[i++]] = bin;
369         }
370         bin++;
371     }
372     return result;
373 }
374
375 unittest {
376     double[] data = [5U, 1, 3, 8, 30, 10, 7];
377     auto res = frqBin(data, 3);
378     assert(res == to!(ubyte[])([0, 0, 0, 1, 2, 2, 1]));
379     data = [3, 1, 4, 1, 5, 9, 2, 6, 5];
380
381     auto buf = new ubyte[32];
382     foreach(i, ref elem; buf) {
383         elem = cast(ubyte) i;
384     }
385
386     res = frqBin(data, 4, buf);
387     assert(res == to!(ubyte[])([1, 0, 1, 0, 2, 3, 0, 3, 2]));
388     data = [3U, 1, 4, 1, 5, 9, 2, 6, 5, 3, 4, 8, 9, 7, 9, 2];
389     res = frqBin(data, 4);
390     assert(res == to!(ubyte[])([1, 0, 1, 0, 2, 3, 0, 2, 2, 1, 1, 3, 3, 2, 3, 0]));
391
392     // Make sure this throws:
393     try {
394         auto foo = frqBin( seq(0U, 1_000U), 512);
395         assert(0);
396     } catch(Exception e) {
397         // It's supposed to throw.
398     }
399 }
400
401 /**Generates a sequence from [start..end] by increment.  Includes start,
402  * excludes end.  Does so eagerly as an array.
403  *
404  * Examples:
405  * ---
406  * auto s = seq(0, 5);
407  * assert(s == [0, 1, 2, 3, 4]);
408  * ---
409  */
410 CommonType!(T, U)[] seq(T, U, V = uint)(T start, U end, V increment = 1U) {
411     dstatsEnforce(increment > 0, "Cannot have a seq increment <= 0.");
412     dstatsEnforce(end >= start, "End must be >= start in seq.");
413
414     alias CommonType!(T, U) R;
415     auto output = newVoid!(R)(cast(size_t) ((end - start) / increment));
416
417     size_t count = 0;
418     for(T i = start; i < end; i += increment) {
419         output[count++] = i;
420     }
421     return output;
422 }
423
424 unittest {
425     auto s = seq(0, 5);
426     assert(s == [0, 1, 2, 3, 4]);
427 }
428
429 /**Given an input array, outputs an array containing the rank from
430  * [1, input.length] corresponding to each element.  Ties are dealt with by
431  * averaging.  This function does not reorder the input range.
432  * Return type is float[] by default, but if you are sure you have no ties,
433  * ints can be used for efficiency (in which case ties will not be averaged),
434  * and if you need more precision when averaging ties, you can use double or
435  * real.
436  *
437  * Works with any input range.
438  *
439  * Examples:
440  * ---
441  * uint[] test = [3, 5, 3, 1, 2];
442  * assert(rank!("a < b", float)(test) == [3.5f, 5f, 3.5f, 1f, 2f]);
443  * assert(test == [3U, 5, 3, 1, 2]);
444  * ---*/
445 Ret[] rank(alias compFun = "a < b", Ret = double, T)(T input, Ret[] buf = null)
446 if(isInputRange!(T) && is(typeof(input.front < input.front))) {
447     static if(!isRandomAccessRange!(T) || !hasLength!(T)) {
448         mixin(newFrame);
449         return rankSort!(compFun, Ret)( tempdup(input), buf);
450     } else {
451
452         /* It's faster to duplicate the input and then use rankSort on the
453          * duplicate, but more space efficient to create an index array.  Check
454          * TempAlloc to see whether we can fit everything we need on the current
455          * frame.  If yes, use the faster algorithm since we're effectively
456          * not saving any space by using the space-efficient algorithm.
457          * If no, use the more space-efficient algorithm.
458          */
459         auto bytesNeeded = input.length * (ElementType!(T).sizeof + size_t.sizeof);
460         if(bytesNeeded < TempAlloc.slack) {
461             mixin(newFrame);
462             return rankSort!(compFun, Ret)( tempdup(input), buf);
463         } else {
464             return rankUsingIndex!(compFun, Ret)(input, buf);
465         }
466     }
467
468     assert(0);
469 }
470
471 /* Space-efficient but slow way of computing ranks.  The extra indirection
472  * caused by creating the index array wreaks havock with CPU caches, especially
473  * for large arrays that don't fit entirely in cache.
474  */
475 private Ret[] rankUsingIndex(alias compFun, Ret, T)(T input, Ret[] buf) {
476     mixin(newFrame);
477     size_t[] indices = newStack!size_t(input.length);
478     foreach(i, ref elem; indices) {
479         elem = i;
480     }
481
482     bool compare(size_t lhs, size_t rhs) {
483         alias binaryFun!(compFun) innerComp;
484         return innerComp(input[lhs], input[rhs]);
485     }
486
487     dstats.sort.qsort!compare(indices);
488
489     Ret[] ret;
490     if(buf.length < indices.length) {
491         ret = newVoid!Ret(indices.length);
492     } else {
493         ret = buf[0..indices.length];
494     }
495
496     foreach(i, index; indices) {
497         ret[index] = i + 1;
498     }
499
500     auto myIndexed = Indexed!(T)(input, indices);
501
502     static if(!isIntegral!Ret) {
503         averageTies(myIndexed, ret, indices);
504     }
505
506     return ret;
507 }
508
509 private struct Indexed(T) {
510     T someRange;
511     size_t[] indices;
512
513     ElementType!T opIndex(size_t index) {
514         return someRange[indices[index]];
515     }
516
517     @property size_t length() {
518         return indices.length;
519     }
520 }
521
522 /**Same as rank(), but also sorts the input range.
523  * The array returned will still be identical to that returned by rank(), i.e.
524  * the rank of each element will correspond to the ranks of the elements in the
525  * input array before sorting.
526  *
527  * Examples:
528  * ---
529  * uint[] test = [3, 5, 3, 1, 2];
530  * assert(rankSort(test) == [3.5, 5, 3.5, 1.0, 2.0]);
531  * assert(test == [1U, 2, 3, 4, 5]);
532  * ---
533  */
534 Ret[] rankSort(alias compFun = "a < b", Ret = double, T)(T input, Ret[] buf = null)
535 if(isRandomAccessRange!(T) && hasLength!(T) && is(typeof(input.front < input.front))) {
536     mixin(newFrame);
537     Ret[] ranks;
538     if(buf.length < input.length) {
539         ranks = newVoid!(Ret)(input.length);
540     } else {
541         ranks = buf[0..input.length];
542     }
543
544     size_t[] perms = newStack!(size_t)(input.length);
545     foreach(i, ref p; perms) {
546         p = i;
547     }
548
549     dstats.sort.qsort!compFun(input, perms);
550     foreach(i; 0..perms.length)  {
551         ranks[perms[i]] = i + 1;
552     }
553
554     static if(!isIntegral!Ret) {
555         averageTies(input, ranks, perms);
556     }
557
558     return ranks;
559 }
560
561 unittest {
562     uint[] test = [3, 5, 3, 1, 2];
563
564     float[] dummy;
565     assert(rankUsingIndex!("a < b", float)(test, dummy) == [3.5f, 5f, 3.5f, 1f, 2f]);
566     assert(test == [3U, 5, 3, 1, 2]);
567
568     double[] dummy2;
569     assert(rankUsingIndex!("a < b", double)(test, dummy2) == [3.5, 5, 3.5, 1, 2]);
570     assert(rankSort(test) == [3.5, 5.0, 3.5, 1.0, 2.0]);
571     assert(test == [1U,2,3,3,5]);
572
573     uint[] test2 = [3,3,1,2];
574     assert(rank(test2) == [3.5,3.5,1,2]);
575 }
576
577 private void averageTies(T, U)(T sortedInput, U[] ranks, size_t[] perms)
578 in {
579     assert(sortedInput.length == ranks.length);
580     assert(ranks.length == perms.length);
581 } body {
582     size_t tieCount = 1;
583     foreach(i; 1..ranks.length) {
584         if(sortedInput[i] == sortedInput[i - 1]) {
585             tieCount++;
586         } else if(tieCount > 1) {
587             double avg = 0;
588             immutable increment = 1.0 / tieCount;
589
590             foreach(perm; perms[i - tieCount..i]) {
591                 avg += ranks[perm] * increment;
592             }
593
594             foreach(perm; perms[i - tieCount..i]) {
595                 ranks[perm] = avg;
596             }
597             tieCount = 1;
598         }
599     }
600
601     if(tieCount > 1) { // Handle the end.
602         double avg = 0;
603         immutable increment = 1.0 / tieCount;
604
605         foreach(perm; perms[perms.length - tieCount..$]) {
606             avg += ranks[perm] * increment;
607         }
608
609         foreach(perm; perms[perms.length - tieCount..$]) {
610             ranks[perm] = avg;
611         }
612     }
613 }
614
615 /**Returns an AA of counts of every element in input.  Works w/ any iterable.
616  *
617  * Examples:
618  * ---
619  * int[] foo = [1,2,3,1,2,4];
620  * uint[int] frq = frequency(foo);
621  * assert(frq.length == 4);
622  * assert(frq[1] == 2);
623  * assert(frq[4] == 1);
624  * ---*/
625 uint[IterType!(T)] frequency(T)(T input)
626 if(isIterable!(T)) {
627     typeof(return) output;
628     foreach(i; input) {
629         output[i]++;
630     }
631     return output;
632 }
633
634 unittest {
635     int[] foo = [1,2,3,1,2,4];
636     uint[int] frq = frequency(foo);
637     assert(frq.length == 4);
638     assert(frq[1] == 2);
639     assert(frq[4] == 1);
640 }
641
642 /**Given a range of values and a range of categories, separates values
643  * by category.  This function also guarantees that the order within each
644  * category will be maintained.
645  *
646  * Note:  While the general convention of this library is to try to avoid
647  * heap allocations whenever possible so that multithreaded code scales well and
648  * false pointers aren't an issue, this function allocates like crazy
649  * because there's basically no other way to implement it.  Don't use it in
650  * performance-critical multithreaded code.
651  *
652  * Examples:
653  * ---
654  * uint[] values = [1,2,3,4,5,6,7,8];
655  * bool[] categories = [false, false, false, false, true, true, true, true];
656  * auto separated = byCategory(values, categories);
657  * auto tResult = studentsTTest(separated.values);
658  * ---
659  */
660 ElementType!(V)[][ElementType!(C)] byCategory(V, C)(V values, C categories)
661 if(isInputRange!(V) && isInputRange!(C) && !is(ElementType!C == bool)) {
662     alias ElementType!(V) EV;
663     alias ElementType!(C) EC;
664
665     Appender!(EV[], EV)[EC] aa;
666     while(!values.empty && !categories.empty) {
667         scope(exit) {
668             values.popFront();
669             categories.popFront();
670         }
671
672         auto category = categories.front;
673         auto ptr = category in aa;
674         if(ptr is null) {
675             aa[category] = typeof(aa[category]).init;
676             ptr = category in aa;
677         }
678
679         ptr.put(values.front);
680     }
681
682     EV[][EC] ret;
683     foreach(k, v; aa) {
684         ret[k] = v.data;
685     }
686
687     return ret;
688 }
689
690 /**Special case implementation for when ElementType!C is boolean.*/
691 ElementType!(V)[][2] byCategory(V, C)(V values, C categories)
692 if(isInputRange!(V) && isInputRange!(C) && is(ElementType!C == bool)) {
693     typeof(return) ret;
694
695     static if(hasLength!V || hasLength!C) {
696         // Preallocate.
697         size_t zeroIndex, oneIndex;
698
699         static if(!hasLength!V) {
700             ret[0].length = categories.length;
701             ret[1].length = values.length;
702         } else static if(!hasLength!C) {
703             ret[0].length = values.length;
704             ret[1].length = values.length;
705         } else {
706             immutable len = min(categories.length, values.length);
707             ret[0].length = len;
708             ret[1].length = len;
709         }
710
711         while(!values.empty && !categories.empty) {
712             scope(exit) {
713                 values.popFront();
714                 categories.popFront();
715             }
716
717             auto category = categories.front;
718             if(category) {
719                 ret[1][oneIndex++] = values.front;
720             } else {
721                 ret[0][zeroIndex++] = values.front;
722             }
723         }
724
725         ret[0] = ret[0][0..zeroIndex];
726         ret[1] = ret[1][0..oneIndex];
727     } else {
728         auto app0 = appender!(ElementType!(V)[])();
729         auto app1 = appender!(ElementType!(V)[])();
730
731         while(!values.empty && !categories.empty) {
732             scope(exit) {
733                 values.popFront();
734                 categories.popFront();
735             }
736
737             auto category = categories.front;
738             if(category) {
739                 app1.put(values.front);
740             } else {
741                 app0.put(values.front);
742             }
743         }
744
745         ret[0] = app0.data;
746         ret[1] = app1.data;
747     }
748
749     return ret;
750 }
751
752 unittest {
753     int[] nums = [1,2,3,4,5,6,7,8,9];
754     int[] categories = [0,1,2,0,1,2,0,1,2];
755
756     // The filter is just to prevent having a length.
757     auto categories2 = filter!"a == a"(map!"a % 2 == 0"(nums));
758
759     auto result = byCategory(nums, categories);
760     assert(result[0] == [1,4,7]);
761     assert(result[1] == [2,5,8]);
762     assert(result[2] == [3,6,9]);
763
764     auto res2 = byCategory(filter!"a == a"(nums), categories2);
765     assert(res2[0] == [1,3,5,7,9]);
766     assert(res2[1] == [2,4,6,8]);
767
768     auto res3 = byCategory(nums,
769         [false, true, false, true, false, true, false, true, false]);
770     assert(res2 == res3);
771 }
772
773 /**Finds the area under the ROC curve (a curve with sensitivity on the Y-axis
774  * and 1 - specificity on the X-axis).  This is a useful metric for
775  * determining how well a test statistic discriminates between two classes.
776  * The following assumptions are made in this implementation:
777  *
778  * 1.  For some cutoff value c and test statistic T, your decision rule is of
779  *     the form "Class A if T > c, Class B if T < c".
780  *
781  * 2.  In the case of ties, i.e. if class A and class B both have an identical
782  *     value, linear interpolation is used.  This is because changing the
783  *     value of c infinitesimally will change both sensitivity and specificity
784  *     in these cases.
785  */
786 double auroc(R1, R2)(R1 classATs, R2 classBTs)
787 if(isNumeric!(ElementType!R1) && isNumeric!(ElementType!R2)) {
788     mixin(newFrame);
789     auto classA = tempdup(classATs);
790     auto classB = tempdup(classBTs);
791     qsort(classA);
792     qsort(classB);
793
794     // Start cutoff at -infinity, such that we get everything in class A, i.e.
795     // perfect specificity, zero sensitivity.  We arbitrarily define class B
796     // as our "positive" class.
797     double tp = 0, tn = classA.length, fp = 0, fn = classB.length;
798     double[2] lastPoint = 0;
799
800     Unqual!(CommonType!(ElementType!R1, ElementType!R2)) currentVal;
801
802     ElementType!R1 popA() {
803         tn--;
804         fp++;
805         auto ret = classA.front;
806         classA.popFront();
807         return ret;
808     }
809
810     ElementType!R2 popB() {
811         fn--;
812         tp++;
813         auto ret = classB.front;
814         classB.popFront();
815         return ret;
816     }
817
818     double area = 0;
819     while(!classA.empty && !classB.empty) {
820         if(classA.front < classB.front) {
821             currentVal = popA();
822         } else {
823             currentVal = popB();
824         }
825
826         // Handle ties.
827         while(!classA.empty && classA.front == currentVal) {
828             popA();
829         }
830
831         while(!classB.empty && classB.front == currentVal) {
832             popB();
833         }
834
835         double[2] curPoint;
836         curPoint[0] = 1.0 - tn / (fp + tn);
837         curPoint[1] = tp / (tp + fn);
838
839         immutable xDist = curPoint[0] - lastPoint[0];
840         area += xDist * lastPoint[1];  // Rectangular part.
841         area += xDist * 0.5 * (curPoint[1] - lastPoint[1]);  // Triangular part.
842         lastPoint[] = curPoint[];
843     }
844
845     if(classA.length > 0 && classB.length == 0) {
846         // Then we already have a sensitivity of 1, move straight to the right
847         // to the point (1, 1).
848
849         immutable xDist = 1 - lastPoint[0];
850         area += xDist * lastPoint[1];  // Rectangular part.
851         area += xDist * 0.5 * (1 - lastPoint[1]);  // Triangular part.
852     }
853
854     return area;
855 }
856
857 unittest {
858     // Values worked out by hand on paper.  If you don't believe me, work
859     // them out yourself.
860     assert(auroc([4,5,6], [1,2,3]) == 1);
861     assert(approxEqual(auroc([8,6,7,5,3,0,9], [3,6,2,4,3,6]), 0.6904762));
862     assert(approxEqual(auroc([2,7,1,8,2,8,1,8], [3,1,4,1,5,9,2,6]), 0.546875));
863 }
864
865 ///
866 T sign(T)(T num) pure nothrow if(is(typeof(num < 0))) {
867     if (num > 0) return 1;
868     if (num < 0) return -1;
869     return 0;
870 }
871
872 unittest {
873     assert(sign(3.14159265)==1);
874     assert(sign(-3)==-1);
875     assert(sign(-2.7182818)==-1);
876 }
877
878 ///
879 /*Values up to 9,999 are pre-calculated and stored in
880  * an immutable global array, for performance.  After this point, the gamma
881  * function is used, because caching would take up too much memory, and if
882  * done lazily, would cause threading issues.*/
883 double logFactorial(ulong n) {
884     //Input is uint, can't be less than 0, no need to check.
885     if(n < staticFacTableLen) {
886         return logFactorialTable[cast(size_t) n];
887     } else return lgamma(n + 1);
888 }
889
890 unittest {
891     // Cache branch.
892     assert(cast(uint) round(exp(logFactorial(4)))==24);
893     assert(cast(uint) round(exp(logFactorial(5)))==120);
894     assert(cast(uint) round(exp(logFactorial(6)))==720);
895     assert(cast(uint) round(exp(logFactorial(7)))==5040);
896     assert(cast(uint) round(exp(logFactorial(3)))==6);
897     // Gamma branch.
898     assert(approxEqual(logFactorial(12000), 1.007175584216837e5, 1e-14));
899     assert(approxEqual(logFactorial(14000), 1.196610688711534e5, 1e-14));
900 }
901
902 ///Log of (n choose k).
903 double logNcomb(ulong n, ulong k)
904 in {
905     assert(k <= n);
906 } body {
907     if(n < k) return -double.infinity;
908     //Extra parentheses increase numerical accuracy.
909     return logFactorial(n) - (logFactorial(k) + logFactorial(n - k));
910 }
911
912 unittest {
913     assert(cast(uint) round(exp(logNcomb(4,2)))==6);
914     assert(cast(uint) round(exp(logNcomb(30,8)))==5852925);
915     assert(cast(uint) round(exp(logNcomb(28,5)))==98280);
916 }
917
918 /**Controls whether Perm and Comb duplicate their buffer on each iteration and
919  * return the copy, or recycle it and return an alias of it.
920  * You want to choose recycle if each permutation/combination
921  * will only be needed within the scope of the foreach statement.  If they
922  * may escape this scope, you want to choose dup.  The default is dup,
923  * because it's safer, but recycle can avoid lots of unnecessary GC activity.
924  */
925 enum Buffer {
926
927     ///
928     dup,
929
930     ///
931     recycle,
932
933     DUP = dup,
934
935     RECYCLE = recycle
936 }
937
938 static if(size_t.sizeof == 4) {
939     private enum MAX_PERM_LEN = 12;
940 } else {
941     private enum MAX_PERM_LEN = 20;
942 }
943
944 /**A struct that generates all possible permutations of a sequence.
945  *
946  * Note:  Permutations are output in undefined order.
947  *
948  * Bugs:  Only supports iterating over up to size_t.max permutations.
949  * This means the max permutation length is 12 on 32-bit machines, or 20
950  * on 64-bit.  This was a conscious tradeoff to allow this range to have a
951  * length of type size_t, since iterating over such huge permutation spaces
952  * would be insanely slow anyhow.
953  *
954  * Examples:
955  * ---
956  *  double[][] res;
957  *  auto perm = Perm!(double)([1.0, 2.0, 3.0][]);
958  *  foreach(p; perm) {
959  *      res ~= p;
960  *  }
961  *
962  *  sort(res);
963  *  assert(res.canFindSorted([1.0, 2.0, 3.0]));
964  *  assert(res.canFindSorted([1.0, 3.0, 2.0]));
965  *  assert(res.canFindSorted([2.0, 1.0, 3.0]));
966  *  assert(res.canFindSorted([2.0, 3.0, 1.0]));
967  *  assert(res.canFindSorted([3.0, 1.0, 2.0]));
968  *  assert(res.canFindSorted([3.0, 2.0, 1.0]));
969  *  assert(res.length == 6);
970  *  ---
971  */
972 struct Perm(Buffer bufType = Buffer.dup, T) {
973 private:
974
975     // Optimization:  Since we know this thing can't get too big (there's
976     // an dstatsEnforce statement for it in the c'tor), just use arrays of the max
977     // possible size for stuff and store them inline, if it's all just bytes.
978     static if(T.sizeof == 1) {
979         T[MAX_PERM_LEN] perm;
980     } else {
981         T* perm;
982     }
983
984     // The length of this range.
985     size_t nPerms;
986
987     ubyte[MAX_PERM_LEN] Is;
988     ubyte currentIndex;
989
990     // The length of these arrays.  Stored once to minimize overhead.
991     ubyte len;
992
993     static if(bufType == Buffer.dup) {
994         alias T[] PermArray;
995     } else {
996         alias const(T)[] PermArray;
997     }
998
999 public:
1000     /**Generate permutations from an input range.
1001      * Create a duplicate of this sequence
1002      * so that the original sequence is not modified.*/
1003     this(U)(U input)
1004     if(isForwardRange!(U)) {
1005
1006         static if(ElementType!(U).sizeof > 1) {
1007             auto arr = toArray(input);
1008             dstatsEnforce(arr.length <= MAX_PERM_LEN, text(
1009                 "Can't iterate permutations of an array this long.  (Max length:  ",
1010                         MAX_PERM_LEN, ")"));
1011             len = cast(ubyte) arr.length;
1012             perm = arr.ptr;
1013         } else {
1014             foreach(elem; input) {
1015                 dstatsEnforce(len < MAX_PERM_LEN, text(
1016                     "Can't iterate permutations of an array this long.  (Max length:  ",
1017                         MAX_PERM_LEN, ")"));
1018
1019                 perm[len++] = elem;
1020             }
1021         }
1022
1023         popFront();
1024
1025         nPerms = 1;
1026         for(size_t i = 2; i <= len; i++) {
1027             nPerms *= i;
1028         }
1029     }
1030
1031     /**Note:  PermArray is just an alias to either T[] or const(T)[],
1032      * depending on whether bufType == Buf.dup or Buf.recycle.
1033      */
1034     @property PermArray front() {
1035         static if(bufType == Buffer.dup) {
1036             return perm[0..len].dup;
1037         } else {
1038             return perm[0..len];
1039         }
1040     }
1041
1042     /**Get the next permutation in the sequence.*/
1043     void popFront() {
1044         if(len == 0) {
1045             nPerms--;
1046             return;
1047         }
1048         if(currentIndex == len - 1) {
1049             currentIndex--;
1050             nPerms--;
1051             return;
1052         }
1053
1054         uint max = len - currentIndex;
1055         if(Is[currentIndex] == max) {
1056
1057             if(currentIndex == 0) {
1058                 nPerms--;
1059                 assert(nPerms == 0, to!string(nPerms));
1060                 return;
1061             }
1062
1063             Is[currentIndex..len] = 0;
1064             currentIndex--;
1065             return popFront();
1066         } else {
1067             rotateLeft(perm[currentIndex..len]);
1068             Is[currentIndex]++;
1069             currentIndex++;
1070             return popFront();
1071         }
1072     }
1073
1074     ///
1075     @property bool empty() {
1076         return nPerms == 0;
1077     }
1078
1079     /**The number of permutations left.
1080      */
1081     @property size_t length() const pure nothrow {
1082         return nPerms;
1083     }
1084
1085     ///
1086     @property typeof(this) save() {
1087         auto ret = this;
1088         static if(T.sizeof > 1) {
1089             ret.perm = (ret.perm[0..len].dup).ptr;
1090         }
1091         return ret;
1092     }
1093 }
1094
1095 private template PermRet(Buffer bufType, T...) {
1096     static if(isForwardRange!(T[0])) {
1097         alias Perm!(bufType, ElementType!(T[0])) PermRet;
1098     } else static if(T.length == 1) {
1099         alias Perm!(bufType, byte) PermRet;
1100     } else alias Perm!(bufType, T[0]) PermRet;
1101 }
1102
1103 /**Create a Perm struct from a range or of a set of bounds.
1104  *
1105  * Note:  PermRet is just a template to figure out what this should return.
1106  * I would use auto if not for bug 2251.
1107  *
1108  * Examples:
1109  * ---
1110  * auto p = perm([1,2,3]);  // All permutations of [1,2,3].
1111  * auto p = perm(5);  // All permutations of [0,1,2,3,4].
1112  * auto p = perm(-1, 2); // All permutations of [-1, 0, 1].
1113  * ---
1114  */
1115 PermRet!(bufType, T) perm(Buffer bufType = Buffer.dup, T...)(T stuff) {
1116     alias typeof(return) rt;
1117     static if(isForwardRange!(T[0])) {
1118         return rt(stuff);
1119     } else static if(T.length == 1) {
1120         static assert(isIntegral!(T[0]),
1121             "If one argument is passed to perm(), it must be an integer.");
1122
1123         dstatsEnforce(stuff[0] >= 0, "Cannot generate permutations of length < 0.");
1124         dstatsEnforce(stuff[0] <= MAX_PERM_LEN, text(
1125             "Can't iterate permutations of an array of length ",
1126             stuff[0], ".  (Max length:  ", MAX_PERM_LEN, ")"));
1127
1128         // Optimization:  If we're duplicating the array every time we return
1129         // it, we want it to be as small as possible.  Since we know the lower
1130         // bound is zero and the upper bound can't be > byte.max, use bytes
1131         // instead of bigger integer types.
1132         return rt(seq(cast(byte) 0, cast(byte) stuff[0]));
1133     } else {
1134         static assert(stuff.length == 2);
1135         return rt(seq(stuff[0], stuff[1]));
1136     }
1137 }
1138
1139 unittest {
1140     // Test degenerate case of len == 0;
1141     uint nZero = 0;
1142     foreach(elem; perm(0)) {
1143         assert(elem.length == 0);
1144         nZero++;
1145     }
1146     assert(nZero == 1);
1147
1148     double[][] res;
1149     auto p1 = perm([1.0, 2.0, 3.0][]);
1150     assert(p1.length == 6);
1151     foreach(p; p1) {
1152         res ~= p;
1153     }
1154     auto sortedRes = sort(res);
1155     assert(sortedRes.canFind([1.0, 2.0, 3.0]));
1156     assert(sortedRes.canFind([1.0, 3.0, 2.0]));
1157     assert(sortedRes.canFind([2.0, 1.0, 3.0]));
1158     assert(sortedRes.canFind([2.0, 3.0, 1.0]));
1159     assert(sortedRes.canFind([3.0, 1.0, 2.0]));
1160     assert(sortedRes.canFind([3.0, 2.0, 1.0]));
1161     assert(res.length == 6);
1162     byte[][] res2;
1163     auto perm2 = perm(3);
1164     foreach(p; perm2) {
1165         res2 ~= p.dup;
1166     }
1167     auto sortedRes2 = sort(res2);
1168     assert(sortedRes2.canFind( to!(byte[])([0, 1, 2])));
1169     assert(sortedRes2.canFind( to!(byte[])([0, 2, 1])));
1170     assert(sortedRes2.canFind( to!(byte[])([1, 0, 2])));
1171     assert(sortedRes2.canFind( to!(byte[])([1, 2, 0])));
1172     assert(sortedRes2.canFind( to!(byte[])([2, 0, 1])));
1173     assert(sortedRes2.canFind( to!(byte[])([2, 1, 0])));
1174     assert(res2.length == 6);
1175
1176     // Indirect tests:  If the elements returned are unique, there are N! of
1177     // them, and they contain what they're supposed to contain, the result is
1178     // correct.
1179     auto perm3 = perm(0U, 6U);
1180     bool[uint[]] table;
1181     foreach(p; perm3) {
1182         table[p.idup] = true;
1183     }
1184     assert(table.length == 720);
1185     foreach(elem, val; table) {
1186         assert(elem.dup.insertionSort == [0U, 1, 2, 3, 4, 5]);
1187     }
1188     auto perm4 = perm(5);
1189     bool[byte[]] table2;
1190     foreach(p; perm4) {
1191         table2[p.idup] = true;
1192     }
1193     assert(table2.length == 120);
1194     foreach(elem, val; table2) {
1195         assert(elem.dup.insertionSort == to!(byte[])([0, 1, 2, 3, 4]));
1196     }
1197 }
1198
1199 /**Generates every possible combination of r elements of the given sequence, or r
1200  * array indices from zero to N, depending on which ctor is called.  Uses
1201  * an input range interface.
1202  *
1203  * Bugs:  Only supports iterating over up to size_t.max combinations.
1204  * This was a conscious tradeoff to allow this range to have a
1205  * length of type size_t, since iterating over such huge combination spaces
1206  * would be insanely slow anyhow.
1207  *
1208  * Examples:
1209  * ---
1210     auto comb1 = Comb!(uint)(5, 2);
1211     uint[][] vals;
1212     foreach(c; comb1) {
1213         vals ~= c;
1214     }
1215     sort(vals);
1216     assert(vals.canFindSorted([0u,1].dup));
1217     assert(vals.canFindSorted([0u,2].dup));
1218     assert(vals.canFindSorted([0u,3].dup));
1219     assert(vals.canFindSorted([0u,4].dup));
1220     assert(vals.canFindSorted([1u,2].dup));
1221     assert(vals.canFindSorted([1u,3].dup));
1222     assert(vals.canFindSorted([1u,4].dup));
1223     assert(vals.canFindSorted([2u,3].dup));
1224     assert(vals.canFindSorted([2u,4].dup));
1225     assert(vals.canFindSorted([3u,4].dup));
1226     assert(vals.length == 10);
1227     ---
1228  */
1229 struct Comb(T, Buffer bufType = Buffer.dup) {
1230 private:
1231     int N;
1232     int R;
1233     int diff;
1234     uint* pos;
1235     T* myArray;
1236     T* chosen;
1237     size_t _length;
1238
1239     static if(bufType == Buffer.dup) {
1240         alias T[] CombArray;
1241     } else {
1242         alias const(T)[] CombArray;
1243     }
1244
1245     void popFrontNum() {
1246         int index = R - 1;
1247         for(; index != -1 && pos[index] == diff + index; --index) {}
1248         if(index == -1) {
1249             _length--;
1250             return;
1251         }
1252         pos[index]++;
1253         for(uint i = index + 1; i < R; ++i) {
1254             pos[i] = pos[index] + i - index;
1255         }
1256         _length--;
1257     }
1258
1259     void popFrontArray() {
1260         int index = R - 1;
1261         for(; index != -1 && pos[index] == diff + index; --index) {}
1262         if(index == -1) {
1263             _length--;
1264             return;
1265         }
1266         pos[index]++;
1267         chosen[index] = myArray[pos[index]];
1268         for(uint i = index + 1; i < R; ++i) {
1269             pos[i] = pos[index] + i - index;
1270             chosen[i] = myArray[pos[i]];
1271         }
1272         _length--;
1273     }
1274
1275     void setLen() {
1276         // Used at construction.
1277         auto rLen = exp( logNcomb(N, R));
1278         dstatsEnforce(rLen < size_t.max, "Too many combinations.");
1279         _length = roundTo!size_t(rLen);
1280     }
1281
1282 public:
1283
1284     /**Ctor to generate all possible combinations of array indices for a length r
1285      * array.  This is a special-case optimization and is faster than simply
1286      * using the other ctor to generate all length r combinations from
1287      * seq(0, length).*/
1288     static if(is(T == uint)) {
1289         this(uint n, uint r)
1290         in {
1291             assert(n >= r);
1292         } body {
1293             if(r > 0) {
1294                 pos = (seq(0U, r)).ptr;
1295                 pos[r - 1]--;
1296             }
1297             N = n;
1298             R = r;
1299             diff = N - R;
1300             popFront();
1301             setLen();
1302         }
1303     }
1304
1305     /**General ctor.  array is a sequence from which to generate the
1306      * combinations.  r is the length of the combinations to be generated.*/
1307     this(T[] array, uint r) {
1308         if(r > 0) {
1309             pos = (seq(0U, r)).ptr;
1310             pos[r - 1]--;
1311         }
1312         N = to!uint(array.length);
1313         R = r;
1314         diff = N - R;
1315         auto temp = array.dup;
1316         myArray = temp.ptr;
1317         chosen = (new T[r]).ptr;
1318         foreach(i; 0..r) {
1319             chosen[i] = myArray[pos[i]];
1320         }
1321         popFront();
1322         setLen();
1323     }
1324
1325     @property CombArray front() {
1326         static if(bufType == Buffer.recycle) {
1327             static if(!is(T == uint)) {
1328                 return chosen[0..R].dup;
1329             } else {
1330                 return (myArray is null) ? pos[0..R] : chosen[0..R];
1331             }
1332         } else {
1333             static if(!is(T == uint)) {
1334                 return chosen[0..R].dup;
1335             } else {
1336                 return (myArray is null) ? pos[0..R].dup : chosen[0..R].dup;
1337             }
1338         }
1339     }
1340
1341     void popFront() {
1342         return (myArray is null) ? popFrontNum() : popFrontArray();
1343     }
1344
1345     ///
1346     @property bool empty() const pure nothrow {
1347         return length == 0;
1348     }
1349
1350     ///
1351     @property size_t length() const pure nothrow {
1352         return _length;
1353     }
1354
1355     ///
1356     @property typeof(this) save() {
1357         auto ret = this;
1358         ret.pos = (pos[0..R].dup).ptr;
1359
1360         if(chosen !is null) {
1361             ret.chosen = (chosen[0..R].dup).ptr;
1362         }
1363
1364         return ret;
1365     }
1366 }
1367
1368 private template CombRet(T, Buffer bufType) {
1369     static if(isForwardRange!(T)) {
1370         alias Comb!(Unqual!(ElementType!(T)), bufType) CombRet;
1371     } else static if(isIntegral!T) {
1372         alias Comb!(uint, bufType) CombRet;
1373     } else static assert(0, "comb can only be created with range or uint.");
1374 }
1375
1376 /**Create a Comb struct from a range or of a set of bounds.
1377  *
1378  * Note:  CombRet is just a template to figure out what this should return.
1379  * I would use auto if not for bug 2251.
1380  *
1381  * Examples:
1382  * ---
1383  * auto c1 = comb([1,2,3], 2);  // Any two elements from [1,2,3].
1384  * auto c2 = comb(5, 3);  // Any three elements from [0,1,2,3,4].
1385  * ---
1386  */
1387 CombRet!(T, bufType) comb(Buffer bufType = Buffer.dup, T)(T stuff, uint r) {
1388     alias typeof(return) rt;
1389     static if(isForwardRange!(T)) {
1390         return rt(stuff, r);
1391     } else {
1392         static assert(isIntegral!T, "Can only call comb on ints and ranges.");
1393         return rt(cast(uint) stuff, r);
1394     }
1395 }
1396
1397 unittest {
1398     // Test degenerate case of r == 0.  Shouldn't segfault.
1399     uint nZero = 0;
1400     foreach(elem; comb(5, 0)) {
1401         assert(elem.length == 0);
1402         nZero++;
1403     }
1404     assert(nZero == 1);
1405
1406     nZero = 0;
1407     uint[] foo = [1,2,3,4,5];
1408     foreach(elem; comb(foo, 0)) {
1409         assert(elem.length == 0);
1410         nZero++;
1411     }
1412     assert(nZero == 1);
1413
1414     // Test indexing verison first.
1415     auto comb1 = comb(5, 2);
1416     uint[][] vals;
1417     foreach(c; comb1) {
1418         vals ~= c;
1419     }
1420
1421     auto sortedVals = sort(vals);
1422     assert(sortedVals.canFind([0u,1].dup));
1423     assert(sortedVals.canFind([0u,2].dup));
1424     assert(sortedVals.canFind([0u,3].dup));
1425     assert(sortedVals.canFind([0u,4].dup));
1426     assert(sortedVals.canFind([1u,2].dup));
1427     assert(sortedVals.canFind([1u,3].dup));
1428     assert(sortedVals.canFind([1u,4].dup));
1429     assert(sortedVals.canFind([2u,3].dup));
1430     assert(sortedVals.canFind([2u,4].dup));
1431     assert(sortedVals.canFind([3u,4].dup));
1432     assert(vals.length == 10);
1433
1434     // Now, test the array version.
1435     auto comb2 = comb(seq(5U, 10U), 3);
1436     vals = null;
1437     foreach(c; comb2) {
1438         vals ~= c;
1439     }
1440     sortedVals = sort(vals);
1441     assert(sortedVals.canFind([5u, 6, 7].dup));
1442     assert(sortedVals.canFind([5u, 6, 8].dup));
1443     assert(sortedVals.canFind([5u, 6, 9].dup));
1444     assert(sortedVals.canFind([5u, 7, 8].dup));
1445     assert(sortedVals.canFind([5u, 7, 9].dup));
1446     assert(sortedVals.canFind([5u, 8, 9].dup));
1447     assert(sortedVals.canFind([6U, 7, 8].dup));
1448     assert(sortedVals.canFind([6u, 7, 9].dup));
1449     assert(sortedVals.canFind([6u, 8, 9].dup));
1450     assert(sortedVals.canFind([7u, 8, 9].dup));
1451     assert(vals.length == 10);
1452
1453     // Now a test of a larger dataset where more subtle bugs could hide.
1454     // If the values returned are unique even after sorting, are composed of
1455     // the correct elements, and there is the right number of them, this thing
1456     // works.
1457
1458     bool[uint[]] results;  // Keep track of how many UNIQUE items we have.
1459     auto comb3 = Comb!(uint)(seq(10U, 22U), 6);
1460     foreach(c; comb3) {
1461         auto dupped = c.dup.sort;
1462         // Make sure all elems are unique and within range.
1463         assert(dupped.length == 6);
1464         assert(dupped[0] > 9 && dupped[0] < 22);
1465         foreach(i; 1..dupped.length) {
1466             // Make sure elements are unique.  Remember, the array is sorted.
1467             assert(dupped[i] > dupped[i - 1]);
1468             assert(dupped[i] > 9 && dupped[i] < 22);
1469         }
1470         results[dupped.idup] = true;
1471     }
1472     assert(results.length == 924);  // (12 choose 6).
1473 }
1474
1475 /**Converts a range with arbitrary element types (usually strings) to a
1476  * range of reals lazily.  Ignores any elements that could not be successfully
1477  * converted.  Useful for creating an input range that can be used with this
1478  * lib out of a File without having to read the whole file into an array first.
1479  * The advantages to this over just using std.algorithm.map are that it's
1480  * less typing and that it ignores non-convertible elements, such as blank
1481  * lines.
1482  *
1483  * If rangeIn is an inputRange, then the result of this function is an input
1484  * range.  Otherwise, the result is a forward range.
1485  *
1486  * Note:  The reason this struct doesn't have length or random access,
1487  * even if this is supported by rangeIn, is because it has to be able to
1488  * filter out non-convertible elements.
1489  *
1490  * Examples:
1491  * ---
1492  * // Perform a T-test to see whether the mean of the data being input as text
1493  * // from stdin is different from zero.  This data might not even fit in memory
1494  * // if it were read in eagerly.
1495  *
1496  * auto myRange = toNumericRange( stdin.byLine() );
1497  * TestRes result = studentsTTest(myRange);
1498  * writeln(result);
1499  * ---
1500  */
1501 ToNumericRange!R toNumericRange(R)(R rangeIn) if(isInputRange!R) {
1502     alias ToNumericRange!R RT;
1503     return RT(rangeIn);
1504 }
1505
1506 ///
1507 struct ToNumericRange(R) if(isInputRange!R) {
1508 private:
1509     alias ElementType!R E;
1510     R inputRange;
1511     real _front;
1512
1513 public:
1514     this(R inputRange) {
1515         this.inputRange = inputRange;
1516         try {
1517             _front = to!real(inputRange.front);
1518         } catch(ConvException) {
1519             popFront();
1520         }
1521     }
1522
1523     @property real front() {
1524         return _front;
1525     }
1526
1527     void popFront() {
1528         while(true) {
1529             inputRange.popFront();
1530             if(inputRange.empty) {
1531                 return;
1532             }
1533             auto inFront = inputRange.front;
1534
1535             // If inFront is some string, strip the whitespace.
1536             static if( is(typeof(strip(inFront)))) {
1537                 inFront = strip(inFront);
1538             }
1539
1540             try {
1541                 _front = to!real(inFront);
1542                 return;
1543             } catch(ConvException) {
1544                 continue;
1545             }
1546         }
1547     }
1548
1549     @property bool empty() {
1550         return inputRange.empty;
1551     }
1552 }
1553
1554 unittest {
1555     // Test both with non-convertible element as first element and without.
1556     // This is because non-convertible elements as the first element are
1557     // handled as a special case in the implementation.
1558     string[2] dataArr = ["3.14\n2.71\n8.67\nabracadabra\n362436",
1559                  "foobar\n3.14\n2.71\n8.67\nabracadabra\n362436"];
1560
1561     foreach(data; dataArr) {
1562         std.file.write("NumericFileTestDeleteMe.txt", data);
1563         scope(exit) std.file.remove("NumericFileTestDeleteMe.txt");
1564         auto myFile = File("NumericFileTestDeleteMe.txt");
1565         auto rng = toNumericRange(myFile.byLine());
1566         assert(approxEqual(rng.front, 3.14));
1567         rng.popFront;
1568         assert(approxEqual(rng.front, 2.71));
1569         rng.popFront;
1570         assert(approxEqual(rng.front, 8.67));
1571         rng.popFront;
1572         assert(approxEqual(rng.front, 362435));
1573         assert(!rng.empty);
1574         rng.popFront;
1575         assert(rng.empty);
1576         myFile.close();
1577     }
1578 }
1579
1580 // Used for ILP optimizations.
1581 package template StaticIota(size_t upTo) {
1582     static if(upTo == 0) {
1583         alias TypeTuple!() StaticIota;
1584     } else {
1585         alias TypeTuple!(StaticIota!(upTo - 1), upTo - 1) StaticIota;
1586     }
1587 }
1588
1589 // Verify that there are no TempAlloc memory leaks anywhere in the code covered
1590 // by the unittest.  This should always be the last unittest of the module.
1591 unittest {
1592     auto TAState = TempAlloc.getState;
1593     assert(TAState.used == 0);
1594     assert(TAState.nblocks < 2);
1595 }
Note: See TracBrowser for help on using the browser.