Changeset 28

Show
Ignore:
Timestamp:
04/23/09 23:30:02 (3 years ago)
Author:
dsimcha
Message:

Make dstats work with ranges.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/alloc.d

    r23 r28  
    11/**Allocation functions. 
    2  * Author:  David Simcha 
    3  * 
    4  * Copyright (c) 2009, David Simcha 
     2 * 
     3 * Author:  David Simcha*/ 
     4 /* 
     5 * You may use this software under your choice of either of the following 
     6 * licenses.  YOU NEED ONLY OBEY THE TERMS OF EXACTLY ONE OF THE TWO LICENSES. 
     7 * IF YOU CHOOSE TO USE THE PHOBOS LICENSE, YOU DO NOT NEED TO OBEY THE TERMS OF 
     8 * THE BSD LICENSE.  IF YOU CHOOSE TO USE THE BSD LICENSE, YOU DO NOT NEED 
     9 * TO OBEY THE TERMS OF THE PHOBOS LICENSE.  IF YOU ARE A LAWYER LOOKING FOR 
     10 * LOOPHOLES AND RIDICULOUSLY NON-EXISTENT AMBIGUITIES IN THE PREVIOUS STATEMENT, 
     11 * GET A LIFE. 
     12 * 
     13 * ---------------------Phobos License: --------------------------------------- 
     14 * 
     15 *  Copyright (C) 2008-2009 by David Simcha. 
     16 * 
     17 *  This software is provided 'as-is', without any express or implied 
     18 *  warranty. In no event will the authors be held liable for any damages 
     19 *  arising from the use of this software. 
     20 * 
     21 *  Permission is granted to anyone to use this software for any purpose, 
     22 *  including commercial applications, and to alter it and redistribute it 
     23 *  freely, in both source and binary form, subject to the following 
     24 *  restrictions: 
     25 * 
     26 *  o  The origin of this software must not be misrepresented; you must not 
     27 *     claim that you wrote the original software. If you use this software 
     28 *     in a product, an acknowledgment in the product documentation would be 
     29 *     appreciated but is not required. 
     30 *  o  Altered source versions must be plainly marked as such, and must not 
     31 *     be misrepresented as being the original software. 
     32 *  o  This notice may not be removed or altered from any source 
     33 *     distribution. 
     34 * 
     35 * --------------------BSD License:  ----------------------------------------- 
     36 * 
     37 * Copyright (c) 2008-2009, David Simcha 
    538 * All rights reserved. 
    639 * 
     
    2861 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
    2962 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
    30  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ 
     63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
     64 */ 
    3165 
    3266module dstats.alloc; 
    3367 
    34 import std.traits, core.memory, core.thread, std.c.stdio : stderr; 
     68import std.traits, core.memory, core.thread, std.array, std.range, dstats.base; 
     69static import std.c.stdio; 
    3570 
    3671version(unittest) { 
    37     import std.stdio
     72    import std.stdio, std.conv, dstats.sort
    3873    void main() {} 
    3974} 
     
    5590        const bool isReferenceType = false; 
    5691    } else static if (Types.length == 1) { 
    57         static if (IsType!(Mutable!(Types[0]), bool, byte, ubyte, short, ushort, 
     92        static if (IsType!(Unqual!(Types[0]), bool, byte, ubyte, short, ushort, 
    5893                           int, uint, long, ulong, float, double, real, ifloat, 
    5994                           idouble, ireal, cfloat, cdouble, creal, char, dchar, 
     
    131166} 
    132167 
     168private template Appends(T, U) { 
     169    enum bool Appends = AppendsImpl!(T, U).ret; 
     170} 
     171 
     172private template AppendsImpl(T, U) { 
     173    T[] a; 
     174    U b; 
     175    enum bool ret = is(typeof(a ~= b)); 
     176} 
     177 
    133178///Appends to an array, deleting the old array if it has to be realloced. 
    134179void appendDelOld(T, U)(ref T[] to, U from) 
    135 if (is(Mutable!(T) : Mutable!(U)) || is(Mutable!(T[0]) : Mutable!(U))) { 
     180if(Appends!(T, U)) { 
    136181    auto oldPtr = to.ptr; 
    137182    to ~= from; 
     
    203248        Stack!(Block) inUse; 
    204249        Stack!(void*) freelist; 
    205  
    206         ~this() {  // Blocks are pretty large.  Prevent false ptrs. 
    207             ntFree(lastAlloc.ptr); 
    208             while(nblocks > 1) { 
    209                 ntFree((inUse.pop()).space); 
    210                 nblocks--; 
    211             } 
    212             ntFree(space); 
    213             while(nfree > 0) { 
    214                 ntFree(freelist.pop); 
    215                 nfree--; 
    216             } 
    217         } 
    218250    } 
    219251 
     
    230262 
    231263    static void die() nothrow { 
    232         fprintf(stderr, "TempAlloc error: Out of memory.\0".ptr); 
     264        fprintf(std.c.stdio.stderr, "TempAlloc error: Out of memory.\0".ptr); 
    233265        exit(1); 
    234266    } 
     
    457489        totalLen += array.length; 
    458490    } 
    459     auto ret = newStack!(Mutable!(typeof(T[0][0])))(totalLen); 
     491    auto ret = newStack!(Unqual!(typeof(T[0][0])))(totalLen); 
    460492 
    461493    size_t offset = 0; 
     
    467499} 
    468500 
    469 /**Creates a duplicate of an array on the TempAlloc stack.*/ 
    470 auto tempdup(T)(T[] data) nothrow { 
    471     alias Mutable!(T) U; 
    472     U[] ret = newStack!(U)(data.length); 
    473     ret[] = data[]; 
    474     return ret; 
    475 
    476  
    477 /**Same as tempdup(T[]) but uses stateCopy cached on stack by caller 
    478  * to avoid a thread-local storage lookup.  Strictly a speed hack.*/ 
    479 auto tempdup(T)(T[] data, TempAlloc.State state) nothrow { 
    480     alias Mutable!(T) U; 
    481     U[] ret = newStack!(U)(data.length, state); 
    482     ret[] = data; 
    483     return ret; 
     501void rangeCopy(T, U)(T to, U from) { 
     502    static if(is(typeof(to[] = from[]))) { 
     503        to[] = from[]; 
     504    } else static if(isRandomAccessRange!(T)) { 
     505        size_t i = 0; 
     506        foreach(elem; from) { 
     507            to[i++] = elem; 
     508        } 
     509    } 
     510
     511 
     512/**Creates a duplicate of a range on the TempAlloc stack.  Much faster 
     513 * if the range has a length, but works even if it doesn't.*/ 
     514Unqual!(ElementType!(T))[] tempdup(T)(T data) 
     515if(isInputRange!(T)) { 
     516    alias ElementType!(T) E; 
     517    alias Unqual!(E) U; 
     518    static if(dstats.base.hasLength!(T)) { 
     519        U[] ret = newStack!(U)(data.length); 
     520        rangeCopy(ret, data); 
     521        return ret; 
     522    } else { 
     523        auto state = TempAlloc.getState; 
     524        auto startPtr = TempAlloc(0, state); 
     525        size_t bytesCopied = 0; 
     526 
     527        while(!data.empty) {  // Make sure range interface is being used. 
     528            auto elem = data.front; 
     529            if(state.used + U.sizeof <= TempAlloc.blockSize) { 
     530                data.popFront; 
     531                *(cast(U*) (startPtr + bytesCopied)) = elem; 
     532                bytesCopied += U.sizeof; 
     533                state.used += U.sizeof; 
     534            } else { 
     535                if(bytesCopied + U.sizeof >= TempAlloc.blockSize / 2) { 
     536                    // Then just heap-allocate. 
     537                    U[] result = newVoid!(U)(bytesCopied / U.sizeof); 
     538                    result[] = (cast(U*) startPtr)[0..result.length]; 
     539                    finishCopy(result, data); 
     540                    TempAlloc.free; 
     541                    state.lastAlloc[++state.totalAllocs] = result.ptr; 
     542                    return result; 
     543                } else { 
     544                    U[] oldData = (cast(U*) startPtr)[0..bytesCopied / U.sizeof]; 
     545                    state.used -= bytesCopied; 
     546                    state.totalAllocs--; 
     547                    U[] newArray = newStack!(U)(bytesCopied / U.sizeof + 1, state); 
     548                    newArray[0..oldData.length] = oldData[]; 
     549                    startPtr = state.space; 
     550                    newArray[$ - 1] = elem; 
     551                    bytesCopied += U.sizeof; 
     552                    data.popFront; 
     553                } 
     554            } 
     555        } 
     556        auto rem = bytesCopied % TempAlloc.alignBytes; 
     557        if(rem != 0) { 
     558            auto toAdd = 16 - rem; 
     559            if(state.used + toAdd < TempAlloc.blockSize) { 
     560                state.used += toAdd; 
     561            } else { 
     562                state.used = TempAlloc.blockSize; 
     563            } 
     564        } 
     565        return (cast(U*) startPtr)[0..bytesCopied / U.sizeof]; 
     566    } 
     567
     568 
     569private void finishCopy(T, U)(ref T[] result, U range) { 
     570    auto app = appender(&result); 
     571    foreach(elem; range) { 
     572        app.put(elem); 
     573    } 
     574
     575 
     576unittest { 
     577    // Create quick and dirty finite but lengthless range. 
     578    struct Count { 
     579        uint num; 
     580        uint upTo; 
     581        uint front() { 
     582            return num; 
     583        } 
     584        void popFront() { 
     585            num++; 
     586        } 
     587        bool empty() { 
     588            return num >= upTo; 
     589        } 
     590    } 
     591 
     592    TempAlloc(1024 * 1024 * 3); 
     593    Count count; 
     594    count.upTo = 1024 * 1025; 
     595    auto asArray = tempdup(count); 
     596    foreach(i, elem; asArray) { 
     597        assert(i == elem, to!(string)(i) ~ "\t" ~ to!(string)(elem)); 
     598    } 
     599    assert(asArray.length == 1024 * 1025); 
     600    TempAlloc.free; 
     601    TempAlloc.free; 
     602    while(TempAlloc.getState.freelist.index > 0) { 
     603        TempAlloc.getState.freelist.pop; 
     604    } 
     605    writeln("Passed tempdup unittest."); 
    484606} 
    485607 
     
    495617 * are allocated, due to caching of data stored in thread-local 
    496618 * storage.*/ 
    497 invariant char[] newFrame = 
     619immutable char[] newFrame = 
    498620    "TempAlloc.frameInit; scope(exit) TempAlloc.frameFree;"; 
    499621 
     
    509631         TempAlloc(TempAlloc.alignBytes); 
    510632     } 
    511      assert(TempAlloc.getState.nblocks == 5); 
     633     assert(TempAlloc.getState.nblocks == 5, to!string(TempAlloc.getState.nblocks)); 
    512634     assert(TempAlloc.getState.nfree == 0); 
    513635     foreach(i; 0..nIter) { 
     
    580702        TempAlloc.free; 
    581703    } 
    582     fprintf(stderr, "Passed TempAlloc test.\n\0".ptr); 
    583 
     704    fprintf(std.c.stdio.stderr, "Passed TempAlloc test.\n\0".ptr); 
     705
     706 
     707struct SHNode(K, V) { 
     708    alias SHNode!(K, V) SomeType; 
     709    SomeType* next; 
     710    Unqual!(K) key; 
     711    Unqual!(V) val; 
     712
     713 
     714/**Forward range struct for iterating over the keys or values of a 
     715 * StackHash or StackSet.  The lifetime of this object must not exceed that 
     716 * of the underlying StackHash or StackSet.*/ 
     717struct HashRange(K, S, bool vals = false) { 
     718private: 
     719    S* set; 
     720    size_t index; 
     721    S.Node* next; 
     722    K* frontElem; 
     723    size_t _length; 
     724 
     725    this(S* set) { 
     726        this.set = set; 
     727        if(set.rNext[0] == set.usedSentinel) { 
     728            this.popFront; 
     729        } else { 
     730            static if(vals) { 
     731                frontElem = set.rVals.ptr; 
     732            } else { 
     733                frontElem = set.rKeys.ptr; 
     734            } 
     735            next = set.rNext[0]; 
     736        } 
     737        this._length = set.length; 
     738    } 
     739 
     740public: 
     741    /// 
     742    void popFront() { 
     743        this._length--; 
     744        if(next is null) { 
     745            do { 
     746                index++; 
     747                if(index >= set.rNext.length) { 
     748                    index = size_t.max;  // Sentinel for empty. 
     749                    return; 
     750                } 
     751                next = set.rNext[index]; 
     752            } while(set.rNext[index] == set.usedSentinel); 
     753            static if(vals) { 
     754                frontElem = &(set.rVals[index]); 
     755            } else { 
     756                frontElem = &(set.rKeys[index]); 
     757            } 
     758        } else { 
     759            static if(vals) { 
     760                frontElem = &(next.val); 
     761            } else { 
     762                frontElem = &(next.key); 
     763            } 
     764            next = next.next; 
     765        } 
     766    } 
     767 
     768    /// 
     769    Unqual!(K) front() { 
     770        return *frontElem; 
     771    } 
     772 
     773    /// 
     774    bool empty() { 
     775        return index == size_t.max; 
     776    } 
     777 
     778    /// 
     779    size_t length() { 
     780        return _length; 
     781    } 
     782
     783 
     784/**A hash table that allocates its memory on TempAlloc.  Good for building a 
     785 * temporary hash tables that will not escape the current scope. 
     786 * 
     787 * To avoid TempAlloc memory leaks, use mixin(newFrame). 
     788 * 
     789 * Examples: 
     790 * --- 
     791 * mixin(newFrame);  // To make sure all memory gets freed at end of scope. 
     792 * auto ss = StackHash!(uint)(5); 
     793 * foreach(i; 0..5) { 
     794 *     ss[i]++; 
     795 * } 
     796 * assert(ss[3] == 1); 
     797 * --- 
     798 */ 
     799struct StackHash(K, V) { 
     800private: 
     801    alias SHNode!(K, V) Node; 
     802 
     803    // Using parallel arrays instead of structs to save on alignment overhead: 
     804    Unqual!(K)[] rKeys; 
     805    Unqual!(V)[] rVals; 
     806    Unqual!(Node*)[] rNext; 
     807 
     808    TempAlloc.State TAState; 
     809    TypeInfo keyTI; 
     810    size_t _length; 
     811    Node* usedSentinel; 
     812 
     813    Node* newNode(K key) { 
     814        Node* ret = cast(Node*) TempAlloc(Node.sizeof, TAState); 
     815        ret.key =  key; 
     816        ret.val =  V.init; 
     817        ret.next = null; 
     818        return ret; 
     819    } 
     820 
     821    Node* newNode(K key, V val) { 
     822        Node* ret = cast(Node*) TempAlloc(Node.sizeof, TAState); 
     823        ret.key =  key; 
     824        ret.val = val; 
     825        ret.next = null; 
     826        return ret; 
     827    } 
     828 
     829    hash_t getHash(K key) { 
     830        static if(is(K : long) && K.sizeof <= hash_t.sizeof) { 
     831            hash_t hash = cast(hash_t) key; 
     832        } else static if(__traits(compiles, key.toHash())) { 
     833            hash_t hash = key.toHash(); 
     834        } else { 
     835            hash_t hash = keyTI.getHash(&key); 
     836        } 
     837        hash %= rNext.length; 
     838        return hash; 
     839    } 
     840 
     841 
     842public: 
     843    /**Due to the nature of TempAlloc, you must specify on object creation 
     844     * the approximate number of elements your table will have.  Too large a 
     845     * number will waste space and incur poor cache performance.  Too low a 
     846     * number will make this struct perform like a linked list.  Generally, 
     847     * if you're building a table from some other range, some fraction of the 
     848     * size of that range is a good guess.*/ 
     849    this(size_t nElem) { 
     850        // Obviously, the caller can never mean zero, because this struct 
     851        // can't work at all with nElem == 0, so assume it's a mistake and fix 
     852        // it here. 
     853        if(nElem == 0) 
     854            nElem++; 
     855        TAState = TempAlloc.getState; 
     856        rKeys = newStack!(K)(nElem, TAState); 
     857        rVals = newStack!(V)(nElem, TAState); 
     858        rNext = newStack!(Node*)(nElem, TAState); 
     859        usedSentinel = cast(Node*) rNext.ptr; 
     860        foreach(ref rKey; rKeys) { 
     861            rKey =  K.init; 
     862        } 
     863        foreach(ref rVal; rVals) { 
     864            rVal = V.init; 
     865        } 
     866        foreach(ref r; rNext) { 
     867            r = usedSentinel; 
     868        } 
     869        keyTI = typeid(K); 
     870    } 
     871 
     872    /**Index an element of the range.  If it does not exist, it will be created 
     873     * and initialized to V.init.*/ 
     874    ref V opIndex(K key) { 
     875        hash_t hash = getHash(key); 
     876 
     877        if(rNext[hash] == usedSentinel) { 
     878            rKeys[hash] =  key; 
     879            rNext[hash] = null; 
     880            _length++; 
     881            return rVals[hash]; 
     882        } else if(rKeys[hash] == key) { 
     883            return rVals[hash]; 
     884        } else {  // Collision.  Start chaining. 
     885            Node** next = &(rNext[hash]); 
     886            while(*next !is null) { 
     887                if((**next).key ==  key) { 
     888                    return (**next).val; 
     889                } 
     890                next = &((**next).next); 
     891            } 
     892            *next = newNode(key); 
     893            _length++; 
     894            return (**next).val; 
     895        } 
     896    } 
     897 
     898    /// 
     899    V opIndexAssign(V val, K key) { 
     900        hash_t hash = getHash(key); 
     901 
     902        if(rNext[hash] == usedSentinel) { 
     903            rKeys[hash] =  key; 
     904            rVals[hash] = val; 
     905            rNext[hash] = null; 
     906            _length++; 
     907            return val; 
     908        } else if(rKeys[hash] ==  key) { 
     909            rVals[hash] = val; 
     910            return val; 
     911        } else {  // Collision.  Start chaining. 
     912            Node** next = &(rNext[hash]); 
     913            while(*next !is null) { 
     914                if((**next).key == key) { 
     915                    (**next).val = val; 
     916                    return val; 
     917                } 
     918                next = &((**next).next); 
     919            } 
     920            _length++; 
     921            *next = newNode(key, val); 
     922            return val; 
     923        } 
     924    } 
     925 
     926    /// 
     927    V* opIn_r(K key) { 
     928        hash_t hash = getHash(key); 
     929 
     930        if(rNext[hash] == usedSentinel) { 
     931            return null; 
     932        } else if(rKeys[hash] == key) { 
     933            return &(rVals[hash]); 
     934        } else {  // Collision.  Start chaining. 
     935            Node* next = rNext[hash]; 
     936            while(next !is null) { 
     937                if(next.key == key) { 
     938                    return &(next.val); 
     939                } 
     940                next = next.next; 
     941            } 
     942            return null; 
     943        } 
     944   } 
     945 
     946    /// 
     947    void remove(K key) { 
     948        hash_t hash = getHash(key); 
     949 
     950        Node** next = &(rNext[hash]); 
     951        if(rNext[hash] == usedSentinel) { 
     952            return; 
     953        } else if(rKeys[hash] == key) { 
     954            _length--; 
     955            if(rNext[hash] is null) { 
     956                rKeys[hash] = K.init; 
     957                rVals[hash] = V.init; 
     958                rNext[hash] = usedSentinel; 
     959                return; 
     960            } else { 
     961                rKeys[hash] = (**next).key; 
     962                rVals[hash] = (**next).val; 
     963                rNext[hash] = (**next).next; 
     964                return; 
     965            } 
     966        } else {  // Collision.  Start chaining. 
     967            while(*next !is null) { 
     968                if((**next).key == key) { 
     969                    _length--; 
     970                    *next = (**next).next; 
     971                    break; 
     972                } 
     973                next = &((**next).next); 
     974            } 
     975            return; 
     976        } 
     977   } 
     978 
     979    /**Returns a forward range to iterate over the keys of this table. 
     980     * The lifetime of the HashRange must not exceed the lifetime of this 
     981     * StackHash.*/ 
     982    HashRange!(K, StackHash!(K, V)) keys() { 
     983        return typeof(return)(&this); 
     984    } 
     985 
     986    /**Returns a forward range to iterate over the values of this table. 
     987     * The lifetime of the HashRange must not exceed the lifetime of this 
     988     * StackHash.*/ 
     989    HashRange!(V, StackHash!(K, V), true) values() { 
     990       return typeof(return)(&this); 
     991    } 
     992 
     993    /// 
     994    size_t length() const { 
     995        return _length; 
     996    } 
     997 
     998    real efficiency() { 
     999       uint used = 0; 
     1000       foreach(root; rNext) { 
     1001           if(root != usedSentinel) { 
     1002               used++; 
     1003           } 
     1004       } 
     1005       return cast(real) used / rNext.length; 
     1006    } 
     1007
     1008 
     1009unittest { 
     1010    alias StackHash!(string, uint) mySh; 
     1011    mixin(newFrame); 
     1012    auto data = mySh(2);  // Make sure we get some collisions. 
     1013    data["foo"] = 1; 
     1014    data["bar"] = 2; 
     1015    data["baz"] = 3; 
     1016    data["waldo"] = 4; 
     1017    assert(!("foobar" in data)); 
     1018    assert(*("foo" in data) == 1); 
     1019    assert(*("bar" in data) == 2); 
     1020    assert(*("baz" in data) == 3); 
     1021    assert(*("waldo" in data) == 4); 
     1022    assert(data["foo"] == 1); 
     1023    assert(data["bar"] == 2); 
     1024    assert(data["baz"] == 3); 
     1025    assert(data["waldo"] == 4); 
     1026    auto myKeys = toArray(data.keys); 
     1027    qsort(myKeys); 
     1028    assert(myKeys == cast(string[]) ["bar", "baz", "foo", "waldo"]); 
     1029    auto myValues = toArray(data.values); 
     1030    qsort(myValues); 
     1031    assert(myValues == [1U, 2, 3, 4]); 
     1032    { 
     1033        auto k = data.keys; 
     1034        auto v = data.values; 
     1035        while(!k.empty) { 
     1036            assert(data[k.front] == v.front); 
     1037            k.popFront; 
     1038            v.popFront; 
     1039        } 
     1040    } 
     1041    foreach(v; data.values) { 
     1042        assert(v > 0 && v < 5); 
     1043    } 
     1044 
     1045    // Test remove. 
     1046 
     1047    alias StackHash!(uint, uint) mySh2; 
     1048    auto foo = mySh2(7); 
     1049    for(uint i = 0; i < 200; i++) { 
     1050        foo[i] = i; 
     1051    } 
     1052    assert(foo.length == 200); 
     1053    for(uint i = 0; i < 200; i += 2) { 
     1054        foo.remove(i); 
     1055    } 
     1056    foreach(i; 20..200) { 
     1057        foo.remove(i); 
     1058    } 
     1059    for(uint i = 0; i < 20; i++) { 
     1060        if(i & 1) { 
     1061            assert(*(i in foo) == i); 
     1062        } else { 
     1063            assert(!(i in foo)); 
     1064        } 
     1065    } 
     1066    auto vals = toArray(foo.values); 
     1067    assert(foo.length == 10); 
     1068    assert(vals.qsort == [1U, 3, 5, 7, 9, 11, 13, 15, 17, 19]); 
     1069 
     1070    writeln("Passed StackHash test."); 
     1071
     1072 
     1073/**A hash set that allocates its memory on TempAlloc.  Good for building a 
     1074 * temporary set that will not escape the current scope. 
     1075 * 
     1076 * To avoid TempAlloc memory leaks, use mixin(newFrame). 
     1077 * 
     1078 * Examples: 
     1079 * --- 
     1080 * mixin(newFrame);  // To make sure all memory gets freed at end of scope. 
     1081 * auto ss = StackSet!(uint)(5); 
     1082 * foreach(i; 0..5) { 
     1083 *     ss.insert(i); 
     1084 * } 
     1085 * assert(3 in ss); 
     1086 * --- 
     1087 */ 
     1088struct StackSet(K) { 
     1089private: 
     1090    // Choose smallest representation of the data. 
     1091    struct Node1 { 
     1092        Node1* next; 
     1093        K key; 
     1094    } 
     1095 
     1096    struct Node2 { 
     1097        K key; 
     1098        Node2* next; 
     1099    } 
     1100 
     1101    static if(Node1.sizeof < Node2.sizeof) { 
     1102        alias Node1 Node; 
     1103    } else { 
     1104        alias Node2 Node; 
     1105    } 
     1106 
     1107    Unqual!(K)[] rKeys; 
     1108    Node*[] rNext; 
     1109    TempAlloc.State TAState; 
     1110    size_t _length; 
     1111    Node* usedSentinel; 
     1112 
     1113    Node* newNode(K key) { 
     1114        Node* ret = cast(Node*) TempAlloc(Node.sizeof, TAState); 
     1115        ret.key = key; 
     1116        ret.next = null; 
     1117        return ret; 
     1118    } 
     1119 
     1120    hash_t getHash(K key) { 
     1121        static if(is(K : long) && K.sizeof <= hash_t.sizeof) { 
     1122            hash_t hash = cast(hash_t) key; 
     1123        } else static if(__traits(compiles, key.toHash())) { 
     1124            hash_t hash = key.toHash(); 
     1125        } else { 
     1126            hash_t hash = typeid(K).getHash(&key); 
     1127        } 
     1128        hash %= rNext.length; 
     1129        return hash; 
     1130    } 
     1131 
     1132public: 
     1133    /**Due to the nature of TempAlloc, you must specify on object creation 
     1134     * the approximate number of elements your set will have.  Too large a 
     1135     * number will waste space and incur poor cache performance.  Too low a 
     1136     * number will make this struct perform like a linked list.  Generally, 
     1137     * if you're building a set from some other range, some fraction of the 
     1138     * size of that range is a good guess.*/ 
     1139    this(size_t nElem) { 
     1140        // Obviously, the caller can never mean zero, because this struct 
     1141        // can't work at all with nElem == 0, so assume it's a mistake and fix 
     1142        // it here. 
     1143        if(nElem == 0) 
     1144            nElem++; 
     1145        TAState = TempAlloc.getState; 
     1146        rNext = newStack!(Node*)(nElem, TAState); 
     1147        rKeys = newStack!(Unqual!(K))(nElem, TAState); 
     1148        usedSentinel = cast(Node*) rNext.ptr; 
     1149        foreach(ref root; rKeys) { 
     1150            root = K.init; 
     1151        } 
     1152        foreach(ref root; rNext) { 
     1153            root = usedSentinel; 
     1154        } 
     1155    } 
     1156 
     1157    /// 
     1158    void insert(K key) { 
     1159        hash_t hash = getHash(key); 
     1160 
     1161        if(rNext[hash] == usedSentinel) { 
     1162            rKeys[hash] = key; 
     1163            rNext[hash] = null; 
     1164            _length++; 
     1165            return; 
     1166        } else if(rKeys[hash] == key) { 
     1167            return; 
     1168        } else {  // Collision.  Start chaining. 
     1169            Node** next = &(rNext[hash]); 
     1170            while(*next !is null) { 
     1171                if((**next).key == key) { 
     1172                    return; 
     1173                } 
     1174                next = &((**next).next); 
     1175            } 
     1176            *next = newNode(key); 
     1177            _length++; 
     1178            return; 
     1179        } 
     1180    } 
     1181 
     1182    /**Returns a forward range of the elements of this struct.  The range's 
     1183     * lifetime must not exceed the lifetime of this object.*/ 
     1184    HashRange!(K, typeof(this)) elems() { 
     1185        auto ret = typeof(return)(&this); 
     1186        return ret; 
     1187    } 
     1188 
     1189    /// 
     1190    bool opIn_r(K key) { 
     1191        hash_t hash = getHash(key); 
     1192 
     1193        if(rNext[hash] == usedSentinel) { 
     1194            return false; 
     1195        } else if(rKeys[hash] == key) { 
     1196            return true; 
     1197        } else {  // Collision.  Start chaining. 
     1198            Node* next = rNext[hash]; 
     1199            while(next !is null) { 
     1200                if(next.key == key) { 
     1201                    return true; 
     1202                } 
     1203                next = next.next; 
     1204            } 
     1205            return false; 
     1206        } 
     1207   } 
     1208 
     1209    /// 
     1210    void remove(K key) { 
     1211        hash_t hash = getHash(key); 
     1212 
     1213        Node** next = &(rNext[hash]); 
     1214        if(rNext[hash] == usedSentinel) { 
     1215            return; 
     1216        } else if(rKeys[hash] == key) { 
     1217            _length--; 
     1218            if(rNext[hash] is null) { 
     1219                rKeys[hash] = K.init; 
     1220                rNext[hash] = usedSentinel; 
     1221                return; 
     1222            } else { 
     1223                rKeys[hash] = (**next).key; 
     1224                rNext[hash] = (**next).next; 
     1225                return; 
     1226            } 
     1227        } else {  // Collision.  Start chaining. 
     1228            while(*next !is null) { 
     1229                if((**next).key == key) { 
     1230                    _length--; 
     1231                    *next = (**next).next; 
     1232                    break; 
     1233                } 
     1234                next = &((**next).next); 
     1235            } 
     1236            return; 
     1237        } 
     1238   } 
     1239 
     1240    /// 
     1241    size_t length() { 
     1242       return _length; 
     1243    } 
     1244
     1245 
     1246unittest { 
     1247    mixin(newFrame); 
     1248    alias StackSet!(uint) mySS; 
     1249    mySS set = mySS(12); 
     1250    foreach(i; 0..20) { 
     1251        set.insert(i); 
     1252    } 
     1253    assert(toArray(set.elems).qsort == seq(0U, 20U)); 
     1254 
     1255    for(uint i = 0; i < 20; i += 2) { 
     1256        set.remove(i); 
     1257    } 
     1258 
     1259    foreach(i; 0..20) { 
     1260        if(i & 1) { 
     1261            assert(i in set); 
     1262        } else { 
     1263            assert(!(i in set)); 
     1264        } 
     1265    } 
     1266    uint[] contents; 
     1267 
     1268    foreach(elem; set.elems) { 
     1269        contents ~= elem; 
     1270    } 
     1271    assert(contents.qsort == [1U,3,5,7,9,11,13,15,17,19]); 
     1272    writeln("Passed StackSet test."); 
     1273
  • trunk/base.d

    r24 r28  
    11/**Relatively low-level primitives on which to build higher-level math/stat 
    22 * functionality.  Some are used internally, some are just things that may be 
    3  * useful to users of this library. 
    4  * 
    5  * Author:  David Simcha 
    6  * 
    7  * Copyright (c) 2009, David Simcha 
     3 * useful to users of this library.  This module is starting to take on the 
     4 * appearance of a small utility library. 
     5 * 
     6 * Author:  David Simcha*/ 
     7 /* 
     8 * You may use this software under your choice of either of the following 
     9 * licenses.  YOU NEED ONLY OBEY THE TERMS OF EXACTLY ONE OF THE TWO LICENSES. 
     10 * IF YOU CHOOSE TO USE THE PHOBOS LICENSE, YOU DO NOT NEED TO OBEY THE TERMS OF 
     11 * THE BSD LICENSE.  IF YOU CHOOSE TO USE THE BSD LICENSE, YOU DO NOT NEED 
     12 * TO OBEY THE TERMS OF THE PHOBOS LICENSE.  IF YOU ARE A LAWYER LOOKING FOR 
     13 * LOOPHOLES AND RIDICULOUSLY NON-EXISTENT AMBIGUITIES IN THE PREVIOUS STATEMENT, 
     14 * GET A LIFE. 
     15 * 
     16 * ---------------------Phobos License: --------------------------------------- 
     17 * 
     18 *  Copyright (C) 2008-2009 by David Simcha. 
     19 * 
     20 *  This software is provided 'as-is', without any express or implied 
     21 *  warranty. In no event will the authors be held liable for any damages 
     22 *  arising from the use of this software. 
     23 * 
     24 *  Permission is granted to anyone to use this software for any purpose, 
     25 *  including commercial applications, and to alter it and redistribute it 
     26 *  freely, in both source and binary form, subject to the following 
     27 *  restrictions: 
     28 * 
     29 *  o  The origin of this software must not be misrepresented; you must not 
     30 *     claim that you wrote the original software. If you use this software 
     31 *     in a product, an acknowledgment in the product documentation would be 
     32 *     appreciated but is not required. 
     33 *  o  Altered source versions must be plainly marked as such, and must not 
     34 *     be misrepresented as being the original software. 
     35 *  o  This notice may not be removed or altered from any source 
     36 *     distribution. 
     37 * 
     38 * --------------------BSD License:  ----------------------------------------- 
     39 * 
     40 * Copyright (c) 2008-2009, David Simcha 
    841 * All rights reserved. 
    942 * 
     
    3164 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
    3265 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
    33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ 
     66 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
     67 */ 
    3468 
    3569module dstats.base; 
    3670 
    3771public import std.math, std.traits, dstats.gamma, dstats.alloc; 
    38 private import dstats.sort, std.c.stdlib, std.bigint, 
    39                std.functional, std.algorithm
     72private import dstats.sort, std.c.stdlib, std.bigint, std.typecons, 
     73               std.functional, std.algorithm, std.range, std.bitmanip
    4074 
    4175invariant real[] staticLogFacTable; 
     
    4377enum : size_t { 
    4478    staticFacTableLen = 10_000, 
     79} 
     80 
     81// Tests whether T is an input range whose elements can be implicitly 
     82// converted to reals. 
     83template realInput(T) { 
     84    enum realInput = isInputRange!(T) && is(ElementType!(T) : real); 
     85} 
     86 
     87// See Bugzilla 2873.  This can be removed once that's fixed. 
     88template hasLength(R) { 
     89    enum bool hasLength = is(typeof(R.init.length) : ulong) || 
     90                      is(typeof(R.init.length()) : ulong); 
    4591} 
    4692 
     
    78124} 
    79125 
     126/**Converts any range to an array on the GC heap by the most efficient means 
     127 * available.  If it is already an array, duplicates the range.*/ 
     128Unqual!(ElementType!(T))[] toArray(T)(T range) if(isInputRange!(T)) { 
     129    static if(isArray!(T)) { 
     130        return range.dup; 
     131    } else static if(hasLength!(T)) { 
     132        auto ret = newVoid!(Unqual!(ElementType!(T)))(range.length); 
     133        static if(is(typeof(ret[] = range[]))) { 
     134            ret[] = range[]; 
     135        } else { 
     136            size_t pos = 0; 
     137            foreach(elem; range) { 
     138                ret[pos++] = elem; 
     139            } 
     140        } 
     141        return ret; 
     142    } else { 
     143        Unqual!(ElementType!(T))[] ret; 
     144        mate(range, appender(&ret)); 
     145        return ret; 
     146    } 
     147} 
     148 
     149/**Writes the contents of an input range to an output range. 
     150 * 
     151 * Returns:  The output range.*/ 
     152O mate(I, O)(I input, O output) 
     153if(isInputRange!(I) && isOutputRange!(O, ElementType!(I))) { 
     154    foreach(elem; input) { 
     155        output.put(elem); 
     156    } 
     157    return output; 
     158} 
     159 
    80160/**Bins data into nbin equal width bins, indexed from 
    81161 * 0 to nbin - 1, with 0 being the smallest bin, etc. 
    82162 * The values returned are the counts for each bin.  Returns results on the GC 
    83  * heap by default, but uses TempAlloc stack if alloc == Alloc.STACK.*/ 
    84 Ret[] binCounts(Ret = ushort, T)(const T[] data, uint nbin, Alloc alloc = Alloc.HEAP) 
     163 * heap by default, but uses TempAlloc stack if alloc == Alloc.STACK. 
     164 * 
     165 * Works with any forward range with elements implicitly convertible to real.*/ 
     166Ret[] binCounts(Ret = ushort, T)(T data, uint nbin, Alloc alloc = Alloc.HEAP) 
     167if(isForwardRange!(T) && realInput!(T)) 
    85168in { 
    86     assert(data.length > 0); 
    87169    assert(nbin > 0); 
    88170} body { 
    89     T min = data[0], max = data[0]; 
     171    alias Unqual!(ElementType!(T)) E; 
     172    E min = data[0], max = data[0]; 
    90173    foreach(elem; data[1..$]) { 
    91174        if(elem > max) 
     
    94177            min = elem; 
    95178    } 
    96     T range = max - min; 
     179    E range = max - min; 
    97180 
    98181    Ret[] bins; 
     
    130213/**Bins data into nbin equal width bins, indexed from 
    131214 * 0 to nbin - 1, with 0 being the smallest bin, etc. 
    132  * The values returned are the bin index for each element.  Returns on GC 
    133  * heap by default, but TempAlloc stack if alloc == Alloc.STACK.*/ 
    134 Ret[] bin(Ret = ushort, T)(const T[] data, uint nbin, Alloc alloc = Alloc.HEAP) 
     215 * The values returned are the bin index for each element. 
     216 * 
     217 * Returns on GC heap by default, but TempAlloc stack if alloc == Alloc.STACK. 
     218 * Works with any forward range with elements implicitly convertible to real. 
     219 * 
     220 * Default return type is ubyte, because in the dstats.infotheory, 
     221 * entropy() and related functions specialize on ubytes, and become 
     222 * substandially faster.  However, if you're using more than 255 bins, 
     223 * you'll have to provide a different return type as a template parameter.*/ 
     224Ret[] bin(Ret = ubyte, T)(T data, uint nbin, Alloc alloc = Alloc.HEAP) 
     225if(isForwardRange!(T) && realInput!(T) && isIntegral!(Ret)) 
    135226in { 
    136     assert(data.length > 0); 
     227    assert(nbin <= Ret.max + 1); 
    137228    assert(nbin > 0); 
    138229} body { 
    139     Mutable!(T) min = data[0], max = data[0]; 
    140     foreach(elem; data[1..$]) { 
     230    alias ElementType!(T) E; 
     231    Unqual!(E) min = data.front, max = data.front; 
     232    auto dminmax = data; 
     233    dminmax.popFront; 
     234    foreach(elem; dminmax) { 
    141235        if(elem > max) 
    142236            max = elem; 
     
    144238            min = elem; 
    145239    } 
    146     T range = max - min; 
     240    E range = max - min; 
    147241 
    148242    Ret[] bins; 
     
    171265    double[] data = [0.0, .01, .03, .05, .11, .3, .5, .7, .89, 1]; 
    172266    auto res = bin(data, 10); 
    173     assert(res == [cast(ushort) 0, 0, 0, 0, 1, 3, 5, 7, 8, 9]); 
     267    assert(res == [cast(ubyte) 0, 0, 0, 0, 1, 3, 5, 7, 8, 9]); 
    174268    res = bin(data, 10, Alloc.STACK); 
    175     assert(res == [cast(ushort) 0, 0, 0, 0, 1, 3, 5, 7, 8, 9]); 
     269    assert(res == [cast(ubyte) 0, 0, 0, 0, 1, 3, 5, 7, 8, 9]); 
    176270    TempAlloc.free; 
    177271    writeln("Passed bin unittest."); 
     
    180274/**Bins data into nbin equal frequency bins, indexed from 
    181275 * 0 to nbin - 1, with 0 being the smallest bin, etc. 
    182  * The values returned are the bin index for each element.  Returns on GC 
    183  * heap by default, but TempAlloc stack if alloc == Alloc.STACK.*/ 
    184 Ret[] frqBin(Ret = ushort, T)(const T[] data, uint nbin, Alloc alloc = Alloc.HEAP) 
     276 * The values returned are the bin index for each element. 
     277 * 
     278 * Returns on GC heap by default, but TempAlloc stack if alloc == Alloc.STACK. 
     279 * Works with any forward range with elements implicitly convertible to real 
     280 * and a length property. 
     281 * 
     282 * Default return type is ubyte, because in the dstats.infotheory, 
     283 * entropy() and related functions specialize on ubytes, and become 
     284 * substandially faster.  However, if you're using more than 256 bins, 
     285 * you'll have to provide a different return type as a template parameter.*/ 
     286Ret[] frqBin(Ret = ubyte, T)(T data, uint nbin, Alloc alloc = Alloc.HEAP) 
     287if(realInput!(T) && isForwardRange!(T) && hasLength!(T) && isIntegral!(Ret)) 
    185288in { 
    186     assert(data.length > 0); 
    187289    assert(nbin > 0); 
    188290    assert(nbin <= data.length); 
     291    assert(nbin <= Ret.max + 1); 
    189292} body { 
    190293    Ret[] result; 
     
    198301    foreach(i, ref e; perm) 
    199302        e = i; 
    200     auto dd = data.tempdup
     303    auto dd = tempdup(data)
    201304    qsort(dd, perm); 
    202305    TempAlloc.free; 
     
    217320    double[] data = [5U, 1, 3, 8, 30, 10, 7]; 
    218321    auto res = frqBin(data, 3); 
    219     assert(res == [cast(ushort) 0, 0, 0, 1, 2, 2, 1]); 
     322    assert(res == [cast(ubyte) 0, 0, 0, 1, 2, 2, 1]); 
    220323    data = [3, 1, 4, 1, 5, 9, 2, 6, 5]; 
    221324    res = frqBin(data, 4, Alloc.STACK); 
    222     assert(res == [cast(ushort) 1, 0, 1, 0, 2, 3, 0, 3, 2]); 
     325    assert(res == [cast(ubyte) 1, 0, 1, 0, 2, 3, 0, 3, 2]); 
    223326    data = [3U, 1, 4, 1, 5, 9, 2, 6, 5, 3, 4, 8, 9, 7, 9, 2]; 
    224327    res = frqBin(data, 4); 
    225     assert(res == [cast(ushort) 1, 0, 1, 0, 2, 3, 0, 2, 2, 1, 1, 3, 3, 2, 3, 0]); 
     328    assert(res == [cast(ubyte) 1, 0, 1, 0, 2, 3, 0, 2, 2, 1, 1, 3, 3, 2, 3, 0]); 
    226329    TempAlloc.free; 
    227330    writeln("Passed frqBin unittest."); 
     
    229332 
    230333/**Generates a sequence from [start..end] by increment.  Includes start, 
    231  * excludes end. 
     334 * excludes end.  Does so eagerly as an array. 
    232335 * 
    233336 * Examples: 
     
    254357    auto s = seq(0, 5); 
    255358    assert(s == [0, 1, 2, 3, 4]); 
    256     writeln(stderr, "Passed seq test."); 
     359    writeln("Passed seq test."); 
    257360} 
    258361 
    259362/**Given an input array, outputs an array containing the rank from 
    260363 * [1, input.length] corresponding to each element.  Ties are dealt with by 
    261  * averaging.  This function duplicates the input array, and does not reorder 
     364 * averaging.  This function duplicates the input range, and does not reorder 
    262365 * it.  Return type is float[] by default, but if you are sure you have no ties, 
    263366 * ints can be used for efficiency, and if you need more precision when 
    264367 * averaging ties, you can use double or real. 
     368 * 
     369 * Works with any input range. 
    265370 * 
    266371 * Examples: 
     
    271376 * ---*/ 
    272377Ret[] rank(Ret = float, T)(const T[] input) { 
    273     auto iDup = input.tempdup
     378    auto iDup = tempdup(input)
    274379    scope(exit) TempAlloc.free; 
    275380    return rankSort!(Ret)(iDup); 
    276381} 
    277382 
    278 /**Same as rank(), but sorts the input array in ascending order rather than 
     383/**Same as rank(), but sorts the input range in ascending order rather than 
    279384 * duping it and working on a copy.  The array returned will still be 
    280385 * identical to that returned by rank(), i.e. the rank of each element will 
    281386 * correspond to the ranks of the elements in the input array before sorting. 
     387 * 
     388 * Works with any random access range with a length property. 
    282389 * 
    283390 * Examples: 
     
    287394 * assert(test == [1U, 2, 3, 4, 5]); 
    288395 * ---*/ 
    289 Ret[] rankSort(Ret = float, T)(T[] input) { 
     396Ret[] rankSort(Ret = float, T)(T input) 
     397if(isRandomAccessRange!(T)) { 
    290398    Ret[] ranks = newVoid!(Ret)(input.length); 
    291399    rankSort!(Ret, T)(input, ranks); 
     
    294402 
    295403// Speed hack used internally. 
    296 void rankSort(Ret, T)(T[] input, Ret[] ranks) { 
     404void rankSort(Ret, T)(T input, Ret[] ranks) { 
    297405    size_t[] perms = newStack!(size_t)(input.length); 
    298406    scope(exit) TempAlloc.free; 
     
    316424    assert(rankSort(test) == [3.5f, 5f, 3.5f, 1f, 2f]); 
    317425    assert(test == [1U,2,3,3,5]); 
    318     writeln(stderr, "Passed rank test."); 
     426    writeln("Passed rank test."); 
    319427} 
    320428 
    321429// Used internally by rank(). 
    322 void averageTies(T, U)(T[] sortedInput, U[] ranks, uint[] perms) nothrow 
     430void averageTies(T, U)(T sortedInput, U[] ranks, uint[] perms) nothrow 
    323431in { 
    324432    assert(sortedInput.length == ranks.length); 
     
    350458} 
    351459 
    352 /**Returns an AA of counts of every element in input. 
     460/**Returns an AA of counts of every element in input.  Works w/ any input range. 
    353461 * 
    354462 * Examples: 
     
    360468 * assert(frq[4] == 1); 
    361469 * ---*/ 
    362 uint[T] frequency(T)(const T[] input) pure nothrow { 
    363     uint[T] output; 
     470uint[ElementType!(T)] frequency(T)(T input) 
     471if(isInputRange!(T)) { 
     472    typeof(return) output; 
    364473    foreach(i; input) { 
    365474        output[i]++; 
     
    377486} 
    378487 
    379 unittest { 
    380     uint[int] temp=frequency([1,2,2,1,2]); 
    381     assert(temp[1]==2); 
    382     assert(temp[2]==3); 
    383     writefln("Passed frequency unittest."); 
    384 } 
    385  
    386488/// 
    387 int sign(T)(T num) pure nothrow { 
     489T sign(T)(T num) pure nothrow { 
    388490    if (num > 0) return 1; 
    389491    if (num < 0) return -1; 
     
    403505 * function is used, because caching would take up too much memory, and if 
    404506 * done lazily, would cause threading issues.*/ 
    405  real logFactorial(uint n) { 
     507real logFactorial(uint n) { 
    406508    //Input is uint, can't be less than 0, no need to check. 
    407509    if(n < staticFacTableLen) { 
     
    440542} 
    441543 
    442 ///No, nothing this horribly inefficient is used internally. 
    443 BigInt factorial(uint N) { 
    444     BigInt result = 1; 
    445     for(uint i = 2; i <= N; i++) { 
    446         result *= i; 
    447     } 
    448     return result; 
    449 
    450  
    451 unittest { 
    452     assert(factorial(4) == 24); 
    453     assert(factorial(5) == 120); 
    454     assert(factorial(6) == 720); 
    455     assert(factorial(7) == 5040); 
    456     assert(factorial(3) == 6); 
    457     writefln("Passed factorial test."); 
    458 
     544/////No, nothing this horribly inefficient is used internally. 
     545//BigInt factorial(uint N) { 
     546//    BigInt result = 1; 
     547//    for(uint i = 2; i <= N; i++) { 
     548//        result *= i; 
     549//    } 
     550//    return result; 
     551//
     552// 
     553//unittest { 
     554//    assert(factorial(4) == 24); 
     555//    assert(factorial(5) == 120); 
     556//    assert(factorial(6) == 720); 
     557//    assert(factorial(7) == 5040); 
     558//    assert(factorial(3) == 6); 
     559//    writefln("Passed factorial test."); 
     560//
    459561 
    460562/**A struct that generates all possible permutations of a sequence, 
     
    463565 * are references to the internal permutation state.  This is dangerous, but 
    464566 * necessary for performance.  Therefore, you 
    465  * will have to dup them if you expect them not to change. 
     567 * will have to dup them if you expect them not to change.  This is also 
     568 * the rationale for not making this struct an input range. 
    466569 * 
    467570 * Examples: 
     
    488591 
    489592public: 
    490     /**Generate a sequence of seq(0, length) to permute based on. 
    491      * Exists only if T == uint.*/ 
    492     static if(is(T == uint)) { 
    493         this(uint length) { 
    494             perm = (new uint[length]).ptr; 
    495             foreach(i; 0..length) { 
    496                 perm[i] = i; 
    497             } 
    498             Is = (new size_t[length]).ptr; 
    499             len = length; 
    500         } 
    501     } 
    502  
    503     /**Use user-provided sequence.  Creates a duplicate of this sequence 
     593    /**Generate permutations from an input range. 
     594     * Create a duplicate of this sequence 
    504595     * so that the original sequence is not modified.*/ 
    505     this(T[] input) { 
    506         perm = input.dup.ptr; 
    507         Is = (new size_t[input.length]).ptr; 
    508         len = input.length; 
     596    this(U)(U input) 
     597    if(isForwardRange!(U)) { 
     598        auto arr = toArray(input); 
     599        Is = (new size_t[arr.length]).ptr; 
     600        len = arr.length; 
     601        perm = arr.ptr; 
    509602    } 
    510603 
     
    545638} 
    546639 
     640private template PermRet(T...) { 
     641    static if(isForwardRange!(T[0])) { 
     642        alias Perm!(ElementType!(T[0])) PermRet; 
     643    } else static if(T.length == 1) { 
     644        alias Perm!uint PermRet; 
     645    } else alias Perm!(T[0]) PermRet; 
     646} 
     647 
     648/**Create a Perm struct from a range or of a set of bounds. 
     649 * 
     650 * Note:  PermRet is just a template to figure out what this should return. 
     651 * I would use auto if not for bug 2251. 
     652 * 
     653 * Examples: 
     654 * --- 
     655 * auto p = perm([1,2,3]); 
     656 * auto p = perm(5);  // Permutations of integers on range [0, 5]. 
     657 * auto p = perm(-1, 2); // Permutations of integers on range [-1, 2]. 
     658 * --- 
     659 */ 
     660PermRet!(T) perm(T...)(T stuff) { 
     661    alias typeof(return) rt; 
     662    static if(isForwardRange!(T[0])) { 
     663        return rt(stuff); 
     664    } else static if(T.length == 1) { 
     665        static assert(isIntegral!(T[0])); 
     666        return rt(seq(0U, cast(uint) stuff[0])); 
     667    } else { 
     668        return rt(seq(stuff)); 
     669    } 
     670} 
     671 
     672 
    547673unittest { 
    548674    double[][] res; 
    549     alias Perm!(double) PermD; 
    550     auto perm = PermD(cast(double[]) [1.0, 2.0, 3.0]); 
    551     foreach(p; perm) { 
     675    auto p1 = perm(cast(double[]) [1.0, 2.0, 3.0]); 
     676    foreach(p; p1) { 
    552677        res ~= p.dup; 
    553678    } 
    554     assert(res.canFind([1.0, 2.0, 3.0])); 
    555     assert(res.canFind([1.0, 3.0, 2.0])); 
    556     assert(res.canFind([2.0, 1.0, 3.0])); 
    557     assert(res.canFind([2.0, 3.0, 1.0])); 
    558     assert(res.canFind([3.0, 1.0, 2.0])); 
    559     assert(res.canFind([3.0, 2.0, 1.0])); 
     679    sort(res); 
     680    assert(res.canFindSorted([1.0, 2.0, 3.0])); 
     681    assert(res.canFindSorted([1.0, 3.0, 2.0])); 
     682    assert(res.canFindSorted([2.0, 1.0, 3.0])); 
     683    assert(res.canFindSorted([2.0, 3.0, 1.0])); 
     684    assert(res.canFindSorted([3.0, 1.0, 2.0])); 
     685    assert(res.canFindSorted([3.0, 2.0, 1.0])); 
    560686    assert(res.length == 6); 
    561687    uint[][] res2; 
    562     alias Perm!(uint) PermU; 
    563     auto perm2 = PermU(3); 
     688    auto perm2 = perm(3); 
    564689    foreach(p; perm2) { 
    565690        res2 ~= p.dup; 
    566691    } 
    567     assert(res2.canFind([0u, 1, 2])); 
    568     assert(res2.canFind([0u, 2, 1])); 
    569     assert(res2.canFind([1u, 0, 2])); 
    570     assert(res2.canFind([1u, 2, 0])); 
    571     assert(res2.canFind([2u, 0, 1])); 
    572     assert(res2.canFind([2u, 1, 0])); 
     692    sort(res2); 
     693    assert(res2.canFindSorted([0u, 1, 2])); 
     694    assert(res2.canFindSorted([0u, 2, 1])); 
     695    assert(res2.canFindSorted([1u, 0, 2])); 
     696    assert(res2.canFindSorted([1u, 2, 0])); 
     697    assert(res2.canFindSorted([2u, 0, 1])); 
     698    assert(res2.canFindSorted([2u, 1, 0])); 
    573699    assert(res2.length == 6); 
    574700 
     
    576702    // them, and they contain what they're supposed to contain, the result is 
    577703    // correct. 
    578     auto perm3 = PermU(6); 
     704    auto perm3 = perm(6); 
    579705    bool[uint[]] table; 
    580706    foreach(p; perm3) { 
     
    585711        assert(elem.dup.insertionSort == [0U, 1, 2, 3, 4, 5]); 
    586712    } 
    587     auto perm4 = PermU(5); 
     713    auto perm4 = perm(5); 
    588714    bool[uint[]] table2; 
    589715    foreach(p; perm4) { 
     
    594720        assert(elem.dup.insertionSort == [0U, 1, 2, 3, 4]); 
    595721    } 
    596     writeln(stderr, "Passed Perm test."); 
     722    writeln("Passed Perm test."); 
     723
     724 
     725private template CombRet(T) { 
     726    static if(isForwardRange!(T)) { 
     727        alias Comb!(Unqual!(ElementType!(T))) CombRet; 
     728    } else static if(is(T : uint)) { 
     729        alias Comb!uint CombRet; 
     730    } else static assert(0, "comb can only be created with range or uint."); 
     731
     732 
     733/**Create a Comb struct from a range or of a set of bounds. 
     734 * 
     735 * Note:  CombRet is just a template to figure out what this should return. 
     736 * I would use auto if not for bug 2251. 
     737 * 
     738 * Examples: 
     739 * --- 
     740 * auto p = comb([1,2,3]); 
     741 * auto p = comb(5);  // Permutations of integers on range [0, 5]. 
     742 * --- 
     743 */ 
     744CombRet!(T) comb(T)(T stuff, uint r) { 
     745    alias typeof(return) rt; 
     746    static if(isForwardRange!(T)) { 
     747        return rt(stuff, r); 
     748    } else { 
     749        return rt(seq(0U, cast(uint) stuff), r); 
     750    } 
    597751} 
    598752 
     
    602756 * are const references to the internal state of the Comb object.  This is 
    603757 * dangerous but necessary for performance.  If you want to save them past the 
    604  * next  iteration, you'll have to dup them yourself. 
     758 * next  iteration, you'll have to dup them yourself.  This is also the 
     759 * rationale for not making this struct an input range. 
     760 * 
    605761 * Examples: 
    606762 * --- 
     
    719875unittest { 
    720876    // Test indexing verison first. 
    721     auto comb1 = Comb!(uint)(5, 2); 
     877    auto comb1 = comb(5, 2); 
    722878    uint[][] vals; 
    723879    foreach(c; comb1) { 
    724880        vals ~= c.dup; 
    725881    } 
    726     assert(vals.canFind([0u,1].dup)); 
    727     assert(vals.canFind([0u,2].dup)); 
    728     assert(vals.canFind([0u,3].dup)); 
    729     assert(vals.canFind([0u,4].dup)); 
    730     assert(vals.canFind([1u,2].dup)); 
    731     assert(vals.canFind([1u,3].dup)); 
    732     assert(vals.canFind([1u,4].dup)); 
    733     assert(vals.canFind([2u,3].dup)); 
    734     assert(vals.canFind([2u,4].dup)); 
    735     assert(vals.canFind([3u,4].dup)); 
     882    sort(vals); 
     883    assert(vals.canFindSorted([0u,1].dup)); 
     884    assert(vals.canFindSorted([0u,2].dup)); 
     885    assert(vals.canFindSorted([0u,3].dup)); 
     886    assert(vals.canFindSorted([0u,4].dup)); 
     887    assert(vals.canFindSorted([1u,2].dup)); 
     888    assert(vals.canFindSorted([1u,3].dup)); 
     889    assert(vals.canFindSorted([1u,4].dup)); 
     890    assert(vals.canFindSorted([2u,3].dup)); 
     891    assert(vals.canFindSorted([2u,4].dup)); 
     892    assert(vals.canFindSorted([3u,4].dup)); 
    736893    assert(vals.length == 10); 
    737894 
    738895    // Now, test the array version. 
    739     auto comb2 = Comb!(uint)(seq(5U, 10U), 3); 
     896    auto comb2 = comb(seq(5U, 10U), 3); 
    740897    vals = null; 
    741898    foreach(c; comb2) { 
    742899        vals ~= c.dup; 
    743900    } 
    744     assert(vals.canFind([5u, 6, 7].dup)); 
    745     assert(vals.canFind([5u, 6, 8].dup)); 
    746     assert(vals.canFind([5u, 6, 9].dup)); 
    747     assert(vals.canFind([5u, 7, 8].dup)); 
    748     assert(vals.canFind([5u, 7, 9].dup)); 
    749     assert(vals.canFind([5u, 8, 9].dup)); 
    750     assert(vals.canFind([6U, 7, 8].dup)); 
    751     assert(vals.canFind([6u, 7, 9].dup)); 
    752     assert(vals.canFind([6u, 8, 9].dup)); 
    753     assert(vals.canFind([7u, 8, 9].dup)); 
     901    sort(vals); 
     902    assert(vals.canFindSorted([5u, 6, 7].dup)); 
     903    assert(vals.canFindSorted([5u, 6, 8].dup)); 
     904    assert(vals.canFindSorted([5u, 6, 9].dup)); 
     905    assert(vals.canFindSorted([5u, 7, 8].dup)); 
     906    assert(vals.canFindSorted([5u, 7, 9].dup)); 
     907    assert(vals.canFindSorted([5u, 8, 9].dup)); 
     908    assert(vals.canFindSorted([6U, 7, 8].dup)); 
     909    assert(vals.canFindSorted([6u, 7, 9].dup)); 
     910    assert(vals.canFindSorted([6u, 8, 9].dup)); 
     911    assert(vals.canFindSorted([7u, 8, 9].dup)); 
    754912    assert(vals.length == 10); 
    755913 
     
    777935} 
    778936 
    779 // This should be encapsulated within StackHash.  Workaround for bug 1629. 
    780 struct SHNode(K, V) { 
    781     alias SHNode!(K, V) SomeType; 
    782     SomeType* next; 
    783     Mutable!(K) key; 
    784     Mutable!(V) val; 
    785 } 
    786  
    787 /* A hash table that uses TempAlloc to allocate space.  Useful for building 
    788  * a quick symbol table in a performance-critical function that can't be 
    789  * performing tons of heap allocations.  Intentionally lacking ddoc because 
    790  * the design or even existence of this is still likely to change.*/ 
    791 struct StackHash(K, V) { 
    792 private: 
    793     alias SHNode!(K, V) Node; 
    794     Node[] roots; 
    795     TempAlloc.State TAState; 
    796     TypeInfo keyTI; 
    797     size_t _length; 
    798     Node* usedSentinel; 
    799  
    800     Node* newNode(K key) { 
    801         Node* ret = cast(Node*) TempAlloc(Node.sizeof, TAState); 
    802         ret.key =  key; 
    803         ret.val =  V.init; 
    804         ret.next = null; 
    805         return ret; 
    806     } 
    807  
    808     Node* newNode(K key, V val) { 
    809         Node* ret = cast(Node*) TempAlloc(Node.sizeof, TAState); 
    810         ret.key =  key; 
    811         ret.val = val; 
    812         ret.next = null; 
    813         return ret; 
    814     } 
    815  
    816     hash_t getHash(K key) { 
    817         static if(is(K : long) && K.sizeof <= hash_t.sizeof) { 
    818             hash_t hash = cast(hash_t) key; 
    819         } else static if(__traits(compiles, key.toHash())) { 
    820             hash_t hash = key.toHash(); 
    821         } else { 
    822             hash_t hash = keyTI.getHash(&key); 
    823         } 
    824         hash %= roots.length; 
    825         return hash; 
    826     } 
    827  
    828  
    829 public: 
    830     this(size_t nElem) { 
    831         // Obviously, the caller can never mean zero, because this struct 
    832         // can't work at all with nElem == 0, so assume it's a mistake and fix 
    833         // it here. 
    834         if(nElem == 0) 
    835             nElem++; 
    836         TAState = TempAlloc.getState; 
    837         roots = newStack!(Node)(nElem, TAState); 
    838         usedSentinel = cast(Node*) roots.ptr; 
    839         foreach(ref root; roots) { 
    840             root.key =  K.init; 
    841             root.val = V.init; 
    842             root.next = usedSentinel; 
    843         } 
    844         keyTI = typeid(K); 
    845     } 
    846  
    847     ref V opIndex(K key) { 
    848         hash_t hash = getHash(key); 
    849  
    850         if(roots[hash].next == usedSentinel) { 
    851             roots[hash].key =  key; 
    852             roots[hash].next = null; 
    853             _length++; 
    854             return roots[hash].val; 
    855         } else if(roots[hash].key == key) { 
    856             return roots[hash].val; 
    857         } else {  // Collision.  Start chaining. 
    858             Node** next = &(roots[hash].next); 
    859             while(*next !is null) { 
    860                 if((**next).key ==  key) { 
    861                     return (**next).val; 
    862                 } 
    863                 next = &((**next).next); 
    864             } 
    865             *next = newNode(key); 
    866             _length++; 
    867             return (**next).val; 
    868         } 
    869     } 
    870  
    871     V opIndexAssign(V val, K key) { 
    872         hash_t hash = getHash(key); 
    873  
    874         if(roots[hash].next == usedSentinel) { 
    875             roots[hash].key =  key; 
    876             roots[hash].val = val; 
    877             roots[hash].next = null; 
    878             _length++; 
    879             return val; 
    880         } else if(roots[hash].key ==  key) { 
    881             roots[hash].val = val; 
    882             return val; 
    883         } else {  // Collision.  Start chaining. 
    884             Node** next = &(roots[hash].next); 
    885             while(*next !is null) { 
    886                 if((**next).key == key) { 
    887                     (**next).val = val; 
    888                     return val; 
    889                 } 
    890                 next = &((**next).next); 
    891             } 
    892             _length++; 
    893             *next = newNode(key, val); 
    894             return val; 
    895         } 
    896     } 
    897  
    898     V[] values() { 
    899         auto space = newVoid!(V)(_length); 
    900         return values(space); 
    901     } 
    902  
    903     V[] valStack() { 
    904         auto space = newStack!(V)(_length); 
    905         return values(space); 
    906     } 
    907  
    908     V[] values(V[] space) { 
    909         size_t pos; 
    910         foreach(r; roots) { 
    911             if(r.next == usedSentinel) 
    912                 continue; 
    913             space[pos++] = r.val; 
    914             Node* next = r.next; 
    915             while(next !is null) { 
    916                 space[pos++] = next.val; 
    917                 next = next.next; 
    918             } 
    919         } 
    920         return space; 
    921     } 
    922  
    923     Mutable!(K)[] keys() const { 
    924         auto space = newVoid!(Mutable!(K))(_length); 
    925         return keys(space); 
    926     } 
    927  
    928     Mutable!(K)[] keyStack() const { 
    929         auto space = newStack!(Mutable!(K))(_length); 
    930         return keys(space); 
    931     } 
    932  
    933     Mutable!(K)[] keys(Mutable!(K)[] space) const { 
    934         size_t pos; 
    935         foreach(r; roots) { 
    936             if(r.next == usedSentinel) 
    937                 continue; 
    938             space[pos++] = r.key; 
    939             const(Node)* next = r.next; 
    940             while(next !is null) { 
    941                 space[pos++] = next.key; 
    942                 next = next.next; 
    943             } 
    944         } 
    945         return space; 
    946     } 
    947  
    948     int opApply(int delegate(ref V value) dg) { 
    949         int res = 0; 
    950         outer: 
    951         foreach(r; roots) { 
    952             if(r.next == usedSentinel) 
    953                 continue; 
    954             res = dg(r.val); 
    955             if (res) break; 
    956             Node* next = r.next; 
    957             while(next !is null) { 
    958                 res = dg(next.val); 
    959                 if(res) break outer; 
    960                 next = next.next; 
    961             } 
    962         } 
    963         return res; 
    964    } 
    965  
    966    int opApply(int delegate(ref K key, ref V value) dg) { 
    967         int res = 0; 
    968         outer: 
    969         foreach(r; roots) { 
    970             if(r.next == usedSentinel) 
    971                 continue; 
    972             res = dg(r.key, r.val); 
    973             if (res) break; 
    974             Node* next = r.next; 
    975             while(next !is null) { 
    976                 res = dg(next.key, next.val); 
    977                 if(res) break outer; 
    978                 next = next.next; 
    979             } 
    980         } 
    981         return res; 
    982    } 
    983  
    984    V* opIn_r(K key) { 
    985         hash_t hash = getHash(key); 
    986  
    987         if(roots[hash].next == usedSentinel) { 
    988             return null; 
    989         } else if(roots[hash].key == key) { 
    990             return &(roots[hash].val); 
    991         } else {  // Collision.  Start chaining. 
    992             Node* next = roots[hash].next; 
    993             while(next !is null) { 
    994                 if(next.key == key) { 
    995                     return &(next.val); 
    996                 } 
    997                 next = next.next; 
    998             } 
    999             return null; 
    1000         } 
    1001    } 
    1002  
    1003    void remove(K key) { 
    1004         hash_t hash = getHash(key); 
    1005  
    1006         Node** next = &(roots[hash].next); 
    1007         if(roots[hash].next == usedSentinel) { 
    1008             return; 
    1009         } else if(roots[hash].key == key) { 
    1010             _length--; 
    1011             if(roots[hash].next is null) { 
    1012                 roots[hash].key = K.init; 
    1013                 roots[hash].val = V.init; 
    1014                 roots[hash].next = usedSentinel; 
    1015                 return; 
    1016             } else { 
    1017                 roots[hash].key = (**next).key; 
    1018                 roots[hash].val = (**next).val; 
    1019                 roots[hash].next = (**next).next; 
    1020                 return; 
    1021             } 
    1022         } else {  // Collision.  Start chaining. 
    1023             while(*next !is null) { 
    1024                 if((**next).key == key) { 
    1025                     _length--; 
    1026                     *next = (**next).next; 
    1027                     break; 
    1028                 } 
    1029                 next = &((**next).next); 
    1030             } 
    1031             return; 
    1032         } 
    1033    } 
    1034  
    1035    size_t length() { 
    1036        return _length; 
    1037    } 
    1038  
    1039    real efficiency() { 
    1040        uint used = 0; 
    1041        foreach(root; roots) { 
    1042            if(root.next != usedSentinel) { 
    1043                used++; 
    1044            } 
    1045        } 
    1046        return cast(real) used / roots.length; 
    1047    } 
    1048 } 
    1049  
    1050 unittest { 
    1051     alias StackHash!(string, uint) mySh; 
    1052     mixin(newFrame); 
    1053     auto data = mySh(2);  // Make sure we get some collisions. 
    1054     data["foo"] = 1; 
    1055     data["bar"] = 2; 
    1056     data["baz"] = 3; 
    1057     data["waldo"] = 4; 
    1058     assert(!("foobar" in data)); 
    1059     assert(*("foo" in data) == 1); 
    1060     assert(*("bar" in data) == 2); 
    1061     assert(*("baz" in data) == 3); 
    1062     assert(*("waldo" in data) == 4); 
    1063     assert(data["foo"] == 1); 
    1064     assert(data["bar"] == 2); 
    1065     assert(data["baz"] == 3); 
    1066     assert(data["waldo"] == 4); 
    1067     assert(data.keys.sort == ["bar", "baz", "foo", "waldo"]); 
    1068     assert(data.values.sort == [1U, 2, 3, 4]); 
    1069     foreach(k, v; data) { 
    1070         assert(data[k] == v); 
    1071     } 
    1072     foreach(v; data) { 
    1073         assert(v > 0 && v < 5); 
    1074     } 
    1075  
    1076     // Test remove. 
    1077  
    1078     alias StackHash!(uint, uint) mySh2; 
    1079     auto foo = mySh2(7); 
    1080     for(uint i = 0; i < 20; i++) { 
    1081         foo[i] = i; 
    1082     } 
    1083     assert(foo.length == 20); 
    1084     for(uint i = 0; i < 20; i += 2) { 
    1085         foo.remove(i); 
    1086     } 
    1087     for(uint i = 0; i < 20; i++) { 
    1088         if(i & 1) { 
    1089             assert(*(i in foo) == i); 
    1090         } else { 
    1091             assert(!(i in foo)); 
    1092         } 
    1093     } 
    1094     auto vals = foo.values; 
    1095     assert(foo.length == 10); 
    1096     assert(vals.qsort == [1U, 3, 5, 7, 9, 11, 13, 15, 17, 19]); 
    1097  
    1098     writeln("Passed StackHash test."); 
    1099 } 
    1100  
    1101 /* A hash set that uses TempAlloc to allocate space.  Useful for building 
    1102  * a quick set in a performance-critical function that can't be 
    1103  * performing tons of heap allocations.  Intentionally lacking ddoc because 
    1104  * the design or even existence of this is still likely to change.*/ 
    1105 struct StackSet(K) { 
    1106 private: 
    1107     // Choose smallest representation of the data. 
    1108     struct Node1 { 
    1109         Node1* next; 
    1110         K key; 
    1111     } 
    1112  
    1113     struct Node2 { 
    1114         K key; 
    1115         Node2* next; 
    1116     } 
    1117  
    1118     static if(Node1.sizeof < Node2.sizeof) { 
    1119         alias Node1 Node; 
    1120     } else { 
    1121         alias Node2 Node; 
    1122     } 
    1123  
    1124     Node[] roots; 
    1125     TempAlloc.State TAState; 
    1126     TypeInfo keyTI; 
    1127     size_t _length; 
    1128     Node* usedSentinel; 
    1129  
    1130     Node* newNode(K key) { 
    1131         Node* ret = cast(Node*) TempAlloc(Node.sizeof, TAState); 
    1132         ret.key = key; 
    1133         ret.next = null; 
    1134         return ret; 
    1135     } 
    1136  
    1137     hash_t getHash(K key) { 
    1138         static if(is(K : long) && K.sizeof <= hash_t.sizeof) { 
    1139             hash_t hash = cast(hash_t) key; 
    1140         } else static if(__traits(compiles, key.toHash())) { 
    1141             hash_t hash = key.toHash(); 
    1142         } else { 
    1143             hash_t hash = keyTI.getHash(&key); 
    1144         } 
    1145         hash %= roots.length; 
    1146         return hash; 
    1147     } 
    1148  
    1149 public: 
    1150     this(size_t nElem) { 
    1151         // Obviously, the caller can never mean zero, because this struct 
    1152         // can't work at all with nElem == 0, so assume it's a mistake and fix 
    1153         // it here. 
    1154         if(nElem == 0) 
    1155             nElem++; 
    1156         TAState = TempAlloc.getState; 
    1157         roots = newStack!(Node)(nElem, TAState); 
    1158         usedSentinel = cast(Node*) roots.ptr; 
    1159         foreach(ref root; roots) { 
    1160             root.key = K.init; 
    1161             root.next = usedSentinel; 
    1162         } 
    1163         keyTI = typeid(K); 
    1164     } 
    1165  
    1166     void insert(K key) { 
    1167         hash_t hash = getHash(key); 
    1168  
    1169         if(roots[hash].next == usedSentinel) { 
    1170             roots[hash].key = key; 
    1171             roots[hash].next = null; 
    1172             _length++; 
    1173             return; 
    1174         } else if(roots[hash].key == key) { 
    1175             return; 
    1176         } else {  // Collision.  Start chaining. 
    1177             Node** next = &(roots[hash].next); 
    1178             while(*next !is null) { 
    1179                 if((**next).key == key) { 
    1180                     return; 
    1181                 } 
    1182                 next = &((**next).next); 
    1183             } 
    1184             *next = newNode(key); 
    1185             _length++; 
    1186             return; 
    1187         } 
    1188     } 
    1189  
    1190     K[] elems() { 
    1191         auto space = newVoid!(K)(_length); 
    1192         return elems(space); 
    1193     } 
    1194  
    1195     K[] elemStack() { 
    1196         auto space = newStack!(K)(_length); 
    1197         return elems(space); 
    1198     } 
    1199  
    1200     K[] elems(K[] space) { 
    1201         size_t pos; 
    1202         foreach(r; roots) { 
    1203             if(r.next == usedSentinel) 
    1204                 continue; 
    1205             space[pos++] = r.key; 
    1206             Node* next = r.next; 
    1207             while(next !is null) { 
    1208                 space[pos++] = next.key; 
    1209                 next = next.next; 
    1210             } 
    1211         } 
    1212         return space; 
    1213     } 
    1214  
    1215    int opApply(int delegate(ref K key) dg) { 
    1216         int res = 0; 
    1217         outer: 
    1218         foreach(r; roots) { 
    1219             if(r.next == usedSentinel) 
    1220                 continue; 
    1221             res = dg(r.key); 
    1222             if (res) break; 
    1223             Node* next = r.next; 
    1224             while(next !is null) { 
    1225                 res = dg(next.key); 
    1226                 if(res) break outer; 
    1227                 next = next.next; 
    1228             } 
    1229         } 
    1230         return res; 
    1231    } 
    1232  
    1233    bool opIn_r(K key) { 
    1234         hash_t hash = getHash(key); 
    1235  
    1236         if(roots[hash].next == usedSentinel) { 
    1237             return false; 
    1238         } else if(roots[hash].key == key) { 
    1239             return true; 
    1240         } else {  // Collision.  Start chaining. 
    1241             Node* next = roots[hash].next; 
    1242             while(next !is null) { 
    1243                 if(next.key == key) { 
    1244                     return true; 
    1245                 } 
    1246                 next = next.next; 
    1247             } 
    1248             return false; 
    1249         } 
    1250    } 
    1251  
    1252    void remove(K key) { 
    1253         hash_t hash = getHash(key); 
    1254  
    1255         Node** next = &(roots[hash].next); 
    1256         if(roots[hash].next == usedSentinel) { 
    1257             return; 
    1258         } else if(roots[hash].key == key) { 
    1259             _length--; 
    1260             if(roots[hash].next is null) { 
    1261                 roots[hash].key = K.init; 
    1262                 roots[hash].next = usedSentinel; 
    1263                 return; 
    1264             } else { 
    1265                 roots[hash].key = (**next).key; 
    1266                 roots[hash].next = (**next).next; 
    1267                 return; 
    1268             } 
    1269         } else {  // Collision.  Start chaining. 
    1270             while(*next !is null) { 
    1271                 if((**next).key == key) { 
    1272                     _length--; 
    1273                     *next = (**next).next; 
    1274                     break; 
    1275                 } 
    1276                 next = &((**next).next); 
    1277             } 
    1278             return; 
    1279         } 
    1280    } 
    1281  
    1282    size_t length() { 
    1283        return _length; 
    1284    } 
    1285 } 
    1286  
    1287 unittest { 
    1288     mixin(newFrame); 
    1289     alias StackSet!(uint) mySS; 
    1290     mySS set = mySS(12); 
    1291     foreach(i; 0..20) { 
    1292         set.insert(i); 
    1293     } 
    1294     assert(set.elems.qsort == seq(0U, 20U)); 
    1295     assert(set.elemStack.qsort == seq(0U, 20U)); 
    1296  
    1297     for(uint i = 0; i < 20; i += 2) { 
    1298         set.remove(i); 
    1299     } 
    1300  
    1301     foreach(i; 0..20) { 
    1302         if(i & 1) { 
    1303             assert(i in set); 
    1304         } else { 
    1305             assert(!(i in set)); 
    1306         } 
    1307     } 
    1308     uint[] contents; 
    1309     foreach(elem; set) { 
    1310         contents ~= elem; 
    1311     } 
    1312     assert(contents.qsort == [1U,3,5,7,9,11,13,15,17,19]); 
    1313     writeln("Passed StackSet test."); 
    1314 } 
    1315  
    1316937/**Computes the intersect of two arrays, i.e. the elements that are in both 
    1317938 * arrays.  Time and space complexity are O(first.length + second.length). 
     
    1343964 
    1344965    size_t pos; 
    1345     foreach(key, count; firstSet) { 
    1346         if(count > 0) 
    1347             result[pos++] = key; 
    1348     } 
    1349  
     966    auto key = firstSet.keys; 
     967    auto count = firstSet.values; 
     968    while(!key.empty) { 
     969        if(count.front > 0) { 
     970            result[pos++] = key.front; 
     971        } 
     972        key.popFront; 
     973        count.popFront; 
     974    } 
    1350975    return result[0..pos]; 
    1351976} 
     
    14021027        } 
    14031028    } 
    1404  
    14051029    return result[0..resPos]; 
    14061030} 
     
    14151039    foreach(i; 0..1000) { 
    14161040        foreach(ref f; first) 
    1417             f = uniform(gen, 0U, 2500); 
     1041            f = uniform(0U, 2500); 
    14181042        foreach(ref s; second) 
    1419             s = uniform(gen, 0U, 5000); 
     1043            s = uniform(0U, 5000); 
    14201044        auto hash = qsort!("a > b")(intersect(first, second)); 
    14211045        auto sort = intersectSorted!("a > b") 
  • trunk/cor.d

    r20 r28  
    11/**Pearson, Spearman and Kendall correlations, covariance. 
    22 * 
    3  * Author:  David Simcha 
    4  * 
    5  * Copyright (c) 2009, David Simcha 
     3 * Author:  David Simcha*/ 
     4 /* 
     5 * You may use this software under your choice of either of the following 
     6 * licenses.  YOU NEED ONLY OBEY THE TERMS OF EXACTLY ONE OF THE TWO LICENSES. 
     7 * IF YOU CHOOSE TO USE THE PHOBOS LICENSE, YOU DO NOT NEED TO OBEY THE TERMS OF 
     8 * THE BSD LICENSE.  IF YOU CHOOSE TO USE THE BSD LICENSE, YOU DO NOT NEED 
     9 * TO OBEY THE TERMS OF THE PHOBOS LICENSE.  IF YOU ARE A LAWYER LOOKING FOR 
     10 * LOOPHOLES AND RIDICULOUSLY NON-EXISTENT AMBIGUITIES IN THE PREVIOUS STATEMENT, 
     11 * GET A LIFE. 
     12 * 
     13 * ---------------------Phobos License: --------------------------------------- 
     14 * 
     15 *  Copyright (C) 2008-2009 by David Simcha. 
     16 * 
     17 *  This software is provided 'as-is', without any express or implied 
     18 *  warranty. In no event will the authors be held liable for any damages 
     19 *  arising from the use of this software. 
     20 * 
     21 *  Permission is granted to anyone to use this software for any purpose, 
     22 *  including commercial applications, and to alter it and redistribute it 
     23 *  freely, in both source and binary form, subject to the following 
     24 *  restrictions: 
     25 * 
     26 *  o  The origin of this software must not be misrepresented; you must not 
     27 *     claim that you wrote the original software. If you use this software 
     28 *     in a product, an acknowledgment in the product documentation would be 
     29 *     appreciated but is not required. 
     30 *  o  Altered source versions must be plainly marked as such, and must not 
     31 *     be misrepresented as being the original software. 
     32 *  o  This notice may not be removed or altered from any source 
     33 *     distribution. 
     34 * 
     35 * --------------------BSD License:  ----------------------------------------- 
     36 * 
     37 * Copyright (c) 2008-2009, David Simcha 
    638 * All rights reserved. 
    739 * 
     
    2961 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
    3062 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
    31  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ 
     63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
     64 */ 
    3265 
    3366module dstats.cor; 
    3467 
    35 import core.memory
     68import core.memory, std.range
    3669 
    3770import dstats.sort, dstats.base, dstats.alloc; 
     
    4982/**Pearson correlation.  When the term correlation is used unqualified, it is 
    5083 * usually referring to this quantity.  Pearson correlation assumes the input 
    51  * data is normally distributed and is therefore a parametric test.*/ 
    52 real pcor(T, U)(const T[] input1, const U[] input2) 
    53 in { 
    54     assert(input1.length == input2.length); 
    55 } body
     84 * data is normally distributed and is therefore a parametric test. 
     85 * This function works with any pair of input ranges.  If they are of different 
     86 * lengths, it uses the first min(input1.length, input2.length) elements.*/ 
     87real pcor(T, U)(T input1, U input2) 
     88if(realInput!(T) && realInput!(U))
    5689    OnlinePcor corCalc; 
    57     foreach(i; 0..input1.length) { 
    58         corCalc.addElement(input1[i], input2[i]); 
     90    while(!input1.empty && !input2.empty) { 
     91        corCalc.put(input1.front, input2.front); 
     92        input1.popFront; 
     93        input2.popFront; 
    5994    } 
    6095 
     
    6398 
    6499unittest { 
    65     assert(approxEqual(pcor([1,2,3,4,5], [1,2,3,4,5]), 1)); 
    66     assert(approxEqual(pcor([1,2,3,4,5], [10.0, 8.0, 6.0, 4.0, 2.0]), -1)); 
    67     assert(approxEqual(pcor([2, 4, 1, 6, 19], [4, 5, 1, 3, 2]), -.2382314)); 
     100    assert(approxEqual(pcor([1,2,3,4,5].dup, [1,2,3,4,5].dup), 1)); 
     101    assert(approxEqual(pcor([1,2,3,4,5].dup, [10.0, 8.0, 6.0, 4.0, 2.0].dup), -1)); 
     102    assert(approxEqual(pcor([2, 4, 1, 6, 19].dup, [4, 5, 1, 3, 2].dup), -.2382314)); 
     103 
     104        // Make sure everything works with lowest common denominator range type. 
     105    struct Count { 
     106        uint num; 
     107        uint upTo; 
     108        uint front() { 
     109            return num; 
     110        } 
     111        void popFront() { 
     112            num++; 
     113        } 
     114        bool empty() { 
     115            return num >= upTo; 
     116        } 
     117    } 
     118 
     119    Count a, b; 
     120    a.upTo = 100; 
     121    b.upTo = 100; 
     122    assert(approxEqual(pcor(a, b), 1)); 
     123 
    68124    writefln("Passed pcor unittest."); 
    69125} 
     
    78134public: 
    79135    /// 
    80     void addElement(T, U)(T elem1, U elem2) { 
     136    void put(T, U)(T elem1, U elem2) { 
    81137        _k++; 
    82138        real kNeg1 = 1.0L / _k; 
     
    135191 
    136192/// 
    137 real covariance(T, U)(const T[] input1, const U[] input2) 
     193real covariance(T, U)(T input1, U input2) 
     194if(realInput!(T) && realInput!(U)) { 
     195    OnlinePcor covCalc; 
     196    while(!input1.empty && !input2.empty) { 
     197        covCalc.put(input1.front, input2.front); 
     198        input1.popFront; 
     199        input2.popFront; 
     200    } 
     201    return covCalc.cov; 
     202
     203 
     204unittest { 
     205    assert(approxEqual(covariance([1,4,2,6,3].dup, [3,1,2,6,2].dup), 2.05)); 
     206    writeln("Passed covariance test."); 
     207
     208 
     209/**Spearman's rank correlation.  Non-parametric.  This is essentially the 
     210 * Pearson correlation of the ranks of the data, with ties dealt with by 
     211 * averaging. 
     212 * 
     213 * Currently only works on input ranges with a length property.*/ 
     214real scor(R, S)(R input1, S input2) 
     215if(isInputRange!(R) && isInputRange!(S) && 
     216   dstats.base.hasLength!(R) && dstats.base.hasLength!(S)) 
    138217in { 
    139218    assert(input1.length == input2.length); 
    140219} body { 
    141     OnlinePcor covCalc; 
    142     foreach(i; 0..input1.length) { 
    143         covCalc.addElement(input1[i], input2[i]); 
    144     } 
    145     return covCalc.cov; 
    146 
    147  
    148 unittest { 
    149     assert(approxEqual(covariance([1,4,2,6,3], [3,1,2,6,2]), 2.05)); 
    150     writeln("Passed covariance test."); 
    151 
    152  
    153 /**Spearman's rank correlation.  Non-parametric.  This is essentially the 
    154  * Pearson correlation of the ranks of the data, with ties dealt with by 
    155  * averaging.*/ 
    156 real scor(T, U)(const T[] input1, const U[] input2) 
    157 in { 
    158     assert(input1.length == input2.length); 
    159 } body {  // Not using rank() so I can recycle some allocations. 
     220    alias ElementType!(R) T; 
     221    alias ElementType!(S) U; 
    160222    if(input1.length < 2) 
    161223        return real.nan; 
     
    171233    iDup = (cast(T*) TempAlloc.malloc(largerSize * input1.length)) 
    172234           [0..input1.length]; 
    173     iDup[] = input1[]
     235    rangeCopy(iDup, input1)
    174236    qsort(iDup, perms); 
    175237 
     
    189251    // for the larger of T, U. 
    190252    U[] iDup2 = (cast(U*) iDup.ptr)[0..input2.length]; 
    191     iDup2[] = input2[]
     253    rangeCopy(iDup2, input2)
    192254 
    193255    qsort(iDup2, perms); 
     
    202264unittest { 
    203265    //Test against a few known values. 
    204     assert(approxEqual(scor([1,2,3,4,5,6], [3,1,2,5,4,6]), 0.77143)); 
    205     assert(approxEqual(scor([3,1,2,5,4,6], [1,2,3,4,5,6] ), 0.77143)); 
    206     assert(approxEqual(scor([3,6,7,35,75], [1,63,53,67,3]), 0.3)); 
    207     assert(approxEqual(scor([1,63,53,67,3], [3,6,7,35,75]), 0.3)); 
    208     assert(approxEqual(scor([1.5,6.3,7.8,4.2,1.5], [1,63,53,67,3]), .56429)); 
    209     assert(approxEqual(scor([1,63,53,67,3], [1.5,6.3,7.8,4.2,1.5]), .56429)); 
    210     assert(approxEqual(scor([1.5,6.3,7.8,7.8,1.5], [1,63,53,67,3]), .79057)); 
    211     assert(approxEqual(scor([1,63,53,67,3], [1.5,6.3,7.8,7.8,1.5]), .79057)); 
    212     assert(approxEqual(scor([1.5,6.3,7.8,6.3,1.5], [1,63,53,67,3]), .63246)); 
    213     assert(approxEqual(scor([1,63,53,67,3], [1.5,6.3,7.8,6.3,1.5]), .63246)); 
    214     assert(approxEqual(scor([3,4,1,5,2,1,6,4], [1,3,2,6,4,2,6,7]), .6829268)); 
    215     assert(approxEqual(scor([1,3,2,6,4,2,6,7], [3,4,1,5,2,1,6,4]), .6829268)); 
     266    assert(approxEqual(scor([1,2,3,4,5,6].dup, [3,1,2,5,4,6].dup), 0.77143)); 
     267    assert(approxEqual(scor([3,1,2,5,4,6].dup, [1,2,3,4,5,6].dup ), 0.77143)); 
     268    assert(approxEqual(scor([3,6,7,35,75].dup, [1,63,53,67,3].dup), 0.3)); 
     269    assert(approxEqual(scor([1,63,53,67,3].dup, [3,6,7,35,75].dup), 0.3)); 
     270    assert(approxEqual(scor([1.5,6.3,7.8,4.2,1.5].dup, [1,63,53,67,3].dup), .56429)); 
     271    assert(approxEqual(scor([1,63,53,67,3].dup, [1.5,6.3,7.8,4.2,1.5].dup), .56429)); 
     272    assert(approxEqual(scor([1.5,6.3,7.8,7.8,1.5].dup, [1,63,53,67,3].dup), .79057)); 
     273    assert(approxEqual(scor([1,63,53,67,3].dup, [1.5,6.3,7.8,7.8,1.5].dup), .79057)); 
     274    assert(approxEqual(scor([1.5,6.3,7.8,6.3,1.5].dup, [1,63,53,67,3].dup), .63246)); 
     275    assert(approxEqual(scor([1,63,53,67,3].dup, [1.5,6.3,7.8,6.3,1.5].dup), .63246)); 
     276    assert(approxEqual(scor([3,4,1,5,2,1,6,4].dup, [1,3,2,6,4,2,6,7].dup), .6829268)); 
     277    assert(approxEqual(scor([1,3,2,6,4,2,6,7].dup, [3,4,1,5,2,1,6,4].dup), .6829268)); 
    216278    uint[] one = new uint[1000], two = new uint[1000]; 
    217279    foreach(i; 0..100) {  //Further sanity checks for things like commutativity. 
    218         size_t lowerBound = uniform(gen, 0, one.length); 
    219         size_t upperBound = uniform(gen, 0, one.length); 
     280        size_t lowerBound = uniform(0, one.length); 
     281        size_t upperBound = uniform(0, one.length); 
    220282        if(lowerBound > upperBound) swap(lowerBound, upperBound); 
    221283        foreach(ref o; one) { 
    222             o = uniform(gen, 1, 10);  //Generate lots of ties. 
     284            o = uniform(1, 10);  //Generate lots of ties. 
    223285        } 
    224286        foreach(ref o; two) { 
    225              o = uniform(gen, 1, 10);  //Generate lots of ties. 
     287             o = uniform(1, 10);  //Generate lots of ties. 
    226288        } 
    227289        real sOne = 
     
    245307        assert(approxEqual(sFour, sFive) || (isnan(sFour) && isnan(sFive))); 
    246308    } 
    247     writefln("Passed scor unittest."); 
     309 
     310    // Test input ranges. 
     311    struct Count { 
     312        uint num; 
     313        uint upTo; 
     314        uint front() { 
     315            return num; 
     316        } 
     317        void popFront() { 
     318            num++; 
     319        } 
     320        bool empty() { 
     321            return num >= upTo; 
     322        } 
     323        uint length() { 
     324            return upTo - num; 
     325        } 
     326    } 
     327 
     328    Count a, b; 
     329    a.upTo = 100; 
     330    b.upTo = 100; 
     331    assert(scor(a, b) == 1); 
     332 
     333    writeln("Passed scor unittest."); 
    248334} 
    249335 
     
    255341  * standard formulas, and therefore unlikely to have weird, subtle bugs.*/ 
    256342 
    257 real kcorOld(T, U)(const T[] input1, const U[] input2) 
     343private real kcorOld(T, U)(const T[] input1, const U[] input2) 
    258344in { 
    259345    assert(input1.length == input2.length); 
     
    294380 * bubble sort distance, or the number of swaps that would be needed in a 
    295381 * bubble sort to sort input2 into the same order as input1.  It is 
    296  * a robust, non-parametric correlation metric.*/ 
    297 real kcor(T, U)(const T[] input1, const U[] input2) { 
    298     auto i1d = input1.tempdup; 
     382 * a robust, non-parametric correlation metric. 
     383 * 
     384 * Since a copy of the inputs is made anyhow because they need to be sorted, 
     385 * this function can work with any input range.  However, the ranges must 
     386 * have the same length.*/ 
     387real kcor(T, U)(T input1, U input2) 
     388if(isInputRange!(T) && isInputRange!(U)) { 
     389    auto i1d = tempdup(input1); 
    299390    scope(exit) TempAlloc.free; 
    300     auto i2d = input2.tempdup
     391    auto i2d = tempdup(input2)
    301392    scope(exit) TempAlloc.free; 
    302393    return kcorDestructive(i1d, i2d); 
     
    304395 
    305396/**Kendall's Tau O(N log N) destroys input vectors but requires 
    306  * O(1) auxilliary space provided T.sizeof == U.sizeof.*/ 
     397 * O(1) auxilliary space provided T.sizeof == U.sizeof. 
     398 * Also only works on arrays.*/ 
    307399real kcorDestructive(T, U)(T[] input1, U[] input2) 
    308400in { 
     
    369461unittest { 
    370462    //Test against known values. 
    371     assert(approxEqual(kcor([1,2,3,4,5], [3,1,7,4,3]), 0.1054093)); 
    372     assert(approxEqual(kcor([3,6,7,35,75],[1,63,53,67,3]), 0.2)); 
    373     assert(approxEqual(kcor([1.5,6.3,7.8,4.2,1.5], [1,63,53,67,3]), .3162287)); 
     463    assert(approxEqual(kcor([1,2,3,4,5].dup, [3,1,7,4,3].dup), 0.1054093)); 
     464    assert(approxEqual(kcor([3,6,7,35,75].dup,[1,63,53,67,3].dup), 0.2)); 
     465    assert(approxEqual(kcor([1.5,6.3,7.8,4.2,1.5].dup, [1,63,53,67,3].dup), .3162287)); 
    374466    uint[] one = new uint[1000], two = new uint[1000]; 
    375467    // Test complex, fast implementation against straightforward, 
    376468    // slow implementation. 
    377469    foreach(i; 0..100) { 
    378         size_t lowerBound = uniform(gen, 0, 1000); 
    379         size_t upperBound = uniform(gen, 0, 1000); 
     470        size_t lowerBound = uniform(0, 1000); 
     471        size_t upperBound = uniform(0, 1000); 
    380472        if(lowerBound > upperBound) swap(lowerBound, upperBound); 
    381473        foreach(ref o; one) { 
    382             o = uniform(gen, 1, 10); 
     474            o = uniform(1, 10); 
    383475        } 
    384476        foreach(ref o; two) { 
    385              o = uniform(gen, 1, 10); 
     477             o = uniform(1, 10); 
    386478        } 
    387479        real kOne = 
     
    389481        real kTwo = 
    390482             kcorOld(one[lowerBound..upperBound], two[lowerBound..upperBound]); 
    391         assert(approxEqual(kOne, kTwo)); 
    392     } 
     483        assert(approxEqual(kOne, kTwo) || (isNaN(kOne) && isNaN(kTwo))); 
     484    } 
     485 
     486    // Make sure everything works with lowest common denominator range type. 
     487    struct Count { 
     488        uint num; 
     489        uint upTo; 
     490        uint front() { 
     491            return num; 
     492        } 
     493        void popFront() { 
     494            num++; 
     495        } 
     496        bool empty() { 
     497            return num >= upTo; 
     498        } 
     499    } 
     500 
     501    Count a, b; 
     502    a.upTo = 100; 
     503    b.upTo = 100; 
     504    assert(approxEqual(kcor(a, b), 1)); 
    393505    writefln("Passed kcor unittest."); 
    394506} 
  • trunk/distrib.d

    r20 r28  
    11/**Probability distribution CDFs, PDFs/PMFs, and a few inverse CDFs. 
    22 * 
    3  * Authors:  David Simcha, Don Clugston 
    4  
     3 * Authors:  David Simcha, Don Clugston*/ 
     4 /
    55 * Acknowledgements:  Some of this module was borrowed the mathstat module 
    66 * of Don Clugston's MathExtra library.  This was done to create a 
     
    251251 
    252252unittest { 
    253    Random gen = Random(unpredictableSeed); 
    254253   foreach(i; 0..1_000) { 
    255254       // Restricted variable ranges are because, in the tails, more than one 
     
    257256       // Obviously, this is one of those corner cases that nothing can be 
    258257       // done about. 
    259        real lambda = uniform(gen, .05L, 8.0L); 
    260        uint k = uniform(gen, 0UL, cast(uint) ceil(3.0L * lambda)); 
     258       real lambda = uniform(.05L, 8.0L); 
     259       uint k = uniform(0UL, cast(uint) ceil(3.0L * lambda)); 
    261260       real pVal = poissonCDF(k, lambda); 
    262261       assert(invPoissonCDF(pVal, lambda) == k); 
     
    377376       // Obviously, this is one of those corner cases that nothing can be 
    378377       // done about.  Using small n's, moderate p's prevents this. 
    379        uint n = uniform(gen, 5L, 10L); 
    380        uint k = uniform(gen, 0L, n); 
    381        real p = uniform(gen, 0.1L, 0.9L); 
     378       uint n = uniform(5L, 10L); 
     379       uint k = uniform(0L, n); 
     380       real p = uniform(0.1L, 0.9L); 
    382381       real pVal = binomialCDF(k, n, p); 
    383382       assert(invBinomialCDF(pVal, n, p) == k); 
     
    505504 
    506505real hyperExact(ulong x, ulong n1, ulong n2, ulong n, ulong startAt = 0) { 
    507     invariant real constPart = logFactorial(n1) + logFactorial(n2) + 
     506    immutable real constPart = logFactorial(n1) + logFactorial(n2) + 
    508507        logFactorial(n) + logFactorial(n1 + n2 - n) - logFactorial(n1 + n2); 
    509508    real sum = 0; 
     
    663662 
    664663private { 
    665 invariant static real P0[] = [ 
     664immutable static real P0[] = [ 
    666665   -0x1.758f4d969484bfdcp-7,    // -0.011400139698853582732 
    667666   0x1.53cee17a59259dd2p-3, // 0.16592193750979583221 
     
    674673]; 
    675674 
    676 invariant static real Q0[] = [ 
     675immutable static real Q0[] = [ 
    677676   -0x1.64b92ae791e64bb2p-7,    // -0.010886331510064192632 
    678677   0x1.7585c7d597298286p-3, // 0.1823840725000038842 
     
    685684]; 
    686685 
    687 invariant static real P1[] = [ 
     686immutable static real P1[] = [ 
    688687   0x1.20ceea49ea142f12p-13,    // 0.00013771451113809605662 
    689688   0x1.cbe8a7267aea80bp-7,  // 0.014035302749980729871 
     
    698697]; 
    699698 
    700 invariant static real Q1[] = [ 
     699immutable static real Q1[] = [ 
    701700   0x1.3a4ce1406cea98fap-13,    // 0.00014987006762866754669 
    702701   0x1.f45332623335cda2p-7, // 0.015268706895221911913 
     
    711710]; 
    712711 
    713 invariant static real P2[] = [ 
     712immutable static real P2[] = [ 
    714713   0x1.8c124a850116a6d8p-21,    // 7.3774056430545041787e-07 
    715714   0x1.534abda3c2fb90bap-13,    // 0.0001617870121822776094 
     
    722721]; 
    723722 
    724 invariant static real Q2[] = [ 
     723immutable static real Q2[] = [ 
    725724   0x1.af03f4fc0655e006p-21,    // 8.0282885006885383316e-07 
    726725   0x1.713192048d11fb2p-13, // 0.00017604524340842589303 
     
    733732]; 
    734733 
    735 invariant static real P3[] = [ 
     734immutable static real P3[] = [ 
    736735   -0x1.55da447ae3806168p-34,   // -7.7728283809481633868e-11 
    737736   -0x1.145635641f8778a6p-24,   // -6.4339663876133447143e-08 
     
    744743]; 
    745744 
    746 invariant static real Q3[] = [ 
     745immutable static real Q3[] = [ 
    747746   -0x1.74022dd5523e6f84p-34,   // -8.4584942637876803775e-11 
    748747   -0x1.2cb60d61e29ee836p-24,   // -7.0014768675591937804e-08 
     
    833832    // Just make sure invNormalCDF works like it should as the inverse. 
    834833    foreach(i; 0..1000) { 
    835         real x = uniform(gen, 0.0L, 1.0L); 
    836         real mean = uniform(gen, 0.0L, 100.0L); 
    837         real sd = uniform(gen, 1.0L, 3.0L); 
     834        real x = uniform(0.0L, 1.0L); 
     835        real mean = uniform(0.0L, 100.0L); 
     836        real sd = uniform(1.0L, 3.0L); 
    838837        real inv = invNormalCDF(x, mean, sd); 
    839838        real rec = normalCDF(inv, mean, sd); 
    840839        assert(approxEqual(x, rec)); 
    841840    } 
    842     writeln(stderr, "Passed invNormalCDF unittest."); 
     841    writeln("Passed invNormalCDF unittest."); 
    843842} 
    844843 
     
    919918assert(approxEqual(studentsTCDF(invStudentsTCDF(0.4L, 18), 18), .4L)); 
    920919assert(approxEqual(studentsTCDF( invStudentsTCDF(0.9L, 11), 11), 0.9L)); 
    921 writeln(stderr, "Passed studentsTCDF."); 
     920writeln("Passed studentsTCDF."); 
    922921} 
    923922 
     
    11611160    uint nSkipped; 
    11621161    foreach(i; 0..1000) { 
    1163         uint n = uniform(gen, 1u, 10u); 
    1164         real p = uniform(gen, 0.0L, 1L); 
    1165         uint k = uniform(gen, 0, 20); 
     1162        uint n = uniform(1u, 10u); 
     1163        real p = uniform(0.0L, 1L); 
     1164        uint k = uniform(0, 20); 
    11661165        real pVal = negBinomCDF(k, n, p); 
    11671166 
  • trunk/gamma.d

    r5 r28  
    55 * is trying to integrate the rest of this library into Tango, 
    66 * tango.math.GammaFunction would be a drop-in replacement. 
    7  * 
    8  * Copyright: Based on the CEPHES math library, which is 
     7 **/ 
     8 /* Copyright: Based on the CEPHES math library, which is 
    99 *            Copyright (C) 1994 Stephen L. Moshier (moshier@world.std.com). 
    1010 * Authors:   Stephen L. Moshier (original C code). Conversion to D by Don Clugston 
  • trunk/infotheory.d

    r24 r28  
    33 * quantities, i.e, entropy, mutual info, etc. are output in bits. 
    44 * 
    5  * Author:  David Simcha 
    6  * 
    7 * Copyright (c) 2009, David Simcha 
    8 * All rights reserved. 
    9 
    10 * Redistribution and use in source and binary forms, with or without 
    11 * modification, are permitted provided that the following conditions are met: 
    12 *     * Redistributions of source code must retain the above copyright 
    13 *       notice, this list of conditions and the following disclaimer. 
    14 *     * Redistributions in binary form must reproduce the above copyright 
    15 *       notice, this list of conditions and the following disclaimer in the 
    16 *       documentation and/or other materials provided with the distribution. 
    17 *     * Neither the name of the <organization> nor the 
    18 *       names of its contributors may be used to endorse or promote products 
    19 *       derived from this software without specific prior written permission. 
    20 
    21 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY 
    22 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
    23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
    24 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY 
    25 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
    26 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
    27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 
    28 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
    29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
    30 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ 
     5 * Author:  David Simcha*/ 
     6 /* 
     7 * You may use this software under your choice of either of the following 
     8 * licenses.  YOU NEED ONLY OBEY THE TERMS OF EXACTLY ONE OF THE TWO LICENSES. 
     9 * IF YOU CHOOSE TO USE THE PHOBOS LICENSE, YOU DO NOT NEED TO OBEY THE TERMS OF 
     10 * THE BSD LICENSE.  IF YOU CHOOSE TO USE THE BSD LICENSE, YOU DO NOT NEED 
     11 * TO OBEY THE TERMS OF THE PHOBOS LICENSE.  IF YOU ARE A LAWYER LOOKING FOR 
     12 * LOOPHOLES AND RIDICULOUSLY NON-EXISTENT AMBIGUITIES IN THE PREVIOUS STATEMENT, 
     13 * GET A LIFE. 
     14 * 
     15 * ---------------------Phobos License: --------------------------------------- 
     16 * 
     17 *  Copyright (C) 2008-2009 by David Simcha. 
     18 * 
     19 *  This software is provided 'as-is', without any express or implied 
     20 *  warranty. In no event will the authors be held liable for any damages 
     21 *  arising from the use of this software. 
     22 * 
     23 *  Permission is granted to anyone to use this software for any purpose, 
     24 *  including commercial applications, and to alter it and redistribute it 
     25 *  freely, in both source and binary form, subject to the following 
     26 *  restrictions: 
     27 * 
     28 *  o  The origin of this software must not be misrepresented; you must not 
     29 *     claim that you wrote the original software. If you use this software 
     30 *     in a product, an acknowledgment in the product documentation would be 
     31 *     appreciated but is not required. 
     32 *  o  Altered source versions must be plainly marked as such, and must not 
     33 *     be misrepresented as being the original software. 
     34 *  o  This notice may not be removed or altered from any source 
     35 *     distribution. 
     36 * 
     37 * --------------------BSD License:  ----------------------------------------- 
     38 * 
     39 * Copyright (c) 2008-2009, David Simcha 
     40 * All rights reserved. 
     41 * 
     42 * Redistribution and use in source and binary forms, with or without 
     43 * modification, are permitted provided that the following conditions are met: 
     44 * 
     45 *     * Redistributions of source code must retain the above copyright 
     46 *       notice, this list of conditions and the following disclaimer. 
     47 * 
     48 *     * Redistributions in binary form must reproduce the above copyright 
     49 *       notice, this list of conditions and the following disclaimer in the 
     50 *       documentation and/or other materials provided with the distribution. 
     51 * 
     52 *     * Neither the name of the authors nor the 
     53 *       names of its contributors may be used to endorse or promote products 
     54 *       derived from this software without specific prior written permission. 
     55 * 
     56 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY 
     57 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
     58 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
     59 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY 
     60 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
     61 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
     62 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 
     63 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
     64 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
     65 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
     66 */ 
    3167 
    3268module dstats.infotheory; 
    3369 
    34 import std.traits, std.math, std.typetuple, std.functional; 
     70import std.traits, std.math, std.typetuple, std.functional, std.range, 
     71       std.array, std.typecons; 
    3572 
    3673import dstats.sort, dstats.summary, dstats.base, dstats.alloc; 
     
    4279} 
    4380 
    44 /**This function calculates the Shannon entropy of an array that is 
     81/**This function calculates the Shannon entropy of a forward range that is 
    4582 * treated as frequency counts of a set of discrete observations. 
    4683 * 
     
    5390 * --- 
    5491 */ 
    55 real entropyCounts(T)(const T[] data) { 
    56     return entropyCounts(data, sum!(T, real)(data)); 
    57 
    58  
    59 real entropyCounts(T)(const T[] data, real n) { 
     92real entropyCounts(T)(T data) 
     93if(isForwardRange!(T)) { 
     94    auto save = data; 
     95    return entropyCounts(save, sum!(T, real)(data)); 
     96
     97 
     98real entropyCounts(T)(T data, real n) 
     99if(isForwardRange!(T)) { 
    60100    real nNeg1 = 1.0L / n; 
    61101    real entropy = 0; 
     
    70110 
    71111unittest { 
    72     real uniform3 = entropyCounts([4, 4, 4]); 
     112    real uniform3 = entropyCounts([4, 4, 4].dup); 
    73113    assert(approxEqual(uniform3, log2(3))); 
    74     real uniform4 = entropyCounts([5, 5, 5, 5]); 
     114    real uniform4 = entropyCounts([5, 5, 5, 5].dup); 
    75115    assert(approxEqual(uniform4, 2)); 
    76     assert(entropyCounts([2,2])==1); 
    77     assert(entropyCounts([5.1,5.1,5.1,5.1])==2); 
    78     assert(approxEqual(entropyCounts([1,2,3,4,5]), 2.1492553971685)); 
     116    assert(entropyCounts([2,2].dup)==1); 
     117    assert(entropyCounts([5.1,5.1,5.1,5.1].dup)==2); 
     118    assert(approxEqual(entropyCounts([1,2,3,4,5].dup), 2.1492553971685)); 
    79119    writefln("Passed entropyCounts unittest."); 
    80120} 
    81121 
    82 // Should be encapsulated within entropy().  Workaround for bug 1629. 
     122template FlattenType(T...) { 
     123    alias FlattenTypeImpl!(T).ret FlattenType; 
     124
     125 
     126template FlattenTypeImpl(T...) { 
     127    static if(T.length == 0) { 
     128        alias TypeTuple!() ret; 
     129    } else { 
     130        T[0] j; 
     131        static if(is(typeof(j._jointRanges))) { 
     132            alias TypeTuple!(typeof(j._jointRanges), FlattenType!(T[1..$])) ret; 
     133        } else { 
     134            alias TypeTuple!(T[0], FlattenType!(T[1..$])) ret; 
     135        } 
     136    } 
     137
     138 
     139private Joint!(FlattenType!(T, U)) flattenImpl(T, U...)(T start, U rest) { 
     140    static if(rest.length == 0) { 
     141        return start; 
     142    } else static if(is(typeof(rest[0]._jointRanges))) { 
     143        return flattenImpl(jointImpl(start.tupleof, rest[0]._jointRanges), rest[1..$]); 
     144    } else { 
     145        return flattenImpl(jointImpl(start.tupleof, rest[0]), rest[1..$]); 
     146    } 
     147
     148 
     149Joint!(FlattenType!(T)) flatten(T...)(T args) { 
     150    static assert(args.length > 0); 
     151    static if(is(typeof(args[0]._jointRanges))) { 
     152        auto myTuple = args[0]; 
     153    } else { 
     154        auto myTuple = jointImpl(args[0]); 
     155    } 
     156    static if(args.length == 1) { 
     157        return myTuple; 
     158    } else { 
     159        return flattenImpl(myTuple, args[1..$]); 
     160    } 
     161
     162 
     163/**Bind a set of ranges together to represent a joint probability distribution. 
     164 * 
     165 * Examples: 
     166 * --- 
     167 * auto foo = [1,2,3,1,1]; 
     168 * auto bar = [2,4,6,2,2]; 
     169 * auto e = entropy(joint(foo, bar));  // Calculate joint entropy of foo, bar. 
     170 */ 
     171Joint!(FlattenType!(T)) joint(T...)(T args) { 
     172    return jointImpl(flatten(args).tupleof); 
     173
     174 
     175Joint!(T) jointImpl(T...)(T args) { 
     176    return Joint!(T)(args); 
     177
     178 
     179/**Iterate over a set of ranges in lockstep and return an ObsEnt, 
     180 * which is used internally by entropy functions on each iteration.*/ 
     181struct Joint(T...) { 
     182    T _jointRanges; 
     183 
     184    auto front() { 
     185        alias ElementsTuple!(T) E; 
     186        alias ObsEnt!(E) rt; 
     187        rt ret; 
     188        foreach(ti, elem; _jointRanges) { 
     189            ret.tupleof[ti] = elem.front; 
     190        } 
     191        return ret; 
     192    } 
     193 
     194    void popFront() { 
     195        foreach(ti, elem; _jointRanges) { 
     196            _jointRanges[ti].popFront; 
     197        } 
     198    } 
     199 
     200    bool empty() { 
     201        foreach(elem; _jointRanges) { 
     202            if(elem.empty) { 
     203                return true; 
     204            } 
     205        } 
     206        return false; 
     207    } 
     208 
     209    static if(T.length > 0 && allSatisfy!(dstats.base.hasLength, T)) { 
     210        uint length() { 
     211            uint ret = uint.max; 
     212            foreach(range; _jointRanges) { 
     213                auto len = range.length; 
     214                if(len < ret) { 
     215                    ret = len; 
     216                } 
     217            } 
     218            return ret; 
     219        } 
     220    } 
     221
     222 
     223template ElementsTuple(T...) { 
     224    static if(T.length == 1) { 
     225        alias TypeTuple!(Unqual!(ElementType!(T[0]))) ElementsTuple; 
     226    } else { 
     227        alias TypeTuple!(Unqual!(ElementType!(T[0])), ElementsTuple!(T[1..$])) 
     228            ElementsTuple; 
     229    } 
     230
     231 
     232private template Comparable(T) { 
     233    enum bool Comparable = is(typeof({ 
     234        T a; 
     235        T b; 
     236        return a < b; })); 
     237
     238 
    83239struct ObsEnt(T...) { 
    84     atomsOfArrayTuple!(T) compRep; 
     240    T compRep; 
    85241 
    86242    hash_t toHash() { 
     
    99255    } 
    100256 
    101     bool opEquals(typeof(this) rhs) { 
     257    bool opEquals(ref typeof(this) rhs) { 
    102258        foreach(ti, elem; this.tupleof) { 
    103259            if(elem != rhs.tupleof[ti]) 
     
    106262        return true; 
    107263    } 
    108 
    109  
    110 /**Calculates the joint entropy of a set of observations.  Each array represets 
    111  * a vector of observations. If only one array is given, this reduces to the 
    112  * plain old entropy. 
     264 
     265    static if(allSatisfy!(Comparable, T)) { 
     266        int opCmp(ref typeof(this) rhs) { 
     267            foreach(ti, elem; this.tupleof) { 
     268                if(rhs.tupleof[ti] < elem) { 
     269                    return -1; 
     270                } else if(rhs.tupleof[ti] > elem) { 
     271                    return 1; 
     272                } 
     273            } 
     274            return 0; 
     275        } 
     276    } 
     277 
     278
     279 
     280/**Calculates the joint entropy of a set of observations.  Each input range 
     281 * represents a vector of observations. If only one range is given, this reduces 
     282 * to the plain old entropy.  Input range must have a length. 
     283 * 
     284 * Note:  This function specializes if ElementType!(T) is a byte, ubyte, or 
     285 * char, resulting in a much faster entropy calculation.  When possible, try 
     286 * to provide data in the form of a byte, ubyte, or char. 
    113287 * 
    114288 * Examples: 
     
    118292 * assert(approxEqual(entropyFoo, log2(3))); 
    119293 * int[] bar = [1, 2, 3, 1, 2, 3, 1, 2, 3]; 
    120  * real jointEntropyFooBar = entropy(foo, bar);  // Joint entropy of foo and bar. 
    121  * assert(approxEqual(jointEntropyFooBar, log2(9))); 
     294 * real HFooBar = entropy(joint(foo, bar));  // Joint entropy of foo and bar. 
     295 * assert(approxEqual(HFooBar, log2(9))); 
    122296 * --- 
    123297 */ 
    124 real entropy(T...)(const T data) 
    125 in { 
    126     static assert(data.length > 0); 
    127     size_t zeroLen = data[0].length; 
    128     foreach(array; data[1..$]) { 
    129         assert(array.length == zeroLen); 
    130     } 
    131 } body { 
    132  
    133     static if(data.length > 1) { 
    134         alias ObsEnt!(T) Obs; 
    135     } 
     298real entropy(T)(T data) 
     299if(isInputRange!(T) && dstats.base.hasLength!(T)) { 
     300    if(data.length <= ubyte.max) { 
     301        return entropyImpl!(ubyte, T)(data); 
     302    } else if(data.length <= ushort.max) { 
     303        return entropyImpl!(ushort, T)(data); 
     304    } else { 
     305        return entropyImpl!(uint, T)(data); 
     306    } 
     307
     308 
     309private real entropyImpl(U, T)(T data) 
     310if(ElementType!(T).sizeof > 1) {  // Generic version. 
     311    alias typeof(data.front()) E; 
    136312 
    137313    TempAlloc.frameInit; 
    138     static if(data.length > 1) { 
    139         alias StackHash!(Obs, uint) mySh; 
    140         mySh counts = mySh(data[0].length / 5); 
    141         foreach(i; 0..data[0].length) { 
    142             Obs obs; 
    143             foreach(ti, vec; data) { 
    144                 obs.tupleof[ti] = data[ti][i]; 
    145             } 
    146             counts[obs]++; 
    147         } 
    148     } else { 
    149         alias StackHash!(typeof(data[0][0]), uint) mySh; 
    150         mySh counts = mySh(data[0].length / 5); 
    151         foreach(elem; data[0]) { 
    152             counts[elem]++; 
    153         } 
    154     } 
    155  
    156     real nNeg1 = 1.0L / data[0].length; 
    157     real ans = 0; 
    158     foreach(value; counts) { 
    159         if(value == 0) 
    160             continue; 
    161         real pxi = cast(real) value * nNeg1; 
    162         ans -= pxi * log2(pxi); 
    163     } 
     314    alias StackHash!(E, U) mySh; 
     315    immutable len = data.length;  // In case length calculation is expensive. 
     316    mySh counts = mySh(len / 5); 
     317 
     318    foreach(elem; data)  { 
     319        counts[elem]++; 
     320    } 
     321 
     322    real ans = entropyCounts(counts.values, len); 
    164323    TempAlloc.frameFree; 
    165  
    166324    return ans; 
    167325} 
    168326 
    169 unittest { 
    170     int[] foo = [1, 1, 1, 2, 2, 2, 3, 3, 3]; 
    171     real entropyFoo = entropy(foo); 
    172     assert(approxEqual(entropyFoo, log2(3))); 
    173     int[] bar = [1, 2, 3, 1, 2, 3, 1, 2, 3]; 
    174     real jointEntropyFooBar = entropy(foo, bar);  // Joint entropy of foo and bar. 
    175     assert(approxEqual(jointEntropyFooBar, log2(9))); 
    176     // Testing with BigInts because they're more challenging to get right 
    177     // in terms of hashing and all. 
    178     BigInt[] bi = new BigInt[5]; 
    179     bi[0] = 1; 
    180     bi[1] = 2; 
    181     bi[2] = 3; 
    182     bi[3] = 1; 
    183     bi[4] = 1; 
    184     auto bi2 = bi.dup.reverse; 
    185     assert(approxEqual(entropy(bi, bi2), entropyCounts([2, 1, 1, 1]))); 
    186     assert(approxEqual(entropy(bi), entropyCounts([3, 1, 1]))); 
     327private real entropyImpl(U, T)(T data)  // byte/char specialization 
     328if(ElementType!(T).sizeof == 1) { 
     329    alias typeof(data.front()) E; 
     330 
     331    U[ubyte.max + 1] counts; 
     332 
     333    uint min = ubyte.max, max = 0; 
     334    foreach(elem; data)  { 
     335        static if(is(E == byte)) { 
     336            // Keep adjacent elements adjacent.  In real world use cases, 
     337            // probably will have ranges like [-1, 1]. 
     338            ubyte e = cast(ubyte) (elem) + byte.max; 
     339        } else { 
     340            ubyte e = cast(ubyte) elem; 
     341        } 
     342        counts[e]++; 
     343        if(e > max) { 
     344            max = e; 
     345        } 
     346        if(e < min) { 
     347            min = e; 
     348        } 
     349    } 
     350 
     351    return entropyCounts(counts.ptr[min..max + 1], data.length); 
     352
     353 
     354unittest { 
     355    { // Generic version. 
     356        int[] foo = [1, 1, 1, 2, 2, 2, 3, 3, 3]; 
     357        real entropyFoo = entropy(foo); 
     358        assert(approxEqual(entropyFoo, log2(3))); 
     359        int[] bar = [1, 2, 3, 1, 2, 3, 1, 2, 3]; 
     360        auto stuff = joint(foo, bar); 
     361        real jointEntropyFooBar = entropy(joint(foo, bar)); 
     362        assert(approxEqual(jointEntropyFooBar, log2(9))); 
     363    } 
     364    { // byte specialization 
     365        byte[] foo = [-1, -1, -1, 2, 2, 2, 3, 3, 3]; 
     366        real entropyFoo = entropy(foo); 
     367        assert(approxEqual(entropyFoo, log2(3))); 
     368        string bar = "ACTGGCTA"; 
     369        assert(entropy(bar) == 2); 
     370    } 
    187371    writeln("Passed entropy unittest."); 
    188372} 
     373 
     374/**Calculate the conditional entropy H(data | cond).*/ 
     375real condEntropy(T, U)(T data, U cond) 
     376if(isInputRange!(T) && isInputRange!(U)) { 
     377    return entropy(joint(data, cond)) - entropy(cond); 
     378} 
     379 
     380unittest { 
     381    // This shouldn't be easy to screw up.  Just really basic. 
     382    int[] foo = [1,2,2,1,1]; 
     383    int[] bar = [1,2,3,1,2]; 
     384    assert(approxEqual(entropy(foo) - condEntropy(foo, bar), 
     385           mutualInfo(foo, bar))); 
     386    writeln("Passed condEntroy unittest."); 
     387} 
     388 
    189389 
    190390 
    191391/**Calculates the mutual information of two vectors of observations. 
    192392 */ 
    193 real mutualInfo(T, U)(T[] x, U[] y) 
    194 in { 
    195     assert(x.length == y.length); 
    196 } body { 
    197     return entropy(x) + entropy(y) - entropy(x, y); 
     393real mutualInfo(T, U)(T x, U y) 
     394if(isInputRange!(T) && isInputRange!(U)) { 
     395    return entropy(x) + entropy(y) - entropy(joint(x, y)); 
    198396} 
    199397 
    200398unittest { 
    201399    // Values from R, but converted from base e to base 2. 
    202     assert(approxEqual(mutualInfo(bin([1,2,3,3,8], 10), 
    203            bin([8,6,7,5,3], 10)), 1.921928)); 
    204     assert(approxEqual(mutualInfo(bin([1,2,1,1,3,4,3,6], 2), 
    205            bin([2,7,9,6,3,1,7,40], 2)), .2935645)); 
    206     assert(approxEqual(mutualInfo(bin([1,2,1,1,3,4,3,6], 4), 
    207            bin([2,7,9,6,3,1,7,40], 4)), .5435671)); 
     400    assert(approxEqual(mutualInfo(bin([1,2,3,3,8].dup, 10), 
     401           bin([8,6,7,5,3].dup, 10)), 1.921928)); 
     402    assert(approxEqual(mutualInfo(bin([1,2,1,1,3,4,3,6].dup, 2), 
     403           bin([2,7,9,6,3,1,7,40].dup, 2)), .2935645)); 
     404    assert(approxEqual(mutualInfo(bin([1,2,1,1,3,4,3,6].dup, 4), 
     405           bin([2,7,9,6,3,1,7,40].dup, 4)), .5435671)); 
    208406 
    209407    writeln("Passed mutualInfo unittest."); 
    210408} 
    211409 
    212 /**Calculates the conditional mutual information I(x, y | z).  z can be any 
    213  * number of vectors.  If only two vectors are provided (equivalently if z is 
    214  * empty), this reduces to plain old mutual information.*/ 
    215 real condMutualInfo(T, U, V...)(T[] x, U[] y, V z) 
    216 in { 
    217     size_t zeroLen = z[0].length; 
    218     foreach(array; z[1..$]) { 
    219         assert(array.length == zeroLen); 
    220     } 
    221 } body { 
    222     static if(V.length == 0) 
    223         return mutualInfo(x, y); 
    224     else 
    225         return entropy(x, z) + entropy(y, z) - entropy(x, y, z) - entropy(z); 
     410/**Calculates the conditional mutual information I(x, y | z).*/ 
     411real condMutualInfo(T, U, V)(T x, U y, V z) { 
     412    return entropy(joint(x, z)) + entropy(joint(y, z)) - 
     413           entropy(joint(x, y, z)) - entropy(z); 
    226414} 
    227415 
    228416unittest { 
    229417    // Values from Matlab mi package by Hanchuan Peng. 
    230     auto res = condMutualInfo([1,2,1,2,1,2,1,2], [3,1,2,3,4,2,1,2]
    231                               [1,2,3,1,2,3,1,2]); 
     418    auto res = condMutualInfo([1,2,1,2,1,2,1,2].dup, [3,1,2,3,4,2,1,2].dup
     419                              [1,2,3,1,2,3,1,2].dup); 
    232420    assert(approxEqual(res, 0.4387)); 
    233     res = condMutualInfo([1,2,3,1,2], [2,1,3,2,1], [1,1,1,2,2], [2,2,2,1,1]); 
     421    res = condMutualInfo([1,2,3,1,2].dup, [2,1,3,2,1].dup, 
     422                         joint([1,1,1,2,2].dup, [2,2,2,1,1].dup)); 
    234423    assert(approxEqual(res, 1.3510)); 
    235424    writeln("Passed condMutualInfo unittest."); 
    236425} 
    237426 
    238 /**Calculates the entropy of any old array of observations more quickly than 
    239  * entropy(), provided that it is sorted.  If the input is sorted by more than 
    240  * one key, i.e. structs, the result will be the joint entropy of all of the 
    241  * keys.  The compFun alias will be used to compare adjacent elements and 
    242  * determine how many instances of each value exist.*/ 
    243 real entropySorted(alias compFun = "a == b", T)(T[] data) { 
     427/**Calculates the entropy of any old input range of observations more quickly 
     428 * than entropy(), provided that all equal values are adjacent.  If the input 
     429 * is sorted by more than one key, i.e. structs, the result will be the joint 
     430 * entropy of all of the keys.  The compFun alias will be used to compare 
     431 * adjacent elements and determine how many instances of each value exist.*/ 
     432real entropySorted(alias compFun = "a == b", T)(T data) 
     433if(isInputRange!(T)) { 
     434    alias ElementType!(T) E; 
    244435    alias binaryFun!(compFun) comp; 
    245     invariant size_t n = data.length; 
    246     invariant real nrNeg1 = 1.0L / n; 
     436    immutable n = data.length; 
     437    immutable nrNeg1 = 1.0L / n; 
    247438 
    248439    real sum = 0.0L; 
    249440    real nSame = 1.0L; 
    250     size_t pos = 1; 
    251     while(pos < n) { 
    252         if(comp(data[pos], data[pos - 1])) 
     441    auto last = data.front; 
     442    data.popFront; 
     443    foreach(elem; data) { 
     444        if(comp(elem, last)) 
    253445            nSame++; 
    254446        else { 
     
    257449            sum -= p * log2(p); 
    258450        } 
    259         pos++
     451        last = elem
    260452    } 
    261453    // Handle last run. 
     
    272464} 
    273465 
    274 template atomsOfArrayTuple(T...) { 
    275     mixin("alias TypeTuple!(Mutable!(" ~ (atomType!(T[0])).stringof ~ ")" ~ 
    276           atomsOfArrayTupleImpl!(T[1..$]) ~ " atomsOfArrayTuple;"); 
    277 } 
    278  
    279 template atomsOfArrayTupleImpl(T...) { 
    280     static if(T.length == 0) 
    281         const char[] atomsOfArrayTupleImpl = ")"; 
    282     else const char[] atomsOfArrayTupleImpl = ", Mutable!(" ~ 
    283                       (atomType!(T[0])).stringof ~ ")" ~ 
    284                       atomsOfArrayTupleImpl!(T[1..$]); 
    285 } 
    286  
    287 template atomType(T : T[]) { 
    288     alias T atomType; 
    289 } 
    290  
    291466// Verify that there are no TempAlloc memory leaks anywhere in the code covered 
    292467// by the unittest.  This should always be the last unittest of the module. 
  • trunk/random.d

    r17 r28  
    22 * distributions. 
    33 * 
    4  * Author:  David Simcha 
    5  * 
    6  * Copyright (c) 2009, David Simcha 
     4 * Author:  David Simcha*/ 
     5 /* 
     6 * You may use this software under your choice of either of the following 
     7 * licenses.  YOU NEED ONLY OBEY THE TERMS OF EXACTLY ONE OF THE TWO LICENSES. 
     8 * IF YOU CHOOSE TO USE THE PHOBOS LICENSE, YOU DO NOT NEED TO OBEY THE TERMS OF 
     9 * THE BSD LICENSE.  IF YOU CHOOSE TO USE THE BSD LICENSE, YOU DO NOT NEED 
     10 * TO OBEY THE TERMS OF THE PHOBOS LICENSE.  IF YOU ARE A LAWYER LOOKING FOR 
     11 * LOOPHOLES AND RIDICULOUSLY NON-EXISTENT AMBIGUITIES IN THE PREVIOUS STATEMENT, 
     12 * GET A LIFE. 
     13 * 
     14 * ---------------------Phobos License: --------------------------------------- 
     15 * 
     16 *  Copyright (C) 2008-2009 by David Simcha. 
     17 * 
     18 *  This software is provided 'as-is', without any express or implied 
     19 *  warranty. In no event will the authors be held liable for any damages 
     20 *  arising from the use of this software. 
     21 * 
     22 *  Permission is granted to anyone to use this software for any purpose, 
     23 *  including commercial applications, and to alter it and redistribute it 
     24 *  freely, in both source and binary form, subject to the following 
     25 *  restrictions: 
     26 * 
     27 *  o  The origin of this software must not be misrepresented; you must not 
     28 *     claim that you wrote the original software. If you use this software 
     29 *     in a product, an acknowledgment in the product documentation would be 
     30 *     appreciated but is not required. 
     31 *  o  Altered source versions must be plainly marked as such, and must not 
     32 *     be misrepresented as being the original software. 
     33 *  o  This notice may not be removed or altered from any source 
     34 *     distribution. 
     35 * 
     36 * --------------------BSD License:  ----------------------------------------- 
     37 * 
     38 * Copyright (c) 2008-2009, David Simcha 
    739 * All rights reserved. 
    840 * 
     
    3062 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
    3163 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
    32  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ 
     64 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
     65 */ 
    3366 
    3467 
     
    4881 
    4982///Generates a random number from normal(mean, sd) distribution. 
    50 real rNorm(RGen)(ref RGen gen, real mean = 0.0L, real sd = 1.0L) { 
    51     real p = uniform(gen, 0.0L, 1.0L)
     83real rNorm(RGen = Random)(real mean = 0.0L, real sd = 1.0L, ref RGen gen = rndGen) { 
     84    real p = uniform(0.0L, 1.0L, gen);
    5285    return invNormalCDF(p) * sd + mean; 
    5386} 
    5487 
    5588unittest { 
    56     Random gen; 
    57     gen.seed(unpredictableSeed); 
    5889    real[] observ = new real[100_000]; 
    5990    foreach(ref elem; observ) 
    60         elem = rNorm(gen); 
     91        elem = rNorm(); 
    6192    auto ksRes = ksPval(observ, parametrize!(normalCDF)(0.0L, 1.0L)); 
    6293    writeln("100k samples from normal(0, 1):  K-S P-val:  ", ksRes); 
     
    70101 
    71102///Random Cauchy-distributed number. 
    72 real rCauchy(RGen)(ref RGen gen, real X0 = 0, real gamma = 1) { 
    73     real p = uniform(gen, 0.0L, 1.0L)
     103real rCauchy(RGen = Random)(real X0 = 0, real gamma = 1, ref RGen gen = rndGen) { 
     104    real p = uniform(0.0L, 1.0L, gen);
    74105    return invCauchyCDF(p, X0, gamma); 
    75106} 
    76107 
    77108unittest { 
    78     Random gen; 
    79     gen.seed(unpredictableSeed); 
    80109    real[] observ = new real[100_000]; 
    81110    foreach(ref elem; observ) 
    82         elem = rCauchy(gen); 
     111        elem = rCauchy(); 
    83112    auto ksRes = ksPval(observ, parametrize!(cauchyCDF)(0.0L, 1.0L)); 
    84113    writeln("100k samples from Cauchy(0, 1):  K-S P-val:  ", ksRes); 
     
    91120} 
    92121 
    93 real rStudentT(RGen)(ref RGen gen, real df) { 
    94     real pVal = uniform(gen, 0.0L, 1.0L); 
     122real rStudentT(RGen = Random)(real df, ref RGen gen = rndGen) { 
     123    real pVal = uniform(0.0L, 1.0L, gen); 
    95124    return invStudentsTCDF(pVal, df); 
    96125} 
    97126 
    98127unittest { 
    99     Random gen; 
    100     gen.seed(unpredictableSeed); 
    101128    real[] observ = new real[10_000]; 
    102129    foreach(ref elem; observ) 
    103         elem = rStudentT(gen, 5); 
     130        elem = rStudentT(5); 
    104131    auto ksRes = ksPval(observ, parametrize!(studentsTCDF)(5)); 
    105132    writeln("10k samples from T(5):  K-S P-val:  ", ksRes); 
     
    113140 
    114141/**Generates a random number from the Poisson distribution.*/ 
    115 uint rPoisson(RGen)(ref RGen gen, real lambda) { 
    116     real pVal = uniform(gen, 0.0L, 1.0L); 
     142uint rPoisson(RGen = Random)(real lambda, ref RGen gen = rndGen) { 
     143    real pVal = uniform(0.0L, 1.0L, gen); 
    117144    uint guess = cast(uint) max(round( 
    118145          invNormalCDF(pVal, lambda, sqrt(lambda)) + 0.5), 0.0L); 
     
    142169 
    143170unittest { 
    144     Random gen; 
    145     gen.seed(unpredictableSeed); 
    146171    real lambda = 4L; 
    147172    uint[] observ = new uint[100_000]; 
    148173    foreach(ref elem; observ) 
    149         elem = rPoisson(gen, lambda); 
    150     auto ksRes = ksPval(observ, parametrize!(poissonCDF)(lambda)); 
    151     writeln("100k samples from poisson(", lambda, 
    152             "):  K-S P-val:  ", ksRes); 
     174        elem = rPoisson(lambda); 
     175    writeln("100k samples from poisson(", lambda, "):"); 
    153176    writeln("\tMean Expected: ", lambda, 
    154177            "  Observed:  ", mean(observ)); 
     
    164187 
    165188///Bernoulli r.v. with probability p of equaling 1. 
    166 uint rBernoulli(RGen)(ref RGen gen, real P = 0.5) { 
    167     real pVal = uniform(gen, 0.0L, 1.0L); 
     189uint rBernoulli(RGen = Random)(real P = 0.5, ref RGen gen = rndGen) { 
     190    real pVal = uniform(0.0L, 1.0L, gen); 
    168191    return cast(uint) (pVal <= P); 
    169192} 
     
    171194// bother writing a separate test for. 
    172195 
    173 import std.stdio; 
    174 long gue; 
    175  
    176196/**Generates a random number from the binionial distribution.*/ 
    177 uint rBinomial(RGen)(ref RGen gen, uint n, real p) { 
     197uint rBinomial(RGen = Random)(uint n, real p, ref RGen gen = rndGen) { 
    178198    // Generate p-value, get normal approx. as starting point, search for 
    179199    // binomial element that matches this starting point.  Normal approx. 
    180200    // is usually within 1 or 2 elements. 
    181     real pVal = uniform(gen, 0.0L, 1.0L); 
     201    real pVal = uniform(0.0L, 1.0L, gen); 
    182202    uint guess = cast(uint) max(round( 
    183203          invNormalCDF(pVal, n * p, sqrt(n * p * (1 - p)))) + 0.5, 0); 
     
    211231 
    212232unittest { 
    213     Random gen; 
    214     gen.seed(unpredictableSeed); 
    215233    real p = 0.7; 
    216234    uint n = 10; 
    217235    uint[] observ = new uint[100_000]; 
    218236    foreach(ref elem; observ) 
    219         elem = rBinomial(gen, n, p); 
     237        elem = rBinomial(n, p); 
    220238    auto ksRes = ksPval(observ, parametrize!(binomialCDF)(n, p)); 
    221     writeln("100k samples from binom.(", n, ", ", p, 
    222             "):  K-S P-val:  ", ksRes); 
     239    writeln("100k samples from binom.(", n, ", ", p, "):"); 
    223240    writeln("\tMean Expected: ", n * p, 
    224241            "  Observed:  ", mean(observ)); 
     
    237254 * Bugs:  Slow for large values of N.  Fairly naive implementation. 
    238255 * Reasonably fast, though, for small values (<100) of N.*/ 
    239 uint rHypergeometric(RGen)(ref RGen gen, uint N1, uint N2, uint N
     256uint rHypergeometric(RGen = Random)(uint N1, uint N2, uint N, ref RGen gen = rndGen
    240257in { 
    241258    assert(N <= (N1 + N2)); 
     
    247264    NR[1] = N1; 
    248265    foreach(i; 0..N) { 
    249         uint X = rBernoulli(gen, NR[1] / (NR[0] + NR[1])); 
     266        uint X = rBernoulli(NR[1] / (NR[0] + NR[1]), gen); 
    250267        NR[X] -= 1.0L; 
    251268        result += X; 
     
    255272 
    256273unittest { 
    257     Random gen; 
    258     gen.seed(unpredictableSeed); 
    259274    uint n1 = 20, n2 = 30, n = 10; 
    260275    uint[] observ = new uint[100_000]; 
    261276    foreach(ref elem; observ) 
    262         elem = rHypergeometric(gen, n1, n2, n); 
     277        elem = rHypergeometric(n1, n2, n); 
    263278    auto ksRes = ksPval(observ, parametrize!(hypergeometricCDF)(n1, n2, n)); 
    264     writeln("100k samples from hypergeom.(", n1, ", ", n2, ", ", n, 
    265             "):  K-S P-val:  ", ksRes); 
     279    writeln("100k samples from hypergeom.(", n1, ", ", n2, ", ", n, "):"); 
    266280    writeln("\tMean Expected: ", n * cast(real) n1 / (n1 + n2), 
    267281            "  Observed:  ", mean(observ)); 
     
    278292 * set n = 1, since geometric distribution is just a special case of neg. 
    279293 * binomial.*/ 
    280 uint rNegBinom(RGen)(ref RGen gen, uint n, real p
     294uint rNegBinom(RGen = Random)(uint n, real p, ref RGen gen = rndGen
    281295in { 
    282296    assert(p >= 0 && p <= 1); 
    283297    assert(n > 0); 
    284298} body { 
    285     real pVal = uniform(gen, 0.0L, 1.0L); 
     299    real pVal = uniform(0.0L, 1.0L, gen); 
    286300    // Normal or gamma approx, then adjust. 
    287301    real mean = n * (1 - p) / p; 
     
    348362    uint[] observ = new uint[100_000]; 
    349363    foreach(ref elem; observ) 
    350         elem = rNegBinom(gen, n, p); 
    351     auto ksRes = ksPval(observ, parametrize!(negBinomCDF)(n, p)); 
    352     writeln("100k samples from neg. binom.(", n,", ",  p, 
    353             "):  K-S P-val:  ", ksRes); 
     364        elem = rNegBinom(n, p); 
     365    writeln("100k samples from neg. binom.(", n,", ",  p, "):"); 
    354366    writeln("\tMean Expected: ", n * (1 - p) / p, 
    355367            "  Observed:  ", mean(observ)); 
     
    365377 
    366378/// 
    367 real rLaplace(RGen)(ref RGen gen, real mu = 0, real b = 1) { 
    368     real p = uniform(gen, 0.0L, 1.0L); 
     379real rLaplace(RGen = Random)(real mu = 0, real b = 1, ref RGen gen = rndGen) { 
     380    real p = uniform(0.0L, 1.0L, gen); 
    369381    return invLaplaceCDF(p, mu, b); 
    370382} 
     
    375387    real[] observ = new real[100_000]; 
    376388    foreach(ref elem; observ) 
    377         elem = rLaplace(gen); 
     389        elem = rLaplace(); 
    378390    auto ksRes = ksPval(observ, parametrize!(laplaceCDF)(0.0L, 1.0L)); 
    379391    writeln("100k samples from Laplace(0, 1):  K-S P-val:  ", ksRes); 
     
    389401 * gamma distribution, with b = 1.  However, it's a common special 
    390402 * case and this function is a lot faster than the more general rGamma.*/ 
    391 real rExponential(RGen)(ref RGen gen, real lambda) { 
    392     real p = uniform(gen, 0.0L, 1.0L); 
     403real rExponential(RGen = Random)(real lambda, ref RGen gen = rndGen) { 
     404    real p = uniform(0.0L, 1.0L, gen); 
    393405    return -log(p) / lambda; 
    394406} 
    395407 
    396408unittest { 
    397     Random gen; 
    398     gen.seed(unpredictableSeed); 
    399409    real[] observ = new real[100_000]; 
    400410    foreach(ref elem; observ) 
    401         elem = rExponential(gen, 2.0L); 
     411        elem = rExponential(2.0L); 
    402412    auto ksRes = ksPval(observ, parametrize!(gammaCDF)(2, 1)); 
    403413    writeln("100k samples from exponential(2):  K-S P-val:  ", ksRes); 
     
    414424 * this is a common special case, a separate exponential random 
    415425 * number generator is provided for speed.*/ 
    416 real rGamma(RGen)(ref RGen gen, real a = 1, real b = 1) { 
    417     real p = uniform(gen, 0.0L, 1.0L); 
     426real rGamma(RGen = Random)(real a = 1, real b = 1, ref RGen gen = rndGen) { 
     427    real p = uniform(0.0L, 1.0L, gen); 
    418428    return invGammaCDFR(p, a, b); 
    419429} 
    420430 
    421431unittest { 
    422     Random gen; 
    423     gen.seed(unpredictableSeed); 
    424432    real[] observ = new real[100_000]; 
    425433    foreach(ref elem; observ) 
    426         elem = rGamma(gen, 2.0L, 3.0L); 
     434        elem = rGamma(2.0L, 3.0L); 
    427435    auto ksRes = ksPval(observ, parametrize!(gammaCDF)(2, 3)); 
    428436    writeln("100k samples from gamma(2, 3):  K-S P-val:  ", ksRes); 
  • trunk/sort.d

    r23 r28  
    2323 * --- 
    2424 * 
    25  * Author:  David Simcha 
    26  * 
    27  * Copyright (c) 2009, David Simcha 
     25 * Author:  David Simcha*/ 
     26 /* 
     27 * You may use this software under your choice of either of the following 
     28 * licenses.  YOU NEED ONLY OBEY THE TERMS OF EXACTLY ONE OF THE TWO LICENSES. 
     29 * IF YOU CHOOSE TO USE THE PHOBOS LICENSE, YOU DO NOT NEED TO OBEY THE TERMS OF 
     30 * THE BSD LICENSE.  IF YOU CHOOSE TO USE THE BSD LICENSE, YOU DO NOT NEED 
     31 * TO OBEY THE TERMS OF THE PHOBOS LICENSE.  IF YOU ARE A LAWYER LOOKING FOR 
     32 * LOOPHOLES AND RIDICULOUSLY NON-EXISTENT AMBIGUITIES IN THE PREVIOUS STATEMENT, 
     33 * GET A LIFE. 
     34 * 
     35 * ---------------------Phobos License: --------------------------------------- 
     36 * 
     37 *  Copyright (C) 2008-2009 by David Simcha. 
     38 * 
     39 *  This software is provided 'as-is', without any express or implied 
     40 *  warranty. In no event will the authors be held liable for any damages 
     41 *  arising from the use of this software. 
     42 * 
     43 *  Permission is granted to anyone to use this software for any purpose, 
     44 *  including commercial applications, and to alter it and redistribute it 
     45 *  freely, in both source and binary form, subject to the following 
     46 *  restrictions: 
     47 * 
     48 *  o  The origin of this software must not be misrepresented; you must not 
     49 *     claim that you wrote the original software. If you use this software 
     50 *     in a product, an acknowledgment in the product documentation would be 
     51 *     appreciated but is not required. 
     52 *  o  Altered source versions must be plainly marked as such, and must not 
     53 *     be misrepresented as being the original software. 
     54 *  o  This notice may not be removed or altered from any source 
     55 *     distribution. 
     56 * 
     57 * --------------------BSD License:  ----------------------------------------- 
     58 * 
     59 * Copyright (c) 2008-2009, David Simcha 
    2860 * All rights reserved. 
    2961 * 
     
    5183 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
    5284 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
    53  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ 
     85 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
     86 */ 
    5487 
    5588module dstats.sort; 
    5689 
    5790import std.traits, std.algorithm, std.math, std.functional, std.math, 
    58        std.typetuple
     91       std.typetuple, std.range, std.array
    5992 
    6093import dstats.alloc; 
     
    6295version(unittest) { 
    6396    import std.stdio, std.random; 
    64     Random gen; 
    6597 
    6698    void main (){ 
     
    68100} 
    69101 
    70 void rotateLeft(T)(T[] input) { 
     102void rotateLeft(T)(T input) 
     103if(isRandomAccessRange!(T)) { 
    71104    if(input.length < 2) return; 
    72     T temp = input[0]; 
     105    ElementType!(T) temp = input[0]; 
    73106    foreach(i; 1..input.length) { 
    74107        input[i-1] = input[i]; 
     
    77110} 
    78111 
    79 void rotateRight(T)(T[] input) { 
     112void rotateRight(T)(T input) 
     113if(isRandomAccessRange!(T)) { 
    80114    if(input.length < 2) return; 
    81     T temp = input[$-1]; 
     115    ElementType!(T) temp = input[$-1]; 
    82116    for(size_t i = input.length - 1; i > 0; i--) { 
    83117        input[i] = input[i-1]; 
    84118    } 
    85119    input[0] = temp; 
    86 } 
    87  
    88 // For testing purposes for stable sorts.  Shuffles N arrays in lockstep. 
    89 version(unittest) { 
    90     void randomMultiShuffle(SomeRandomGen, T...)(ref SomeRandomGen r, T array) { 
    91         foreach (i; 0 .. array[0].length) { 
    92             // generate a random number i .. n 
    93             immutable which = i + uniform!(size_t)(r, 0u, array[0].length - i); 
    94             foreach(ti, element; array) { 
    95                 swap(element[i], element[which]); 
    96             } 
    97         } 
    98     } 
    99120} 
    100121 
     
    242263 
    243264unittest { 
    244     gen.seed(unpredictableSeed); 
    245265    {  // Test integer. 
    246266        uint[] test = new uint[1_000]; 
    247267        foreach(ref e; test) { 
    248             e = uniform(gen, 0, 100); 
     268            e = uniform(0, 100); 
    249269        } 
    250270        auto test2 = test.dup; 
    251271        foreach(i; 0..1_000) { 
    252             randomMultiShuffle(gen, test, test2); 
    253             uint len = uniform(gen, 0, 1_000); 
     272            randomShuffle(zip(test, test2)); 
     273            uint len = uniform(0, 1_000); 
    254274            qsort(test[0..len], test2[0..len]); 
    255275            assert(isSorted(test[0..len])); 
     
    260280        double[] test = new double[1_000]; 
    261281        foreach(ref e; test) { 
    262             e = uniform(gen, 0.0, 100_000); 
     282            e = uniform(0.0, 100_000); 
    263283        } 
    264284        auto test2 = test.dup; 
    265285        foreach(i; 0..1_000) { 
    266             randomMultiShuffle(gen, test, test2); 
    267             uint len = uniform(gen, 0, 1_000); 
     286            randomShuffle(zip(test, test2)); 
     287            uint len = uniform(0, 1_000); 
    268288            qsort!("a > b")(test[0..len], test2[0..len]); 
    269289            assert(isSorted!("a > b")(test[0..len])); 
     
    330350    uint[] temp1 = new uint[1_000], temp2 = new uint[1_000]; 
    331351    foreach(ref e; test) { 
    332         e = uniform(gen, 0, 100);  //Lots of ties. 
     352        e = uniform(0, 100);  //Lots of ties. 
    333353    } 
    334354    foreach(i; 0..100) { 
     
    337357            e = j; 
    338358        } 
    339         randomMultiShuffle(gen, test); 
    340         uint len = uniform(gen, 0, 1_000); 
     359        randomShuffle(test); 
     360        uint len = uniform(0, 1_000); 
    341361        // Testing bubble sort distance against bubble sort, 
    342362        // since bubble sort distance computed by bubble sort 
     
    361381            e = j; 
    362382        } 
    363         randomMultiShuffle(gen, test); 
    364         uint len = uniform(gen, 0, 1_000); 
     383        randomShuffle(test); 
     384        uint len = uniform(0, 1_000); 
    365385        if(i & 1)  // Test both temp and non-temp branches. 
    366386            mergeSort(test[0..len], stability[0..len]); 
     
    378398        double[] testL = new double[1_000]; 
    379399        foreach(ref e; testL) { 
    380             e = uniform(gen, 0.0, 100_000); 
     400            e = uniform(0.0, 100_000); 
    381401        } 
    382402        auto testL2 = testL.dup; 
    383403        foreach(i; 0..1_000) { 
    384             randomMultiShuffle(gen, testL, testL2); 
    385             uint len = uniform(gen, 0, 1_000); 
     404            randomShuffle(zip(testL, testL2)); 
     405            uint len = uniform(0, 1_000); 
    386406            mergeSort!("a > b")(testL[0..len], testL2[0..len]); 
    387407            assert(isSorted!("a > b")(testL[0..len])); 
     
    562582    uint[] test = new uint[1_000], stability = new uint[1_000]; 
    563583    foreach(ref e; test) { 
    564         e = uniform(gen, 0, 100);  //Lots of ties. 
     584        e = uniform(0, 100);  //Lots of ties. 
    565585    } 
    566586    uint[] test2 = test.dup; 
     
    569589            e = j; 
    570590        } 
    571         randomMultiShuffle(gen, test, test2); 
    572         uint len = uniform(gen, 0, 1_000); 
     591        randomShuffle(zip(test, test2)); 
     592        uint len = uniform(0, 1_000); 
    573593        mergeSortInPlace(test[0..len], test2[0..len], stability[0..len]); 
    574594        assert(isSorted(test[0..len])); 
     
    644664 
    645665    foreach(array; data) { 
    646         rotate(array[half1..middle + half2], array.ptr + middle); 
     666        bringToFront(array[half1..middle], array[middle..middle + half2]); 
    647667    } 
    648668    size_t newMiddle = half1 + half2; 
     
    688708 
    689709unittest { 
    690     gen.seed(unpredictableSeed); 
    691710    uint[] test = new uint[1_000]; 
    692711    foreach(ref e; test) { 
    693         e = uniform(gen, 0, 100_000); 
     712        e = uniform(0, 100_000); 
    694713    } 
    695714    auto test2 = test.dup; 
    696715    foreach(i; 0..1_000) { 
    697         randomMultiShuffle(gen, test, test2); 
    698         uint len = uniform(gen, 0, 1_000); 
     716        randomShuffle(zip(test, test2)); 
     717        uint len = uniform(0, 1_000); 
    699718        heapSort(test[0..len], test2[0..len]); 
    700719        assert(isSorted(test[0..len])); 
     
    769788 
    770789unittest { 
    771     gen.seed(unpredictableSeed); 
    772790    uint[] test = new uint[100], stability = new uint[100]; 
    773791    foreach(ref e; test) { 
    774         e = uniform(gen, 0, 100);  //Lots of ties. 
     792        e = uniform(0, 100);  //Lots of ties. 
    775793    } 
    776794    foreach(i; 0..1_000) { 
     
    779797            e = j; 
    780798        } 
    781         randomMultiShuffle(gen, test); 
    782         uint len = uniform(gen, 0, 100); 
     799        randomShuffle(test); 
     800        uint len = uniform(0, 100); 
    783801        // Testing bubble sort distance against bubble sort, 
    784802        // since bubble sort distance computed by bubble sort 
     
    932950 
    933951unittest { 
    934     gen.seed(unpredictableSeed); 
    935952    enum n = 1000; 
    936953    uint[] test = new uint[n]; 
     
    938955    uint[] lockstep = new uint[n]; 
    939956    foreach(ref e; test) { 
    940         e = uniform(gen, 0, 1000); 
     957        e = uniform(0, 1000); 
    941958    } 
    942959    foreach(i; 0..1_000) { 
    943         randomShuffle(test, gen); 
    944960        test2[] = test[]; 
    945961        lockstep[] = test[]; 
    946         uint len = uniform(gen, 0, n - 1) + 1; 
     962        uint len = uniform(0, n - 1) + 1; 
    947963        qsort!("a > b")(test2[0..len]); 
    948         int k = uniform(gen, 0, len); 
     964        int k = uniform(0, len); 
    949965        auto qsRes = partitionK!("a > b")(test[0..len], lockstep[0..len], k); 
    950966        assert(qsRes == test2[k]); 
     
    960976} 
    961977 
    962 /**Given a set of data points entered through the addElement function, 
     978/**Given a set of data points entered through the put function, this output range 
    963979 * maintains the invariant that the top N according to compFun will be 
    964980 * contained in the data structure.  Uses a heap internally, O(log N) insertion 
     
    977993 * randomShuffle(nums, gen); 
    978994 * foreach(n; nums) { 
    979  *     less.addElement(n); 
    980  *     more.addElement(n); 
     995 *     less.put(n); 
     996 *     more.put(n); 
    981997 * } 
    982998 *  assert(less.getSorted == [0U, 1,2,3,4,5,6,7,8,9]); 
     
    9991015 
    10001016    /** Insert an element into the topN struct.*/ 
    1001     void addElement(T elem) { 
     1017    void put(T elem) { 
    10021018        if(nAdded < n) { 
    10031019            nodes[nAdded] = elem; 
     
    10411057        randomShuffle(nums, gen); 
    10421058        foreach(n; nums) { 
    1043             less.addElement(n); 
    1044             more.addElement(n); 
     1059            less.put(n); 
     1060            more.put(n); 
    10451061        } 
    10461062        assert(less.getSorted == [0U, 1,2,3,4,5,6,7,8,9]); 
     
    10521068        randomShuffle(nums, gen); 
    10531069        foreach(n; nums[0..5]) { 
    1054             less.addElement(n); 
    1055             more.addElement(n); 
     1070            less.put(n); 
     1071            more.put(n); 
    10561072        } 
    10571073        assert(less.getSorted == qsort!("a < b")(nums[0..5])); 
  • trunk/stats.d

    r17 r28  
    1 /**Convenience module that simply publicly imports everything else. 
    2  * 
    3  * Author:  David Simcha 
    4  * 
    5  * Copyright (c) 2009, David Simcha 
    6  * All rights reserved. 
    7  * 
    8  * Redistribution and use in source and binary forms, with or without 
    9  * modification, are permitted provided that the following conditions are met: 
    10  * 
    11  *     * Redistributions of source code must retain the above copyright 
    12  *       notice, this list of conditions and the following disclaimer. 
    13  * 
    14  *     * Redistributions in binary form must reproduce the above copyright 
    15  *       notice, this list of conditions and the following disclaimer in the 
    16  *       documentation and/or other materials provided with the distribution. 
    17  * 
    18  *     * Neither the name of the authors nor the 
    19  *       names of its contributors may be used to endorse or promote products 
    20  *       derived from this software without specific prior written permission. 
    21  * 
    22  * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY 
    23  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
    24  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
    25  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY 
    26  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
    27  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
    28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 
    29  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
    30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
    31  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ 
     1/**Convenience module that simply publicly imports everything else.*/ 
    322 
    333module dstats.stats; 
  • trunk/summary.d

    r20 r28  
    1 /**Summary dstats such as mean, median, sum, variance, skewness, kurtosis. 
     1/**Summary statistics such as mean, median, sum, variance, skewness, kurtosis. 
     2 * Except for median, which cannot be calculated online, all summary statistics 
     3 * have both an input range interface and an output range interface. 
    24 * 
    35 * Bugs:  This whole module assumes that input will be reals or types implicitly 
    46 *        convertible to real.  No allowances are made for user-defined numeric 
    5  *        types such as BigInts.  This is necessary for simplicity. 
    6  * 
    7  * Author:  David Simcha 
    8  * 
    9  * Copyright (c) 2009, David Simcha 
     7 *        types such as BigInts.  This is necessary for simplicity.  However, 
     8 *        if you have a function that converts your data to reals, most of 
     9 *        these functions work with any input range, so you can simply map 
     10 *        this function onto your range. 
     11 * 
     12 * Author:  David Simcha*/ 
     13 /* 
     14 * You may use this software under your choice of either of the following 
     15 * licenses.  YOU NEED ONLY OBEY THE TERMS OF EXACTLY ONE OF THE TWO LICENSES. 
     16 * IF YOU CHOOSE TO USE THE PHOBOS LICENSE, YOU DO NOT NEED TO OBEY THE TERMS OF 
     17 * THE BSD LICENSE.  IF YOU CHOOSE TO USE THE BSD LICENSE, YOU DO NOT NEED 
     18 * TO OBEY THE TERMS OF THE PHOBOS LICENSE.  IF YOU ARE A LAWYER LOOKING FOR 
     19 * LOOPHOLES AND RIDICULOUSLY NON-EXISTENT AMBIGUITIES IN THE PREVIOUS STATEMENT, 
     20 * GET A LIFE. 
     21 * 
     22 * ---------------------Phobos License: --------------------------------------- 
     23 * 
     24 *  Copyright (C) 2008-2009 by David Simcha. 
     25 * 
     26 *  This software is provided 'as-is', without any express or implied 
     27 *  warranty. In no event will the authors be held liable for any damages 
     28 *  arising from the use of this software. 
     29 * 
     30 *  Permission is granted to anyone to use this software for any purpose, 
     31 *  including commercial applications, and to alter it and redistribute it 
     32 *  freely, in both source and binary form, subject to the following 
     33 *  restrictions: 
     34 * 
     35 *  o  The origin of this software must not be misrepresented; you must not 
     36 *     claim that you wrote the original software. If you use this software 
     37 *     in a product, an acknowledgment in the product documentation would be 
     38 *     appreciated but is not required. 
     39 *  o  Altered source versions must be plainly marked as such, and must not 
     40 *     be misrepresented as being the original software. 
     41 *  o  This notice may not be removed or altered from any source 
     42 *     distribution. 
     43 * 
     44 * --------------------BSD License:  ----------------------------------------- 
     45 * 
     46 * Copyright (c) 2008-2009, David Simcha 
    1047 * All rights reserved. 
    1148 * 
     
    3370 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
    3471 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
    35  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ 
     72 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
     73 */ 
    3674 
    3775 
    3876module dstats.summary; 
    3977 
    40 import std.algorithm, std.functional, std.conv, std.string; 
     78import std.algorithm, std.functional, std.conv, std.string, std.range, 
     79       std.array; 
    4180 
    4281import dstats.sort, dstats.base, dstats.alloc; 
    4382 
    4483version(unittest) { 
    45     import std.stdio, std.random; 
    46  
    47     Random gen; 
     84    import std.stdio, std.random, std.algorithm, std.conv; 
    4885 
    4986    void main() { 
     
    5188} 
    5289 
    53 /**Finds median in O(N) time on average.  In the case of an even number of 
    54  * elements, the mean of the two middle elements is returned.  This is a 
    55  * convenience founction designed specifically for numeric types, where the 
    56  * averaging of the two middle elements is desired.  A more general selection 
    57  * algorithm that can handle any type with a total ordering, as well as 
    58  * selecting any position in the ordering, can be found at 
     90/**Finds median of an input range in O(N) time on average.  In the case of an 
     91 * even number of elements, the mean of the two middle elements is returned. 
     92 * This is a convenience founction designed specifically for numeric types, 
     93 * where the averaging of the two middle elements is desired.  A more general 
     94 * selection algorithm that can handle any type with a total ordering, as well 
     95 * as selecting any position in the ordering, can be found at 
    5996 * dstats.sort.quickSelect() and dstats.sort.partitionK(). 
    6097 * Allocates memory, does not reorder input data.*/ 
    61 real median(T)(const T[] data) { 
    62     auto TAState = TempAlloc.getState; 
    63     auto dataDup = data.tempdup(TAState);  scope(exit) TempAlloc.free(TAState); 
     98real median(T)(T data) 
     99if(realInput!(T)) { 
     100    // Allocate once on TempAlloc if possible, i.e. if we know the length. 
     101    // This can be done on TempAlloc.  Otherwise, have to use GC heap 
     102    // and appending. 
     103    auto dataDup = tempdup(data); 
     104    scope(exit) TempAlloc.free; 
    64105    return medianPartition(dataDup); 
    65106} 
     
    69110 * median, and elements larger than the median will have larger indices than 
    70111 * that of the median. Useful both for its partititioning and to avoid 
    71  * memory allocations.*/ 
    72 real medianPartition(T)(T[] data) 
     112 * memory allocations.  Requires a random access range with swappable 
     113 * elements.*/ 
     114real medianPartition(T)(T data) 
     115if(isRandomAccessRange!(T) && 
     116   is(ElementType!(T) : real) && 
     117   hasSwappableElements!(T) && 
     118   dstats.base.hasLength!(T)) 
    73119in { 
    74120    assert(data.length > 0); 
     
    77123    // with an index larger than the lower median, after the array is 
    78124    // partially sorted. 
    79     static T min(T[] data) { 
    80         T min = data[0]; 
    81         foreach(d; data[1..$]) { 
    82             if(d < min) 
    83                 min = d; 
    84         } 
    85         return min; 
    86     } 
    87  
    88125    if(data.length == 1) { 
    89126        return data[0]; 
     
    92129    } else { 
    93130        auto lower = partitionK(data, data.length / 2 - 1); 
    94         auto upper = min(data[data.length / 2..$]); 
     131        auto upper = ElementType!(T).max; 
     132 
     133        // Avoid requiring slicing to be supported. 
     134        foreach(i; data.length / 2..data.length) { 
     135            if(data[i] < upper) { 
     136                upper = data[i]; 
     137            } 
     138        } 
    95139        return lower * 0.5L + upper * 0.5L; 
    96140    } 
     
    105149    } 
    106150 
    107     gen.seed(unpredictableSeed); 
    108151    float[] test = new float[1000]; 
    109152    uint upperBound, lowerBound; 
    110153    foreach(testNum; 0..1000) { 
    111154        foreach(ref e; test) { 
    112             e = uniform(gen, 0f, 1000f); 
     155            e = uniform(0f, 1000f); 
    113156        } 
    114157        do { 
    115             upperBound = uniform(gen, 0u, test.length); 
    116             lowerBound = uniform(gen, 0u, test.length); 
     158            upperBound = uniform(0u, test.length); 
     159            lowerBound = uniform(0u, test.length); 
    117160        } while(lowerBound == upperBound); 
    118161        if(lowerBound > upperBound) { 
     
    126169        assert(approxEqual(quickRes, accurateRes)); 
    127170    } 
     171 
     172    // Make sure everything works with lowest common denominator range type. 
     173    struct Count { 
     174        uint num; 
     175        uint upTo; 
     176        uint front() { 
     177            return num; 
     178        } 
     179        void popFront() { 
     180            num++; 
     181        } 
     182        bool empty() { 
     183            return num >= upTo; 
     184        } 
     185    } 
     186 
     187    Count a; 
     188    a.upTo = 100; 
     189    assert(approxEqual(median(a), 49.5)); 
    128190    writeln("Passed median unittest."); 
    129191} 
    130192 
    131 /// 
    132 real mean(T)(const T[] data) { 
     193/**Finds the arithmetic mean of any input range whose elements are implicitly 
     194 * convertible to real.*/ 
     195real mean(T)(T data) 
     196if(realInput!(T)) { 
    133197    OnlineMean meanCalc; 
    134198    foreach(element; data) { 
    135         meanCalc.addElement(element); 
     199        meanCalc.put(element); 
    136200    } 
    137201    return meanCalc.mean; 
    138202} 
    139203 
    140 /**Struct to calculate the mean online.  Getter for mean costs a branch to 
     204/**Output range to calculate the mean online.  Getter for mean costs a branch to 
    141205 * check for N == 0.  This struct uses O(1) space and does *NOT* store the 
    142206 * individual elements. 
     
    145209 * --- 
    146210 * OnlineMean summ; 
    147  * summ.addElement(1); 
    148  * summ.addElement(2); 
    149  * summ.addElement(3); 
    150  * summ.addElement(4); 
    151  * summ.addElement(5); 
     211 * summ.put(1); 
     212 * summ.put(2); 
     213 * summ.put(3); 
     214 * summ.put(4); 
     215 * summ.put(5); 
    152216 * assert(summ.mean == 3); 
    153217 * ---*/ 
     
    157221    real k = 0; 
    158222public: 
    159     /// 
    160     void addElement(real element) { 
     223    /// Allow implicit casting to real, by returning the current mean. 
     224    alias mean this; 
     225 
     226    /// 
     227    void put(real element) { 
    161228        result += (element - result) / ++k; 
    162229    } 
     
    178245} 
    179246 
    180 ///Returns mean of absolute values. 
    181 real absMean(T)(const T[] data) { 
    182     OnlineMean meanCalc; 
    183     foreach(element; data) { 
    184         meanCalc.addElement(abs(element)); 
    185     } 
    186     return meanCalc.mean; 
    187 
    188  
    189 /**User has option of making U a different type than T to prevent overflows 
     247/// 
     248struct OnlineGeometricMean { 
     249private: 
     250    OnlineMean m; 
     251public: 
     252    ///Allow implicit casting to real, by returning current geometric mean. 
     253    alias geoMean this; 
     254 
     255    /// 
     256    void put(real element) { 
     257        m.put(log2(element)); 
     258    } 
     259 
     260    /// 
     261    real geoMean() const { 
     262        return exp2(m.mean); 
     263    } 
     264 
     265    /// 
     266    real N() const { 
     267        return m.k; 
     268    } 
     269 
     270    /// 
     271    string toString() { 
     272        return to!(string)(geoMean); 
     273    } 
     274
     275 
     276/// 
     277real geometricMean(T)(T data) 
     278if(realInput!(T)) { 
     279    OnlineGeometricMean m; 
     280    foreach(elem; data) { 
     281        m.put(elem); 
     282    } 
     283    return m.geoMean; 
     284
     285 
     286unittest { 
     287    string[] data = ["1", "2", "3", "4", "5"]; 
     288    auto result = geometricMean(map!(to!(uint, string))(data)); 
     289    assert(approxEqual(result, 2.60517)); 
     290    writeln("Passed geometricMean unittest."); 
     291
     292 
     293 
     294/**Finds the sum of an input range whose elements implicitly convert to real. 
     295 * User has option of making U a different type than T to prevent overflows 
    190296 * on large array summing operations.  However, by default, return type is 
    191297 * T (same as input type).*/ 
    192 U sum(T, U = Mutable!(T))(const T[] data) { 
     298U sum(T, U = Unqual!(ElementType!(T)))(T data) 
     299if(realInput!(T)) { 
    193300    U sum = 0; 
    194301    foreach(value; data) { 
     
    198305} 
    199306 
    200 /**User has option of making U a different type than T 
    201  * to prevent overflows on large array summing operations. 
    202  * However, by default, return type is T (same as input type).*/ 
    203 U absSum(T, U = Mutable!(T))(const T[] data) { 
    204     U sum=0; 
    205     foreach(value; data) { 
    206         sum+=abs(value); 
    207     } 
    208     return sum; 
    209 } 
    210  
    211307unittest { 
    212     assert(sum([1,2,3,4,5])==15); 
    213     assert(sum([40.0, 40.1, 5.2])==85.3); 
    214     assert(mean([1,2,3])==2); 
    215     assert(mean([1.0, 2.0, 3.0])==2.0); 
    216     assert(mean([1, 2, 5, 10, 17]) == 7); 
    217     assert(absSum([-1, 2, 3, -4, 5]) == 15); 
    218     assert(absMean([-1, 2, 3, -4, 5]) == 3); 
     308    assert(sum(cast(int[]) [1,2,3,4,5])==15); 
     309    assert(approxEqual( sum(cast(int[]) [40.0, 40.1, 5.2]), 85.3)); 
     310    assert(mean(cast(int[]) [1,2,3]) == 2); 
     311    assert(mean(cast(int[]) [1.0, 2.0, 3.0]) == 2.0); 
     312    assert(mean(cast(int[]) [1, 2, 5, 10, 17]) == 7); 
    219313    writefln("Passed sum/mean unittest."); 
    220314} 
    221315 
    222316 
    223 /**Allows computation of mean, stdev, variance online.  Getter methods 
     317/**Outpu range to compute mean, stdev, variance online.  Getter methods 
    224318 * for stdev, var cost a few floating point ops.  Getter for mean costs 
    225319 * a single branch to check for N == 0.  Relatively expensive floating point 
     
    230324 * --- 
    231325 * OnlineMeanSD summ; 
    232  * summ.addElement(1); 
    233  * summ.addElement(2); 
    234  * summ.addElement(3); 
    235  * summ.addElement(4); 
    236  * summ.addElement(5); 
     326 * summ.put(1); 
     327 * summ.put(2); 
     328 * summ.put(3); 
     329 * summ.put(4); 
     330 * summ.put(5); 
    237331 * assert(summ.mean == 3); 
    238332 * assert(summ.stdev == sqrt(2.5)); 
     
    246340public: 
    247341    /// 
    248     void addElement(real element) { 
     342    void put(real element) { 
    249343        real kNeg1 = 1.0L / ++_k; 
    250344        _var += (element * element - _var) * kNeg1; 
     
    292386} 
    293387 
    294 /// 
    295 real variance(T)(const T[] data) { 
     388/**Finds the variance of an input range with members implicitly convertible 
     389 * to reals.*/ 
     390real variance(T)(T data) 
     391if(realInput!(T)) { 
    296392    return meanVariance(data).SD; 
    297393} 
    298394 
    299 /// Calculates both mean and variance in one pass, returns a MeanSD struct. 
    300 MeanSD meanVariance(T)(const T[] data) { 
     395/**Calculates both mean and variance of an input range, returns a MeanSD 
     396 * struct.*/ 
     397MeanSD meanVariance(T)(T data) 
     398if(realInput!(T)) { 
    301399    OnlineMeanSD meanSDCalc; 
    302400    foreach(element; data) { 
    303         meanSDCalc.addElement(element); 
     401        meanSDCalc.put(element); 
    304402    } 
    305403 
     
    307405} 
    308406 
    309 /// Computes mean and standard deviation in one pass, returns both in a struct. 
    310 MeanSD meanStdev(T)(const T[] data) { 
     407/**Calculates both mean and standard deviation of an input range, returns a 
     408 * MeanSD struct.*/ 
     409MeanSD meanStdev(T)(T data) 
     410if(realInput!(T)) { 
    311411    auto ret = meanVariance(data); 
    312412    ret.SD = sqrt(ret.SD); 
     
    314414} 
    315415 
    316 /// 
    317 real stdev(T)(const T[] data) { 
     416/**Calculate the standard deviation of an input range with members 
     417 * implicitly converitble to real.*/ 
     418real stdev(T)(T data) 
     419if(realInput!(T)) { 
    318420    return meanStdev(data).SD; 
    319421} 
    320422 
    321423unittest { 
    322     auto res = meanStdev([3, 1, 4, 5]); 
     424    auto res = meanStdev(cast(int[]) [3, 1, 4, 5]); 
    323425    assert(approxEqual(res.SD, 1.7078)); 
    324426    assert(approxEqual(res.mean, 3.25)); 
    325     res = meanStdev([1.0, 2.0, 3.0, 4.0, 5.0]); 
     427    res = meanStdev(cast(double[]) [1.0, 2.0, 3.0, 4.0, 5.0]); 
    326428    assert(approxEqual(res.SD, 1.5811)); 
    327429    assert(approxEqual(res.mean, 3)); 
     
    329431} 
    330432 
    331 /// 
    332 real percentVariance(T) (const T[] data) { 
    333     MeanSD stats = meanStdev(data); 
    334     real PV = 100 * abs(stats.SD / stats.mean); 
    335     return abs(PV); 
    336 
    337  
    338 /**Allows computation of mean, stdev, variance, skewness, kurtosis, min, and 
     433/**Output range to compute mean, stdev, variance, skewness, kurtosis, min, and 
    339434 * max online. Using this struct is relatively expensive, so if you just need 
    340435 * mean and/or stdev, try OnlineMeanSD or OnlineMean. Getter methods for stdev, 
     
    347442 * --- 
    348443 * OnlineSummary summ; 
    349  * summ.addElement(1); 
    350  * summ.addElement(2); 
    351  * summ.addElement(3); 
    352  * summ.addElement(4); 
    353  * summ.addElement(5); 
     444 * summ.put(1); 
     445 * summ.put(2); 
     446 * summ.put(3); 
     447 * summ.put(4); 
     448 * summ.put(5); 
    354449 * assert(summ.N == 5); 
    355450 * assert(summ.mean == 3); 
     
    371466public: 
    372467    /// 
    373     void addElement(real element) { 
    374         invariant real kNeg1 = 1.0L / ++_k; 
     468    void put(real element) { 
     469        immutable real kNeg1 = 1.0L / ++_k; 
    375470        _min = (element < _min) ? element : _min; 
    376471        _max = (element > _max) ? element : _max; 
     
    438533 * the variance is due to infrequent, large deviations from the mean.  Low 
    439534 * kurtosis means that the variance is due to frequent, small deviations from 
    440  * the mean.  The normal distribution is defined as having kurtosis of 0.*/ 
    441 real kurtosis(T)(const T[] data) { 
     535 * the mean.  The normal distribution is defined as having kurtosis of 0. 
     536 * Input must be an input range with elements implicitly convertible to real.*/ 
     537real kurtosis(T)(T data) 
     538if(realInput!(T)) { 
    442539    OnlineSummary kCalc; 
    443540    foreach(elem; data) { 
    444         kCalc.addElement(elem); 
     541        kCalc.put(elem); 
    445542    } 
    446543    return kCalc.kurtosis; 
     
    449546unittest { 
    450547    // Values from Octave. 
    451     assert(approxEqual(kurtosis([1, 1, 1, 1, 10]), -.92)); 
    452     assert(approxEqual(kurtosis([2.5, 3.5, 4.5, 5.5]), -2.0775)); 
    453     assert(approxEqual(kurtosis([1,2,2,2,2,2,100]), 0.79523)); 
     548    assert(approxEqual(kurtosis([1, 1, 1, 1, 10].dup), -.92)); 
     549    assert(approxEqual(kurtosis([2.5, 3.5, 4.5, 5.5].dup), -2.0775)); 
     550    assert(approxEqual(kurtosis([1,2,2,2,2,2,100].dup), 0.79523)); 
    454551    writefln("Passed kurtosis unittest."); 
    455552} 
     
    458555 * means that the right tail is longer/fatter than the left tail.  Negative 
    459556 * skewness means the left tail is longer/fatter than the right tail.  Zero 
    460  * skewness indicates a symmetrical distribution.*/ 
    461 real skewness(T)(const T[] data) { 
     557 * skewness indicates a symmetrical distribution.  Input must be an input 
     558 * range with elements implicitly convertible to real.*/ 
     559real skewness(T)(T data) 
     560if(realInput!(T)) { 
    462561    OnlineSummary sCalc; 
    463562    foreach(elem; data) { 
    464         sCalc.addElement(elem); 
     563        sCalc.put(elem); 
    465564    } 
    466565    return sCalc.skewness; 
     
    469568unittest { 
    470569    // Values from Octave. 
    471     assert(approxEqual(skewness([1,2,3,4,5]), 0)); 
    472     assert(approxEqual(skewness([3,1,4,1,5,9,2,6,5]), 0.45618)); 
    473     assert(approxEqual(skewness([2,7,1,8,2,8,1,8,2,8,4,5,9]), -0.076783)); 
     570    assert(approxEqual(skewness([1,2,3,4,5].dup), 0)); 
     571    assert(approxEqual(skewness([3,1,4,1,5,9,2,6,5].dup), 0.45618)); 
     572    assert(approxEqual(skewness([2,7,1,8,2,8,1,8,2,8,4,5,9].dup), -0.076783)); 
     573 
     574    // Test handling of ranges that are not arrays. 
     575    string[] stringy = ["3", "1", "4", "1", "5", "9", "2", "6", "5"]; 
     576    auto intified = map!(to!(int, string))(stringy); 
     577    assert(approxEqual(skewness(intified), 0.45618)); 
    474578    writeln("Passed skewness test."); 
    475579} 
     
    510614} 
    511615 
    512 /**Calculates all summary dstats (mean, variance, standard dev., skewness 
    513  * and kurtosis) on an array.  Returns the results in a Summary struct.*/ 
    514 Summary summary(T)(const T[] data) { 
     616/**Calculates all summary stats (mean, variance, standard dev., skewness 
     617 * and kurtosis) on an input range with elements that can be implicitly 
     618 * converted to real.  Returns the results in a Summary struct.*/ 
     619Summary summary(T)(T data) 
     620if(realInput!(T)) { 
    515621    OnlineSummary summ; 
    516622    foreach(elem; data) { 
    517         summ.addElement(elem); 
     623        summ.put(elem); 
    518624    } 
    519625    return Summary(data.length, summ.mean, summ.var, summ.stdev, summ.skewness, 
  • trunk/tests.d

    r26 r28  
    1 /**Hypothesis testing beyond simple CDFs. 
    2  * 
    3  * Author:  David Simcha 
    4  * 
    5  * Copyright (c) 2009, David Simcha 
     1/**Hypothesis testing beyond simple CDFs.  All functions work with input 
     2 * ranges with elements implicitly convertible to real unless otherwise noted. 
     3 * 
     4 * Author:  David Simcha*/ 
     5 /* 
     6 * You may use this software under your choice of either of the following 
     7 * licenses.  YOU NEED ONLY OBEY THE TERMS OF EXACTLY ONE OF THE TWO LICENSES. 
     8 * IF YOU CHOOSE TO USE THE PHOBOS LICENSE, YOU DO NOT NEED TO OBEY THE TERMS OF 
     9 * THE BSD LICENSE.  IF YOU CHOOSE TO USE THE BSD LICENSE, YOU DO NOT NEED 
     10 * TO OBEY THE TERMS OF THE PHOBOS LICENSE.  IF YOU ARE A LAWYER LOOKING FOR 
     11 * LOOPHOLES AND RIDICULOUSLY NON-EXISTENT AMBIGUITIES IN THE PREVIOUS STATEMENT, 
     12 * GET A LIFE. 
     13 * 
     14 * ---------------------Phobos License: --------------------------------------- 
     15 * 
     16 *  Copyright (C) 2008-2009 by David Simcha. 
     17 * 
     18 *  This software is provided 'as-is', without any express or implied 
     19 *  warranty. In no event will the authors be held liable for any damages 
     20 *  arising from the use of this software. 
     21 * 
     22 *  Permission is granted to anyone to use this software for any purpose, 
     23 *  including commercial applications, and to alter it and redistribute it 
     24 *  freely, in both source and binary form, subject to the following 
     25 *  restrictions: 
     26 * 
     27 *  o  The origin of this software must not be misrepresented; you must not 
     28 *     claim that you wrote the original software. If you use this software 
     29 *     in a product, an acknowledgment in the product documentation would be 
     30 *     appreciated but is not required. 
     31 *  o  Altered source versions must be plainly marked as such, and must not 
     32 *     be misrepresented as being the original software. 
     33 *  o  This notice may not be removed or altered from any source 
     34 *     distribution. 
     35 * 
     36 * --------------------BSD License:  ----------------------------------------- 
     37 * 
     38 * Copyright (c) 2008-2009, David Simcha 
    639 * All rights reserved. 
    740 * 
     
    2962 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
    3063 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
    31  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ 
     64 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
     65 */ 
    3266module dstats.tests; 
    3367 
    3468import dstats.base, dstats.distrib, dstats.alloc, dstats.summary, dstats.sort, 
    35        std.algorithm, std.functional
     69       std.algorithm, std.functional, std.range
    3670 
    3771version(unittest) { 
     
    6195 * Alt.GREATER, meaning mean(data) > mean, and Alt.TWOSIDE, meaning mean(data) 
    6296 * != mean. 
     97 * 
    6398 * Returns:  The p-value against the given alternative.*/ 
    64 real studentsTTest(T)(const T[] data, real mean, Alt alt = Alt.TWOSIDE) { 
     99real studentsTTest(T)(T data, real mean, Alt alt = Alt.TWOSIDE) 
     100if(realInput!(T)) { 
    65101    auto meanSd = meanStdev(data); 
    66102    real t = (meanSd.mean - mean) / (meanSd.SD / sqrt(cast(real) data.length)); 
     
    75111 
    76112unittest { 
    77     assert(approxEqual(studentsTTest([1, 2, 3, 4, 5], 2), .2302)); 
    78     assert(approxEqual(studentsTTest([1, 2, 3, 4, 5], 2, Alt.LESS), .8849)); 
    79     assert(approxEqual(studentsTTest([1, 2, 3, 4, 5], 2, Alt.GREATER), .1151)); 
     113    assert(approxEqual(studentsTTest([1, 2, 3, 4, 5].dup, 2), .2302)); 
     114    assert(approxEqual(studentsTTest([1, 2, 3, 4, 5].dup, 2, Alt.LESS), .8849)); 
     115    assert(approxEqual(studentsTTest([1, 2, 3, 4, 5].dup, 2, Alt.GREATER), .1151)); 
    80116    writeln("Passed 1-sample studentsTTest test."); 
    81117} 
     
    86122 * mean(sample2), and Alt.TWOSIDE, meaning mean(sample1) != mean(sample2). 
    87123 * Returns:  The p-value against the given alternative.*/ 
    88 real studentsTTest(T, U)(const T[] sample1, const U[] sample2, Alt alt = Alt.TWOSIDE) { 
    89     size_t n1 = sample1.length; 
    90     size_t n2 = sample2.length; 
    91  
    92     auto s1summ = meanVariance(sample1); 
    93     auto s2summ = meanVariance(sample2); 
    94     real sx1x2 = sqrt(((n1 - 1) * s1summ.SD + (n2 - 1) * s2summ.SD) / 
     124real studentsTTest(T, U)(T sample1, U sample2, Alt alt = Alt.TWOSIDE) 
     125if(realInput!(T) && realInput!(U)) { 
     126    size_t n1, n2; 
     127    OnlineMeanSD s1summ, s2summ; 
     128    foreach(elem; sample1) { 
     129        s1summ.put(elem); 
     130        n1++; 
     131    } 
     132    foreach(elem; sample2) { 
     133        s2summ.put(elem); 
     134        n2++; 
     135    } 
     136 
     137    real sx1x2 = sqrt(((n1 - 1) * s1summ.var + (n2 - 1) * s2summ.var) / 
    95138                 (n1 + n2 - 2)); 
    96139    real t = (s1summ.mean - s2summ.mean) / 
     
    107150unittest { 
    108151    // Values from R. 
    109     assert(approxEqual(studentsTTest([1,2,3,4,5], [1,3,4,5,7,9]), 0.2346)); 
    110     assert(approxEqual(studentsTTest([1,2,3,4,5], [1,3,4,5,7,9], Alt.LESS), 
     152    assert(approxEqual(studentsTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup), 0.2346)); 
     153    assert(approxEqual(studentsTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup, Alt.LESS), 
    111154           0.1173)); 
    112     assert(approxEqual(studentsTTest([1,2,3,4,5], [1,3,4,5,7,9], Alt.GREATER), 
     155    assert(approxEqual(studentsTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup, Alt.GREATER), 
    113156           0.8827)); 
    114     assert(approxEqual(studentsTTest([1,3,5,7,9,11], [2,2,1,3,4]), 0.06985)); 
    115     assert(approxEqual(studentsTTest([1,3,5,7,9,11], [2,2,1,3,4], Alt.LESS), 
     157    assert(approxEqual(studentsTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup), 0.06985)); 
     158    assert(approxEqual(studentsTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup, Alt.LESS), 
    116159           0.965)); 
    117     assert(approxEqual(studentsTTest([1,3,5,7,9,11], [2,2,1,3,4], Alt.GREATER), 
     160    assert(approxEqual(studentsTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup, Alt.GREATER), 
    118161           0.03492)); 
    119162    writeln("Passed 2-sample studentsTTest test."); 
     
    126169 * != mean(sample2). 
    127170 * Returns:  The p-value against the given alternative.*/ 
    128 real welchTTest(T, U)(const T[] sample1, const U[] sample2, Alt alt = Alt.TWOSIDE) { 
    129     size_t n1 = sample1.length; 
    130     size_t n2 = sample2.length; 
    131  
    132     auto s1summ = meanVariance(sample1); 
    133     auto s2summ = meanVariance(sample2); 
    134     real sx1x2 = sqrt(s1summ.SD / n1 + s2summ.SD / n2); 
     171real welchTTest(T, U)(T sample1, U sample2, Alt alt = Alt.TWOSIDE) 
     172if(realInput!(T) && realInput!(U)) { 
     173    size_t n1, n2; 
     174    OnlineMeanSD s1summ, s2summ; 
     175    foreach(elem; sample1) { 
     176        s1summ.put(elem); 
     177        n1++; 
     178    } 
     179    foreach(elem; sample2) { 
     180        s2summ.put(elem); 
     181        n2++; 
     182    } 
     183 
     184    auto v1 = s1summ.var, v2 = s2summ.var; 
     185    real sx1x2 = sqrt(v1 / n1 + v2 / n2); 
    135186    real t = (s1summ.mean - s2summ.mean) / sx1x2; 
    136     real numerator = s1summ.SD / n1 + s2summ.SD / n2; 
     187    real numerator = v1 / n1 + v2 / n2; 
    137188    numerator *= numerator; 
    138     real denom1 = s1summ.SD / n1; 
     189    real denom1 = v1 / n1; 
    139190    denom1 = denom1 * denom1 / (n1 - 1); 
    140     real denom2 = s2summ.SD / n2; 
     191    real denom2 = v2 / n2; 
    141192    denom2 = denom2 * denom2 / (n2 - 1); 
    142193    real df = numerator / (denom1 + denom2); 
     
    151202unittest { 
    152203        // Values from R. 
    153     assert(approxEqual(welchTTest([1,2,3,4,5], [1,3,4,5,7,9]), 0.2159)); 
    154     assert(approxEqual(welchTTest([1,2,3,4,5], [1,3,4,5,7,9], Alt.LESS), 
     204    assert(approxEqual(welchTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup), 0.2159)); 
     205    assert(approxEqual(welchTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup, Alt.LESS), 
    155206           0.1079)); 
    156     assert(approxEqual(welchTTest([1,2,3,4,5], [1,3,4,5,7,9], Alt.GREATER), 
     207    assert(approxEqual(welchTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup, Alt.GREATER), 
    157208           0.892)); 
    158     assert(approxEqual(welchTTest([1,3,5,7,9,11], [2,2,1,3,4]), 0.06616)); 
    159     assert(approxEqual(welchTTest([1,3,5,7,9,11], [2,2,1,3,4], Alt.LESS), 
     209    assert(approxEqual(welchTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup), 0.06616)); 
     210    assert(approxEqual(welchTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup, Alt.LESS), 
    160211           0.967)); 
    161     assert(approxEqual(welchTTest([1,3,5,7,9,11], [2,2,1,3,4], Alt.GREATER), 
     212    assert(approxEqual(welchTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup, Alt.GREATER), 
    162213           0.03308)); 
    163214    writeln("Passed welchTTest test."); 
     
    170221 * greater than testMean, and Alt.TWOSIDE, meaning the true mean difference is not 
    171222 * equal to testMean. 
     223 * 
    172224 * Returns:  The p-value against the given alternative.*/ 
    173 real pairedTTest(T, U)(const T[] before, const U[] after, 
    174                        Alt alt = Alt.TWOSIDE, real testMean = 0) 
    175 in { 
    176     assert(before.length == after.length); 
    177 } body { 
     225real pairedTTest(T, U)(T before, U after, Alt alt = Alt.TWOSIDE, real testMean = 0) 
     226if(realInput!(T) && realInput!(U)) { 
    178227    OnlineMeanSD msd; 
    179     foreach(i; 0..before.length) { 
    180         real diff = cast(real) before[i] - cast(real) after[i]; 
    181         msd.addElement(diff); 
    182     } 
    183     real t = (msd.mean - testMean) / msd.stdev * sqrt(cast(real) before.length); 
     228    size_t len = 0; 
     229    while(!before.empty && !after.empty) { 
     230        real diff = cast(real) before.front - cast(real) after.front; 
     231        before.popFront; 
     232        after.popFront; 
     233        msd.put(diff); 
     234        len++; 
     235    } 
     236    real t = (msd.mean - testMean) / msd.stdev * sqrt(cast(real) len); 
    184237 
    185238    if(alt == Alt.LESS) { 
    186         return studentsTCDF(t, before.length - 1); 
     239        return studentsTCDF(t, len - 1); 
    187240    } else if(alt == Alt.GREATER) { 
    188         return studentsTCDF(-t, before.length - 1); 
     241        return studentsTCDF(-t, len - 1); 
    189242    } else if(t > 0) { 
    190         return 2 * studentsTCDF(-t, before.length - 1); 
     243        return 2 * studentsTCDF(-t, len - 1); 
    191244    } else { 
    192         return 2 * studentsTCDF(t, before.length - 1); 
     245        return 2 * studentsTCDF(t, len - 1); 
    193246    } 
    194247} 
     
    196249unittest { 
    197250    // Values from R. 
    198     assert(approxEqual(pairedTTest([3,2,3,4,5], [2,3,5,5,6], Alt.LESS), 0.0889)); 
    199     assert(approxEqual(pairedTTest([3,2,3,4,5], [2,3,5,5,6], Alt.GREATER), 0.9111)); 
    200     assert(approxEqual(pairedTTest([3,2,3,4,5], [2,3,5,5,6], Alt.TWOSIDE), 0.1778)); 
    201     assert(approxEqual(pairedTTest([3,2,3,4,5], [2,3,5,5,6], Alt.LESS, 1), 0.01066)); 
    202     assert(approxEqual(pairedTTest([3,2,3,4,5], [2,3,5,5,6], Alt.GREATER, 1), 0.9893)); 
    203     assert(approxEqual(pairedTTest([3,2,3,4,5], [2,3,5,5,6], Alt.TWOSIDE, 1), 0.02131)); 
     251    assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, Alt.LESS), 0.0889)); 
     252    assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, Alt.GREATER), 0.9111)); 
     253    assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, Alt.TWOSIDE), 0.1778)); 
     254    assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, Alt.LESS, 1), 0.01066)); 
     255    assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, Alt.GREATER, 1), 0.9893)); 
     256    assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, Alt.TWOSIDE, 1), 0.02131)); 
    204257    writeln("Passed pairedTTest unittest."); 
    205258} 
     
    210263 * dereference that can be used in wilcoxonRankSumPval() to adjust for ties in 
    211264 * the input data. 
     265 * 
     266 * Input ranges for this function must define a length. 
     267 * 
    212268 * Returns:  The Wilcoxon test statistic W.*/ 
    213 real wilcoxonRankSum(T)(const T[] sample1, const T[] sample2, real* tieSum = null) { 
    214         ulong n1 = sample1.length, n2 = sample2.length, N = n1 + n2; 
    215     auto combined = newStack!(Mutable!(T))(N); 
    216     combined[0..n1] = sample1[]; 
    217     combined[n1..$] = sample2[]; 
     269real wilcoxonRankSum(T)(T sample1, T sample2, real* tieSum = null) 
     270if(isInputRange!(T) && dstats.base.hasLength!(T)) { 
     271    ulong n1 = sample1.length, n2 = sample2.length, N = n1 + n2; 
     272    auto combined = newStack!(Unqual!(ElementType!(T)))(N); 
     273    rangeCopy(combined[0..n1], sample1); 
     274    rangeCopy(combined[n1..$], sample2); 
    218275 
    219276    float[] ranks = newStack!(float)(N); 
    220277    rankSort(combined, ranks); 
    221     real w = sum!(float, real)(ranks[0..n1]) - n1 * (n1 + 1) / 2UL; 
     278    real w = reduce!("a + b")(0.0L, ranks[0..n1]) - n1 * (n1 + 1) / 2UL; 
    222279    TempAlloc.free;  // Free ranks. 
    223280 
     
    248305 
    249306unittest { 
    250     assert(wilcoxonRankSum([1, 2, 3, 4, 5], [2, 4, 6, 8, 10]) == 5); 
    251     assert(wilcoxonRankSum([2, 4, 6, 8, 10], [1, 2, 3, 4, 5]) == 20); 
    252     assert(wilcoxonRankSum([3, 7, 21, 5, 9], [2, 4, 6, 8, 10]) == 15); 
     307    assert(wilcoxonRankSum([1, 2, 3, 4, 5].dup, [2, 4, 6, 8, 10].dup) == 5); 
     308    assert(wilcoxonRankSum([2, 4, 6, 8, 10].dup, [1, 2, 3, 4, 5].dup) == 20); 
     309    assert(wilcoxonRankSum([3, 7, 21, 5, 9].dup, [2, 4, 6, 8, 10].dup) == 15); 
    253310    writeln("Passed wilcoxonRankSum test."); 
    254311} 
     
    298355    assert(approxEqual(wilcoxonRankSumPval(1500, 50, 50, Alt.LESS), .957903)); 
    299356    assert(approxEqual(wilcoxonRankSumPval(8500, 100, 200, Alt.LESS), .01704)); 
    300     auto w = wilcoxonRankSum([2,4,6,8,12], [1,3,5,7,11,9]); 
     357    auto w = wilcoxonRankSum([2,4,6,8,12].dup, [1,3,5,7,11,9].dup); 
    301358    assert(approxEqual(wilcoxonRankSumPval(w, 5, 6), 0.9273)); 
    302359    assert(approxEqual(wilcoxonRankSumPval(w, 5, 6, Alt.GREATER), 0.4636)); 
     
    325382 * In these cases, exactThresh will be ignored. 
    326383 * 
     384 * Input ranges for this function must define a length. 
     385 * 
    327386 * Returns:  The p-value against the given alternative.*/ 
    328 real wilcoxonRankSumPval(T)(const T[] sample1, const T[] sample2, 
    329                          Alt alt = Alt.TWOSIDE, uint exactThresh = 50) { 
     387real wilcoxonRankSumPval(T)(T sample1, T sample2, Alt alt = Alt.TWOSIDE, 
     388    uint exactThresh = 50) if(isInputRange!(T) && dstats.base.hasLength!(T)) { 
     389 
    330390    real tieSum; 
    331391    real W = wilcoxonRankSum(sample1, sample2, &tieSum); 
     
    337397     // Values from R.  Simple stuff (no ties) first.  Testing approximate 
    338398     // calculation first. 
    339      assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12], [1,3,5,7,11,9]
     399     assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12].dup, [1,3,5,7,11,9].dup
    340400           Alt.TWOSIDE, 0), 0.9273)); 
    341      assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12], [1,3,5,7,11,9]
     401     assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12].dup, [1,3,5,7,11,9].dup
    342402           Alt.LESS, 0), 0.6079)); 
    343      assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12], [1,3,5,7,11,9]
     403     assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12].dup, [1,3,5,7,11,9].dup
    344404           Alt.GREATER, 0), 0.4636)); 
    345      assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12], [3,5,7,8,13,15]
     405     assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12].dup, [3,5,7,8,13,15].dup
    346406            Alt.TWOSIDE, 0), 0.4113)); 
    347      assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12], [3,5,7,8,13,15]
     407     assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12].dup, [3,5,7,8,13,15].dup
    348408            Alt.LESS, 0), 0.2057)); 
    349      assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12], [3,5,7,8,13,15]
     409     assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12].dup, [3,5,7,8,13,15].dup
    350410            Alt.GREATER, 0), 0.8423)); 
    351      assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9], [2,4,6,8,10]
     411     assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9].dup, [2,4,6,8,10].dup
    352412            Alt.TWOSIDE, 0), .6745)); 
    353      assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9], [2,4,6,8,10]
     413     assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9].dup, [2,4,6,8,10].dup
    354414            Alt.LESS, 0), .3372)); 
    355      assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9], [2,4,6,8,10]
     415     assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9].dup, [2,4,6,8,10].dup
    356416            Alt.GREATER, 0), .7346)); 
    357417 
    358418    // Now, lots of ties. 
    359     assert(approxEqual(wilcoxonRankSumPval([1,2,3,4,5], [2,3,4,5,6]
     419    assert(approxEqual(wilcoxonRankSumPval([1,2,3,4,5].dup, [2,3,4,5,6].dup
    360420           Alt.TWOSIDE, 0), 0.3976)); 
    361     assert(approxEqual(wilcoxonRankSumPval([1,2,3,4,5], [2,3,4,5,6]
     421    assert(approxEqual(wilcoxonRankSumPval([1,2,3,4,5].dup, [2,3,4,5,6].dup
    362422           Alt.LESS, 0), 0.1988)); 
    363     assert(approxEqual(wilcoxonRankSumPval([1,2,3,4,5], [2,3,4,5,6]
     423    assert(approxEqual(wilcoxonRankSumPval([1,2,3,4,5].dup, [2,3,4,5,6].dup
    364424           Alt.GREATER, 0), 0.8548)); 
    365     assert(approxEqual(wilcoxonRankSumPval([1,2,1,1,2], [1,2,3,1,1]
     425    assert(approxEqual(wilcoxonRankSumPval([1,2,1,1,2].dup, [1,2,3,1,1].dup
    366426           Alt.TWOSIDE, 0), 0.9049)); 
    367     assert(approxEqual(wilcoxonRankSumPval([1,2,1,1,2], [1,2,3,1,1]
     427    assert(approxEqual(wilcoxonRankSumPval([1,2,1,1,2].dup, [1,2,3,1,1].dup
    368428           Alt.LESS, 0), 0.4524)); 
    369     assert(approxEqual(wilcoxonRankSumPval([1,2,1,1,2], [1,2,3,1,1]
     429    assert(approxEqual(wilcoxonRankSumPval([1,2,1,1,2].dup, [1,2,3,1,1].dup
    370430           Alt.GREATER, 0), 0.64)); 
    371431 
    372432    // Now, testing the exact calculation on the same data. 
    373      assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12], [1,3,5,7,11,9]
     433     assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12].dup, [1,3,5,7,11,9].dup
    374434       Alt.TWOSIDE), 0.9307)); 
    375      assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12], [1,3,5,7,11,9]
     435     assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12].dup, [1,3,5,7,11,9].dup
    376436           Alt.LESS), 0.6039)); 
    377      assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12], [1,3,5,7,11,9]
     437     assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12].dup, [1,3,5,7,11,9].dup
    378438           Alt.GREATER), 0.4654)); 
    379      assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12], [3,5,7,8,13,15]
     439     assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12].dup, [3,5,7,8,13,15].dup
    380440            Alt.TWOSIDE), 0.4286)); 
    381      assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12], [3,5,7,8,13,15]
     441     assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12].dup, [3,5,7,8,13,15].dup
    382442            Alt.LESS), 0.2143)); 
    383      assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12], [3,5,7,8,13,15]
     443     assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12].dup, [3,5,7,8,13,15].dup
    384444            Alt.GREATER), 0.8355)); 
    385      assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9], [2,4,6,8,10]
     445     assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9].dup, [2,4,6,8,10].dup
    386446            Alt.TWOSIDE), .6905)); 
    387      assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9], [2,4,6,8,10]
     447     assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9].dup, [2,4,6,8,10].dup
    388448            Alt.LESS), .3452)); 
    389      assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9], [2,4,6,8,10]
     449     assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9].dup, [2,4,6,8,10].dup
    390450            Alt.GREATER), .7262)); 
    391451    writeln("Passed wilcoxonRankSumPval test."); 
     
    501561 * dereference that can be used in wilcoxonRankSumPval() to adjust for ties in 
    502562 * the input data. 
     563 * 
     564 * Input ranges for this function must define a length. 
     565 * 
    503566 * Returns:  The Wilcoxon test statistic W.*/ 
    504 real wilcoxonSignedRank(T, U)(const T[] before, const U[] after, 
    505       real* tieSum = null) { 
     567real wilcoxonSignedRank(T, U)(T before, U after, real* tieSum = null) 
     568if(realInput!(T) && realInput!(U) && 
     569   dstats.base.hasLength!(T) && dstats.base.hasLength!(U)) { 
    506570    mixin(newFrame); 
    507571    float[] diffRanks = newStack!(float)(before.length); 
     
    517581    } 
    518582 
    519     foreach(i; 0..before.length) { 
    520         real diff = cast(real) before[i] - cast(real) after[i]; 
    521         signs[i] = sign(diff); 
    522         diffs[i] = abs(diff); 
     583    size_t ii = 0; 
     584    while(!before.empty && !after.empty) { 
     585        real diff = cast(real) before.front - cast(real) after.front; 
     586        signs[ii] = sign(diff); 
     587        diffs[ii] = abs(diff); 
     588        ii++; 
     589        before.popFront; 
     590        after.popFront; 
    523591    } 
    524592    rankSort(diffs, diffRanks); 
     
    556624 
    557625unittest { 
    558     assert(wilcoxonSignedRank([1,2,3,4,5], [2,1,4,5,3]) == 7.5); 
    559     assert(wilcoxonSignedRank([3,1,4,1,5], [2,7,1,8,2]) == 6); 
    560     assert(wilcoxonSignedRank([8,6,7,5,3], [0,9,8,6,7]) == 5); 
     626    assert(wilcoxonSignedRank([1,2,3,4,5].dup, [2,1,4,5,3].dup) == 7.5); 
     627    assert(wilcoxonSignedRank([3,1,4,1,5].dup, [2,7,1,8,2].dup) == 6); 
     628    assert(wilcoxonSignedRank([8,6,7,5,3].dup, [0,9,8,6,7].dup) == 5); 
    561629    writeln("Passed wilcoxonSignedRank unittest."); 
    562630} 
     
    583651 * or equal pairs. 
    584652 * 
     653 * The input ranges for this function must define a length and must be 
     654 * forward ranges. 
     655 * 
    585656 * Returns:  The p-value against the given alternative.*/ 
    586 real wilcoxonSignedRankPval(T)(const T[] before, const T[] after, 
     657real wilcoxonSignedRankPval(T)(T before, T after, 
    587658      Alt alt = Alt.TWOSIDE, uint exactThresh = 50) 
     659if(realInput!(T) && dstats.base.hasLength!(T) && isForwardRange!(T)) 
    588660in { 
    589661    assert(before.length == after.length); 
    590662} body { 
    591       real tieSum; 
    592       real W = wilcoxonSignedRank(before, after, &tieSum); 
    593       ulong N = before.length; 
    594       foreach(i; 0..before.length) { 
    595           if(before[i] == after[i]
     663    real tieSum; 
     664    real W = wilcoxonSignedRank(before, after, &tieSum); 
     665    ulong N = before.length; 
     666    while(!before.empty && !after.empty) { 
     667        if(before.front == after.front
    596668            N--; 
    597       } 
    598       return wilcoxonSignedRankPval(W, N, alt, tieSum, exactThresh); 
     669        before.popFront; 
     670        after.popFront; 
     671 
     672    } 
     673    return wilcoxonSignedRankPval(W, N, alt, tieSum, exactThresh); 
    599674} 
    600675 
     
    603678    alias approxEqual ae; 
    604679    // With ties, normal approx. 
    605     assert(ae(wilcoxonSignedRankPval([1,2,3,4,5], [2,1,4,5,3]), 1)); 
    606     assert(ae(wilcoxonSignedRankPval([3,1,4,1,5], [2,7,1,8,2]), 0.7865)); 
    607     assert(ae(wilcoxonSignedRankPval([8,6,7,5,3], [0,9,8,6,7]), 0.5879)); 
    608     assert(ae(wilcoxonSignedRankPval([1,2,3,4,5], [2,1,4,5,3], Alt.LESS), 0.5562)); 
    609     assert(ae(wilcoxonSignedRankPval([3,1,4,1,5], [2,7,1,8,2], Alt.LESS), 0.3932)); 
    610     assert(ae(wilcoxonSignedRankPval([8,6,7,5,3], [0,9,8,6,7], Alt.LESS), 0.2940)); 
    611     assert(ae(wilcoxonSignedRankPval([1,2,3,4,5], [2,1,4,5,3], Alt.GREATER), 0.5562)); 
    612     assert(ae(wilcoxonSignedRankPval([3,1,4,1,5], [2,7,1,8,2], Alt.GREATER), 0.706)); 
    613     assert(ae(wilcoxonSignedRankPval([8,6,7,5,3], [0,9,8,6,7], Alt.GREATER), 0.7918)); 
     680    assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,1,4,5,3].dup), 1)); 
     681    assert(ae(wilcoxonSignedRankPval([3,1,4,1,5].dup, [2,7,1,8,2].dup), 0.7865)); 
     682    assert(ae(wilcoxonSignedRankPval([8,6,7,5,3].dup, [0,9,8,6,7].dup), 0.5879)); 
     683    assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,1,4,5,3].dup, Alt.LESS), 0.5562)); 
     684    assert(ae(wilcoxonSignedRankPval([3,1,4,1,5].dup, [2,7,1,8,2].dup, Alt.LESS), 0.3932)); 
     685    assert(ae(wilcoxonSignedRankPval([8,6,7,5,3].dup, [0,9,8,6,7].dup, Alt.LESS), 0.2940)); 
     686    assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,1,4,5,3].dup, Alt.GREATER), 0.5562)); 
     687    assert(ae(wilcoxonSignedRankPval([3,1,4,1,5].dup, [2,7,1,8,2].dup, Alt.GREATER), 0.706)); 
     688    assert(ae(wilcoxonSignedRankPval([8,6,7,5,3].dup, [0,9,8,6,7].dup, Alt.GREATER), 0.7918)); 
    614689 
    615690    // Exact. 
    616     assert(ae(wilcoxonSignedRankPval([1,2,3,4,5], [2,-4,-8,16,32]), 0.625)); 
    617     assert(ae(wilcoxonSignedRankPval([1,2,3,4,5], [2,-4,-8,16,32], Alt.LESS), 0.3125)); 
    618     assert(ae(wilcoxonSignedRankPval([1,2,3,4,5], [2,-4,-8,16,32], Alt.GREATER), 0.7812)); 
    619     assert(ae(wilcoxonSignedRankPval([1,2,3,4,5], [2,-4,-8,-16,32]), 0.8125)); 
    620     assert(ae(wilcoxonSignedRankPval([1,2,3,4,5], [2,-4,-8,-16,32], Alt.LESS), 0.6875)); 
    621     assert(ae(wilcoxonSignedRankPval([1,2,3,4,5], [2,-4,-8,-16,32], Alt.GREATER), 0.4062)); 
     691    assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,-4,-8,16,32].dup), 0.625)); 
     692    assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,-4,-8,16,32].dup, Alt.LESS), 0.3125)); 
     693    assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,-4,-8,16,32].dup, Alt.GREATER), 0.7812)); 
     694    assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,-4,-8,-16,32].dup), 0.8125)); 
     695    assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,-4,-8,-16,32].dup, Alt.LESS), 0.6875)); 
     696    assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,-4,-8,-16,32].dup, Alt.GREATER), 0.4062)); 
    622697    writeln("Passed wilcoxonSignedRankPval unittest."); 
    623698} 
     
    755830 * elements of after, and Alt.TWOSIDE, meaning that there is a significant 
    756831 * difference in either direction.*/ 
    757 real signTest(T)(const T[] before, const T[] after, Alt alt = Alt.TWOSIDE) 
    758 in { 
    759     assert(before.length == after.length); 
    760 } body { 
     832real signTest(T)(T before, T after, Alt alt = Alt.TWOSIDE) 
     833if(realInput!(T)) { 
    761834    uint greater, less; 
    762     foreach(i; 0..before.length) { 
    763         if(before[i] < after[i]
     835    while(!before.empty && !after.empty) { 
     836        if(before.front < after.front
    764837            less++; 
    765         else if(after[i] < before[i]
     838        else if(after.front < before.front
    766839            greater++; 
    767840        // Ignore equals. 
     841        before.popFront; 
     842        after.popFront; 
    768843    } 
    769844    if(alt == Alt.LESS) { 
     
    780855unittest { 
    781856    alias approxEqual ae; 
    782     assert(ae(signTest([1,3,4,2,5], [1,2,4,8,16]), 1)); 
    783     assert(ae(signTest([1,3,4,2,5], [1,2,4,8,16], Alt.LESS), 0.5)); 
    784     assert(ae(signTest([1,3,4,2,5], [1,2,4,8,16], Alt.GREATER), 0.875)); 
    785     assert(ae(signTest([5,3,4,6,8], [1,2,3,4,5], Alt.GREATER), 0.03125)); 
    786     assert(ae(signTest([5,3,4,6,8], [1,2,3,4,5], Alt.LESS), 1)); 
    787     assert(ae(signTest([5,3,4,6,8], [1,2,3,4,5]), 0.0625)); 
     857    assert(ae(signTest([1,3,4,2,5].dup, [1,2,4,8,16].dup), 1)); 
     858    assert(ae(signTest([1,3,4,2,5].dup, [1,2,4,8,16].dup, Alt.LESS), 0.5)); 
     859    assert(ae(signTest([1,3,4,2,5].dup, [1,2,4,8,16].dup, Alt.GREATER), 0.875)); 
     860    assert(ae(signTest([5,3,4,6,8].dup, [1,2,3,4,5].dup, Alt.GREATER), 0.03125)); 
     861    assert(ae(signTest([5,3,4,6,8].dup, [1,2,3,4,5].dup, Alt.LESS), 1)); 
     862    assert(ae(signTest([5,3,4,6,8].dup, [1,2,3,4,5].dup), 0.0625)); 
    788863} 
    789864 
     
    795870 * elements of after, and Alt.TWOSIDE, meaning that there is a significant 
    796871 * difference in either direction.*/ 
    797 real signTest(T, U)(const T[] data, U median, Alt alt = Alt.TWOSIDE) 
    798 if(!isArray!(U)) { 
     872real signTest(T, U)(T data, U median, Alt alt = Alt.TWOSIDE) 
     873if(realInput!(T) && isNumeric!(U)) { 
    799874    uint greater, less; 
    800     foreach(i; 0..data.length) { 
    801         if(data[i] < median) 
     875    foreach(elem; data) { 
     876        if(elem < median) 
    802877            less++; 
    803         else if(median < data[i]
     878        else if(median < elem
    804879            greater++; 
    805880        // Ignore equals. 
     
    817892 
    818893unittest { 
    819     assert(approxEqual(signTest([1,2,6,7,9], 2), 0.625)); 
     894    assert(approxEqual(signTest([1,2,6,7,9].dup, 2), 0.625)); 
    820895    writeln("Passed signTest unittest."); 
    821896} 
     
    828903 * To get the D value used in standard notation, 
    829904 * simply take the absolute value of this D value.*/ 
    830 real ksTest(T, U)(const T[] F, const U[] Fprime) { 
     905real ksTest(T, U)(T F, U Fprime) 
     906if(isInputRange!(T) && isInputRange!(U)) { 
    831907    auto TAState = TempAlloc.getState; 
    832908    scope(exit) { 
     
    834910        TempAlloc.free(TAState); 
    835911    } 
    836     return ksTestDestructive(F.tempdup(TAState), Fprime.tempdup(TAState)); 
     912    return ksTestDestructive(tempdup(F), tempdup(Fprime)); 
    837913} 
    838914 
    839915/**Same as ksTest, but sorts input data in place instead of duplicating 
    840  * data.  Therefore, less "safe" but doesn't allocate memory.*/ 
    841 real ksTestDestructive(T, U)(T[] F, U[] Fprime) { 
     916 * data.  Therefore, less "safe" but doesn't allocate memory.  F, 
     917 * Fprime must both be random access ranges w/ lengths.*/ 
     918real ksTestDestructive(T, U)(T F, U Fprime) 
     919if(isRandomAccessRange!(U) && isRandomAccessRange!(T) && 
     920   dstats.base.hasLength!(T) && dstats.base.hasLength!(U)) { 
    842921    qsort(F); 
    843922    qsort(Fprime); 
     
    865944unittest { 
    866945    // Values from R. 
    867     assert(approxEqual(ksTestDestructive([1,2,3,4,5], [1,2,3,4,5]), 0)); 
    868     assert(approxEqual(ksTestDestructive([1,2,3,4,5], [1,2,2,3,5]), -.2)); 
    869     assert(approxEqual(ksTestDestructive([-1,0,2,8, 6], [1,2,2,3,5]), .4)); 
    870     assert(approxEqual(ksTestDestructive([1,2,3,4,5], [1,2,2,3,5,7,8]), .2857)); 
    871     assert(approxEqual(ksTestDestructive([1, 2, 3, 4, 4, 4, 5]
    872            [1, 2, 3, 4, 5, 5, 5]), .2857)); 
     946    assert(approxEqual(ksTestDestructive([1,2,3,4,5].dup, [1,2,3,4,5].dup), 0)); 
     947    assert(approxEqual(ksTestDestructive([1,2,3,4,5].dup, [1,2,2,3,5].dup), -.2)); 
     948    assert(approxEqual(ksTestDestructive([-1,0,2,8, 6].dup, [1,2,2,3,5].dup), .4)); 
     949    assert(approxEqual(ksTestDestructive([1,2,3,4,5].dup, [1,2,2,3,5,7,8].dup), .2857)); 
     950    assert(approxEqual(ksTestDestructive([1, 2, 3, 4, 4, 4, 5].dup
     951           [1, 2, 3, 4, 5, 5, 5].dup), .2857)); 
    873952    writeln("Passed 2-sample ksTestDestructive."); 
    874953} 
     
    885964 * --- 
    886965 */ 
    887 real ksTest(T, Func)(const T[] Femp, Func F) 
    888 if(is(Func == function) || is(Func == delegate)) { 
     966real ksTest(T, Func)(T Femp, Func F) 
     967if(realInput!(T) && (is(Func == function) || is(Func == delegate))) { 
    889968    scope(exit) TempAlloc.free; 
    890     return ksTestDestructive(Femp.tempdup, F); 
    891 
    892  
    893 /**One-sample KS test, sorts in place.*/ 
    894 real ksTestDestructive(T, Func)(T[] Femp, Func F) 
    895 if(is(Func == function) || is(Func == delegate)) { 
     969    return ksTestDestructive(tempdup(Femp), F); 
     970
     971 
     972/**One-sample KS test, sorts in place.  Femp must be a random access range 
     973 * with length.*/ 
     974real ksTestDestructive(T, Func)(T Femp, Func F) 
     975if(isRandomAccessRange!(T) && dstats.base.hasLength!(T) && 
     976    (is(Func == function) || is(Func == delegate))) { 
    896977    qsort(Femp); 
    897978    real D = 0; 
     
    909990    // Testing against values from R. 
    910991    auto stdNormal = parametrize!(normalCDF)(0.0L, 1.0L); 
    911     assert(approxEqual(ksTestDestructive([1,2,3,4,5], stdNormal), -.8413)); 
    912     assert(approxEqual(ksTestDestructive([-1,0,2,8, 6], stdNormal), -.5772)); 
    913     auto lotsOfTies = [5,1,2,2,2,2,2,2,3,4]
     992    assert(approxEqual(ksTestDestructive([1,2,3,4,5].dup, stdNormal), -.8413)); 
     993    assert(approxEqual(ksTestDestructive([-1,0,2,8, 6].dup, stdNormal), -.5772)); 
     994    auto lotsOfTies = [5,1,2,2,2,2,2,2,3,4].dup
    914995    assert(approxEqual(ksTestDestructive(lotsOfTies, stdNormal), -0.8772)); 
    915996    writeln("Passed 1-sample ksTestDestructive."); 
     
    9401021/**KS-test, returns 2-sided P-val instead of D. 
    9411022 * Bugs:  Exact calculation not implemented.  Uses asymptotic approximation.*/ 
    942 real ksPval(T, U)(const T[] F, const U[] Fprime) { 
     1023real ksPval(T, U)(T F, U Fprime) 
     1024if(realInput!(T) && realInput!(U)) { 
    9431025    return ksPvalD(F.length, Fprime.length, ksTest(F, Fprime)); 
    9441026} 
    9451027 
    9461028unittest { 
    947     assert(approxEqual(ksPval([1, 2, 3, 4, 4, 4, 5], [1, 2, 3, 4, 5, 5, 5]), 
     1029    assert(approxEqual(ksPval([1, 2, 3, 4, 4, 4, 5].dup, [1, 2, 3, 4, 5, 5, 5].dup), 
    9481030           .9375)); 
    9491031    writeln("Passed ksPval 2-sample test."); 
     
    9521034/**One-sided K-S test, returns P-val instead of D. 
    9531035 * Bugs:  Exact calculation not implemented.  Uses asymptotic approximation.*/ 
    954 real ksPval(T, Func)(const T[] Femp, Func F) 
    955 if(is(Func == function) || is(Func == delegate)) { 
     1036real ksPval(T, Func)(T Femp, Func F) 
     1037if(realInput!(T) && (is(Func == function) || is(Func == delegate))) { 
    9561038    return ksPvalD(Femp.length, ksTest(Femp, F)); 
    9571039} 
     
    9591041unittest { 
    9601042    auto stdNormal = parametrize!(normalCDF)(0.0L, 1.0L); 
    961     assert(approxEqual(ksPval([0,1,2,3,4], stdNormal), .03271)); 
     1043    assert(approxEqual(ksPval([0,1,2,3,4].dup, stdNormal), .03271)); 
    9621044 
    9631045    auto uniform01 = parametrize!(uniformCDF)(0, 1); 
    964     assert(approxEqual(ksPval([0.1, 0.3, 0.5, 0.9, 1], uniform01), 0.7591)); 
     1046    assert(approxEqual(ksPval([0.1, 0.3, 0.5, 0.9, 1].dup, uniform01), 0.7591)); 
    9651047 
    9661048    writeln("Passed ksPval 1-sample test."); 
     
    10141096    alias contingencyTable c; 
    10151097 
    1016     invariant uint n1 = c[0][0] + c[0][1], 
     1098    immutable uint n1 = c[0][0] + c[0][1], 
    10171099                   n2 = c[1][0] + c[1][1], 
    10181100                   n  = c[0][0] + c[1][0]; 
    10191101 
    1020     invariant uint mode = 
     1102    immutable uint mode = 
    10211103        cast(uint) ((cast(real) (n + 1) * (n1 + 1)) / (n1 + n2 + 2)); 
    1022     invariant real pExact = hypergeometricPMF(c[0][0], n1, n2, n); 
    1023     invariant real pMode = hypergeometricPMF(mode, n1, n2, n); 
     1104    immutable real pExact = hypergeometricPMF(c[0][0], n1, n2, n); 
     1105    immutable real pMode = hypergeometricPMF(mode, n1, n2, n); 
    10241106 
    10251107    if(approxEqual(pExact, pMode, 1e-7)) { 
    10261108        return 1; 
    10271109    } else if(c[0][0] < mode) { 
    1028         invariant real pLower = hypergeometricCDF(c[0][0], n1, n2, n); 
     1110        immutable real pLower = hypergeometricCDF(c[0][0], n1, n2, n); 
    10291111 
    10301112        // Special case to prevent binary search from getting stuck. 
     
    10391121                    (cast(ulong) max + cast(ulong) min) / 2UL; 
    10401122 
    1041             invariant real pGuess = hypergeometricPMF(guess, n1, n2, n); 
     1123            immutable real pGuess = hypergeometricPMF(guess, n1, n2, n); 
    10421124            if(pGuess <= pExact && 
    10431125                hypergeometricPMF(guess - 1, n1, n2, n) > pExact) { 
     
    10531135               hypergeometricCDFR(guess, n1, n2, n), 1.0L); 
    10541136    } else { 
    1055         invariant real pUpper = hypergeometricCDFR(c[0][0], n1, n2, n); 
     1137        immutable real pUpper = hypergeometricCDFR(c[0][0], n1, n2, n); 
    10561138 
    10571139        // Special case to prevent binary search from getting stuck. 
     
    10861168    // Simple, naive impl. of two-sided to test against. 
    10871169    static real naive(const uint[][] c) { 
    1088         invariant uint n1 = c[0][0] + c[0][1], 
     1170        immutable uint n1 = c[0][0] + c[0][1], 
    10891171                   n2 = c[1][0] + c[1][1], 
    10901172                   n  = c[0][0] + c[1][0]; 
    1091         invariant uint mode = 
     1173        immutable uint mode = 
    10921174            cast(uint) ((cast(real) (n + 1) * (n1 + 1)) / (n1 + n2 + 2)); 
    1093         invariant real pExact = hypergeometricPMF(c[0][0], n1, n2, n); 
    1094         invariant real pMode = hypergeometricPMF(mode, n1, n2, n); 
     1175        immutable real pExact = hypergeometricPMF(c[0][0], n1, n2, n); 
     1176        immutable real pMode = hypergeometricPMF(mode, n1, n2, n); 
    10951177        if(approxEqual(pExact, pMode, 1e-7)) 
    10961178            return 1; 
     
    11071189 
    11081190    foreach(i; 0..1000) { 
    1109         c[0][0] = uniform(gen, 0U, 51U); 
    1110         c[0][1] = uniform(gen, 0U, 51U); 
    1111         c[1][0] = uniform(gen, 0U, 51U); 
    1112         c[1][1] = uniform(gen, 0U, 51U); 
     1191        c[0][0] = uniform(0U, 51U); 
     1192        c[0][1] = uniform(0U, 51U); 
     1193        c[1][0] = uniform(0U, 51U); 
     1194        c[1][1] = uniform(0U, 51U); 
    11131195        real naiveAns = naive(c); 
    11141196        real fastAns = fisherExact(c); 
     
    11731255 * Bugs:  No exact calculation of the P-value.  Asymptotic approximation only. 
    11741256 */ 
    1175 real runsTest(alias positive = "a > 0", T)(const T[] obs, Alt alt = Alt.TWOSIDE) { 
    1176     OnlineRunsTest!(positive, T) r; 
     1257real runsTest(alias positive = "a > 0", T)(T obs, Alt alt = Alt.TWOSIDE) 
     1258if(isInputRange!(T)) { 
     1259    OnlineRunsTest!(positive, ElementType!(T)) r; 
    11771260    foreach(elem; obs) { 
    1178         r.addElement(elem); 
     1261        r.put(elem); 
    11791262    } 
    11801263    return r.pVal(alt); 
     
    11851268    // hard-coded as the equivalent to positive().  The median of this data 
    11861269    // is 0.5, so everything works. 
    1187     invariant int[] data = [1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1].idup; 
     1270    immutable int[] data = [1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1].idup; 
    11881271    assert(approxEqual(runsTest(data), 0.3581)); 
    11891272    assert(approxEqual(runsTest(data, Alt.LESS), 0.821)); 
     
    12061289 
    12071290    /// 
    1208     void addElement(T elem) { 
     1291    void put(T elem) { 
    12091292        bool curPos = pos(elem); 
    12101293        if(nRun == 0) { 
     
    13451428 * Returns:   An array of Q-values with indices 
    13461429 * corresponding to the indices of the p-values passed in.*/ 
    1347 real[] falseDiscoveryRate(T)(const T[] pVals) { 
     1430real[] falseDiscoveryRate(T)(T pVals) 
     1431if(realInput!(T)) { 
    13481432    // Not optimized at all because I can't imagine anyone writing code where 
    13491433    // FDR calculations are the main bottleneck. 
    1350     auto p = pVals.tempdup;  scope(exit) TempAlloc.free; 
     1434    auto p = tempdup(pVals);  scope(exit) TempAlloc.free; 
    13511435    auto perm = newStack!(uint)(pVals.length);  scope(exit) TempAlloc.free; 
    13521436    foreach(i, ref elem; perm) 
     
    13751459unittest { 
    13761460    // Comparing results to R's qvalue package. 
    1377     auto pVals = [.90, .01, .03, .03, .70, .60, .01]
     1461    auto pVals = [.90, .01, .03, .03, .70, .60, .01].dup
    13781462    auto qVals = falseDiscoveryRate(pVals); 
    13791463    assert(approxEqual(qVals[0], .9));