Changeset 28
- Timestamp:
- 04/23/09 23:30:02 (3 years ago)
- Files:
-
- trunk/alloc.d (modified) (11 diffs)
- trunk/base.d (modified) (34 diffs)
- trunk/cor.d (modified) (15 diffs)
- trunk/distrib.d (modified) (16 diffs)
- trunk/gamma.d (modified) (1 diff)
- trunk/infotheory.d (modified) (9 diffs)
- trunk/random.d (modified) (19 diffs)
- trunk/sort.d (modified) (24 diffs)
- trunk/stats.d (modified) (1 diff)
- trunk/summary.d (modified) (25 diffs)
- trunk/tests.d (modified) (42 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/alloc.d
r23 r28 1 1 /**Allocation functions. 2 * Author: David Simcha 3 * 4 * Copyright (c) 2009, David Simcha 2 * 3 * Author: David Simcha*/ 4 /* 5 * You may use this software under your choice of either of the following 6 * licenses. YOU NEED ONLY OBEY THE TERMS OF EXACTLY ONE OF THE TWO LICENSES. 7 * IF YOU CHOOSE TO USE THE PHOBOS LICENSE, YOU DO NOT NEED TO OBEY THE TERMS OF 8 * THE BSD LICENSE. IF YOU CHOOSE TO USE THE BSD LICENSE, YOU DO NOT NEED 9 * TO OBEY THE TERMS OF THE PHOBOS LICENSE. IF YOU ARE A LAWYER LOOKING FOR 10 * LOOPHOLES AND RIDICULOUSLY NON-EXISTENT AMBIGUITIES IN THE PREVIOUS STATEMENT, 11 * GET A LIFE. 12 * 13 * ---------------------Phobos License: --------------------------------------- 14 * 15 * Copyright (C) 2008-2009 by David Simcha. 16 * 17 * This software is provided 'as-is', without any express or implied 18 * warranty. In no event will the authors be held liable for any damages 19 * arising from the use of this software. 20 * 21 * Permission is granted to anyone to use this software for any purpose, 22 * including commercial applications, and to alter it and redistribute it 23 * freely, in both source and binary form, subject to the following 24 * restrictions: 25 * 26 * o The origin of this software must not be misrepresented; you must not 27 * claim that you wrote the original software. If you use this software 28 * in a product, an acknowledgment in the product documentation would be 29 * appreciated but is not required. 30 * o Altered source versions must be plainly marked as such, and must not 31 * be misrepresented as being the original software. 32 * o This notice may not be removed or altered from any source 33 * distribution. 34 * 35 * --------------------BSD License: ----------------------------------------- 36 * 37 * Copyright (c) 2008-2009, David Simcha 5 38 * All rights reserved. 6 39 * … … 28 61 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 62 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 */ 31 65 32 66 module dstats.alloc; 33 67 34 import std.traits, core.memory, core.thread, std.c.stdio : stderr; 68 import std.traits, core.memory, core.thread, std.array, std.range, dstats.base; 69 static import std.c.stdio; 35 70 36 71 version(unittest) { 37 import std.stdio ;72 import std.stdio, std.conv, dstats.sort; 38 73 void main() {} 39 74 } … … 55 90 const bool isReferenceType = false; 56 91 } else static if (Types.length == 1) { 57 static if (IsType!( Mutable!(Types[0]), bool, byte, ubyte, short, ushort,92 static if (IsType!(Unqual!(Types[0]), bool, byte, ubyte, short, ushort, 58 93 int, uint, long, ulong, float, double, real, ifloat, 59 94 idouble, ireal, cfloat, cdouble, creal, char, dchar, … … 131 166 } 132 167 168 private template Appends(T, U) { 169 enum bool Appends = AppendsImpl!(T, U).ret; 170 } 171 172 private template AppendsImpl(T, U) { 173 T[] a; 174 U b; 175 enum bool ret = is(typeof(a ~= b)); 176 } 177 133 178 ///Appends to an array, deleting the old array if it has to be realloced. 134 179 void appendDelOld(T, U)(ref T[] to, U from) 135 if (is(Mutable!(T) : Mutable!(U)) || is(Mutable!(T[0]) : Mutable!(U))) {180 if(Appends!(T, U)) { 136 181 auto oldPtr = to.ptr; 137 182 to ~= from; … … 203 248 Stack!(Block) inUse; 204 249 Stack!(void*) freelist; 205 206 ~this() { // Blocks are pretty large. Prevent false ptrs.207 ntFree(lastAlloc.ptr);208 while(nblocks > 1) {209 ntFree((inUse.pop()).space);210 nblocks--;211 }212 ntFree(space);213 while(nfree > 0) {214 ntFree(freelist.pop);215 nfree--;216 }217 }218 250 } 219 251 … … 230 262 231 263 static void die() nothrow { 232 fprintf(std err, "TempAlloc error: Out of memory.\0".ptr);264 fprintf(std.c.stdio.stderr, "TempAlloc error: Out of memory.\0".ptr); 233 265 exit(1); 234 266 } … … 457 489 totalLen += array.length; 458 490 } 459 auto ret = newStack!( Mutable!(typeof(T[0][0])))(totalLen);491 auto ret = newStack!(Unqual!(typeof(T[0][0])))(totalLen); 460 492 461 493 size_t offset = 0; … … 467 499 } 468 500 469 /**Creates a duplicate of an array on the TempAlloc stack.*/ 470 auto tempdup(T)(T[] data) nothrow { 471 alias Mutable!(T) U; 472 U[] ret = newStack!(U)(data.length); 473 ret[] = data[]; 474 return ret; 475 } 476 477 /**Same as tempdup(T[]) but uses stateCopy cached on stack by caller 478 * to avoid a thread-local storage lookup. Strictly a speed hack.*/ 479 auto tempdup(T)(T[] data, TempAlloc.State state) nothrow { 480 alias Mutable!(T) U; 481 U[] ret = newStack!(U)(data.length, state); 482 ret[] = data; 483 return ret; 501 void rangeCopy(T, U)(T to, U from) { 502 static if(is(typeof(to[] = from[]))) { 503 to[] = from[]; 504 } else static if(isRandomAccessRange!(T)) { 505 size_t i = 0; 506 foreach(elem; from) { 507 to[i++] = elem; 508 } 509 } 510 } 511 512 /**Creates a duplicate of a range on the TempAlloc stack. Much faster 513 * if the range has a length, but works even if it doesn't.*/ 514 Unqual!(ElementType!(T))[] tempdup(T)(T data) 515 if(isInputRange!(T)) { 516 alias ElementType!(T) E; 517 alias Unqual!(E) U; 518 static if(dstats.base.hasLength!(T)) { 519 U[] ret = newStack!(U)(data.length); 520 rangeCopy(ret, data); 521 return ret; 522 } else { 523 auto state = TempAlloc.getState; 524 auto startPtr = TempAlloc(0, state); 525 size_t bytesCopied = 0; 526 527 while(!data.empty) { // Make sure range interface is being used. 528 auto elem = data.front; 529 if(state.used + U.sizeof <= TempAlloc.blockSize) { 530 data.popFront; 531 *(cast(U*) (startPtr + bytesCopied)) = elem; 532 bytesCopied += U.sizeof; 533 state.used += U.sizeof; 534 } else { 535 if(bytesCopied + U.sizeof >= TempAlloc.blockSize / 2) { 536 // Then just heap-allocate. 537 U[] result = newVoid!(U)(bytesCopied / U.sizeof); 538 result[] = (cast(U*) startPtr)[0..result.length]; 539 finishCopy(result, data); 540 TempAlloc.free; 541 state.lastAlloc[++state.totalAllocs] = result.ptr; 542 return result; 543 } else { 544 U[] oldData = (cast(U*) startPtr)[0..bytesCopied / U.sizeof]; 545 state.used -= bytesCopied; 546 state.totalAllocs--; 547 U[] newArray = newStack!(U)(bytesCopied / U.sizeof + 1, state); 548 newArray[0..oldData.length] = oldData[]; 549 startPtr = state.space; 550 newArray[$ - 1] = elem; 551 bytesCopied += U.sizeof; 552 data.popFront; 553 } 554 } 555 } 556 auto rem = bytesCopied % TempAlloc.alignBytes; 557 if(rem != 0) { 558 auto toAdd = 16 - rem; 559 if(state.used + toAdd < TempAlloc.blockSize) { 560 state.used += toAdd; 561 } else { 562 state.used = TempAlloc.blockSize; 563 } 564 } 565 return (cast(U*) startPtr)[0..bytesCopied / U.sizeof]; 566 } 567 } 568 569 private void finishCopy(T, U)(ref T[] result, U range) { 570 auto app = appender(&result); 571 foreach(elem; range) { 572 app.put(elem); 573 } 574 } 575 576 unittest { 577 // Create quick and dirty finite but lengthless range. 578 struct Count { 579 uint num; 580 uint upTo; 581 uint front() { 582 return num; 583 } 584 void popFront() { 585 num++; 586 } 587 bool empty() { 588 return num >= upTo; 589 } 590 } 591 592 TempAlloc(1024 * 1024 * 3); 593 Count count; 594 count.upTo = 1024 * 1025; 595 auto asArray = tempdup(count); 596 foreach(i, elem; asArray) { 597 assert(i == elem, to!(string)(i) ~ "\t" ~ to!(string)(elem)); 598 } 599 assert(asArray.length == 1024 * 1025); 600 TempAlloc.free; 601 TempAlloc.free; 602 while(TempAlloc.getState.freelist.index > 0) { 603 TempAlloc.getState.freelist.pop; 604 } 605 writeln("Passed tempdup unittest."); 484 606 } 485 607 … … 495 617 * are allocated, due to caching of data stored in thread-local 496 618 * storage.*/ 497 i nvariantchar[] newFrame =619 immutable char[] newFrame = 498 620 "TempAlloc.frameInit; scope(exit) TempAlloc.frameFree;"; 499 621 … … 509 631 TempAlloc(TempAlloc.alignBytes); 510 632 } 511 assert(TempAlloc.getState.nblocks == 5 );633 assert(TempAlloc.getState.nblocks == 5, to!string(TempAlloc.getState.nblocks)); 512 634 assert(TempAlloc.getState.nfree == 0); 513 635 foreach(i; 0..nIter) { … … 580 702 TempAlloc.free; 581 703 } 582 fprintf(stderr, "Passed TempAlloc test.\n\0".ptr); 583 } 704 fprintf(std.c.stdio.stderr, "Passed TempAlloc test.\n\0".ptr); 705 } 706 707 struct SHNode(K, V) { 708 alias SHNode!(K, V) SomeType; 709 SomeType* next; 710 Unqual!(K) key; 711 Unqual!(V) val; 712 } 713 714 /**Forward range struct for iterating over the keys or values of a 715 * StackHash or StackSet. The lifetime of this object must not exceed that 716 * of the underlying StackHash or StackSet.*/ 717 struct HashRange(K, S, bool vals = false) { 718 private: 719 S* set; 720 size_t index; 721 S.Node* next; 722 K* frontElem; 723 size_t _length; 724 725 this(S* set) { 726 this.set = set; 727 if(set.rNext[0] == set.usedSentinel) { 728 this.popFront; 729 } else { 730 static if(vals) { 731 frontElem = set.rVals.ptr; 732 } else { 733 frontElem = set.rKeys.ptr; 734 } 735 next = set.rNext[0]; 736 } 737 this._length = set.length; 738 } 739 740 public: 741 /// 742 void popFront() { 743 this._length--; 744 if(next is null) { 745 do { 746 index++; 747 if(index >= set.rNext.length) { 748 index = size_t.max; // Sentinel for empty. 749 return; 750 } 751 next = set.rNext[index]; 752 } while(set.rNext[index] == set.usedSentinel); 753 static if(vals) { 754 frontElem = &(set.rVals[index]); 755 } else { 756 frontElem = &(set.rKeys[index]); 757 } 758 } else { 759 static if(vals) { 760 frontElem = &(next.val); 761 } else { 762 frontElem = &(next.key); 763 } 764 next = next.next; 765 } 766 } 767 768 /// 769 Unqual!(K) front() { 770 return *frontElem; 771 } 772 773 /// 774 bool empty() { 775 return index == size_t.max; 776 } 777 778 /// 779 size_t length() { 780 return _length; 781 } 782 } 783 784 /**A hash table that allocates its memory on TempAlloc. Good for building a 785 * temporary hash tables that will not escape the current scope. 786 * 787 * To avoid TempAlloc memory leaks, use mixin(newFrame). 788 * 789 * Examples: 790 * --- 791 * mixin(newFrame); // To make sure all memory gets freed at end of scope. 792 * auto ss = StackHash!(uint)(5); 793 * foreach(i; 0..5) { 794 * ss[i]++; 795 * } 796 * assert(ss[3] == 1); 797 * --- 798 */ 799 struct StackHash(K, V) { 800 private: 801 alias SHNode!(K, V) Node; 802 803 // Using parallel arrays instead of structs to save on alignment overhead: 804 Unqual!(K)[] rKeys; 805 Unqual!(V)[] rVals; 806 Unqual!(Node*)[] rNext; 807 808 TempAlloc.State TAState; 809 TypeInfo keyTI; 810 size_t _length; 811 Node* usedSentinel; 812 813 Node* newNode(K key) { 814 Node* ret = cast(Node*) TempAlloc(Node.sizeof, TAState); 815 ret.key = key; 816 ret.val = V.init; 817 ret.next = null; 818 return ret; 819 } 820 821 Node* newNode(K key, V val) { 822 Node* ret = cast(Node*) TempAlloc(Node.sizeof, TAState); 823 ret.key = key; 824 ret.val = val; 825 ret.next = null; 826 return ret; 827 } 828 829 hash_t getHash(K key) { 830 static if(is(K : long) && K.sizeof <= hash_t.sizeof) { 831 hash_t hash = cast(hash_t) key; 832 } else static if(__traits(compiles, key.toHash())) { 833 hash_t hash = key.toHash(); 834 } else { 835 hash_t hash = keyTI.getHash(&key); 836 } 837 hash %= rNext.length; 838 return hash; 839 } 840 841 842 public: 843 /**Due to the nature of TempAlloc, you must specify on object creation 844 * the approximate number of elements your table will have. Too large a 845 * number will waste space and incur poor cache performance. Too low a 846 * number will make this struct perform like a linked list. Generally, 847 * if you're building a table from some other range, some fraction of the 848 * size of that range is a good guess.*/ 849 this(size_t nElem) { 850 // Obviously, the caller can never mean zero, because this struct 851 // can't work at all with nElem == 0, so assume it's a mistake and fix 852 // it here. 853 if(nElem == 0) 854 nElem++; 855 TAState = TempAlloc.getState; 856 rKeys = newStack!(K)(nElem, TAState); 857 rVals = newStack!(V)(nElem, TAState); 858 rNext = newStack!(Node*)(nElem, TAState); 859 usedSentinel = cast(Node*) rNext.ptr; 860 foreach(ref rKey; rKeys) { 861 rKey = K.init; 862 } 863 foreach(ref rVal; rVals) { 864 rVal = V.init; 865 } 866 foreach(ref r; rNext) { 867 r = usedSentinel; 868 } 869 keyTI = typeid(K); 870 } 871 872 /**Index an element of the range. If it does not exist, it will be created 873 * and initialized to V.init.*/ 874 ref V opIndex(K key) { 875 hash_t hash = getHash(key); 876 877 if(rNext[hash] == usedSentinel) { 878 rKeys[hash] = key; 879 rNext[hash] = null; 880 _length++; 881 return rVals[hash]; 882 } else if(rKeys[hash] == key) { 883 return rVals[hash]; 884 } else { // Collision. Start chaining. 885 Node** next = &(rNext[hash]); 886 while(*next !is null) { 887 if((**next).key == key) { 888 return (**next).val; 889 } 890 next = &((**next).next); 891 } 892 *next = newNode(key); 893 _length++; 894 return (**next).val; 895 } 896 } 897 898 /// 899 V opIndexAssign(V val, K key) { 900 hash_t hash = getHash(key); 901 902 if(rNext[hash] == usedSentinel) { 903 rKeys[hash] = key; 904 rVals[hash] = val; 905 rNext[hash] = null; 906 _length++; 907 return val; 908 } else if(rKeys[hash] == key) { 909 rVals[hash] = val; 910 return val; 911 } else { // Collision. Start chaining. 912 Node** next = &(rNext[hash]); 913 while(*next !is null) { 914 if((**next).key == key) { 915 (**next).val = val; 916 return val; 917 } 918 next = &((**next).next); 919 } 920 _length++; 921 *next = newNode(key, val); 922 return val; 923 } 924 } 925 926 /// 927 V* opIn_r(K key) { 928 hash_t hash = getHash(key); 929 930 if(rNext[hash] == usedSentinel) { 931 return null; 932 } else if(rKeys[hash] == key) { 933 return &(rVals[hash]); 934 } else { // Collision. Start chaining. 935 Node* next = rNext[hash]; 936 while(next !is null) { 937 if(next.key == key) { 938 return &(next.val); 939 } 940 next = next.next; 941 } 942 return null; 943 } 944 } 945 946 /// 947 void remove(K key) { 948 hash_t hash = getHash(key); 949 950 Node** next = &(rNext[hash]); 951 if(rNext[hash] == usedSentinel) { 952 return; 953 } else if(rKeys[hash] == key) { 954 _length--; 955 if(rNext[hash] is null) { 956 rKeys[hash] = K.init; 957 rVals[hash] = V.init; 958 rNext[hash] = usedSentinel; 959 return; 960 } else { 961 rKeys[hash] = (**next).key; 962 rVals[hash] = (**next).val; 963 rNext[hash] = (**next).next; 964 return; 965 } 966 } else { // Collision. Start chaining. 967 while(*next !is null) { 968 if((**next).key == key) { 969 _length--; 970 *next = (**next).next; 971 break; 972 } 973 next = &((**next).next); 974 } 975 return; 976 } 977 } 978 979 /**Returns a forward range to iterate over the keys of this table. 980 * The lifetime of the HashRange must not exceed the lifetime of this 981 * StackHash.*/ 982 HashRange!(K, StackHash!(K, V)) keys() { 983 return typeof(return)(&this); 984 } 985 986 /**Returns a forward range to iterate over the values of this table. 987 * The lifetime of the HashRange must not exceed the lifetime of this 988 * StackHash.*/ 989 HashRange!(V, StackHash!(K, V), true) values() { 990 return typeof(return)(&this); 991 } 992 993 /// 994 size_t length() const { 995 return _length; 996 } 997 998 real efficiency() { 999 uint used = 0; 1000 foreach(root; rNext) { 1001 if(root != usedSentinel) { 1002 used++; 1003 } 1004 } 1005 return cast(real) used / rNext.length; 1006 } 1007 } 1008 1009 unittest { 1010 alias StackHash!(string, uint) mySh; 1011 mixin(newFrame); 1012 auto data = mySh(2); // Make sure we get some collisions. 1013 data["foo"] = 1; 1014 data["bar"] = 2; 1015 data["baz"] = 3; 1016 data["waldo"] = 4; 1017 assert(!("foobar" in data)); 1018 assert(*("foo" in data) == 1); 1019 assert(*("bar" in data) == 2); 1020 assert(*("baz" in data) == 3); 1021 assert(*("waldo" in data) == 4); 1022 assert(data["foo"] == 1); 1023 assert(data["bar"] == 2); 1024 assert(data["baz"] == 3); 1025 assert(data["waldo"] == 4); 1026 auto myKeys = toArray(data.keys); 1027 qsort(myKeys); 1028 assert(myKeys == cast(string[]) ["bar", "baz", "foo", "waldo"]); 1029 auto myValues = toArray(data.values); 1030 qsort(myValues); 1031 assert(myValues == [1U, 2, 3, 4]); 1032 { 1033 auto k = data.keys; 1034 auto v = data.values; 1035 while(!k.empty) { 1036 assert(data[k.front] == v.front); 1037 k.popFront; 1038 v.popFront; 1039 } 1040 } 1041 foreach(v; data.values) { 1042 assert(v > 0 && v < 5); 1043 } 1044 1045 // Test remove. 1046 1047 alias StackHash!(uint, uint) mySh2; 1048 auto foo = mySh2(7); 1049 for(uint i = 0; i < 200; i++) { 1050 foo[i] = i; 1051 } 1052 assert(foo.length == 200); 1053 for(uint i = 0; i < 200; i += 2) { 1054 foo.remove(i); 1055 } 1056 foreach(i; 20..200) { 1057 foo.remove(i); 1058 } 1059 for(uint i = 0; i < 20; i++) { 1060 if(i & 1) { 1061 assert(*(i in foo) == i); 1062 } else { 1063 assert(!(i in foo)); 1064 } 1065 } 1066 auto vals = toArray(foo.values); 1067 assert(foo.length == 10); 1068 assert(vals.qsort == [1U, 3, 5, 7, 9, 11, 13, 15, 17, 19]); 1069 1070 writeln("Passed StackHash test."); 1071 } 1072 1073 /**A hash set that allocates its memory on TempAlloc. Good for building a 1074 * temporary set that will not escape the current scope. 1075 * 1076 * To avoid TempAlloc memory leaks, use mixin(newFrame). 1077 * 1078 * Examples: 1079 * --- 1080 * mixin(newFrame); // To make sure all memory gets freed at end of scope. 1081 * auto ss = StackSet!(uint)(5); 1082 * foreach(i; 0..5) { 1083 * ss.insert(i); 1084 * } 1085 * assert(3 in ss); 1086 * --- 1087 */ 1088 struct StackSet(K) { 1089 private: 1090 // Choose smallest representation of the data. 1091 struct Node1 { 1092 Node1* next; 1093 K key; 1094 } 1095 1096 struct Node2 { 1097 K key; 1098 Node2* next; 1099 } 1100 1101 static if(Node1.sizeof < Node2.sizeof) { 1102 alias Node1 Node; 1103 } else { 1104 alias Node2 Node; 1105 } 1106 1107 Unqual!(K)[] rKeys; 1108 Node*[] rNext; 1109 TempAlloc.State TAState; 1110 size_t _length; 1111 Node* usedSentinel; 1112 1113 Node* newNode(K key) { 1114 Node* ret = cast(Node*) TempAlloc(Node.sizeof, TAState); 1115 ret.key = key; 1116 ret.next = null; 1117 return ret; 1118 } 1119 1120 hash_t getHash(K key) { 1121 static if(is(K : long) && K.sizeof <= hash_t.sizeof) { 1122 hash_t hash = cast(hash_t) key; 1123 } else static if(__traits(compiles, key.toHash())) { 1124 hash_t hash = key.toHash(); 1125 } else { 1126 hash_t hash = typeid(K).getHash(&key); 1127 } 1128 hash %= rNext.length; 1129 return hash; 1130 } 1131 1132 public: 1133 /**Due to the nature of TempAlloc, you must specify on object creation 1134 * the approximate number of elements your set will have. Too large a 1135 * number will waste space and incur poor cache performance. Too low a 1136 * number will make this struct perform like a linked list. Generally, 1137 * if you're building a set from some other range, some fraction of the 1138 * size of that range is a good guess.*/ 1139 this(size_t nElem) { 1140 // Obviously, the caller can never mean zero, because this struct 1141 // can't work at all with nElem == 0, so assume it's a mistake and fix 1142 // it here. 1143 if(nElem == 0) 1144 nElem++; 1145 TAState = TempAlloc.getState; 1146 rNext = newStack!(Node*)(nElem, TAState); 1147 rKeys = newStack!(Unqual!(K))(nElem, TAState); 1148 usedSentinel = cast(Node*) rNext.ptr; 1149 foreach(ref root; rKeys) { 1150 root = K.init; 1151 } 1152 foreach(ref root; rNext) { 1153 root = usedSentinel; 1154 } 1155 } 1156 1157 /// 1158 void insert(K key) { 1159 hash_t hash = getHash(key); 1160 1161 if(rNext[hash] == usedSentinel) { 1162 rKeys[hash] = key; 1163 rNext[hash] = null; 1164 _length++; 1165 return; 1166 } else if(rKeys[hash] == key) { 1167 return; 1168 } else { // Collision. Start chaining. 1169 Node** next = &(rNext[hash]); 1170 while(*next !is null) { 1171 if((**next).key == key) { 1172 return; 1173 } 1174 next = &((**next).next); 1175 } 1176 *next = newNode(key); 1177 _length++; 1178 return; 1179 } 1180 } 1181 1182 /**Returns a forward range of the elements of this struct. The range's 1183 * lifetime must not exceed the lifetime of this object.*/ 1184 HashRange!(K, typeof(this)) elems() { 1185 auto ret = typeof(return)(&this); 1186 return ret; 1187 } 1188 1189 /// 1190 bool opIn_r(K key) { 1191 hash_t hash = getHash(key); 1192 1193 if(rNext[hash] == usedSentinel) { 1194 return false; 1195 } else if(rKeys[hash] == key) { 1196 return true; 1197 } else { // Collision. Start chaining. 1198 Node* next = rNext[hash]; 1199 while(next !is null) { 1200 if(next.key == key) { 1201 return true; 1202 } 1203 next = next.next; 1204 } 1205 return false; 1206 } 1207 } 1208 1209 /// 1210 void remove(K key) { 1211 hash_t hash = getHash(key); 1212 1213 Node** next = &(rNext[hash]); 1214 if(rNext[hash] == usedSentinel) { 1215 return; 1216 } else if(rKeys[hash] == key) { 1217 _length--; 1218 if(rNext[hash] is null) { 1219 rKeys[hash] = K.init; 1220 rNext[hash] = usedSentinel; 1221 return; 1222 } else { 1223 rKeys[hash] = (**next).key; 1224 rNext[hash] = (**next).next; 1225 return; 1226 } 1227 } else { // Collision. Start chaining. 1228 while(*next !is null) { 1229 if((**next).key == key) { 1230 _length--; 1231 *next = (**next).next; 1232 break; 1233 } 1234 next = &((**next).next); 1235 } 1236 return; 1237 } 1238 } 1239 1240 /// 1241 size_t length() { 1242 return _length; 1243 } 1244 } 1245 1246 unittest { 1247 mixin(newFrame); 1248 alias StackSet!(uint) mySS; 1249 mySS set = mySS(12); 1250 foreach(i; 0..20) { 1251 set.insert(i); 1252 } 1253 assert(toArray(set.elems).qsort == seq(0U, 20U)); 1254 1255 for(uint i = 0; i < 20; i += 2) { 1256 set.remove(i); 1257 } 1258 1259 foreach(i; 0..20) { 1260 if(i & 1) { 1261 assert(i in set); 1262 } else { 1263 assert(!(i in set)); 1264 } 1265 } 1266 uint[] contents; 1267 1268 foreach(elem; set.elems) { 1269 contents ~= elem; 1270 } 1271 assert(contents.qsort == [1U,3,5,7,9,11,13,15,17,19]); 1272 writeln("Passed StackSet test."); 1273 } trunk/base.d
r24 r28 1 1 /**Relatively low-level primitives on which to build higher-level math/stat 2 2 * functionality. Some are used internally, some are just things that may be 3 * useful to users of this library. 4 * 5 * Author: David Simcha 6 * 7 * Copyright (c) 2009, David Simcha 3 * useful to users of this library. This module is starting to take on the 4 * appearance of a small utility library. 5 * 6 * Author: David Simcha*/ 7 /* 8 * You may use this software under your choice of either of the following 9 * licenses. YOU NEED ONLY OBEY THE TERMS OF EXACTLY ONE OF THE TWO LICENSES. 10 * IF YOU CHOOSE TO USE THE PHOBOS LICENSE, YOU DO NOT NEED TO OBEY THE TERMS OF 11 * THE BSD LICENSE. IF YOU CHOOSE TO USE THE BSD LICENSE, YOU DO NOT NEED 12 * TO OBEY THE TERMS OF THE PHOBOS LICENSE. IF YOU ARE A LAWYER LOOKING FOR 13 * LOOPHOLES AND RIDICULOUSLY NON-EXISTENT AMBIGUITIES IN THE PREVIOUS STATEMENT, 14 * GET A LIFE. 15 * 16 * ---------------------Phobos License: --------------------------------------- 17 * 18 * Copyright (C) 2008-2009 by David Simcha. 19 * 20 * This software is provided 'as-is', without any express or implied 21 * warranty. In no event will the authors be held liable for any damages 22 * arising from the use of this software. 23 * 24 * Permission is granted to anyone to use this software for any purpose, 25 * including commercial applications, and to alter it and redistribute it 26 * freely, in both source and binary form, subject to the following 27 * restrictions: 28 * 29 * o The origin of this software must not be misrepresented; you must not 30 * claim that you wrote the original software. If you use this software 31 * in a product, an acknowledgment in the product documentation would be 32 * appreciated but is not required. 33 * o Altered source versions must be plainly marked as such, and must not 34 * be misrepresented as being the original software. 35 * o This notice may not be removed or altered from any source 36 * distribution. 37 * 38 * --------------------BSD License: ----------------------------------------- 39 * 40 * Copyright (c) 2008-2009, David Simcha 8 41 * All rights reserved. 9 42 * … … 31 64 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 32 65 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ 66 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 67 */ 34 68 35 69 module dstats.base; 36 70 37 71 public import std.math, std.traits, dstats.gamma, dstats.alloc; 38 private import dstats.sort, std.c.stdlib, std.bigint, 39 std.functional, std.algorithm ;72 private import dstats.sort, std.c.stdlib, std.bigint, std.typecons, 73 std.functional, std.algorithm, std.range, std.bitmanip; 40 74 41 75 invariant real[] staticLogFacTable; … … 43 77 enum : size_t { 44 78 staticFacTableLen = 10_000, 79 } 80 81 // Tests whether T is an input range whose elements can be implicitly 82 // converted to reals. 83 template realInput(T) { 84 enum realInput = isInputRange!(T) && is(ElementType!(T) : real); 85 } 86 87 // See Bugzilla 2873. This can be removed once that's fixed. 88 template hasLength(R) { 89 enum bool hasLength = is(typeof(R.init.length) : ulong) || 90 is(typeof(R.init.length()) : ulong); 45 91 } 46 92 … … 78 124 } 79 125 126 /**Converts any range to an array on the GC heap by the most efficient means 127 * available. If it is already an array, duplicates the range.*/ 128 Unqual!(ElementType!(T))[] toArray(T)(T range) if(isInputRange!(T)) { 129 static if(isArray!(T)) { 130 return range.dup; 131 } else static if(hasLength!(T)) { 132 auto ret = newVoid!(Unqual!(ElementType!(T)))(range.length); 133 static if(is(typeof(ret[] = range[]))) { 134 ret[] = range[]; 135 } else { 136 size_t pos = 0; 137 foreach(elem; range) { 138 ret[pos++] = elem; 139 } 140 } 141 return ret; 142 } else { 143 Unqual!(ElementType!(T))[] ret; 144 mate(range, appender(&ret)); 145 return ret; 146 } 147 } 148 149 /**Writes the contents of an input range to an output range. 150 * 151 * Returns: The output range.*/ 152 O mate(I, O)(I input, O output) 153 if(isInputRange!(I) && isOutputRange!(O, ElementType!(I))) { 154 foreach(elem; input) { 155 output.put(elem); 156 } 157 return output; 158 } 159 80 160 /**Bins data into nbin equal width bins, indexed from 81 161 * 0 to nbin - 1, with 0 being the smallest bin, etc. 82 162 * The values returned are the counts for each bin. Returns results on the GC 83 * heap by default, but uses TempAlloc stack if alloc == Alloc.STACK.*/ 84 Ret[] binCounts(Ret = ushort, T)(const T[] data, uint nbin, Alloc alloc = Alloc.HEAP) 163 * heap by default, but uses TempAlloc stack if alloc == Alloc.STACK. 164 * 165 * Works with any forward range with elements implicitly convertible to real.*/ 166 Ret[] binCounts(Ret = ushort, T)(T data, uint nbin, Alloc alloc = Alloc.HEAP) 167 if(isForwardRange!(T) && realInput!(T)) 85 168 in { 86 assert(data.length > 0);87 169 assert(nbin > 0); 88 170 } body { 89 T min = data[0], max = data[0]; 171 alias Unqual!(ElementType!(T)) E; 172 E min = data[0], max = data[0]; 90 173 foreach(elem; data[1..$]) { 91 174 if(elem > max) … … 94 177 min = elem; 95 178 } 96 Trange = max - min;179 E range = max - min; 97 180 98 181 Ret[] bins; … … 130 213 /**Bins data into nbin equal width bins, indexed from 131 214 * 0 to nbin - 1, with 0 being the smallest bin, etc. 132 * The values returned are the bin index for each element. Returns on GC 133 * heap by default, but TempAlloc stack if alloc == Alloc.STACK.*/ 134 Ret[] bin(Ret = ushort, T)(const T[] data, uint nbin, Alloc alloc = Alloc.HEAP) 215 * The values returned are the bin index for each element. 216 * 217 * Returns on GC heap by default, but TempAlloc stack if alloc == Alloc.STACK. 218 * Works with any forward range with elements implicitly convertible to real. 219 * 220 * Default return type is ubyte, because in the dstats.infotheory, 221 * entropy() and related functions specialize on ubytes, and become 222 * substandially faster. However, if you're using more than 255 bins, 223 * you'll have to provide a different return type as a template parameter.*/ 224 Ret[] bin(Ret = ubyte, T)(T data, uint nbin, Alloc alloc = Alloc.HEAP) 225 if(isForwardRange!(T) && realInput!(T) && isIntegral!(Ret)) 135 226 in { 136 assert( data.length > 0);227 assert(nbin <= Ret.max + 1); 137 228 assert(nbin > 0); 138 229 } body { 139 Mutable!(T) min = data[0], max = data[0]; 140 foreach(elem; data[1..$]) { 230 alias ElementType!(T) E; 231 Unqual!(E) min = data.front, max = data.front; 232 auto dminmax = data; 233 dminmax.popFront; 234 foreach(elem; dminmax) { 141 235 if(elem > max) 142 236 max = elem; … … 144 238 min = elem; 145 239 } 146 Trange = max - min;240 E range = max - min; 147 241 148 242 Ret[] bins; … … 171 265 double[] data = [0.0, .01, .03, .05, .11, .3, .5, .7, .89, 1]; 172 266 auto res = bin(data, 10); 173 assert(res == [cast(u short) 0, 0, 0, 0, 1, 3, 5, 7, 8, 9]);267 assert(res == [cast(ubyte) 0, 0, 0, 0, 1, 3, 5, 7, 8, 9]); 174 268 res = bin(data, 10, Alloc.STACK); 175 assert(res == [cast(u short) 0, 0, 0, 0, 1, 3, 5, 7, 8, 9]);269 assert(res == [cast(ubyte) 0, 0, 0, 0, 1, 3, 5, 7, 8, 9]); 176 270 TempAlloc.free; 177 271 writeln("Passed bin unittest."); … … 180 274 /**Bins data into nbin equal frequency bins, indexed from 181 275 * 0 to nbin - 1, with 0 being the smallest bin, etc. 182 * The values returned are the bin index for each element. Returns on GC 183 * heap by default, but TempAlloc stack if alloc == Alloc.STACK.*/ 184 Ret[] frqBin(Ret = ushort, T)(const T[] data, uint nbin, Alloc alloc = Alloc.HEAP) 276 * The values returned are the bin index for each element. 277 * 278 * Returns on GC heap by default, but TempAlloc stack if alloc == Alloc.STACK. 279 * Works with any forward range with elements implicitly convertible to real 280 * and a length property. 281 * 282 * Default return type is ubyte, because in the dstats.infotheory, 283 * entropy() and related functions specialize on ubytes, and become 284 * substandially faster. However, if you're using more than 256 bins, 285 * you'll have to provide a different return type as a template parameter.*/ 286 Ret[] frqBin(Ret = ubyte, T)(T data, uint nbin, Alloc alloc = Alloc.HEAP) 287 if(realInput!(T) && isForwardRange!(T) && hasLength!(T) && isIntegral!(Ret)) 185 288 in { 186 assert(data.length > 0);187 289 assert(nbin > 0); 188 290 assert(nbin <= data.length); 291 assert(nbin <= Ret.max + 1); 189 292 } body { 190 293 Ret[] result; … … 198 301 foreach(i, ref e; perm) 199 302 e = i; 200 auto dd = data.tempdup;303 auto dd = tempdup(data); 201 304 qsort(dd, perm); 202 305 TempAlloc.free; … … 217 320 double[] data = [5U, 1, 3, 8, 30, 10, 7]; 218 321 auto res = frqBin(data, 3); 219 assert(res == [cast(u short) 0, 0, 0, 1, 2, 2, 1]);322 assert(res == [cast(ubyte) 0, 0, 0, 1, 2, 2, 1]); 220 323 data = [3, 1, 4, 1, 5, 9, 2, 6, 5]; 221 324 res = frqBin(data, 4, Alloc.STACK); 222 assert(res == [cast(u short) 1, 0, 1, 0, 2, 3, 0, 3, 2]);325 assert(res == [cast(ubyte) 1, 0, 1, 0, 2, 3, 0, 3, 2]); 223 326 data = [3U, 1, 4, 1, 5, 9, 2, 6, 5, 3, 4, 8, 9, 7, 9, 2]; 224 327 res = frqBin(data, 4); 225 assert(res == [cast(u short) 1, 0, 1, 0, 2, 3, 0, 2, 2, 1, 1, 3, 3, 2, 3, 0]);328 assert(res == [cast(ubyte) 1, 0, 1, 0, 2, 3, 0, 2, 2, 1, 1, 3, 3, 2, 3, 0]); 226 329 TempAlloc.free; 227 330 writeln("Passed frqBin unittest."); … … 229 332 230 333 /**Generates a sequence from [start..end] by increment. Includes start, 231 * excludes end. 334 * excludes end. Does so eagerly as an array. 232 335 * 233 336 * Examples: … … 254 357 auto s = seq(0, 5); 255 358 assert(s == [0, 1, 2, 3, 4]); 256 writeln( stderr,"Passed seq test.");359 writeln("Passed seq test."); 257 360 } 258 361 259 362 /**Given an input array, outputs an array containing the rank from 260 363 * [1, input.length] corresponding to each element. Ties are dealt with by 261 * averaging. This function duplicates the input array, and does not reorder364 * averaging. This function duplicates the input range, and does not reorder 262 365 * it. Return type is float[] by default, but if you are sure you have no ties, 263 366 * ints can be used for efficiency, and if you need more precision when 264 367 * averaging ties, you can use double or real. 368 * 369 * Works with any input range. 265 370 * 266 371 * Examples: … … 271 376 * ---*/ 272 377 Ret[] rank(Ret = float, T)(const T[] input) { 273 auto iDup = input.tempdup;378 auto iDup = tempdup(input); 274 379 scope(exit) TempAlloc.free; 275 380 return rankSort!(Ret)(iDup); 276 381 } 277 382 278 /**Same as rank(), but sorts the input arrayin ascending order rather than383 /**Same as rank(), but sorts the input range in ascending order rather than 279 384 * duping it and working on a copy. The array returned will still be 280 385 * identical to that returned by rank(), i.e. the rank of each element will 281 386 * correspond to the ranks of the elements in the input array before sorting. 387 * 388 * Works with any random access range with a length property. 282 389 * 283 390 * Examples: … … 287 394 * assert(test == [1U, 2, 3, 4, 5]); 288 395 * ---*/ 289 Ret[] rankSort(Ret = float, T)(T[] input) { 396 Ret[] rankSort(Ret = float, T)(T input) 397 if(isRandomAccessRange!(T)) { 290 398 Ret[] ranks = newVoid!(Ret)(input.length); 291 399 rankSort!(Ret, T)(input, ranks); … … 294 402 295 403 // Speed hack used internally. 296 void rankSort(Ret, T)(T []input, Ret[] ranks) {404 void rankSort(Ret, T)(T input, Ret[] ranks) { 297 405 size_t[] perms = newStack!(size_t)(input.length); 298 406 scope(exit) TempAlloc.free; … … 316 424 assert(rankSort(test) == [3.5f, 5f, 3.5f, 1f, 2f]); 317 425 assert(test == [1U,2,3,3,5]); 318 writeln( stderr,"Passed rank test.");426 writeln("Passed rank test."); 319 427 } 320 428 321 429 // Used internally by rank(). 322 void averageTies(T, U)(T []sortedInput, U[] ranks, uint[] perms) nothrow430 void averageTies(T, U)(T sortedInput, U[] ranks, uint[] perms) nothrow 323 431 in { 324 432 assert(sortedInput.length == ranks.length); … … 350 458 } 351 459 352 /**Returns an AA of counts of every element in input. 460 /**Returns an AA of counts of every element in input. Works w/ any input range. 353 461 * 354 462 * Examples: … … 360 468 * assert(frq[4] == 1); 361 469 * ---*/ 362 uint[T] frequency(T)(const T[] input) pure nothrow { 363 uint[T] output; 470 uint[ElementType!(T)] frequency(T)(T input) 471 if(isInputRange!(T)) { 472 typeof(return) output; 364 473 foreach(i; input) { 365 474 output[i]++; … … 377 486 } 378 487 379 unittest {380 uint[int] temp=frequency([1,2,2,1,2]);381 assert(temp[1]==2);382 assert(temp[2]==3);383 writefln("Passed frequency unittest.");384 }385 386 488 /// 387 intsign(T)(T num) pure nothrow {489 T sign(T)(T num) pure nothrow { 388 490 if (num > 0) return 1; 389 491 if (num < 0) return -1; … … 403 505 * function is used, because caching would take up too much memory, and if 404 506 * done lazily, would cause threading issues.*/ 405 real logFactorial(uint n) {507 real logFactorial(uint n) { 406 508 //Input is uint, can't be less than 0, no need to check. 407 509 if(n < staticFacTableLen) { … … 440 542 } 441 543 442 /// No, nothing this horribly inefficient is used internally.443 BigInt factorial(uint N) {444 BigInt result = 1;445 for(uint i = 2; i <= N; i++) {446 result *= i;447 }448 return result;449 }450 451 unittest {452 assert(factorial(4) == 24);453 assert(factorial(5) == 120);454 assert(factorial(6) == 720);455 assert(factorial(7) == 5040);456 assert(factorial(3) == 6);457 writefln("Passed factorial test.");458 }544 /////No, nothing this horribly inefficient is used internally. 545 //BigInt factorial(uint N) { 546 // BigInt result = 1; 547 // for(uint i = 2; i <= N; i++) { 548 // result *= i; 549 // } 550 // return result; 551 //} 552 // 553 //unittest { 554 // assert(factorial(4) == 24); 555 // assert(factorial(5) == 120); 556 // assert(factorial(6) == 720); 557 // assert(factorial(7) == 5040); 558 // assert(factorial(3) == 6); 559 // writefln("Passed factorial test."); 560 //} 459 561 460 562 /**A struct that generates all possible permutations of a sequence, … … 463 565 * are references to the internal permutation state. This is dangerous, but 464 566 * necessary for performance. Therefore, you 465 * will have to dup them if you expect them not to change. 567 * will have to dup them if you expect them not to change. This is also 568 * the rationale for not making this struct an input range. 466 569 * 467 570 * Examples: … … 488 591 489 592 public: 490 /**Generate a sequence of seq(0, length) to permute based on. 491 * Exists only if T == uint.*/ 492 static if(is(T == uint)) { 493 this(uint length) { 494 perm = (new uint[length]).ptr; 495 foreach(i; 0..length) { 496 perm[i] = i; 497 } 498 Is = (new size_t[length]).ptr; 499 len = length; 500 } 501 } 502 503 /**Use user-provided sequence. Creates a duplicate of this sequence 593 /**Generate permutations from an input range. 594 * Create a duplicate of this sequence 504 595 * so that the original sequence is not modified.*/ 505 this(T[] input) { 506 perm = input.dup.ptr; 507 Is = (new size_t[input.length]).ptr; 508 len = input.length; 596 this(U)(U input) 597 if(isForwardRange!(U)) { 598 auto arr = toArray(input); 599 Is = (new size_t[arr.length]).ptr; 600 len = arr.length; 601 perm = arr.ptr; 509 602 } 510 603 … … 545 638 } 546 639 640 private template PermRet(T...) { 641 static if(isForwardRange!(T[0])) { 642 alias Perm!(ElementType!(T[0])) PermRet; 643 } else static if(T.length == 1) { 644 alias Perm!uint PermRet; 645 } else alias Perm!(T[0]) PermRet; 646 } 647 648 /**Create a Perm struct from a range or of a set of bounds. 649 * 650 * Note: PermRet is just a template to figure out what this should return. 651 * I would use auto if not for bug 2251. 652 * 653 * Examples: 654 * --- 655 * auto p = perm([1,2,3]); 656 * auto p = perm(5); // Permutations of integers on range [0, 5]. 657 * auto p = perm(-1, 2); // Permutations of integers on range [-1, 2]. 658 * --- 659 */ 660 PermRet!(T) perm(T...)(T stuff) { 661 alias typeof(return) rt; 662 static if(isForwardRange!(T[0])) { 663 return rt(stuff); 664 } else static if(T.length == 1) { 665 static assert(isIntegral!(T[0])); 666 return rt(seq(0U, cast(uint) stuff[0])); 667 } else { 668 return rt(seq(stuff)); 669 } 670 } 671 672 547 673 unittest { 548 674 double[][] res; 549 alias Perm!(double) PermD; 550 auto perm = PermD(cast(double[]) [1.0, 2.0, 3.0]); 551 foreach(p; perm) { 675 auto p1 = perm(cast(double[]) [1.0, 2.0, 3.0]); 676 foreach(p; p1) { 552 677 res ~= p.dup; 553 678 } 554 assert(res.canFind([1.0, 2.0, 3.0])); 555 assert(res.canFind([1.0, 3.0, 2.0])); 556 assert(res.canFind([2.0, 1.0, 3.0])); 557 assert(res.canFind([2.0, 3.0, 1.0])); 558 assert(res.canFind([3.0, 1.0, 2.0])); 559 assert(res.canFind([3.0, 2.0, 1.0])); 679 sort(res); 680 assert(res.canFindSorted([1.0, 2.0, 3.0])); 681 assert(res.canFindSorted([1.0, 3.0, 2.0])); 682 assert(res.canFindSorted([2.0, 1.0, 3.0])); 683 assert(res.canFindSorted([2.0, 3.0, 1.0])); 684 assert(res.canFindSorted([3.0, 1.0, 2.0])); 685 assert(res.canFindSorted([3.0, 2.0, 1.0])); 560 686 assert(res.length == 6); 561 687 uint[][] res2; 562 alias Perm!(uint) PermU; 563 auto perm2 = PermU(3); 688 auto perm2 = perm(3); 564 689 foreach(p; perm2) { 565 690 res2 ~= p.dup; 566 691 } 567 assert(res2.canFind([0u, 1, 2])); 568 assert(res2.canFind([0u, 2, 1])); 569 assert(res2.canFind([1u, 0, 2])); 570 assert(res2.canFind([1u, 2, 0])); 571 assert(res2.canFind([2u, 0, 1])); 572 assert(res2.canFind([2u, 1, 0])); 692 sort(res2); 693 assert(res2.canFindSorted([0u, 1, 2])); 694 assert(res2.canFindSorted([0u, 2, 1])); 695 assert(res2.canFindSorted([1u, 0, 2])); 696 assert(res2.canFindSorted([1u, 2, 0])); 697 assert(res2.canFindSorted([2u, 0, 1])); 698 assert(res2.canFindSorted([2u, 1, 0])); 573 699 assert(res2.length == 6); 574 700 … … 576 702 // them, and they contain what they're supposed to contain, the result is 577 703 // correct. 578 auto perm3 = PermU(6);704 auto perm3 = perm(6); 579 705 bool[uint[]] table; 580 706 foreach(p; perm3) { … … 585 711 assert(elem.dup.insertionSort == [0U, 1, 2, 3, 4, 5]); 586 712 } 587 auto perm4 = PermU(5);713 auto perm4 = perm(5); 588 714 bool[uint[]] table2; 589 715 foreach(p; perm4) { … … 594 720 assert(elem.dup.insertionSort == [0U, 1, 2, 3, 4]); 595 721 } 596 writeln(stderr, "Passed Perm test."); 722 writeln("Passed Perm test."); 723 } 724 725 private template CombRet(T) { 726 static if(isForwardRange!(T)) { 727 alias Comb!(Unqual!(ElementType!(T))) CombRet; 728 } else static if(is(T : uint)) { 729 alias Comb!uint CombRet; 730 } else static assert(0, "comb can only be created with range or uint."); 731 } 732 733 /**Create a Comb struct from a range or of a set of bounds. 734 * 735 * Note: CombRet is just a template to figure out what this should return. 736 * I would use auto if not for bug 2251. 737 * 738 * Examples: 739 * --- 740 * auto p = comb([1,2,3]); 741 * auto p = comb(5); // Permutations of integers on range [0, 5]. 742 * --- 743 */ 744 CombRet!(T) comb(T)(T stuff, uint r) { 745 alias typeof(return) rt; 746 static if(isForwardRange!(T)) { 747 return rt(stuff, r); 748 } else { 749 return rt(seq(0U, cast(uint) stuff), r); 750 } 597 751 } 598 752 … … 602 756 * are const references to the internal state of the Comb object. This is 603 757 * dangerous but necessary for performance. If you want to save them past the 604 * next iteration, you'll have to dup them yourself. 758 * next iteration, you'll have to dup them yourself. This is also the 759 * rationale for not making this struct an input range. 760 * 605 761 * Examples: 606 762 * --- … … 719 875 unittest { 720 876 // Test indexing verison first. 721 auto comb1 = Comb!(uint)(5, 2);877 auto comb1 = comb(5, 2); 722 878 uint[][] vals; 723 879 foreach(c; comb1) { 724 880 vals ~= c.dup; 725 881 } 726 assert(vals.canFind([0u,1].dup)); 727 assert(vals.canFind([0u,2].dup)); 728 assert(vals.canFind([0u,3].dup)); 729 assert(vals.canFind([0u,4].dup)); 730 assert(vals.canFind([1u,2].dup)); 731 assert(vals.canFind([1u,3].dup)); 732 assert(vals.canFind([1u,4].dup)); 733 assert(vals.canFind([2u,3].dup)); 734 assert(vals.canFind([2u,4].dup)); 735 assert(vals.canFind([3u,4].dup)); 882 sort(vals); 883 assert(vals.canFindSorted([0u,1].dup)); 884 assert(vals.canFindSorted([0u,2].dup)); 885 assert(vals.canFindSorted([0u,3].dup)); 886 assert(vals.canFindSorted([0u,4].dup)); 887 assert(vals.canFindSorted([1u,2].dup)); 888 assert(vals.canFindSorted([1u,3].dup)); 889 assert(vals.canFindSorted([1u,4].dup)); 890 assert(vals.canFindSorted([2u,3].dup)); 891 assert(vals.canFindSorted([2u,4].dup)); 892 assert(vals.canFindSorted([3u,4].dup)); 736 893 assert(vals.length == 10); 737 894 738 895 // Now, test the array version. 739 auto comb2 = Comb!(uint)(seq(5U, 10U), 3);896 auto comb2 = comb(seq(5U, 10U), 3); 740 897 vals = null; 741 898 foreach(c; comb2) { 742 899 vals ~= c.dup; 743 900 } 744 assert(vals.canFind([5u, 6, 7].dup)); 745 assert(vals.canFind([5u, 6, 8].dup)); 746 assert(vals.canFind([5u, 6, 9].dup)); 747 assert(vals.canFind([5u, 7, 8].dup)); 748 assert(vals.canFind([5u, 7, 9].dup)); 749 assert(vals.canFind([5u, 8, 9].dup)); 750 assert(vals.canFind([6U, 7, 8].dup)); 751 assert(vals.canFind([6u, 7, 9].dup)); 752 assert(vals.canFind([6u, 8, 9].dup)); 753 assert(vals.canFind([7u, 8, 9].dup)); 901 sort(vals); 902 assert(vals.canFindSorted([5u, 6, 7].dup)); 903 assert(vals.canFindSorted([5u, 6, 8].dup)); 904 assert(vals.canFindSorted([5u, 6, 9].dup)); 905 assert(vals.canFindSorted([5u, 7, 8].dup)); 906 assert(vals.canFindSorted([5u, 7, 9].dup)); 907 assert(vals.canFindSorted([5u, 8, 9].dup)); 908 assert(vals.canFindSorted([6U, 7, 8].dup)); 909 assert(vals.canFindSorted([6u, 7, 9].dup)); 910 assert(vals.canFindSorted([6u, 8, 9].dup)); 911 assert(vals.canFindSorted([7u, 8, 9].dup)); 754 912 assert(vals.length == 10); 755 913 … … 777 935 } 778 936 779 // This should be encapsulated within StackHash. Workaround for bug 1629.780 struct SHNode(K, V) {781 alias SHNode!(K, V) SomeType;782 SomeType* next;783 Mutable!(K) key;784 Mutable!(V) val;785 }786 787 /* A hash table that uses TempAlloc to allocate space. Useful for building788 * a quick symbol table in a performance-critical function that can't be789 * performing tons of heap allocations. Intentionally lacking ddoc because790 * the design or even existence of this is still likely to change.*/791 struct StackHash(K, V) {792 private:793 alias SHNode!(K, V) Node;794 Node[] roots;795 TempAlloc.State TAState;796 TypeInfo keyTI;797 size_t _length;798 Node* usedSentinel;799 800 Node* newNode(K key) {801 Node* ret = cast(Node*) TempAlloc(Node.sizeof, TAState);802 ret.key = key;803 ret.val = V.init;804 ret.next = null;805 return ret;806 }807 808 Node* newNode(K key, V val) {809 Node* ret = cast(Node*) TempAlloc(Node.sizeof, TAState);810 ret.key = key;811 ret.val = val;812 ret.next = null;813 return ret;814 }815 816 hash_t getHash(K key) {817 static if(is(K : long) && K.sizeof <= hash_t.sizeof) {818 hash_t hash = cast(hash_t) key;819 } else static if(__traits(compiles, key.toHash())) {820 hash_t hash = key.toHash();821 } else {822 hash_t hash = keyTI.getHash(&key);823 }824 hash %= roots.length;825 return hash;826 }827 828 829 public:830 this(size_t nElem) {831 // Obviously, the caller can never mean zero, because this struct832 // can't work at all with nElem == 0, so assume it's a mistake and fix833 // it here.834 if(nElem == 0)835 nElem++;836 TAState = TempAlloc.getState;837 roots = newStack!(Node)(nElem, TAState);838 usedSentinel = cast(Node*) roots.ptr;839 foreach(ref root; roots) {840 root.key = K.init;841 root.val = V.init;842 root.next = usedSentinel;843 }844 keyTI = typeid(K);845 }846 847 ref V opIndex(K key) {848 hash_t hash = getHash(key);849 850 if(roots[hash].next == usedSentinel) {851 roots[hash].key = key;852 roots[hash].next = null;853 _length++;854 return roots[hash].val;855 } else if(roots[hash].key == key) {856 return roots[hash].val;857 } else { // Collision. Start chaining.858 Node** next = &(roots[hash].next);859 while(*next !is null) {860 if((**next).key == key) {861 return (**next).val;862 }863 next = &((**next).next);864 }865 *next = newNode(key);866 _length++;867 return (**next).val;868 }869 }870 871 V opIndexAssign(V val, K key) {872 hash_t hash = getHash(key);873 874 if(roots[hash].next == usedSentinel) {875 roots[hash].key = key;876 roots[hash].val = val;877 roots[hash].next = null;878 _length++;879 return val;880 } else if(roots[hash].key == key) {881 roots[hash].val = val;882 return val;883 } else { // Collision. Start chaining.884 Node** next = &(roots[hash].next);885 while(*next !is null) {886 if((**next).key == key) {887 (**next).val = val;888 return val;889 }890 next = &((**next).next);891 }892 _length++;893 *next = newNode(key, val);894 return val;895 }896 }897 898 V[] values() {899 auto space = newVoid!(V)(_length);900 return values(space);901 }902 903 V[] valStack() {904 auto space = newStack!(V)(_length);905 return values(space);906 }907 908 V[] values(V[] space) {909 size_t pos;910 foreach(r; roots) {911 if(r.next == usedSentinel)912 continue;913 space[pos++] = r.val;914 Node* next = r.next;915 while(next !is null) {916 space[pos++] = next.val;917 next = next.next;918 }919 }920 return space;921 }922 923 Mutable!(K)[] keys() const {924 auto space = newVoid!(Mutable!(K))(_length);925 return keys(space);926 }927 928 Mutable!(K)[] keyStack() const {929 auto space = newStack!(Mutable!(K))(_length);930 return keys(space);931 }932 933 Mutable!(K)[] keys(Mutable!(K)[] space) const {934 size_t pos;935 foreach(r; roots) {936 if(r.next == usedSentinel)937 continue;938 space[pos++] = r.key;939 const(Node)* next = r.next;940 while(next !is null) {941 space[pos++] = next.key;942 next = next.next;943 }944 }945 return space;946 }947 948 int opApply(int delegate(ref V value) dg) {949 int res = 0;950 outer:951 foreach(r; roots) {952 if(r.next == usedSentinel)953 continue;954 res = dg(r.val);955 if (res) break;956 Node* next = r.next;957 while(next !is null) {958 res = dg(next.val);959 if(res) break outer;960 next = next.next;961 }962 }963 return res;964 }965 966 int opApply(int delegate(ref K key, ref V value) dg) {967 int res = 0;968 outer:969 foreach(r; roots) {970 if(r.next == usedSentinel)971 continue;972 res = dg(r.key, r.val);973 if (res) break;974 Node* next = r.next;975 while(next !is null) {976 res = dg(next.key, next.val);977 if(res) break outer;978 next = next.next;979 }980 }981 return res;982 }983 984 V* opIn_r(K key) {985 hash_t hash = getHash(key);986 987 if(roots[hash].next == usedSentinel) {988 return null;989 } else if(roots[hash].key == key) {990 return &(roots[hash].val);991 } else { // Collision. Start chaining.992 Node* next = roots[hash].next;993 while(next !is null) {994 if(next.key == key) {995 return &(next.val);996 }997 next = next.next;998 }999 return null;1000 }1001 }1002 1003 void remove(K key) {1004 hash_t hash = getHash(key);1005 1006 Node** next = &(roots[hash].next);1007 if(roots[hash].next == usedSentinel) {1008 return;1009 } else if(roots[hash].key == key) {1010 _length--;1011 if(roots[hash].next is null) {1012 roots[hash].key = K.init;1013 roots[hash].val = V.init;1014 roots[hash].next = usedSentinel;1015 return;1016 } else {1017 roots[hash].key = (**next).key;1018 roots[hash].val = (**next).val;1019 roots[hash].next = (**next).next;1020 return;1021 }1022 } else { // Collision. Start chaining.1023 while(*next !is null) {1024 if((**next).key == key) {1025 _length--;1026 *next = (**next).next;1027 break;1028 }1029 next = &((**next).next);1030 }1031 return;1032 }1033 }1034 1035 size_t length() {1036 return _length;1037 }1038 1039 real efficiency() {1040 uint used = 0;1041 foreach(root; roots) {1042 if(root.next != usedSentinel) {1043 used++;1044 }1045 }1046 return cast(real) used / roots.length;1047 }1048 }1049 1050 unittest {1051 alias StackHash!(string, uint) mySh;1052 mixin(newFrame);1053 auto data = mySh(2); // Make sure we get some collisions.1054 data["foo"] = 1;1055 data["bar"] = 2;1056 data["baz"] = 3;1057 data["waldo"] = 4;1058 assert(!("foobar" in data));1059 assert(*("foo" in data) == 1);1060 assert(*("bar" in data) == 2);1061 assert(*("baz" in data) == 3);1062 assert(*("waldo" in data) == 4);1063 assert(data["foo"] == 1);1064 assert(data["bar"] == 2);1065 assert(data["baz"] == 3);1066 assert(data["waldo"] == 4);1067 assert(data.keys.sort == ["bar", "baz", "foo", "waldo"]);1068 assert(data.values.sort == [1U, 2, 3, 4]);1069 foreach(k, v; data) {1070 assert(data[k] == v);1071 }1072 foreach(v; data) {1073 assert(v > 0 && v < 5);1074 }1075 1076 // Test remove.1077 1078 alias StackHash!(uint, uint) mySh2;1079 auto foo = mySh2(7);1080 for(uint i = 0; i < 20; i++) {1081 foo[i] = i;1082 }1083 assert(foo.length == 20);1084 for(uint i = 0; i < 20; i += 2) {1085 foo.remove(i);1086 }1087 for(uint i = 0; i < 20; i++) {1088 if(i & 1) {1089 assert(*(i in foo) == i);1090 } else {1091 assert(!(i in foo));1092 }1093 }1094 auto vals = foo.values;1095 assert(foo.length == 10);1096 assert(vals.qsort == [1U, 3, 5, 7, 9, 11, 13, 15, 17, 19]);1097 1098 writeln("Passed StackHash test.");1099 }1100 1101 /* A hash set that uses TempAlloc to allocate space. Useful for building1102 * a quick set in a performance-critical function that can't be1103 * performing tons of heap allocations. Intentionally lacking ddoc because1104 * the design or even existence of this is still likely to change.*/1105 struct StackSet(K) {1106 private:1107 // Choose smallest representation of the data.1108 struct Node1 {1109 Node1* next;1110 K key;1111 }1112 1113 struct Node2 {1114 K key;1115 Node2* next;1116 }1117 1118 static if(Node1.sizeof < Node2.sizeof) {1119 alias Node1 Node;1120 } else {1121 alias Node2 Node;1122 }1123 1124 Node[] roots;1125 TempAlloc.State TAState;1126 TypeInfo keyTI;1127 size_t _length;1128 Node* usedSentinel;1129 1130 Node* newNode(K key) {1131 Node* ret = cast(Node*) TempAlloc(Node.sizeof, TAState);1132 ret.key = key;1133 ret.next = null;1134 return ret;1135 }1136 1137 hash_t getHash(K key) {1138 static if(is(K : long) && K.sizeof <= hash_t.sizeof) {1139 hash_t hash = cast(hash_t) key;1140 } else static if(__traits(compiles, key.toHash())) {1141 hash_t hash = key.toHash();1142 } else {1143 hash_t hash = keyTI.getHash(&key);1144 }1145 hash %= roots.length;1146 return hash;1147 }1148 1149 public:1150 this(size_t nElem) {1151 // Obviously, the caller can never mean zero, because this struct1152 // can't work at all with nElem == 0, so assume it's a mistake and fix1153 // it here.1154 if(nElem == 0)1155 nElem++;1156 TAState = TempAlloc.getState;1157 roots = newStack!(Node)(nElem, TAState);1158 usedSentinel = cast(Node*) roots.ptr;1159 foreach(ref root; roots) {1160 root.key = K.init;1161 root.next = usedSentinel;1162 }1163 keyTI = typeid(K);1164 }1165 1166 void insert(K key) {1167 hash_t hash = getHash(key);1168 1169 if(roots[hash].next == usedSentinel) {1170 roots[hash].key = key;1171 roots[hash].next = null;1172 _length++;1173 return;1174 } else if(roots[hash].key == key) {1175 return;1176 } else { // Collision. Start chaining.1177 Node** next = &(roots[hash].next);1178 while(*next !is null) {1179 if((**next).key == key) {1180 return;1181 }1182 next = &((**next).next);1183 }1184 *next = newNode(key);1185 _length++;1186 return;1187 }1188 }1189 1190 K[] elems() {1191 auto space = newVoid!(K)(_length);1192 return elems(space);1193 }1194 1195 K[] elemStack() {1196 auto space = newStack!(K)(_length);1197 return elems(space);1198 }1199 1200 K[] elems(K[] space) {1201 size_t pos;1202 foreach(r; roots) {1203 if(r.next == usedSentinel)1204 continue;1205 space[pos++] = r.key;1206 Node* next = r.next;1207 while(next !is null) {1208 space[pos++] = next.key;1209 next = next.next;1210 }1211 }1212 return space;1213 }1214 1215 int opApply(int delegate(ref K key) dg) {1216 int res = 0;1217 outer:1218 foreach(r; roots) {1219 if(r.next == usedSentinel)1220 continue;1221 res = dg(r.key);1222 if (res) break;1223 Node* next = r.next;1224 while(next !is null) {1225 res = dg(next.key);1226 if(res) break outer;1227 next = next.next;1228 }1229 }1230 return res;1231 }1232 1233 bool opIn_r(K key) {1234 hash_t hash = getHash(key);1235 1236 if(roots[hash].next == usedSentinel) {1237 return false;1238 } else if(roots[hash].key == key) {1239 return true;1240 } else { // Collision. Start chaining.1241 Node* next = roots[hash].next;1242 while(next !is null) {1243 if(next.key == key) {1244 return true;1245 }1246 next = next.next;1247 }1248 return false;1249 }1250 }1251 1252 void remove(K key) {1253 hash_t hash = getHash(key);1254 1255 Node** next = &(roots[hash].next);1256 if(roots[hash].next == usedSentinel) {1257 return;1258 } else if(roots[hash].key == key) {1259 _length--;1260 if(roots[hash].next is null) {1261 roots[hash].key = K.init;1262 roots[hash].next = usedSentinel;1263 return;1264 } else {1265 roots[hash].key = (**next).key;1266 roots[hash].next = (**next).next;1267 return;1268 }1269 } else { // Collision. Start chaining.1270 while(*next !is null) {1271 if((**next).key == key) {1272 _length--;1273 *next = (**next).next;1274 break;1275 }1276 next = &((**next).next);1277 }1278 return;1279 }1280 }1281 1282 size_t length() {1283 return _length;1284 }1285 }1286 1287 unittest {1288 mixin(newFrame);1289 alias StackSet!(uint) mySS;1290 mySS set = mySS(12);1291 foreach(i; 0..20) {1292 set.insert(i);1293 }1294 assert(set.elems.qsort == seq(0U, 20U));1295 assert(set.elemStack.qsort == seq(0U, 20U));1296 1297 for(uint i = 0; i < 20; i += 2) {1298 set.remove(i);1299 }1300 1301 foreach(i; 0..20) {1302 if(i & 1) {1303 assert(i in set);1304 } else {1305 assert(!(i in set));1306 }1307 }1308 uint[] contents;1309 foreach(elem; set) {1310 contents ~= elem;1311 }1312 assert(contents.qsort == [1U,3,5,7,9,11,13,15,17,19]);1313 writeln("Passed StackSet test.");1314 }1315 1316 937 /**Computes the intersect of two arrays, i.e. the elements that are in both 1317 938 * arrays. Time and space complexity are O(first.length + second.length). … … 1343 964 1344 965 size_t pos; 1345 foreach(key, count; firstSet) { 1346 if(count > 0) 1347 result[pos++] = key; 1348 } 1349 966 auto key = firstSet.keys; 967 auto count = firstSet.values; 968 while(!key.empty) { 969 if(count.front > 0) { 970 result[pos++] = key.front; 971 } 972 key.popFront; 973 count.popFront; 974 } 1350 975 return result[0..pos]; 1351 976 } … … 1402 1027 } 1403 1028 } 1404 1405 1029 return result[0..resPos]; 1406 1030 } … … 1415 1039 foreach(i; 0..1000) { 1416 1040 foreach(ref f; first) 1417 f = uniform( gen,0U, 2500);1041 f = uniform(0U, 2500); 1418 1042 foreach(ref s; second) 1419 s = uniform( gen,0U, 5000);1043 s = uniform(0U, 5000); 1420 1044 auto hash = qsort!("a > b")(intersect(first, second)); 1421 1045 auto sort = intersectSorted!("a > b") trunk/cor.d
r20 r28 1 1 /**Pearson, Spearman and Kendall correlations, covariance. 2 2 * 3 * Author: David Simcha 4 * 5 * Copyright (c) 2009, David Simcha 3 * Author: David Simcha*/ 4 /* 5 * You may use this software under your choice of either of the following 6 * licenses. YOU NEED ONLY OBEY THE TERMS OF EXACTLY ONE OF THE TWO LICENSES. 7 * IF YOU CHOOSE TO USE THE PHOBOS LICENSE, YOU DO NOT NEED TO OBEY THE TERMS OF 8 * THE BSD LICENSE. IF YOU CHOOSE TO USE THE BSD LICENSE, YOU DO NOT NEED 9 * TO OBEY THE TERMS OF THE PHOBOS LICENSE. IF YOU ARE A LAWYER LOOKING FOR 10 * LOOPHOLES AND RIDICULOUSLY NON-EXISTENT AMBIGUITIES IN THE PREVIOUS STATEMENT, 11 * GET A LIFE. 12 * 13 * ---------------------Phobos License: --------------------------------------- 14 * 15 * Copyright (C) 2008-2009 by David Simcha. 16 * 17 * This software is provided 'as-is', without any express or implied 18 * warranty. In no event will the authors be held liable for any damages 19 * arising from the use of this software. 20 * 21 * Permission is granted to anyone to use this software for any purpose, 22 * including commercial applications, and to alter it and redistribute it 23 * freely, in both source and binary form, subject to the following 24 * restrictions: 25 * 26 * o The origin of this software must not be misrepresented; you must not 27 * claim that you wrote the original software. If you use this software 28 * in a product, an acknowledgment in the product documentation would be 29 * appreciated but is not required. 30 * o Altered source versions must be plainly marked as such, and must not 31 * be misrepresented as being the original software. 32 * o This notice may not be removed or altered from any source 33 * distribution. 34 * 35 * --------------------BSD License: ----------------------------------------- 36 * 37 * Copyright (c) 2008-2009, David Simcha 6 38 * All rights reserved. 7 39 * … … 29 61 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 62 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 */ 32 65 33 66 module dstats.cor; 34 67 35 import core.memory ;68 import core.memory, std.range; 36 69 37 70 import dstats.sort, dstats.base, dstats.alloc; … … 49 82 /**Pearson correlation. When the term correlation is used unqualified, it is 50 83 * usually referring to this quantity. Pearson correlation assumes the input 51 * data is normally distributed and is therefore a parametric test. */52 real pcor(T, U)(const T[] input1, const U[] input2) 53 in { 54 assert(input1.length == input2.length); 55 } body{84 * data is normally distributed and is therefore a parametric test. 85 * This function works with any pair of input ranges. If they are of different 86 * lengths, it uses the first min(input1.length, input2.length) elements.*/ 87 real pcor(T, U)(T input1, U input2) 88 if(realInput!(T) && realInput!(U)) { 56 89 OnlinePcor corCalc; 57 foreach(i; 0..input1.length) { 58 corCalc.addElement(input1[i], input2[i]); 90 while(!input1.empty && !input2.empty) { 91 corCalc.put(input1.front, input2.front); 92 input1.popFront; 93 input2.popFront; 59 94 } 60 95 … … 63 98 64 99 unittest { 65 assert(approxEqual(pcor([1,2,3,4,5], [1,2,3,4,5]), 1)); 66 assert(approxEqual(pcor([1,2,3,4,5], [10.0, 8.0, 6.0, 4.0, 2.0]), -1)); 67 assert(approxEqual(pcor([2, 4, 1, 6, 19], [4, 5, 1, 3, 2]), -.2382314)); 100 assert(approxEqual(pcor([1,2,3,4,5].dup, [1,2,3,4,5].dup), 1)); 101 assert(approxEqual(pcor([1,2,3,4,5].dup, [10.0, 8.0, 6.0, 4.0, 2.0].dup), -1)); 102 assert(approxEqual(pcor([2, 4, 1, 6, 19].dup, [4, 5, 1, 3, 2].dup), -.2382314)); 103 104 // Make sure everything works with lowest common denominator range type. 105 struct Count { 106 uint num; 107 uint upTo; 108 uint front() { 109 return num; 110 } 111 void popFront() { 112 num++; 113 } 114 bool empty() { 115 return num >= upTo; 116 } 117 } 118 119 Count a, b; 120 a.upTo = 100; 121 b.upTo = 100; 122 assert(approxEqual(pcor(a, b), 1)); 123 68 124 writefln("Passed pcor unittest."); 69 125 } … … 78 134 public: 79 135 /// 80 void addElement(T, U)(T elem1, U elem2) {136 void put(T, U)(T elem1, U elem2) { 81 137 _k++; 82 138 real kNeg1 = 1.0L / _k; … … 135 191 136 192 /// 137 real covariance(T, U)(const T[] input1, const U[] input2) 193 real covariance(T, U)(T input1, U input2) 194 if(realInput!(T) && realInput!(U)) { 195 OnlinePcor covCalc; 196 while(!input1.empty && !input2.empty) { 197 covCalc.put(input1.front, input2.front); 198 input1.popFront; 199 input2.popFront; 200 } 201 return covCalc.cov; 202 } 203 204 unittest { 205 assert(approxEqual(covariance([1,4,2,6,3].dup, [3,1,2,6,2].dup), 2.05)); 206 writeln("Passed covariance test."); 207 } 208 209 /**Spearman's rank correlation. Non-parametric. This is essentially the 210 * Pearson correlation of the ranks of the data, with ties dealt with by 211 * averaging. 212 * 213 * Currently only works on input ranges with a length property.*/ 214 real scor(R, S)(R input1, S input2) 215 if(isInputRange!(R) && isInputRange!(S) && 216 dstats.base.hasLength!(R) && dstats.base.hasLength!(S)) 138 217 in { 139 218 assert(input1.length == input2.length); 140 219 } body { 141 OnlinePcor covCalc; 142 foreach(i; 0..input1.length) { 143 covCalc.addElement(input1[i], input2[i]); 144 } 145 return covCalc.cov; 146 } 147 148 unittest { 149 assert(approxEqual(covariance([1,4,2,6,3], [3,1,2,6,2]), 2.05)); 150 writeln("Passed covariance test."); 151 } 152 153 /**Spearman's rank correlation. Non-parametric. This is essentially the 154 * Pearson correlation of the ranks of the data, with ties dealt with by 155 * averaging.*/ 156 real scor(T, U)(const T[] input1, const U[] input2) 157 in { 158 assert(input1.length == input2.length); 159 } body { // Not using rank() so I can recycle some allocations. 220 alias ElementType!(R) T; 221 alias ElementType!(S) U; 160 222 if(input1.length < 2) 161 223 return real.nan; … … 171 233 iDup = (cast(T*) TempAlloc.malloc(largerSize * input1.length)) 172 234 [0..input1.length]; 173 iDup[] = input1[];235 rangeCopy(iDup, input1); 174 236 qsort(iDup, perms); 175 237 … … 189 251 // for the larger of T, U. 190 252 U[] iDup2 = (cast(U*) iDup.ptr)[0..input2.length]; 191 iDup2[] = input2[];253 rangeCopy(iDup2, input2); 192 254 193 255 qsort(iDup2, perms); … … 202 264 unittest { 203 265 //Test against a few known values. 204 assert(approxEqual(scor([1,2,3,4,5,6] , [3,1,2,5,4,6]), 0.77143));205 assert(approxEqual(scor([3,1,2,5,4,6] , [1,2,3,4,5,6]), 0.77143));206 assert(approxEqual(scor([3,6,7,35,75] , [1,63,53,67,3]), 0.3));207 assert(approxEqual(scor([1,63,53,67,3] , [3,6,7,35,75]), 0.3));208 assert(approxEqual(scor([1.5,6.3,7.8,4.2,1.5] , [1,63,53,67,3]), .56429));209 assert(approxEqual(scor([1,63,53,67,3] , [1.5,6.3,7.8,4.2,1.5]), .56429));210 assert(approxEqual(scor([1.5,6.3,7.8,7.8,1.5] , [1,63,53,67,3]), .79057));211 assert(approxEqual(scor([1,63,53,67,3] , [1.5,6.3,7.8,7.8,1.5]), .79057));212 assert(approxEqual(scor([1.5,6.3,7.8,6.3,1.5] , [1,63,53,67,3]), .63246));213 assert(approxEqual(scor([1,63,53,67,3] , [1.5,6.3,7.8,6.3,1.5]), .63246));214 assert(approxEqual(scor([3,4,1,5,2,1,6,4] , [1,3,2,6,4,2,6,7]), .6829268));215 assert(approxEqual(scor([1,3,2,6,4,2,6,7] , [3,4,1,5,2,1,6,4]), .6829268));266 assert(approxEqual(scor([1,2,3,4,5,6].dup, [3,1,2,5,4,6].dup), 0.77143)); 267 assert(approxEqual(scor([3,1,2,5,4,6].dup, [1,2,3,4,5,6].dup ), 0.77143)); 268 assert(approxEqual(scor([3,6,7,35,75].dup, [1,63,53,67,3].dup), 0.3)); 269 assert(approxEqual(scor([1,63,53,67,3].dup, [3,6,7,35,75].dup), 0.3)); 270 assert(approxEqual(scor([1.5,6.3,7.8,4.2,1.5].dup, [1,63,53,67,3].dup), .56429)); 271 assert(approxEqual(scor([1,63,53,67,3].dup, [1.5,6.3,7.8,4.2,1.5].dup), .56429)); 272 assert(approxEqual(scor([1.5,6.3,7.8,7.8,1.5].dup, [1,63,53,67,3].dup), .79057)); 273 assert(approxEqual(scor([1,63,53,67,3].dup, [1.5,6.3,7.8,7.8,1.5].dup), .79057)); 274 assert(approxEqual(scor([1.5,6.3,7.8,6.3,1.5].dup, [1,63,53,67,3].dup), .63246)); 275 assert(approxEqual(scor([1,63,53,67,3].dup, [1.5,6.3,7.8,6.3,1.5].dup), .63246)); 276 assert(approxEqual(scor([3,4,1,5,2,1,6,4].dup, [1,3,2,6,4,2,6,7].dup), .6829268)); 277 assert(approxEqual(scor([1,3,2,6,4,2,6,7].dup, [3,4,1,5,2,1,6,4].dup), .6829268)); 216 278 uint[] one = new uint[1000], two = new uint[1000]; 217 279 foreach(i; 0..100) { //Further sanity checks for things like commutativity. 218 size_t lowerBound = uniform( gen,0, one.length);219 size_t upperBound = uniform( gen,0, one.length);280 size_t lowerBound = uniform(0, one.length); 281 size_t upperBound = uniform(0, one.length); 220 282 if(lowerBound > upperBound) swap(lowerBound, upperBound); 221 283 foreach(ref o; one) { 222 o = uniform( gen,1, 10); //Generate lots of ties.284 o = uniform(1, 10); //Generate lots of ties. 223 285 } 224 286 foreach(ref o; two) { 225 o = uniform( gen,1, 10); //Generate lots of ties.287 o = uniform(1, 10); //Generate lots of ties. 226 288 } 227 289 real sOne = … … 245 307 assert(approxEqual(sFour, sFive) || (isnan(sFour) && isnan(sFive))); 246 308 } 247 writefln("Passed scor unittest."); 309 310 // Test input ranges. 311 struct Count { 312 uint num; 313 uint upTo; 314 uint front() { 315 return num; 316 } 317 void popFront() { 318 num++; 319 } 320 bool empty() { 321 return num >= upTo; 322 } 323 uint length() { 324 return upTo - num; 325 } 326 } 327 328 Count a, b; 329 a.upTo = 100; 330 b.upTo = 100; 331 assert(scor(a, b) == 1); 332 333 writeln("Passed scor unittest."); 248 334 } 249 335 … … 255 341 * standard formulas, and therefore unlikely to have weird, subtle bugs.*/ 256 342 257 real kcorOld(T, U)(const T[] input1, const U[] input2)343 private real kcorOld(T, U)(const T[] input1, const U[] input2) 258 344 in { 259 345 assert(input1.length == input2.length); … … 294 380 * bubble sort distance, or the number of swaps that would be needed in a 295 381 * bubble sort to sort input2 into the same order as input1. It is 296 * a robust, non-parametric correlation metric.*/ 297 real kcor(T, U)(const T[] input1, const U[] input2) { 298 auto i1d = input1.tempdup; 382 * a robust, non-parametric correlation metric. 383 * 384 * Since a copy of the inputs is made anyhow because they need to be sorted, 385 * this function can work with any input range. However, the ranges must 386 * have the same length.*/ 387 real kcor(T, U)(T input1, U input2) 388 if(isInputRange!(T) && isInputRange!(U)) { 389 auto i1d = tempdup(input1); 299 390 scope(exit) TempAlloc.free; 300 auto i2d = input2.tempdup;391 auto i2d = tempdup(input2); 301 392 scope(exit) TempAlloc.free; 302 393 return kcorDestructive(i1d, i2d); … … 304 395 305 396 /**Kendall's Tau O(N log N) destroys input vectors but requires 306 * O(1) auxilliary space provided T.sizeof == U.sizeof.*/ 397 * O(1) auxilliary space provided T.sizeof == U.sizeof. 398 * Also only works on arrays.*/ 307 399 real kcorDestructive(T, U)(T[] input1, U[] input2) 308 400 in { … … 369 461 unittest { 370 462 //Test against known values. 371 assert(approxEqual(kcor([1,2,3,4,5] , [3,1,7,4,3]), 0.1054093));372 assert(approxEqual(kcor([3,6,7,35,75] ,[1,63,53,67,3]), 0.2));373 assert(approxEqual(kcor([1.5,6.3,7.8,4.2,1.5] , [1,63,53,67,3]), .3162287));463 assert(approxEqual(kcor([1,2,3,4,5].dup, [3,1,7,4,3].dup), 0.1054093)); 464 assert(approxEqual(kcor([3,6,7,35,75].dup,[1,63,53,67,3].dup), 0.2)); 465 assert(approxEqual(kcor([1.5,6.3,7.8,4.2,1.5].dup, [1,63,53,67,3].dup), .3162287)); 374 466 uint[] one = new uint[1000], two = new uint[1000]; 375 467 // Test complex, fast implementation against straightforward, 376 468 // slow implementation. 377 469 foreach(i; 0..100) { 378 size_t lowerBound = uniform( gen,0, 1000);379 size_t upperBound = uniform( gen,0, 1000);470 size_t lowerBound = uniform(0, 1000); 471 size_t upperBound = uniform(0, 1000); 380 472 if(lowerBound > upperBound) swap(lowerBound, upperBound); 381 473 foreach(ref o; one) { 382 o = uniform( gen,1, 10);474 o = uniform(1, 10); 383 475 } 384 476 foreach(ref o; two) { 385 o = uniform( gen,1, 10);477 o = uniform(1, 10); 386 478 } 387 479 real kOne = … … 389 481 real kTwo = 390 482 kcorOld(one[lowerBound..upperBound], two[lowerBound..upperBound]); 391 assert(approxEqual(kOne, kTwo)); 392 } 483 assert(approxEqual(kOne, kTwo) || (isNaN(kOne) && isNaN(kTwo))); 484 } 485 486 // Make sure everything works with lowest common denominator range type. 487 struct Count { 488 uint num; 489 uint upTo; 490 uint front() { 491 return num; 492 } 493 void popFront() { 494 num++; 495 } 496 bool empty() { 497 return num >= upTo; 498 } 499 } 500 501 Count a, b; 502 a.upTo = 100; 503 b.upTo = 100; 504 assert(approxEqual(kcor(a, b), 1)); 393 505 writefln("Passed kcor unittest."); 394 506 } trunk/distrib.d
r20 r28 1 1 /**Probability distribution CDFs, PDFs/PMFs, and a few inverse CDFs. 2 2 * 3 * Authors: David Simcha, Don Clugston 4 *3 * Authors: David Simcha, Don Clugston*/ 4 /* 5 5 * Acknowledgements: Some of this module was borrowed the mathstat module 6 6 * of Don Clugston's MathExtra library. This was done to create a … … 251 251 252 252 unittest { 253 Random gen = Random(unpredictableSeed);254 253 foreach(i; 0..1_000) { 255 254 // Restricted variable ranges are because, in the tails, more than one … … 257 256 // Obviously, this is one of those corner cases that nothing can be 258 257 // done about. 259 real lambda = uniform( gen,.05L, 8.0L);260 uint k = uniform( gen,0UL, cast(uint) ceil(3.0L * lambda));258 real lambda = uniform(.05L, 8.0L); 259 uint k = uniform(0UL, cast(uint) ceil(3.0L * lambda)); 261 260 real pVal = poissonCDF(k, lambda); 262 261 assert(invPoissonCDF(pVal, lambda) == k); … … 377 376 // Obviously, this is one of those corner cases that nothing can be 378 377 // done about. Using small n's, moderate p's prevents this. 379 uint n = uniform( gen,5L, 10L);380 uint k = uniform( gen,0L, n);381 real p = uniform( gen,0.1L, 0.9L);378 uint n = uniform(5L, 10L); 379 uint k = uniform(0L, n); 380 real p = uniform(0.1L, 0.9L); 382 381 real pVal = binomialCDF(k, n, p); 383 382 assert(invBinomialCDF(pVal, n, p) == k); … … 505 504 506 505 real hyperExact(ulong x, ulong n1, ulong n2, ulong n, ulong startAt = 0) { 507 i nvariantreal constPart = logFactorial(n1) + logFactorial(n2) +506 immutable real constPart = logFactorial(n1) + logFactorial(n2) + 508 507 logFactorial(n) + logFactorial(n1 + n2 - n) - logFactorial(n1 + n2); 509 508 real sum = 0; … … 663 662 664 663 private { 665 i nvariantstatic real P0[] = [664 immutable static real P0[] = [ 666 665 -0x1.758f4d969484bfdcp-7, // -0.011400139698853582732 667 666 0x1.53cee17a59259dd2p-3, // 0.16592193750979583221 … … 674 673 ]; 675 674 676 i nvariantstatic real Q0[] = [675 immutable static real Q0[] = [ 677 676 -0x1.64b92ae791e64bb2p-7, // -0.010886331510064192632 678 677 0x1.7585c7d597298286p-3, // 0.1823840725000038842 … … 685 684 ]; 686 685 687 i nvariantstatic real P1[] = [686 immutable static real P1[] = [ 688 687 0x1.20ceea49ea142f12p-13, // 0.00013771451113809605662 689 688 0x1.cbe8a7267aea80bp-7, // 0.014035302749980729871 … … 698 697 ]; 699 698 700 i nvariantstatic real Q1[] = [699 immutable static real Q1[] = [ 701 700 0x1.3a4ce1406cea98fap-13, // 0.00014987006762866754669 702 701 0x1.f45332623335cda2p-7, // 0.015268706895221911913 … … 711 710 ]; 712 711 713 i nvariantstatic real P2[] = [712 immutable static real P2[] = [ 714 713 0x1.8c124a850116a6d8p-21, // 7.3774056430545041787e-07 715 714 0x1.534abda3c2fb90bap-13, // 0.0001617870121822776094 … … 722 721 ]; 723 722 724 i nvariantstatic real Q2[] = [723 immutable static real Q2[] = [ 725 724 0x1.af03f4fc0655e006p-21, // 8.0282885006885383316e-07 726 725 0x1.713192048d11fb2p-13, // 0.00017604524340842589303 … … 733 732 ]; 734 733 735 i nvariantstatic real P3[] = [734 immutable static real P3[] = [ 736 735 -0x1.55da447ae3806168p-34, // -7.7728283809481633868e-11 737 736 -0x1.145635641f8778a6p-24, // -6.4339663876133447143e-08 … … 744 743 ]; 745 744 746 i nvariantstatic real Q3[] = [745 immutable static real Q3[] = [ 747 746 -0x1.74022dd5523e6f84p-34, // -8.4584942637876803775e-11 748 747 -0x1.2cb60d61e29ee836p-24, // -7.0014768675591937804e-08 … … 833 832 // Just make sure invNormalCDF works like it should as the inverse. 834 833 foreach(i; 0..1000) { 835 real x = uniform( gen,0.0L, 1.0L);836 real mean = uniform( gen,0.0L, 100.0L);837 real sd = uniform( gen,1.0L, 3.0L);834 real x = uniform(0.0L, 1.0L); 835 real mean = uniform(0.0L, 100.0L); 836 real sd = uniform(1.0L, 3.0L); 838 837 real inv = invNormalCDF(x, mean, sd); 839 838 real rec = normalCDF(inv, mean, sd); 840 839 assert(approxEqual(x, rec)); 841 840 } 842 writeln( stderr,"Passed invNormalCDF unittest.");841 writeln("Passed invNormalCDF unittest."); 843 842 } 844 843 … … 919 918 assert(approxEqual(studentsTCDF(invStudentsTCDF(0.4L, 18), 18), .4L)); 920 919 assert(approxEqual(studentsTCDF( invStudentsTCDF(0.9L, 11), 11), 0.9L)); 921 writeln( stderr,"Passed studentsTCDF.");920 writeln("Passed studentsTCDF."); 922 921 } 923 922 … … 1161 1160 uint nSkipped; 1162 1161 foreach(i; 0..1000) { 1163 uint n = uniform( gen,1u, 10u);1164 real p = uniform( gen,0.0L, 1L);1165 uint k = uniform( gen,0, 20);1162 uint n = uniform(1u, 10u); 1163 real p = uniform(0.0L, 1L); 1164 uint k = uniform(0, 20); 1166 1165 real pVal = negBinomCDF(k, n, p); 1167 1166 trunk/gamma.d
r5 r28 5 5 * is trying to integrate the rest of this library into Tango, 6 6 * tango.math.GammaFunction would be a drop-in replacement. 7 * 8 * Copyright: Based on the CEPHES math library, which is7 **/ 8 /* Copyright: Based on the CEPHES math library, which is 9 9 * Copyright (C) 1994 Stephen L. Moshier (moshier@world.std.com). 10 10 * Authors: Stephen L. Moshier (original C code). Conversion to D by Don Clugston trunk/infotheory.d
r24 r28 3 3 * quantities, i.e, entropy, mutual info, etc. are output in bits. 4 4 * 5 * Author: David Simcha 6 * 7 * Copyright (c) 2009, David Simcha 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions are met: 12 * * Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * * Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * * Neither the name of the <organization> nor the 18 * names of its contributors may be used to endorse or promote products 19 * derived from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY 22 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY 25 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 28 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ 5 * Author: David Simcha*/ 6 /* 7 * You may use this software under your choice of either of the following 8 * licenses. YOU NEED ONLY OBEY THE TERMS OF EXACTLY ONE OF THE TWO LICENSES. 9 * IF YOU CHOOSE TO USE THE PHOBOS LICENSE, YOU DO NOT NEED TO OBEY THE TERMS OF 10 * THE BSD LICENSE. IF YOU CHOOSE TO USE THE BSD LICENSE, YOU DO NOT NEED 11 * TO OBEY THE TERMS OF THE PHOBOS LICENSE. IF YOU ARE A LAWYER LOOKING FOR 12 * LOOPHOLES AND RIDICULOUSLY NON-EXISTENT AMBIGUITIES IN THE PREVIOUS STATEMENT, 13 * GET A LIFE. 14 * 15 * ---------------------Phobos License: --------------------------------------- 16 * 17 * Copyright (C) 2008-2009 by David Simcha. 18 * 19 * This software is provided 'as-is', without any express or implied 20 * warranty. In no event will the authors be held liable for any damages 21 * arising from the use of this software. 22 * 23 * Permission is granted to anyone to use this software for any purpose, 24 * including commercial applications, and to alter it and redistribute it 25 * freely, in both source and binary form, subject to the following 26 * restrictions: 27 * 28 * o The origin of this software must not be misrepresented; you must not 29 * claim that you wrote the original software. If you use this software 30 * in a product, an acknowledgment in the product documentation would be 31 * appreciated but is not required. 32 * o Altered source versions must be plainly marked as such, and must not 33 * be misrepresented as being the original software. 34 * o This notice may not be removed or altered from any source 35 * distribution. 36 * 37 * --------------------BSD License: ----------------------------------------- 38 * 39 * Copyright (c) 2008-2009, David Simcha 40 * All rights reserved. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions are met: 44 * 45 * * Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 48 * * Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 52 * * Neither the name of the authors nor the 53 * names of its contributors may be used to endorse or promote products 54 * derived from this software without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY 57 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 58 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 59 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY 60 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 61 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 62 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 63 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 64 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 65 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 66 */ 31 67 32 68 module dstats.infotheory; 33 69 34 import std.traits, std.math, std.typetuple, std.functional; 70 import std.traits, std.math, std.typetuple, std.functional, std.range, 71 std.array, std.typecons; 35 72 36 73 import dstats.sort, dstats.summary, dstats.base, dstats.alloc; … … 42 79 } 43 80 44 /**This function calculates the Shannon entropy of a n arraythat is81 /**This function calculates the Shannon entropy of a forward range that is 45 82 * treated as frequency counts of a set of discrete observations. 46 83 * … … 53 90 * --- 54 91 */ 55 real entropyCounts(T)(const T[] data) { 56 return entropyCounts(data, sum!(T, real)(data)); 57 } 58 59 real entropyCounts(T)(const T[] data, real n) { 92 real entropyCounts(T)(T data) 93 if(isForwardRange!(T)) { 94 auto save = data; 95 return entropyCounts(save, sum!(T, real)(data)); 96 } 97 98 real entropyCounts(T)(T data, real n) 99 if(isForwardRange!(T)) { 60 100 real nNeg1 = 1.0L / n; 61 101 real entropy = 0; … … 70 110 71 111 unittest { 72 real uniform3 = entropyCounts([4, 4, 4] );112 real uniform3 = entropyCounts([4, 4, 4].dup); 73 113 assert(approxEqual(uniform3, log2(3))); 74 real uniform4 = entropyCounts([5, 5, 5, 5] );114 real uniform4 = entropyCounts([5, 5, 5, 5].dup); 75 115 assert(approxEqual(uniform4, 2)); 76 assert(entropyCounts([2,2] )==1);77 assert(entropyCounts([5.1,5.1,5.1,5.1] )==2);78 assert(approxEqual(entropyCounts([1,2,3,4,5] ), 2.1492553971685));116 assert(entropyCounts([2,2].dup)==1); 117 assert(entropyCounts([5.1,5.1,5.1,5.1].dup)==2); 118 assert(approxEqual(entropyCounts([1,2,3,4,5].dup), 2.1492553971685)); 79 119 writefln("Passed entropyCounts unittest."); 80 120 } 81 121 82 // Should be encapsulated within entropy(). Workaround for bug 1629. 122 template FlattenType(T...) { 123 alias FlattenTypeImpl!(T).ret FlattenType; 124 } 125 126 template FlattenTypeImpl(T...) { 127 static if(T.length == 0) { 128 alias TypeTuple!() ret; 129 } else { 130 T[0] j; 131 static if(is(typeof(j._jointRanges))) { 132 alias TypeTuple!(typeof(j._jointRanges), FlattenType!(T[1..$])) ret; 133 } else { 134 alias TypeTuple!(T[0], FlattenType!(T[1..$])) ret; 135 } 136 } 137 } 138 139 private Joint!(FlattenType!(T, U)) flattenImpl(T, U...)(T start, U rest) { 140 static if(rest.length == 0) { 141 return start; 142 } else static if(is(typeof(rest[0]._jointRanges))) { 143 return flattenImpl(jointImpl(start.tupleof, rest[0]._jointRanges), rest[1..$]); 144 } else { 145 return flattenImpl(jointImpl(start.tupleof, rest[0]), rest[1..$]); 146 } 147 } 148 149 Joint!(FlattenType!(T)) flatten(T...)(T args) { 150 static assert(args.length > 0); 151 static if(is(typeof(args[0]._jointRanges))) { 152 auto myTuple = args[0]; 153 } else { 154 auto myTuple = jointImpl(args[0]); 155 } 156 static if(args.length == 1) { 157 return myTuple; 158 } else { 159 return flattenImpl(myTuple, args[1..$]); 160 } 161 } 162 163 /**Bind a set of ranges together to represent a joint probability distribution. 164 * 165 * Examples: 166 * --- 167 * auto foo = [1,2,3,1,1]; 168 * auto bar = [2,4,6,2,2]; 169 * auto e = entropy(joint(foo, bar)); // Calculate joint entropy of foo, bar. 170 */ 171 Joint!(FlattenType!(T)) joint(T...)(T args) { 172 return jointImpl(flatten(args).tupleof); 173 } 174 175 Joint!(T) jointImpl(T...)(T args) { 176 return Joint!(T)(args); 177 } 178 179 /**Iterate over a set of ranges in lockstep and return an ObsEnt, 180 * which is used internally by entropy functions on each iteration.*/ 181 struct Joint(T...) { 182 T _jointRanges; 183 184 auto front() { 185 alias ElementsTuple!(T) E; 186 alias ObsEnt!(E) rt; 187 rt ret; 188 foreach(ti, elem; _jointRanges) { 189 ret.tupleof[ti] = elem.front; 190 } 191 return ret; 192 } 193 194 void popFront() { 195 foreach(ti, elem; _jointRanges) { 196 _jointRanges[ti].popFront; 197 } 198 } 199 200 bool empty() { 201 foreach(elem; _jointRanges) { 202 if(elem.empty) { 203 return true; 204 } 205 } 206 return false; 207 } 208 209 static if(T.length > 0 && allSatisfy!(dstats.base.hasLength, T)) { 210 uint length() { 211 uint ret = uint.max; 212 foreach(range; _jointRanges) { 213 auto len = range.length; 214 if(len < ret) { 215 ret = len; 216 } 217 } 218 return ret; 219 } 220 } 221 } 222 223 template ElementsTuple(T...) { 224 static if(T.length == 1) { 225 alias TypeTuple!(Unqual!(ElementType!(T[0]))) ElementsTuple; 226 } else { 227 alias TypeTuple!(Unqual!(ElementType!(T[0])), ElementsTuple!(T[1..$])) 228 ElementsTuple; 229 } 230 } 231 232 private template Comparable(T) { 233 enum bool Comparable = is(typeof({ 234 T a; 235 T b; 236 return a < b; })); 237 } 238 83 239 struct ObsEnt(T...) { 84 atomsOfArrayTuple!(T)compRep;240 T compRep; 85 241 86 242 hash_t toHash() { … … 99 255 } 100 256 101 bool opEquals( typeof(this) rhs) {257 bool opEquals(ref typeof(this) rhs) { 102 258 foreach(ti, elem; this.tupleof) { 103 259 if(elem != rhs.tupleof[ti]) … … 106 262 return true; 107 263 } 108 } 109 110 /**Calculates the joint entropy of a set of observations. Each array represets 111 * a vector of observations. If only one array is given, this reduces to the 112 * plain old entropy. 264 265 static if(allSatisfy!(Comparable, T)) { 266 int opCmp(ref typeof(this) rhs) { 267 foreach(ti, elem; this.tupleof) { 268 if(rhs.tupleof[ti] < elem) { 269 return -1; 270 } else if(rhs.tupleof[ti] > elem) { 271 return 1; 272 } 273 } 274 return 0; 275 } 276 } 277 278 } 279 280 /**Calculates the joint entropy of a set of observations. Each input range 281 * represents a vector of observations. If only one range is given, this reduces 282 * to the plain old entropy. Input range must have a length. 283 * 284 * Note: This function specializes if ElementType!(T) is a byte, ubyte, or 285 * char, resulting in a much faster entropy calculation. When possible, try 286 * to provide data in the form of a byte, ubyte, or char. 113 287 * 114 288 * Examples: … … 118 292 * assert(approxEqual(entropyFoo, log2(3))); 119 293 * int[] bar = [1, 2, 3, 1, 2, 3, 1, 2, 3]; 120 * real jointEntropyFooBar = entropy(foo, bar); // Joint entropy of foo and bar.121 * assert(approxEqual( jointEntropyFooBar, log2(9)));294 * real HFooBar = entropy(joint(foo, bar)); // Joint entropy of foo and bar. 295 * assert(approxEqual(HFooBar, log2(9))); 122 296 * --- 123 297 */ 124 real entropy(T...)(const T data) 125 in { 126 static assert(data.length > 0); 127 size_t zeroLen = data[0].length; 128 foreach(array; data[1..$]) { 129 assert(array.length == zeroLen); 130 } 131 } body { 132 133 static if(data.length > 1) { 134 alias ObsEnt!(T) Obs; 135 } 298 real entropy(T)(T data) 299 if(isInputRange!(T) && dstats.base.hasLength!(T)) { 300 if(data.length <= ubyte.max) { 301 return entropyImpl!(ubyte, T)(data); 302 } else if(data.length <= ushort.max) { 303 return entropyImpl!(ushort, T)(data); 304 } else { 305 return entropyImpl!(uint, T)(data); 306 } 307 } 308 309 private real entropyImpl(U, T)(T data) 310 if(ElementType!(T).sizeof > 1) { // Generic version. 311 alias typeof(data.front()) E; 136 312 137 313 TempAlloc.frameInit; 138 static if(data.length > 1) { 139 alias StackHash!(Obs, uint) mySh; 140 mySh counts = mySh(data[0].length / 5); 141 foreach(i; 0..data[0].length) { 142 Obs obs; 143 foreach(ti, vec; data) { 144 obs.tupleof[ti] = data[ti][i]; 145 } 146 counts[obs]++; 147 } 148 } else { 149 alias StackHash!(typeof(data[0][0]), uint) mySh; 150 mySh counts = mySh(data[0].length / 5); 151 foreach(elem; data[0]) { 152 counts[elem]++; 153 } 154 } 155 156 real nNeg1 = 1.0L / data[0].length; 157 real ans = 0; 158 foreach(value; counts) { 159 if(value == 0) 160 continue; 161 real pxi = cast(real) value * nNeg1; 162 ans -= pxi * log2(pxi); 163 } 314 alias StackHash!(E, U) mySh; 315 immutable len = data.length; // In case length calculation is expensive. 316 mySh counts = mySh(len / 5); 317 318 foreach(elem; data) { 319 counts[elem]++; 320 } 321 322 real ans = entropyCounts(counts.values, len); 164 323 TempAlloc.frameFree; 165 166 324 return ans; 167 325 } 168 326 169 unittest { 170 int[] foo = [1, 1, 1, 2, 2, 2, 3, 3, 3]; 171 real entropyFoo = entropy(foo); 172 assert(approxEqual(entropyFoo, log2(3))); 173 int[] bar = [1, 2, 3, 1, 2, 3, 1, 2, 3]; 174 real jointEntropyFooBar = entropy(foo, bar); // Joint entropy of foo and bar. 175 assert(approxEqual(jointEntropyFooBar, log2(9))); 176 // Testing with BigInts because they're more challenging to get right 177 // in terms of hashing and all. 178 BigInt[] bi = new BigInt[5]; 179 bi[0] = 1; 180 bi[1] = 2; 181 bi[2] = 3; 182 bi[3] = 1; 183 bi[4] = 1; 184 auto bi2 = bi.dup.reverse; 185 assert(approxEqual(entropy(bi, bi2), entropyCounts([2, 1, 1, 1]))); 186 assert(approxEqual(entropy(bi), entropyCounts([3, 1, 1]))); 327 private real entropyImpl(U, T)(T data) // byte/char specialization 328 if(ElementType!(T).sizeof == 1) { 329 alias typeof(data.front()) E; 330 331 U[ubyte.max + 1] counts; 332 333 uint min = ubyte.max, max = 0; 334 foreach(elem; data) { 335 static if(is(E == byte)) { 336 // Keep adjacent elements adjacent. In real world use cases, 337 // probably will have ranges like [-1, 1]. 338 ubyte e = cast(ubyte) (elem) + byte.max; 339 } else { 340 ubyte e = cast(ubyte) elem; 341 } 342 counts[e]++; 343 if(e > max) { 344 max = e; 345 } 346 if(e < min) { 347 min = e; 348 } 349 } 350 351 return entropyCounts(counts.ptr[min..max + 1], data.length); 352 } 353 354 unittest { 355 { // Generic version. 356 int[] foo = [1, 1, 1, 2, 2, 2, 3, 3, 3]; 357 real entropyFoo = entropy(foo); 358 assert(approxEqual(entropyFoo, log2(3))); 359 int[] bar = [1, 2, 3, 1, 2, 3, 1, 2, 3]; 360 auto stuff = joint(foo, bar); 361 real jointEntropyFooBar = entropy(joint(foo, bar)); 362 assert(approxEqual(jointEntropyFooBar, log2(9))); 363 } 364 { // byte specialization 365 byte[] foo = [-1, -1, -1, 2, 2, 2, 3, 3, 3]; 366 real entropyFoo = entropy(foo); 367 assert(approxEqual(entropyFoo, log2(3))); 368 string bar = "ACTGGCTA"; 369 assert(entropy(bar) == 2); 370 } 187 371 writeln("Passed entropy unittest."); 188 372 } 373 374 /**Calculate the conditional entropy H(data | cond).*/ 375 real condEntropy(T, U)(T data, U cond) 376 if(isInputRange!(T) && isInputRange!(U)) { 377 return entropy(joint(data, cond)) - entropy(cond); 378 } 379 380 unittest { 381 // This shouldn't be easy to screw up. Just really basic. 382 int[] foo = [1,2,2,1,1]; 383 int[] bar = [1,2,3,1,2]; 384 assert(approxEqual(entropy(foo) - condEntropy(foo, bar), 385 mutualInfo(foo, bar))); 386 writeln("Passed condEntroy unittest."); 387 } 388 189 389 190 390 191 391 /**Calculates the mutual information of two vectors of observations. 192 392 */ 193 real mutualInfo(T, U)(T[] x, U[] y) 194 in { 195 assert(x.length == y.length); 196 } body { 197 return entropy(x) + entropy(y) - entropy(x, y); 393 real mutualInfo(T, U)(T x, U y) 394 if(isInputRange!(T) && isInputRange!(U)) { 395 return entropy(x) + entropy(y) - entropy(joint(x, y)); 198 396 } 199 397 200 398 unittest { 201 399 // Values from R, but converted from base e to base 2. 202 assert(approxEqual(mutualInfo(bin([1,2,3,3,8] , 10),203 bin([8,6,7,5,3] , 10)), 1.921928));204 assert(approxEqual(mutualInfo(bin([1,2,1,1,3,4,3,6] , 2),205 bin([2,7,9,6,3,1,7,40] , 2)), .2935645));206 assert(approxEqual(mutualInfo(bin([1,2,1,1,3,4,3,6] , 4),207 bin([2,7,9,6,3,1,7,40] , 4)), .5435671));400 assert(approxEqual(mutualInfo(bin([1,2,3,3,8].dup, 10), 401 bin([8,6,7,5,3].dup, 10)), 1.921928)); 402 assert(approxEqual(mutualInfo(bin([1,2,1,1,3,4,3,6].dup, 2), 403 bin([2,7,9,6,3,1,7,40].dup, 2)), .2935645)); 404 assert(approxEqual(mutualInfo(bin([1,2,1,1,3,4,3,6].dup, 4), 405 bin([2,7,9,6,3,1,7,40].dup, 4)), .5435671)); 208 406 209 407 writeln("Passed mutualInfo unittest."); 210 408 } 211 409 212 /**Calculates the conditional mutual information I(x, y | z). z can be any 213 * number of vectors. If only two vectors are provided (equivalently if z is 214 * empty), this reduces to plain old mutual information.*/ 215 real condMutualInfo(T, U, V...)(T[] x, U[] y, V z) 216 in { 217 size_t zeroLen = z[0].length; 218 foreach(array; z[1..$]) { 219 assert(array.length == zeroLen); 220 } 221 } body { 222 static if(V.length == 0) 223 return mutualInfo(x, y); 224 else 225 return entropy(x, z) + entropy(y, z) - entropy(x, y, z) - entropy(z); 410 /**Calculates the conditional mutual information I(x, y | z).*/ 411 real condMutualInfo(T, U, V)(T x, U y, V z) { 412 return entropy(joint(x, z)) + entropy(joint(y, z)) - 413 entropy(joint(x, y, z)) - entropy(z); 226 414 } 227 415 228 416 unittest { 229 417 // Values from Matlab mi package by Hanchuan Peng. 230 auto res = condMutualInfo([1,2,1,2,1,2,1,2] , [3,1,2,3,4,2,1,2],231 [1,2,3,1,2,3,1,2] );418 auto res = condMutualInfo([1,2,1,2,1,2,1,2].dup, [3,1,2,3,4,2,1,2].dup, 419 [1,2,3,1,2,3,1,2].dup); 232 420 assert(approxEqual(res, 0.4387)); 233 res = condMutualInfo([1,2,3,1,2], [2,1,3,2,1], [1,1,1,2,2], [2,2,2,1,1]); 421 res = condMutualInfo([1,2,3,1,2].dup, [2,1,3,2,1].dup, 422 joint([1,1,1,2,2].dup, [2,2,2,1,1].dup)); 234 423 assert(approxEqual(res, 1.3510)); 235 424 writeln("Passed condMutualInfo unittest."); 236 425 } 237 426 238 /**Calculates the entropy of any old array of observations more quickly than 239 * entropy(), provided that it is sorted. If the input is sorted by more than 240 * one key, i.e. structs, the result will be the joint entropy of all of the 241 * keys. The compFun alias will be used to compare adjacent elements and 242 * determine how many instances of each value exist.*/ 243 real entropySorted(alias compFun = "a == b", T)(T[] data) { 427 /**Calculates the entropy of any old input range of observations more quickly 428 * than entropy(), provided that all equal values are adjacent. If the input 429 * is sorted by more than one key, i.e. structs, the result will be the joint 430 * entropy of all of the keys. The compFun alias will be used to compare 431 * adjacent elements and determine how many instances of each value exist.*/ 432 real entropySorted(alias compFun = "a == b", T)(T data) 433 if(isInputRange!(T)) { 434 alias ElementType!(T) E; 244 435 alias binaryFun!(compFun) comp; 245 i nvariant size_tn = data.length;246 i nvariant realnrNeg1 = 1.0L / n;436 immutable n = data.length; 437 immutable nrNeg1 = 1.0L / n; 247 438 248 439 real sum = 0.0L; 249 440 real nSame = 1.0L; 250 size_t pos = 1; 251 while(pos < n) { 252 if(comp(data[pos], data[pos - 1])) 441 auto last = data.front; 442 data.popFront; 443 foreach(elem; data) { 444 if(comp(elem, last)) 253 445 nSame++; 254 446 else { … … 257 449 sum -= p * log2(p); 258 450 } 259 pos++;451 last = elem; 260 452 } 261 453 // Handle last run. … … 272 464 } 273 465 274 template atomsOfArrayTuple(T...) {275 mixin("alias TypeTuple!(Mutable!(" ~ (atomType!(T[0])).stringof ~ ")" ~276 atomsOfArrayTupleImpl!(T[1..$]) ~ " atomsOfArrayTuple;");277 }278 279 template atomsOfArrayTupleImpl(T...) {280 static if(T.length == 0)281 const char[] atomsOfArrayTupleImpl = ")";282 else const char[] atomsOfArrayTupleImpl = ", Mutable!(" ~283 (atomType!(T[0])).stringof ~ ")" ~284 atomsOfArrayTupleImpl!(T[1..$]);285 }286 287 template atomType(T : T[]) {288 alias T atomType;289 }290 291 466 // Verify that there are no TempAlloc memory leaks anywhere in the code covered 292 467 // by the unittest. This should always be the last unittest of the module. trunk/random.d
r17 r28 2 2 * distributions. 3 3 * 4 * Author: David Simcha 5 * 6 * Copyright (c) 2009, David Simcha 4 * Author: David Simcha*/ 5 /* 6 * You may use this software under your choice of either of the following 7 * licenses. YOU NEED ONLY OBEY THE TERMS OF EXACTLY ONE OF THE TWO LICENSES. 8 * IF YOU CHOOSE TO USE THE PHOBOS LICENSE, YOU DO NOT NEED TO OBEY THE TERMS OF 9 * THE BSD LICENSE. IF YOU CHOOSE TO USE THE BSD LICENSE, YOU DO NOT NEED 10 * TO OBEY THE TERMS OF THE PHOBOS LICENSE. IF YOU ARE A LAWYER LOOKING FOR 11 * LOOPHOLES AND RIDICULOUSLY NON-EXISTENT AMBIGUITIES IN THE PREVIOUS STATEMENT, 12 * GET A LIFE. 13 * 14 * ---------------------Phobos License: --------------------------------------- 15 * 16 * Copyright (C) 2008-2009 by David Simcha. 17 * 18 * This software is provided 'as-is', without any express or implied 19 * warranty. In no event will the authors be held liable for any damages 20 * arising from the use of this software. 21 * 22 * Permission is granted to anyone to use this software for any purpose, 23 * including commercial applications, and to alter it and redistribute it 24 * freely, in both source and binary form, subject to the following 25 * restrictions: 26 * 27 * o The origin of this software must not be misrepresented; you must not 28 * claim that you wrote the original software. If you use this software 29 * in a product, an acknowledgment in the product documentation would be 30 * appreciated but is not required. 31 * o Altered source versions must be plainly marked as such, and must not 32 * be misrepresented as being the original software. 33 * o This notice may not be removed or altered from any source 34 * distribution. 35 * 36 * --------------------BSD License: ----------------------------------------- 37 * 38 * Copyright (c) 2008-2009, David Simcha 7 39 * All rights reserved. 8 40 * … … 30 62 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 63 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ 64 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 65 */ 33 66 34 67 … … 48 81 49 82 ///Generates a random number from normal(mean, sd) distribution. 50 real rNorm(RGen )(ref RGen gen, real mean = 0.0L, real sd = 1.0L) {51 real p = uniform( gen, 0.0L, 1.0L);83 real rNorm(RGen = Random)(real mean = 0.0L, real sd = 1.0L, ref RGen gen = rndGen) { 84 real p = uniform(0.0L, 1.0L, gen);; 52 85 return invNormalCDF(p) * sd + mean; 53 86 } 54 87 55 88 unittest { 56 Random gen;57 gen.seed(unpredictableSeed);58 89 real[] observ = new real[100_000]; 59 90 foreach(ref elem; observ) 60 elem = rNorm( gen);91 elem = rNorm(); 61 92 auto ksRes = ksPval(observ, parametrize!(normalCDF)(0.0L, 1.0L)); 62 93 writeln("100k samples from normal(0, 1): K-S P-val: ", ksRes); … … 70 101 71 102 ///Random Cauchy-distributed number. 72 real rCauchy(RGen )(ref RGen gen, real X0 = 0, real gamma = 1) {73 real p = uniform( gen, 0.0L, 1.0L);103 real rCauchy(RGen = Random)(real X0 = 0, real gamma = 1, ref RGen gen = rndGen) { 104 real p = uniform(0.0L, 1.0L, gen);; 74 105 return invCauchyCDF(p, X0, gamma); 75 106 } 76 107 77 108 unittest { 78 Random gen;79 gen.seed(unpredictableSeed);80 109 real[] observ = new real[100_000]; 81 110 foreach(ref elem; observ) 82 elem = rCauchy( gen);111 elem = rCauchy(); 83 112 auto ksRes = ksPval(observ, parametrize!(cauchyCDF)(0.0L, 1.0L)); 84 113 writeln("100k samples from Cauchy(0, 1): K-S P-val: ", ksRes); … … 91 120 } 92 121 93 real rStudentT(RGen )(ref RGen gen, real df) {94 real pVal = uniform( gen, 0.0L, 1.0L);122 real rStudentT(RGen = Random)(real df, ref RGen gen = rndGen) { 123 real pVal = uniform(0.0L, 1.0L, gen); 95 124 return invStudentsTCDF(pVal, df); 96 125 } 97 126 98 127 unittest { 99 Random gen;100 gen.seed(unpredictableSeed);101 128 real[] observ = new real[10_000]; 102 129 foreach(ref elem; observ) 103 elem = rStudentT( gen,5);130 elem = rStudentT(5); 104 131 auto ksRes = ksPval(observ, parametrize!(studentsTCDF)(5)); 105 132 writeln("10k samples from T(5): K-S P-val: ", ksRes); … … 113 140 114 141 /**Generates a random number from the Poisson distribution.*/ 115 uint rPoisson(RGen )(ref RGen gen, real lambda) {116 real pVal = uniform( gen, 0.0L, 1.0L);142 uint rPoisson(RGen = Random)(real lambda, ref RGen gen = rndGen) { 143 real pVal = uniform(0.0L, 1.0L, gen); 117 144 uint guess = cast(uint) max(round( 118 145 invNormalCDF(pVal, lambda, sqrt(lambda)) + 0.5), 0.0L); … … 142 169 143 170 unittest { 144 Random gen;145 gen.seed(unpredictableSeed);146 171 real lambda = 4L; 147 172 uint[] observ = new uint[100_000]; 148 173 foreach(ref elem; observ) 149 elem = rPoisson(gen, lambda); 150 auto ksRes = ksPval(observ, parametrize!(poissonCDF)(lambda)); 151 writeln("100k samples from poisson(", lambda, 152 "): K-S P-val: ", ksRes); 174 elem = rPoisson(lambda); 175 writeln("100k samples from poisson(", lambda, "):"); 153 176 writeln("\tMean Expected: ", lambda, 154 177 " Observed: ", mean(observ)); … … 164 187 165 188 ///Bernoulli r.v. with probability p of equaling 1. 166 uint rBernoulli(RGen )(ref RGen gen, real P = 0.5) {167 real pVal = uniform( gen, 0.0L, 1.0L);189 uint rBernoulli(RGen = Random)(real P = 0.5, ref RGen gen = rndGen) { 190 real pVal = uniform(0.0L, 1.0L, gen); 168 191 return cast(uint) (pVal <= P); 169 192 } … … 171 194 // bother writing a separate test for. 172 195 173 import std.stdio;174 long gue;175 176 196 /**Generates a random number from the binionial distribution.*/ 177 uint rBinomial(RGen )(ref RGen gen, uint n, real p) {197 uint rBinomial(RGen = Random)(uint n, real p, ref RGen gen = rndGen) { 178 198 // Generate p-value, get normal approx. as starting point, search for 179 199 // binomial element that matches this starting point. Normal approx. 180 200 // is usually within 1 or 2 elements. 181 real pVal = uniform( gen, 0.0L, 1.0L);201 real pVal = uniform(0.0L, 1.0L, gen); 182 202 uint guess = cast(uint) max(round( 183 203 invNormalCDF(pVal, n * p, sqrt(n * p * (1 - p)))) + 0.5, 0); … … 211 231 212 232 unittest { 213 Random gen;214 gen.seed(unpredictableSeed);215 233 real p = 0.7; 216 234 uint n = 10; 217 235 uint[] observ = new uint[100_000]; 218 236 foreach(ref elem; observ) 219 elem = rBinomial( gen,n, p);237 elem = rBinomial(n, p); 220 238 auto ksRes = ksPval(observ, parametrize!(binomialCDF)(n, p)); 221 writeln("100k samples from binom.(", n, ", ", p, 222 "): K-S P-val: ", ksRes); 239 writeln("100k samples from binom.(", n, ", ", p, "):"); 223 240 writeln("\tMean Expected: ", n * p, 224 241 " Observed: ", mean(observ)); … … 237 254 * Bugs: Slow for large values of N. Fairly naive implementation. 238 255 * Reasonably fast, though, for small values (<100) of N.*/ 239 uint rHypergeometric(RGen )(ref RGen gen, uint N1, uint N2, uint N)256 uint rHypergeometric(RGen = Random)(uint N1, uint N2, uint N, ref RGen gen = rndGen) 240 257 in { 241 258 assert(N <= (N1 + N2)); … … 247 264 NR[1] = N1; 248 265 foreach(i; 0..N) { 249 uint X = rBernoulli( gen, NR[1] / (NR[0] + NR[1]));266 uint X = rBernoulli(NR[1] / (NR[0] + NR[1]), gen); 250 267 NR[X] -= 1.0L; 251 268 result += X; … … 255 272 256 273 unittest { 257 Random gen;258 gen.seed(unpredictableSeed);259 274 uint n1 = 20, n2 = 30, n = 10; 260 275 uint[] observ = new uint[100_000]; 261 276 foreach(ref elem; observ) 262 elem = rHypergeometric( gen,n1, n2, n);277 elem = rHypergeometric(n1, n2, n); 263 278 auto ksRes = ksPval(observ, parametrize!(hypergeometricCDF)(n1, n2, n)); 264 writeln("100k samples from hypergeom.(", n1, ", ", n2, ", ", n, 265 "): K-S P-val: ", ksRes); 279 writeln("100k samples from hypergeom.(", n1, ", ", n2, ", ", n, "):"); 266 280 writeln("\tMean Expected: ", n * cast(real) n1 / (n1 + n2), 267 281 " Observed: ", mean(observ)); … … 278 292 * set n = 1, since geometric distribution is just a special case of neg. 279 293 * binomial.*/ 280 uint rNegBinom(RGen )(ref RGen gen, uint n, real p)294 uint rNegBinom(RGen = Random)(uint n, real p, ref RGen gen = rndGen) 281 295 in { 282 296 assert(p >= 0 && p <= 1); 283 297 assert(n > 0); 284 298 } body { 285 real pVal = uniform( gen, 0.0L, 1.0L);299 real pVal = uniform(0.0L, 1.0L, gen); 286 300 // Normal or gamma approx, then adjust. 287 301 real mean = n * (1 - p) / p; … … 348 362 uint[] observ = new uint[100_000]; 349 363 foreach(ref elem; observ) 350 elem = rNegBinom(gen, n, p); 351 auto ksRes = ksPval(observ, parametrize!(negBinomCDF)(n, p)); 352 writeln("100k samples from neg. binom.(", n,", ", p, 353 "): K-S P-val: ", ksRes); 364 elem = rNegBinom(n, p); 365 writeln("100k samples from neg. binom.(", n,", ", p, "):"); 354 366 writeln("\tMean Expected: ", n * (1 - p) / p, 355 367 " Observed: ", mean(observ)); … … 365 377 366 378 /// 367 real rLaplace(RGen )(ref RGen gen, real mu = 0, real b = 1) {368 real p = uniform( gen, 0.0L, 1.0L);379 real rLaplace(RGen = Random)(real mu = 0, real b = 1, ref RGen gen = rndGen) { 380 real p = uniform(0.0L, 1.0L, gen); 369 381 return invLaplaceCDF(p, mu, b); 370 382 } … … 375 387 real[] observ = new real[100_000]; 376 388 foreach(ref elem; observ) 377 elem = rLaplace( gen);389 elem = rLaplace(); 378 390 auto ksRes = ksPval(observ, parametrize!(laplaceCDF)(0.0L, 1.0L)); 379 391 writeln("100k samples from Laplace(0, 1): K-S P-val: ", ksRes); … … 389 401 * gamma distribution, with b = 1. However, it's a common special 390 402 * case and this function is a lot faster than the more general rGamma.*/ 391 real rExponential(RGen )(ref RGen gen, real lambda) {392 real p = uniform( gen, 0.0L, 1.0L);403 real rExponential(RGen = Random)(real lambda, ref RGen gen = rndGen) { 404 real p = uniform(0.0L, 1.0L, gen); 393 405 return -log(p) / lambda; 394 406 } 395 407 396 408 unittest { 397 Random gen;398 gen.seed(unpredictableSeed);399 409 real[] observ = new real[100_000]; 400 410 foreach(ref elem; observ) 401 elem = rExponential( gen,2.0L);411 elem = rExponential(2.0L); 402 412 auto ksRes = ksPval(observ, parametrize!(gammaCDF)(2, 1)); 403 413 writeln("100k samples from exponential(2): K-S P-val: ", ksRes); … … 414 424 * this is a common special case, a separate exponential random 415 425 * number generator is provided for speed.*/ 416 real rGamma(RGen )(ref RGen gen, real a = 1, real b = 1) {417 real p = uniform( gen, 0.0L, 1.0L);426 real rGamma(RGen = Random)(real a = 1, real b = 1, ref RGen gen = rndGen) { 427 real p = uniform(0.0L, 1.0L, gen); 418 428 return invGammaCDFR(p, a, b); 419 429 } 420 430 421 431 unittest { 422 Random gen;423 gen.seed(unpredictableSeed);424 432 real[] observ = new real[100_000]; 425 433 foreach(ref elem; observ) 426 elem = rGamma( gen,2.0L, 3.0L);434 elem = rGamma(2.0L, 3.0L); 427 435 auto ksRes = ksPval(observ, parametrize!(gammaCDF)(2, 3)); 428 436 writeln("100k samples from gamma(2, 3): K-S P-val: ", ksRes); trunk/sort.d
r23 r28 23 23 * --- 24 24 * 25 * Author: David Simcha 26 * 27 * Copyright (c) 2009, David Simcha 25 * Author: David Simcha*/ 26 /* 27 * You may use this software under your choice of either of the following 28 * licenses. YOU NEED ONLY OBEY THE TERMS OF EXACTLY ONE OF THE TWO LICENSES. 29 * IF YOU CHOOSE TO USE THE PHOBOS LICENSE, YOU DO NOT NEED TO OBEY THE TERMS OF 30 * THE BSD LICENSE. IF YOU CHOOSE TO USE THE BSD LICENSE, YOU DO NOT NEED 31 * TO OBEY THE TERMS OF THE PHOBOS LICENSE. IF YOU ARE A LAWYER LOOKING FOR 32 * LOOPHOLES AND RIDICULOUSLY NON-EXISTENT AMBIGUITIES IN THE PREVIOUS STATEMENT, 33 * GET A LIFE. 34 * 35 * ---------------------Phobos License: --------------------------------------- 36 * 37 * Copyright (C) 2008-2009 by David Simcha. 38 * 39 * This software is provided 'as-is', without any express or implied 40 * warranty. In no event will the authors be held liable for any damages 41 * arising from the use of this software. 42 * 43 * Permission is granted to anyone to use this software for any purpose, 44 * including commercial applications, and to alter it and redistribute it 45 * freely, in both source and binary form, subject to the following 46 * restrictions: 47 * 48 * o The origin of this software must not be misrepresented; you must not 49 * claim that you wrote the original software. If you use this software 50 * in a product, an acknowledgment in the product documentation would be 51 * appreciated but is not required. 52 * o Altered source versions must be plainly marked as such, and must not 53 * be misrepresented as being the original software. 54 * o This notice may not be removed or altered from any source 55 * distribution. 56 * 57 * --------------------BSD License: ----------------------------------------- 58 * 59 * Copyright (c) 2008-2009, David Simcha 28 60 * All rights reserved. 29 61 * … … 51 83 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 52 84 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 53 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ 85 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 86 */ 54 87 55 88 module dstats.sort; 56 89 57 90 import std.traits, std.algorithm, std.math, std.functional, std.math, 58 std.typetuple ;91 std.typetuple, std.range, std.array; 59 92 60 93 import dstats.alloc; … … 62 95 version(unittest) { 63 96 import std.stdio, std.random; 64 Random gen;65 97 66 98 void main (){ … … 68 100 } 69 101 70 void rotateLeft(T)(T[] input) { 102 void rotateLeft(T)(T input) 103 if(isRandomAccessRange!(T)) { 71 104 if(input.length < 2) return; 72 Ttemp = input[0];105 ElementType!(T) temp = input[0]; 73 106 foreach(i; 1..input.length) { 74 107 input[i-1] = input[i]; … … 77 110 } 78 111 79 void rotateRight(T)(T[] input) { 112 void rotateRight(T)(T input) 113 if(isRandomAccessRange!(T)) { 80 114 if(input.length < 2) return; 81 Ttemp = input[$-1];115 ElementType!(T) temp = input[$-1]; 82 116 for(size_t i = input.length - 1; i > 0; i--) { 83 117 input[i] = input[i-1]; 84 118 } 85 119 input[0] = temp; 86 }87 88 // For testing purposes for stable sorts. Shuffles N arrays in lockstep.89 version(unittest) {90 void randomMultiShuffle(SomeRandomGen, T...)(ref SomeRandomGen r, T array) {91 foreach (i; 0 .. array[0].length) {92 // generate a random number i .. n93 immutable which = i + uniform!(size_t)(r, 0u, array[0].length - i);94 foreach(ti, element; array) {95 swap(element[i], element[which]);96 }97 }98 }99 120 } 100 121 … … 242 263 243 264 unittest { 244 gen.seed(unpredictableSeed);245 265 { // Test integer. 246 266 uint[] test = new uint[1_000]; 247 267 foreach(ref e; test) { 248 e = uniform( gen,0, 100);268 e = uniform(0, 100); 249 269 } 250 270 auto test2 = test.dup; 251 271 foreach(i; 0..1_000) { 252 random MultiShuffle(gen, test, test2);253 uint len = uniform( gen,0, 1_000);272 randomShuffle(zip(test, test2)); 273 uint len = uniform(0, 1_000); 254 274 qsort(test[0..len], test2[0..len]); 255 275 assert(isSorted(test[0..len])); … … 260 280 double[] test = new double[1_000]; 261 281 foreach(ref e; test) { 262 e = uniform( gen,0.0, 100_000);282 e = uniform(0.0, 100_000); 263 283 } 264 284 auto test2 = test.dup; 265 285 foreach(i; 0..1_000) { 266 random MultiShuffle(gen, test, test2);267 uint len = uniform( gen,0, 1_000);286 randomShuffle(zip(test, test2)); 287 uint len = uniform(0, 1_000); 268 288 qsort!("a > b")(test[0..len], test2[0..len]); 269 289 assert(isSorted!("a > b")(test[0..len])); … … 330 350 uint[] temp1 = new uint[1_000], temp2 = new uint[1_000]; 331 351 foreach(ref e; test) { 332 e = uniform( gen,0, 100); //Lots of ties.352 e = uniform(0, 100); //Lots of ties. 333 353 } 334 354 foreach(i; 0..100) { … … 337 357 e = j; 338 358 } 339 random MultiShuffle(gen,test);340 uint len = uniform( gen,0, 1_000);359 randomShuffle(test); 360 uint len = uniform(0, 1_000); 341 361 // Testing bubble sort distance against bubble sort, 342 362 // since bubble sort distance computed by bubble sort … … 361 381 e = j; 362 382 } 363 random MultiShuffle(gen,test);364 uint len = uniform( gen,0, 1_000);383 randomShuffle(test); 384 uint len = uniform(0, 1_000); 365 385 if(i & 1) // Test both temp and non-temp branches. 366 386 mergeSort(test[0..len], stability[0..len]); … … 378 398 double[] testL = new double[1_000]; 379 399 foreach(ref e; testL) { 380 e = uniform( gen,0.0, 100_000);400 e = uniform(0.0, 100_000); 381 401 } 382 402 auto testL2 = testL.dup; 383 403 foreach(i; 0..1_000) { 384 random MultiShuffle(gen, testL, testL2);385 uint len = uniform( gen,0, 1_000);404 randomShuffle(zip(testL, testL2)); 405 uint len = uniform(0, 1_000); 386 406 mergeSort!("a > b")(testL[0..len], testL2[0..len]); 387 407 assert(isSorted!("a > b")(testL[0..len])); … … 562 582 uint[] test = new uint[1_000], stability = new uint[1_000]; 563 583 foreach(ref e; test) { 564 e = uniform( gen,0, 100); //Lots of ties.584 e = uniform(0, 100); //Lots of ties. 565 585 } 566 586 uint[] test2 = test.dup; … … 569 589 e = j; 570 590 } 571 random MultiShuffle(gen, test, test2);572 uint len = uniform( gen,0, 1_000);591 randomShuffle(zip(test, test2)); 592 uint len = uniform(0, 1_000); 573 593 mergeSortInPlace(test[0..len], test2[0..len], stability[0..len]); 574 594 assert(isSorted(test[0..len])); … … 644 664 645 665 foreach(array; data) { 646 rotate(array[half1..middle + half2], array.ptr + middle);666 bringToFront(array[half1..middle], array[middle..middle + half2]); 647 667 } 648 668 size_t newMiddle = half1 + half2; … … 688 708 689 709 unittest { 690 gen.seed(unpredictableSeed);691 710 uint[] test = new uint[1_000]; 692 711 foreach(ref e; test) { 693 e = uniform( gen,0, 100_000);712 e = uniform(0, 100_000); 694 713 } 695 714 auto test2 = test.dup; 696 715 foreach(i; 0..1_000) { 697 random MultiShuffle(gen, test, test2);698 uint len = uniform( gen,0, 1_000);716 randomShuffle(zip(test, test2)); 717 uint len = uniform(0, 1_000); 699 718 heapSort(test[0..len], test2[0..len]); 700 719 assert(isSorted(test[0..len])); … … 769 788 770 789 unittest { 771 gen.seed(unpredictableSeed);772 790 uint[] test = new uint[100], stability = new uint[100]; 773 791 foreach(ref e; test) { 774 e = uniform( gen,0, 100); //Lots of ties.792 e = uniform(0, 100); //Lots of ties. 775 793 } 776 794 foreach(i; 0..1_000) { … … 779 797 e = j; 780 798 } 781 random MultiShuffle(gen,test);782 uint len = uniform( gen,0, 100);799 randomShuffle(test); 800 uint len = uniform(0, 100); 783 801 // Testing bubble sort distance against bubble sort, 784 802 // since bubble sort distance computed by bubble sort … … 932 950 933 951 unittest { 934 gen.seed(unpredictableSeed);935 952 enum n = 1000; 936 953 uint[] test = new uint[n]; … … 938 955 uint[] lockstep = new uint[n]; 939 956 foreach(ref e; test) { 940 e = uniform( gen,0, 1000);957 e = uniform(0, 1000); 941 958 } 942 959 foreach(i; 0..1_000) { 943 randomShuffle(test, gen);944 960 test2[] = test[]; 945 961 lockstep[] = test[]; 946 uint len = uniform( gen,0, n - 1) + 1;962 uint len = uniform(0, n - 1) + 1; 947 963 qsort!("a > b")(test2[0..len]); 948 int k = uniform( gen,0, len);964 int k = uniform(0, len); 949 965 auto qsRes = partitionK!("a > b")(test[0..len], lockstep[0..len], k); 950 966 assert(qsRes == test2[k]); … … 960 976 } 961 977 962 /**Given a set of data points entered through the addElement function,978 /**Given a set of data points entered through the put function, this output range 963 979 * maintains the invariant that the top N according to compFun will be 964 980 * contained in the data structure. Uses a heap internally, O(log N) insertion … … 977 993 * randomShuffle(nums, gen); 978 994 * foreach(n; nums) { 979 * less. addElement(n);980 * more. addElement(n);995 * less.put(n); 996 * more.put(n); 981 997 * } 982 998 * assert(less.getSorted == [0U, 1,2,3,4,5,6,7,8,9]); … … 999 1015 1000 1016 /** Insert an element into the topN struct.*/ 1001 void addElement(T elem) {1017 void put(T elem) { 1002 1018 if(nAdded < n) { 1003 1019 nodes[nAdded] = elem; … … 1041 1057 randomShuffle(nums, gen); 1042 1058 foreach(n; nums) { 1043 less. addElement(n);1044 more. addElement(n);1059 less.put(n); 1060 more.put(n); 1045 1061 } 1046 1062 assert(less.getSorted == [0U, 1,2,3,4,5,6,7,8,9]); … … 1052 1068 randomShuffle(nums, gen); 1053 1069 foreach(n; nums[0..5]) { 1054 less. addElement(n);1055 more. addElement(n);1070 less.put(n); 1071 more.put(n); 1056 1072 } 1057 1073 assert(less.getSorted == qsort!("a < b")(nums[0..5])); trunk/stats.d
r17 r28 1 /**Convenience module that simply publicly imports everything else. 2 * 3 * Author: David Simcha 4 * 5 * Copyright (c) 2009, David Simcha 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 14 * * Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * * Neither the name of the authors nor the 19 * names of its contributors may be used to endorse or promote products 20 * derived from this software without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY 23 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 24 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 25 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY 26 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 27 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 29 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ 1 /**Convenience module that simply publicly imports everything else.*/ 32 2 33 3 module dstats.stats; trunk/summary.d
r20 r28 1 /**Summary dstats such as mean, median, sum, variance, skewness, kurtosis. 1 /**Summary statistics such as mean, median, sum, variance, skewness, kurtosis. 2 * Except for median, which cannot be calculated online, all summary statistics 3 * have both an input range interface and an output range interface. 2 4 * 3 5 * Bugs: This whole module assumes that input will be reals or types implicitly 4 6 * convertible to real. No allowances are made for user-defined numeric 5 * types such as BigInts. This is necessary for simplicity. 6 * 7 * Author: David Simcha 8 * 9 * Copyright (c) 2009, David Simcha 7 * types such as BigInts. This is necessary for simplicity. However, 8 * if you have a function that converts your data to reals, most of 9 * these functions work with any input range, so you can simply map 10 * this function onto your range. 11 * 12 * Author: David Simcha*/ 13 /* 14 * You may use this software under your choice of either of the following 15 * licenses. YOU NEED ONLY OBEY THE TERMS OF EXACTLY ONE OF THE TWO LICENSES. 16 * IF YOU CHOOSE TO USE THE PHOBOS LICENSE, YOU DO NOT NEED TO OBEY THE TERMS OF 17 * THE BSD LICENSE. IF YOU CHOOSE TO USE THE BSD LICENSE, YOU DO NOT NEED 18 * TO OBEY THE TERMS OF THE PHOBOS LICENSE. IF YOU ARE A LAWYER LOOKING FOR 19 * LOOPHOLES AND RIDICULOUSLY NON-EXISTENT AMBIGUITIES IN THE PREVIOUS STATEMENT, 20 * GET A LIFE. 21 * 22 * ---------------------Phobos License: --------------------------------------- 23 * 24 * Copyright (C) 2008-2009 by David Simcha. 25 * 26 * This software is provided 'as-is', without any express or implied 27 * warranty. In no event will the authors be held liable for any damages 28 * arising from the use of this software. 29 * 30 * Permission is granted to anyone to use this software for any purpose, 31 * including commercial applications, and to alter it and redistribute it 32 * freely, in both source and binary form, subject to the following 33 * restrictions: 34 * 35 * o The origin of this software must not be misrepresented; you must not 36 * claim that you wrote the original software. If you use this software 37 * in a product, an acknowledgment in the product documentation would be 38 * appreciated but is not required. 39 * o Altered source versions must be plainly marked as such, and must not 40 * be misrepresented as being the original software. 41 * o This notice may not be removed or altered from any source 42 * distribution. 43 * 44 * --------------------BSD License: ----------------------------------------- 45 * 46 * Copyright (c) 2008-2009, David Simcha 10 47 * All rights reserved. 11 48 * … … 33 70 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 71 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 35 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ 72 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 73 */ 36 74 37 75 38 76 module dstats.summary; 39 77 40 import std.algorithm, std.functional, std.conv, std.string; 78 import std.algorithm, std.functional, std.conv, std.string, std.range, 79 std.array; 41 80 42 81 import dstats.sort, dstats.base, dstats.alloc; 43 82 44 83 version(unittest) { 45 import std.stdio, std.random; 46 47 Random gen; 84 import std.stdio, std.random, std.algorithm, std.conv; 48 85 49 86 void main() { … … 51 88 } 52 89 53 /**Finds median in O(N) time on average. In the case of an even number of54 * e lements, the mean of the two middle elements is returned. This is a55 * convenience founction designed specifically for numeric types, where the56 * averaging of the two middle elements is desired. A more general selection57 * algorithm that can handle any type with a total ordering, as well as58 * selecting any position in the ordering, can be found at90 /**Finds median of an input range in O(N) time on average. In the case of an 91 * even number of elements, the mean of the two middle elements is returned. 92 * This is a convenience founction designed specifically for numeric types, 93 * where the averaging of the two middle elements is desired. A more general 94 * selection algorithm that can handle any type with a total ordering, as well 95 * as selecting any position in the ordering, can be found at 59 96 * dstats.sort.quickSelect() and dstats.sort.partitionK(). 60 97 * Allocates memory, does not reorder input data.*/ 61 real median(T)(const T[] data) { 62 auto TAState = TempAlloc.getState; 63 auto dataDup = data.tempdup(TAState); scope(exit) TempAlloc.free(TAState); 98 real median(T)(T data) 99 if(realInput!(T)) { 100 // Allocate once on TempAlloc if possible, i.e. if we know the length. 101 // This can be done on TempAlloc. Otherwise, have to use GC heap 102 // and appending. 103 auto dataDup = tempdup(data); 104 scope(exit) TempAlloc.free; 64 105 return medianPartition(dataDup); 65 106 } … … 69 110 * median, and elements larger than the median will have larger indices than 70 111 * that of the median. Useful both for its partititioning and to avoid 71 * memory allocations.*/ 72 real medianPartition(T)(T[] data) 112 * memory allocations. Requires a random access range with swappable 113 * elements.*/ 114 real medianPartition(T)(T data) 115 if(isRandomAccessRange!(T) && 116 is(ElementType!(T) : real) && 117 hasSwappableElements!(T) && 118 dstats.base.hasLength!(T)) 73 119 in { 74 120 assert(data.length > 0); … … 77 123 // with an index larger than the lower median, after the array is 78 124 // partially sorted. 79 static T min(T[] data) {80 T min = data[0];81 foreach(d; data[1..$]) {82 if(d < min)83 min = d;84 }85 return min;86 }87 88 125 if(data.length == 1) { 89 126 return data[0]; … … 92 129 } else { 93 130 auto lower = partitionK(data, data.length / 2 - 1); 94 auto upper = min(data[data.length / 2..$]); 131 auto upper = ElementType!(T).max; 132 133 // Avoid requiring slicing to be supported. 134 foreach(i; data.length / 2..data.length) { 135 if(data[i] < upper) { 136 upper = data[i]; 137 } 138 } 95 139 return lower * 0.5L + upper * 0.5L; 96 140 } … … 105 149 } 106 150 107 gen.seed(unpredictableSeed);108 151 float[] test = new float[1000]; 109 152 uint upperBound, lowerBound; 110 153 foreach(testNum; 0..1000) { 111 154 foreach(ref e; test) { 112 e = uniform( gen,0f, 1000f);155 e = uniform(0f, 1000f); 113 156 } 114 157 do { 115 upperBound = uniform( gen,0u, test.length);116 lowerBound = uniform( gen,0u, test.length);158 upperBound = uniform(0u, test.length); 159 lowerBound = uniform(0u, test.length); 117 160 } while(lowerBound == upperBound); 118 161 if(lowerBound > upperBound) { … … 126 169 assert(approxEqual(quickRes, accurateRes)); 127 170 } 171 172 // Make sure everything works with lowest common denominator range type. 173 struct Count { 174 uint num; 175 uint upTo; 176 uint front() { 177 return num; 178 } 179 void popFront() { 180 num++; 181 } 182 bool empty() { 183 return num >= upTo; 184 } 185 } 186 187 Count a; 188 a.upTo = 100; 189 assert(approxEqual(median(a), 49.5)); 128 190 writeln("Passed median unittest."); 129 191 } 130 192 131 /// 132 real mean(T)(const T[] data) { 193 /**Finds the arithmetic mean of any input range whose elements are implicitly 194 * convertible to real.*/ 195 real mean(T)(T data) 196 if(realInput!(T)) { 133 197 OnlineMean meanCalc; 134 198 foreach(element; data) { 135 meanCalc. addElement(element);199 meanCalc.put(element); 136 200 } 137 201 return meanCalc.mean; 138 202 } 139 203 140 /** Structto calculate the mean online. Getter for mean costs a branch to204 /**Output range to calculate the mean online. Getter for mean costs a branch to 141 205 * check for N == 0. This struct uses O(1) space and does *NOT* store the 142 206 * individual elements. … … 145 209 * --- 146 210 * OnlineMean summ; 147 * summ. addElement(1);148 * summ. addElement(2);149 * summ. addElement(3);150 * summ. addElement(4);151 * summ. addElement(5);211 * summ.put(1); 212 * summ.put(2); 213 * summ.put(3); 214 * summ.put(4); 215 * summ.put(5); 152 216 * assert(summ.mean == 3); 153 217 * ---*/ … … 157 221 real k = 0; 158 222 public: 159 /// 160 void addElement(real element) { 223 /// Allow implicit casting to real, by returning the current mean. 224 alias mean this; 225 226 /// 227 void put(real element) { 161 228 result += (element - result) / ++k; 162 229 } … … 178 245 } 179 246 180 ///Returns mean of absolute values. 181 real absMean(T)(const T[] data) { 182 OnlineMean meanCalc; 183 foreach(element; data) { 184 meanCalc.addElement(abs(element)); 185 } 186 return meanCalc.mean; 187 } 188 189 /**User has option of making U a different type than T to prevent overflows 247 /// 248 struct OnlineGeometricMean { 249 private: 250 OnlineMean m; 251 public: 252 ///Allow implicit casting to real, by returning current geometric mean. 253 alias geoMean this; 254 255 /// 256 void put(real element) { 257 m.put(log2(element)); 258 } 259 260 /// 261 real geoMean() const { 262 return exp2(m.mean); 263 } 264 265 /// 266 real N() const { 267 return m.k; 268 } 269 270 /// 271 string toString() { 272 return to!(string)(geoMean); 273 } 274 } 275 276 /// 277 real geometricMean(T)(T data) 278 if(realInput!(T)) { 279 OnlineGeometricMean m; 280 foreach(elem; data) { 281 m.put(elem); 282 } 283 return m.geoMean; 284 } 285 286 unittest { 287 string[] data = ["1", "2", "3", "4", "5"]; 288 auto result = geometricMean(map!(to!(uint, string))(data)); 289 assert(approxEqual(result, 2.60517)); 290 writeln("Passed geometricMean unittest."); 291 } 292 293 294 /**Finds the sum of an input range whose elements implicitly convert to real. 295 * User has option of making U a different type than T to prevent overflows 190 296 * on large array summing operations. However, by default, return type is 191 297 * T (same as input type).*/ 192 U sum(T, U = Mutable!(T))(const T[] data) { 298 U sum(T, U = Unqual!(ElementType!(T)))(T data) 299 if(realInput!(T)) { 193 300 U sum = 0; 194 301 foreach(value; data) { … … 198 305 } 199 306 200 /**User has option of making U a different type than T201 * to prevent overflows on large array summing operations.202 * However, by default, return type is T (same as input type).*/203 U absSum(T, U = Mutable!(T))(const T[] data) {204 U sum=0;205 foreach(value; data) {206 sum+=abs(value);207 }208 return sum;209 }210 211 307 unittest { 212 assert(sum([1,2,3,4,5])==15); 213 assert(sum([40.0, 40.1, 5.2])==85.3); 214 assert(mean([1,2,3])==2); 215 assert(mean([1.0, 2.0, 3.0])==2.0); 216 assert(mean([1, 2, 5, 10, 17]) == 7); 217 assert(absSum([-1, 2, 3, -4, 5]) == 15); 218 assert(absMean([-1, 2, 3, -4, 5]) == 3); 308 assert(sum(cast(int[]) [1,2,3,4,5])==15); 309 assert(approxEqual( sum(cast(int[]) [40.0, 40.1, 5.2]), 85.3)); 310 assert(mean(cast(int[]) [1,2,3]) == 2); 311 assert(mean(cast(int[]) [1.0, 2.0, 3.0]) == 2.0); 312 assert(mean(cast(int[]) [1, 2, 5, 10, 17]) == 7); 219 313 writefln("Passed sum/mean unittest."); 220 314 } 221 315 222 316 223 /** Allows computation ofmean, stdev, variance online. Getter methods317 /**Outpu range to compute mean, stdev, variance online. Getter methods 224 318 * for stdev, var cost a few floating point ops. Getter for mean costs 225 319 * a single branch to check for N == 0. Relatively expensive floating point … … 230 324 * --- 231 325 * OnlineMeanSD summ; 232 * summ. addElement(1);233 * summ. addElement(2);234 * summ. addElement(3);235 * summ. addElement(4);236 * summ. addElement(5);326 * summ.put(1); 327 * summ.put(2); 328 * summ.put(3); 329 * summ.put(4); 330 * summ.put(5); 237 331 * assert(summ.mean == 3); 238 332 * assert(summ.stdev == sqrt(2.5)); … … 246 340 public: 247 341 /// 248 void addElement(real element) {342 void put(real element) { 249 343 real kNeg1 = 1.0L / ++_k; 250 344 _var += (element * element - _var) * kNeg1; … … 292 386 } 293 387 294 /// 295 real variance(T)(const T[] data) { 388 /**Finds the variance of an input range with members implicitly convertible 389 * to reals.*/ 390 real variance(T)(T data) 391 if(realInput!(T)) { 296 392 return meanVariance(data).SD; 297 393 } 298 394 299 /// Calculates both mean and variance in one pass, returns a MeanSD struct. 300 MeanSD meanVariance(T)(const T[] data) { 395 /**Calculates both mean and variance of an input range, returns a MeanSD 396 * struct.*/ 397 MeanSD meanVariance(T)(T data) 398 if(realInput!(T)) { 301 399 OnlineMeanSD meanSDCalc; 302 400 foreach(element; data) { 303 meanSDCalc. addElement(element);401 meanSDCalc.put(element); 304 402 } 305 403 … … 307 405 } 308 406 309 /// Computes mean and standard deviation in one pass, returns both in a struct. 310 MeanSD meanStdev(T)(const T[] data) { 407 /**Calculates both mean and standard deviation of an input range, returns a 408 * MeanSD struct.*/ 409 MeanSD meanStdev(T)(T data) 410 if(realInput!(T)) { 311 411 auto ret = meanVariance(data); 312 412 ret.SD = sqrt(ret.SD); … … 314 414 } 315 415 316 /// 317 real stdev(T)(const T[] data) { 416 /**Calculate the standard deviation of an input range with members 417 * implicitly converitble to real.*/ 418 real stdev(T)(T data) 419 if(realInput!(T)) { 318 420 return meanStdev(data).SD; 319 421 } 320 422 321 423 unittest { 322 auto res = meanStdev( [3, 1, 4, 5]);424 auto res = meanStdev(cast(int[]) [3, 1, 4, 5]); 323 425 assert(approxEqual(res.SD, 1.7078)); 324 426 assert(approxEqual(res.mean, 3.25)); 325 res = meanStdev( [1.0, 2.0, 3.0, 4.0, 5.0]);427 res = meanStdev(cast(double[]) [1.0, 2.0, 3.0, 4.0, 5.0]); 326 428 assert(approxEqual(res.SD, 1.5811)); 327 429 assert(approxEqual(res.mean, 3)); … … 329 431 } 330 432 331 /// 332 real percentVariance(T) (const T[] data) { 333 MeanSD stats = meanStdev(data); 334 real PV = 100 * abs(stats.SD / stats.mean); 335 return abs(PV); 336 } 337 338 /**Allows computation of mean, stdev, variance, skewness, kurtosis, min, and 433 /**Output range to compute mean, stdev, variance, skewness, kurtosis, min, and 339 434 * max online. Using this struct is relatively expensive, so if you just need 340 435 * mean and/or stdev, try OnlineMeanSD or OnlineMean. Getter methods for stdev, … … 347 442 * --- 348 443 * OnlineSummary summ; 349 * summ. addElement(1);350 * summ. addElement(2);351 * summ. addElement(3);352 * summ. addElement(4);353 * summ. addElement(5);444 * summ.put(1); 445 * summ.put(2); 446 * summ.put(3); 447 * summ.put(4); 448 * summ.put(5); 354 449 * assert(summ.N == 5); 355 450 * assert(summ.mean == 3); … … 371 466 public: 372 467 /// 373 void addElement(real element) {374 i nvariantreal kNeg1 = 1.0L / ++_k;468 void put(real element) { 469 immutable real kNeg1 = 1.0L / ++_k; 375 470 _min = (element < _min) ? element : _min; 376 471 _max = (element > _max) ? element : _max; … … 438 533 * the variance is due to infrequent, large deviations from the mean. Low 439 534 * kurtosis means that the variance is due to frequent, small deviations from 440 * the mean. The normal distribution is defined as having kurtosis of 0.*/ 441 real kurtosis(T)(const T[] data) { 535 * the mean. The normal distribution is defined as having kurtosis of 0. 536 * Input must be an input range with elements implicitly convertible to real.*/ 537 real kurtosis(T)(T data) 538 if(realInput!(T)) { 442 539 OnlineSummary kCalc; 443 540 foreach(elem; data) { 444 kCalc. addElement(elem);541 kCalc.put(elem); 445 542 } 446 543 return kCalc.kurtosis; … … 449 546 unittest { 450 547 // Values from Octave. 451 assert(approxEqual(kurtosis([1, 1, 1, 1, 10] ), -.92));452 assert(approxEqual(kurtosis([2.5, 3.5, 4.5, 5.5] ), -2.0775));453 assert(approxEqual(kurtosis([1,2,2,2,2,2,100] ), 0.79523));548 assert(approxEqual(kurtosis([1, 1, 1, 1, 10].dup), -.92)); 549 assert(approxEqual(kurtosis([2.5, 3.5, 4.5, 5.5].dup), -2.0775)); 550 assert(approxEqual(kurtosis([1,2,2,2,2,2,100].dup), 0.79523)); 454 551 writefln("Passed kurtosis unittest."); 455 552 } … … 458 555 * means that the right tail is longer/fatter than the left tail. Negative 459 556 * skewness means the left tail is longer/fatter than the right tail. Zero 460 * skewness indicates a symmetrical distribution.*/ 461 real skewness(T)(const T[] data) { 557 * skewness indicates a symmetrical distribution. Input must be an input 558 * range with elements implicitly convertible to real.*/ 559 real skewness(T)(T data) 560 if(realInput!(T)) { 462 561 OnlineSummary sCalc; 463 562 foreach(elem; data) { 464 sCalc. addElement(elem);563 sCalc.put(elem); 465 564 } 466 565 return sCalc.skewness; … … 469 568 unittest { 470 569 // Values from Octave. 471 assert(approxEqual(skewness([1,2,3,4,5]), 0)); 472 assert(approxEqual(skewness([3,1,4,1,5,9,2,6,5]), 0.45618)); 473 assert(approxEqual(skewness([2,7,1,8,2,8,1,8,2,8,4,5,9]), -0.076783)); 570 assert(approxEqual(skewness([1,2,3,4,5].dup), 0)); 571 assert(approxEqual(skewness([3,1,4,1,5,9,2,6,5].dup), 0.45618)); 572 assert(approxEqual(skewness([2,7,1,8,2,8,1,8,2,8,4,5,9].dup), -0.076783)); 573 574 // Test handling of ranges that are not arrays. 575 string[] stringy = ["3", "1", "4", "1", "5", "9", "2", "6", "5"]; 576 auto intified = map!(to!(int, string))(stringy); 577 assert(approxEqual(skewness(intified), 0.45618)); 474 578 writeln("Passed skewness test."); 475 579 } … … 510 614 } 511 615 512 /**Calculates all summary dstats (mean, variance, standard dev., skewness 513 * and kurtosis) on an array. Returns the results in a Summary struct.*/ 514 Summary summary(T)(const T[] data) { 616 /**Calculates all summary stats (mean, variance, standard dev., skewness 617 * and kurtosis) on an input range with elements that can be implicitly 618 * converted to real. Returns the results in a Summary struct.*/ 619 Summary summary(T)(T data) 620 if(realInput!(T)) { 515 621 OnlineSummary summ; 516 622 foreach(elem; data) { 517 summ. addElement(elem);623 summ.put(elem); 518 624 } 519 625 return Summary(data.length, summ.mean, summ.var, summ.stdev, summ.skewness, trunk/tests.d
r26 r28 1 /**Hypothesis testing beyond simple CDFs. 2 * 3 * Author: David Simcha 4 * 5 * Copyright (c) 2009, David Simcha 1 /**Hypothesis testing beyond simple CDFs. All functions work with input 2 * ranges with elements implicitly convertible to real unless otherwise noted. 3 * 4 * Author: David Simcha*/ 5 /* 6 * You may use this software under your choice of either of the following 7 * licenses. YOU NEED ONLY OBEY THE TERMS OF EXACTLY ONE OF THE TWO LICENSES. 8 * IF YOU CHOOSE TO USE THE PHOBOS LICENSE, YOU DO NOT NEED TO OBEY THE TERMS OF 9 * THE BSD LICENSE. IF YOU CHOOSE TO USE THE BSD LICENSE, YOU DO NOT NEED 10 * TO OBEY THE TERMS OF THE PHOBOS LICENSE. IF YOU ARE A LAWYER LOOKING FOR 11 * LOOPHOLES AND RIDICULOUSLY NON-EXISTENT AMBIGUITIES IN THE PREVIOUS STATEMENT, 12 * GET A LIFE. 13 * 14 * ---------------------Phobos License: --------------------------------------- 15 * 16 * Copyright (C) 2008-2009 by David Simcha. 17 * 18 * This software is provided 'as-is', without any express or implied 19 * warranty. In no event will the authors be held liable for any damages 20 * arising from the use of this software. 21 * 22 * Permission is granted to anyone to use this software for any purpose, 23 * including commercial applications, and to alter it and redistribute it 24 * freely, in both source and binary form, subject to the following 25 * restrictions: 26 * 27 * o The origin of this software must not be misrepresented; you must not 28 * claim that you wrote the original software. If you use this software 29 * in a product, an acknowledgment in the product documentation would be 30 * appreciated but is not required. 31 * o Altered source versions must be plainly marked as such, and must not 32 * be misrepresented as being the original software. 33 * o This notice may not be removed or altered from any source 34 * distribution. 35 * 36 * --------------------BSD License: ----------------------------------------- 37 * 38 * Copyright (c) 2008-2009, David Simcha 6 39 * All rights reserved. 7 40 * … … 29 62 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 63 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ 64 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 65 */ 32 66 module dstats.tests; 33 67 34 68 import dstats.base, dstats.distrib, dstats.alloc, dstats.summary, dstats.sort, 35 std.algorithm, std.functional ;69 std.algorithm, std.functional, std.range; 36 70 37 71 version(unittest) { … … 61 95 * Alt.GREATER, meaning mean(data) > mean, and Alt.TWOSIDE, meaning mean(data) 62 96 * != mean. 97 * 63 98 * Returns: The p-value against the given alternative.*/ 64 real studentsTTest(T)(const T[] data, real mean, Alt alt = Alt.TWOSIDE) { 99 real studentsTTest(T)(T data, real mean, Alt alt = Alt.TWOSIDE) 100 if(realInput!(T)) { 65 101 auto meanSd = meanStdev(data); 66 102 real t = (meanSd.mean - mean) / (meanSd.SD / sqrt(cast(real) data.length)); … … 75 111 76 112 unittest { 77 assert(approxEqual(studentsTTest([1, 2, 3, 4, 5] , 2), .2302));78 assert(approxEqual(studentsTTest([1, 2, 3, 4, 5] , 2, Alt.LESS), .8849));79 assert(approxEqual(studentsTTest([1, 2, 3, 4, 5] , 2, Alt.GREATER), .1151));113 assert(approxEqual(studentsTTest([1, 2, 3, 4, 5].dup, 2), .2302)); 114 assert(approxEqual(studentsTTest([1, 2, 3, 4, 5].dup, 2, Alt.LESS), .8849)); 115 assert(approxEqual(studentsTTest([1, 2, 3, 4, 5].dup, 2, Alt.GREATER), .1151)); 80 116 writeln("Passed 1-sample studentsTTest test."); 81 117 } … … 86 122 * mean(sample2), and Alt.TWOSIDE, meaning mean(sample1) != mean(sample2). 87 123 * Returns: The p-value against the given alternative.*/ 88 real studentsTTest(T, U)(const T[] sample1, const U[] sample2, Alt alt = Alt.TWOSIDE) { 89 size_t n1 = sample1.length; 90 size_t n2 = sample2.length; 91 92 auto s1summ = meanVariance(sample1); 93 auto s2summ = meanVariance(sample2); 94 real sx1x2 = sqrt(((n1 - 1) * s1summ.SD + (n2 - 1) * s2summ.SD) / 124 real studentsTTest(T, U)(T sample1, U sample2, Alt alt = Alt.TWOSIDE) 125 if(realInput!(T) && realInput!(U)) { 126 size_t n1, n2; 127 OnlineMeanSD s1summ, s2summ; 128 foreach(elem; sample1) { 129 s1summ.put(elem); 130 n1++; 131 } 132 foreach(elem; sample2) { 133 s2summ.put(elem); 134 n2++; 135 } 136 137 real sx1x2 = sqrt(((n1 - 1) * s1summ.var + (n2 - 1) * s2summ.var) / 95 138 (n1 + n2 - 2)); 96 139 real t = (s1summ.mean - s2summ.mean) / … … 107 150 unittest { 108 151 // Values from R. 109 assert(approxEqual(studentsTTest([1,2,3,4,5] , [1,3,4,5,7,9]), 0.2346));110 assert(approxEqual(studentsTTest([1,2,3,4,5] , [1,3,4,5,7,9], Alt.LESS),152 assert(approxEqual(studentsTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup), 0.2346)); 153 assert(approxEqual(studentsTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup, Alt.LESS), 111 154 0.1173)); 112 assert(approxEqual(studentsTTest([1,2,3,4,5] , [1,3,4,5,7,9], Alt.GREATER),155 assert(approxEqual(studentsTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup, Alt.GREATER), 113 156 0.8827)); 114 assert(approxEqual(studentsTTest([1,3,5,7,9,11] , [2,2,1,3,4]), 0.06985));115 assert(approxEqual(studentsTTest([1,3,5,7,9,11] , [2,2,1,3,4], Alt.LESS),157 assert(approxEqual(studentsTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup), 0.06985)); 158 assert(approxEqual(studentsTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup, Alt.LESS), 116 159 0.965)); 117 assert(approxEqual(studentsTTest([1,3,5,7,9,11] , [2,2,1,3,4], Alt.GREATER),160 assert(approxEqual(studentsTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup, Alt.GREATER), 118 161 0.03492)); 119 162 writeln("Passed 2-sample studentsTTest test."); … … 126 169 * != mean(sample2). 127 170 * Returns: The p-value against the given alternative.*/ 128 real welchTTest(T, U)(const T[] sample1, const U[] sample2, Alt alt = Alt.TWOSIDE) { 129 size_t n1 = sample1.length; 130 size_t n2 = sample2.length; 131 132 auto s1summ = meanVariance(sample1); 133 auto s2summ = meanVariance(sample2); 134 real sx1x2 = sqrt(s1summ.SD / n1 + s2summ.SD / n2); 171 real welchTTest(T, U)(T sample1, U sample2, Alt alt = Alt.TWOSIDE) 172 if(realInput!(T) && realInput!(U)) { 173 size_t n1, n2; 174 OnlineMeanSD s1summ, s2summ; 175 foreach(elem; sample1) { 176 s1summ.put(elem); 177 n1++; 178 } 179 foreach(elem; sample2) { 180 s2summ.put(elem); 181 n2++; 182 } 183 184 auto v1 = s1summ.var, v2 = s2summ.var; 185 real sx1x2 = sqrt(v1 / n1 + v2 / n2); 135 186 real t = (s1summ.mean - s2summ.mean) / sx1x2; 136 real numerator = s1summ.SD / n1 + s2summ.SD/ n2;187 real numerator = v1 / n1 + v2 / n2; 137 188 numerator *= numerator; 138 real denom1 = s1summ.SD/ n1;189 real denom1 = v1 / n1; 139 190 denom1 = denom1 * denom1 / (n1 - 1); 140 real denom2 = s2summ.SD/ n2;191 real denom2 = v2 / n2; 141 192 denom2 = denom2 * denom2 / (n2 - 1); 142 193 real df = numerator / (denom1 + denom2); … … 151 202 unittest { 152 203 // Values from R. 153 assert(approxEqual(welchTTest([1,2,3,4,5] , [1,3,4,5,7,9]), 0.2159));154 assert(approxEqual(welchTTest([1,2,3,4,5] , [1,3,4,5,7,9], Alt.LESS),204 assert(approxEqual(welchTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup), 0.2159)); 205 assert(approxEqual(welchTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup, Alt.LESS), 155 206 0.1079)); 156 assert(approxEqual(welchTTest([1,2,3,4,5] , [1,3,4,5,7,9], Alt.GREATER),207 assert(approxEqual(welchTTest([1,2,3,4,5].dup, [1,3,4,5,7,9].dup, Alt.GREATER), 157 208 0.892)); 158 assert(approxEqual(welchTTest([1,3,5,7,9,11] , [2,2,1,3,4]), 0.06616));159 assert(approxEqual(welchTTest([1,3,5,7,9,11] , [2,2,1,3,4], Alt.LESS),209 assert(approxEqual(welchTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup), 0.06616)); 210 assert(approxEqual(welchTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup, Alt.LESS), 160 211 0.967)); 161 assert(approxEqual(welchTTest([1,3,5,7,9,11] , [2,2,1,3,4], Alt.GREATER),212 assert(approxEqual(welchTTest([1,3,5,7,9,11].dup, [2,2,1,3,4].dup, Alt.GREATER), 162 213 0.03308)); 163 214 writeln("Passed welchTTest test."); … … 170 221 * greater than testMean, and Alt.TWOSIDE, meaning the true mean difference is not 171 222 * equal to testMean. 223 * 172 224 * Returns: The p-value against the given alternative.*/ 173 real pairedTTest(T, U)(const T[] before, const U[] after, 174 Alt alt = Alt.TWOSIDE, real testMean = 0) 175 in { 176 assert(before.length == after.length); 177 } body { 225 real pairedTTest(T, U)(T before, U after, Alt alt = Alt.TWOSIDE, real testMean = 0) 226 if(realInput!(T) && realInput!(U)) { 178 227 OnlineMeanSD msd; 179 foreach(i; 0..before.length) { 180 real diff = cast(real) before[i] - cast(real) after[i]; 181 msd.addElement(diff); 182 } 183 real t = (msd.mean - testMean) / msd.stdev * sqrt(cast(real) before.length); 228 size_t len = 0; 229 while(!before.empty && !after.empty) { 230 real diff = cast(real) before.front - cast(real) after.front; 231 before.popFront; 232 after.popFront; 233 msd.put(diff); 234 len++; 235 } 236 real t = (msd.mean - testMean) / msd.stdev * sqrt(cast(real) len); 184 237 185 238 if(alt == Alt.LESS) { 186 return studentsTCDF(t, before.length- 1);239 return studentsTCDF(t, len - 1); 187 240 } else if(alt == Alt.GREATER) { 188 return studentsTCDF(-t, before.length- 1);241 return studentsTCDF(-t, len - 1); 189 242 } else if(t > 0) { 190 return 2 * studentsTCDF(-t, before.length- 1);243 return 2 * studentsTCDF(-t, len - 1); 191 244 } else { 192 return 2 * studentsTCDF(t, before.length- 1);245 return 2 * studentsTCDF(t, len - 1); 193 246 } 194 247 } … … 196 249 unittest { 197 250 // Values from R. 198 assert(approxEqual(pairedTTest([3,2,3,4,5] , [2,3,5,5,6], Alt.LESS), 0.0889));199 assert(approxEqual(pairedTTest([3,2,3,4,5] , [2,3,5,5,6], Alt.GREATER), 0.9111));200 assert(approxEqual(pairedTTest([3,2,3,4,5] , [2,3,5,5,6], Alt.TWOSIDE), 0.1778));201 assert(approxEqual(pairedTTest([3,2,3,4,5] , [2,3,5,5,6], Alt.LESS, 1), 0.01066));202 assert(approxEqual(pairedTTest([3,2,3,4,5] , [2,3,5,5,6], Alt.GREATER, 1), 0.9893));203 assert(approxEqual(pairedTTest([3,2,3,4,5] , [2,3,5,5,6], Alt.TWOSIDE, 1), 0.02131));251 assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, Alt.LESS), 0.0889)); 252 assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, Alt.GREATER), 0.9111)); 253 assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, Alt.TWOSIDE), 0.1778)); 254 assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, Alt.LESS, 1), 0.01066)); 255 assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, Alt.GREATER, 1), 0.9893)); 256 assert(approxEqual(pairedTTest([3,2,3,4,5].dup, [2,3,5,5,6].dup, Alt.TWOSIDE, 1), 0.02131)); 204 257 writeln("Passed pairedTTest unittest."); 205 258 } … … 210 263 * dereference that can be used in wilcoxonRankSumPval() to adjust for ties in 211 264 * the input data. 265 * 266 * Input ranges for this function must define a length. 267 * 212 268 * Returns: The Wilcoxon test statistic W.*/ 213 real wilcoxonRankSum(T)(const T[] sample1, const T[] sample2, real* tieSum = null) { 214 ulong n1 = sample1.length, n2 = sample2.length, N = n1 + n2; 215 auto combined = newStack!(Mutable!(T))(N); 216 combined[0..n1] = sample1[]; 217 combined[n1..$] = sample2[]; 269 real wilcoxonRankSum(T)(T sample1, T sample2, real* tieSum = null) 270 if(isInputRange!(T) && dstats.base.hasLength!(T)) { 271 ulong n1 = sample1.length, n2 = sample2.length, N = n1 + n2; 272 auto combined = newStack!(Unqual!(ElementType!(T)))(N); 273 rangeCopy(combined[0..n1], sample1); 274 rangeCopy(combined[n1..$], sample2); 218 275 219 276 float[] ranks = newStack!(float)(N); 220 277 rankSort(combined, ranks); 221 real w = sum!(float, real)(ranks[0..n1]) - n1 * (n1 + 1) / 2UL;278 real w = reduce!("a + b")(0.0L, ranks[0..n1]) - n1 * (n1 + 1) / 2UL; 222 279 TempAlloc.free; // Free ranks. 223 280 … … 248 305 249 306 unittest { 250 assert(wilcoxonRankSum([1, 2, 3, 4, 5] , [2, 4, 6, 8, 10]) == 5);251 assert(wilcoxonRankSum([2, 4, 6, 8, 10] , [1, 2, 3, 4, 5]) == 20);252 assert(wilcoxonRankSum([3, 7, 21, 5, 9] , [2, 4, 6, 8, 10]) == 15);307 assert(wilcoxonRankSum([1, 2, 3, 4, 5].dup, [2, 4, 6, 8, 10].dup) == 5); 308 assert(wilcoxonRankSum([2, 4, 6, 8, 10].dup, [1, 2, 3, 4, 5].dup) == 20); 309 assert(wilcoxonRankSum([3, 7, 21, 5, 9].dup, [2, 4, 6, 8, 10].dup) == 15); 253 310 writeln("Passed wilcoxonRankSum test."); 254 311 } … … 298 355 assert(approxEqual(wilcoxonRankSumPval(1500, 50, 50, Alt.LESS), .957903)); 299 356 assert(approxEqual(wilcoxonRankSumPval(8500, 100, 200, Alt.LESS), .01704)); 300 auto w = wilcoxonRankSum([2,4,6,8,12] , [1,3,5,7,11,9]);357 auto w = wilcoxonRankSum([2,4,6,8,12].dup, [1,3,5,7,11,9].dup); 301 358 assert(approxEqual(wilcoxonRankSumPval(w, 5, 6), 0.9273)); 302 359 assert(approxEqual(wilcoxonRankSumPval(w, 5, 6, Alt.GREATER), 0.4636)); … … 325 382 * In these cases, exactThresh will be ignored. 326 383 * 384 * Input ranges for this function must define a length. 385 * 327 386 * Returns: The p-value against the given alternative.*/ 328 real wilcoxonRankSumPval(T)(const T[] sample1, const T[] sample2, 329 Alt alt = Alt.TWOSIDE, uint exactThresh = 50) { 387 real wilcoxonRankSumPval(T)(T sample1, T sample2, Alt alt = Alt.TWOSIDE, 388 uint exactThresh = 50) if(isInputRange!(T) && dstats.base.hasLength!(T)) { 389 330 390 real tieSum; 331 391 real W = wilcoxonRankSum(sample1, sample2, &tieSum); … … 337 397 // Values from R. Simple stuff (no ties) first. Testing approximate 338 398 // calculation first. 339 assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12] , [1,3,5,7,11,9],399 assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, 340 400 Alt.TWOSIDE, 0), 0.9273)); 341 assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12] , [1,3,5,7,11,9],401 assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, 342 402 Alt.LESS, 0), 0.6079)); 343 assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12] , [1,3,5,7,11,9],403 assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, 344 404 Alt.GREATER, 0), 0.4636)); 345 assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12] , [3,5,7,8,13,15],405 assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, 346 406 Alt.TWOSIDE, 0), 0.4113)); 347 assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12] , [3,5,7,8,13,15],407 assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, 348 408 Alt.LESS, 0), 0.2057)); 349 assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12] , [3,5,7,8,13,15],409 assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, 350 410 Alt.GREATER, 0), 0.8423)); 351 assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9] , [2,4,6,8,10],411 assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9].dup, [2,4,6,8,10].dup, 352 412 Alt.TWOSIDE, 0), .6745)); 353 assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9] , [2,4,6,8,10],413 assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9].dup, [2,4,6,8,10].dup, 354 414 Alt.LESS, 0), .3372)); 355 assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9] , [2,4,6,8,10],415 assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9].dup, [2,4,6,8,10].dup, 356 416 Alt.GREATER, 0), .7346)); 357 417 358 418 // Now, lots of ties. 359 assert(approxEqual(wilcoxonRankSumPval([1,2,3,4,5] , [2,3,4,5,6],419 assert(approxEqual(wilcoxonRankSumPval([1,2,3,4,5].dup, [2,3,4,5,6].dup, 360 420 Alt.TWOSIDE, 0), 0.3976)); 361 assert(approxEqual(wilcoxonRankSumPval([1,2,3,4,5] , [2,3,4,5,6],421 assert(approxEqual(wilcoxonRankSumPval([1,2,3,4,5].dup, [2,3,4,5,6].dup, 362 422 Alt.LESS, 0), 0.1988)); 363 assert(approxEqual(wilcoxonRankSumPval([1,2,3,4,5] , [2,3,4,5,6],423 assert(approxEqual(wilcoxonRankSumPval([1,2,3,4,5].dup, [2,3,4,5,6].dup, 364 424 Alt.GREATER, 0), 0.8548)); 365 assert(approxEqual(wilcoxonRankSumPval([1,2,1,1,2] , [1,2,3,1,1],425 assert(approxEqual(wilcoxonRankSumPval([1,2,1,1,2].dup, [1,2,3,1,1].dup, 366 426 Alt.TWOSIDE, 0), 0.9049)); 367 assert(approxEqual(wilcoxonRankSumPval([1,2,1,1,2] , [1,2,3,1,1],427 assert(approxEqual(wilcoxonRankSumPval([1,2,1,1,2].dup, [1,2,3,1,1].dup, 368 428 Alt.LESS, 0), 0.4524)); 369 assert(approxEqual(wilcoxonRankSumPval([1,2,1,1,2] , [1,2,3,1,1],429 assert(approxEqual(wilcoxonRankSumPval([1,2,1,1,2].dup, [1,2,3,1,1].dup, 370 430 Alt.GREATER, 0), 0.64)); 371 431 372 432 // Now, testing the exact calculation on the same data. 373 assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12] , [1,3,5,7,11,9],433 assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, 374 434 Alt.TWOSIDE), 0.9307)); 375 assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12] , [1,3,5,7,11,9],435 assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, 376 436 Alt.LESS), 0.6039)); 377 assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12] , [1,3,5,7,11,9],437 assert(approxEqual(wilcoxonRankSumPval([2,4,6,8,12].dup, [1,3,5,7,11,9].dup, 378 438 Alt.GREATER), 0.4654)); 379 assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12] , [3,5,7,8,13,15],439 assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, 380 440 Alt.TWOSIDE), 0.4286)); 381 assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12] , [3,5,7,8,13,15],441 assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, 382 442 Alt.LESS), 0.2143)); 383 assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12] , [3,5,7,8,13,15],443 assert(approxEqual(wilcoxonRankSumPval([1,2,6,10,12].dup, [3,5,7,8,13,15].dup, 384 444 Alt.GREATER), 0.8355)); 385 assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9] , [2,4,6,8,10],445 assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9].dup, [2,4,6,8,10].dup, 386 446 Alt.TWOSIDE), .6905)); 387 assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9] , [2,4,6,8,10],447 assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9].dup, [2,4,6,8,10].dup, 388 448 Alt.LESS), .3452)); 389 assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9] , [2,4,6,8,10],449 assert(approxEqual(wilcoxonRankSumPval([1,3,5,7,9].dup, [2,4,6,8,10].dup, 390 450 Alt.GREATER), .7262)); 391 451 writeln("Passed wilcoxonRankSumPval test."); … … 501 561 * dereference that can be used in wilcoxonRankSumPval() to adjust for ties in 502 562 * the input data. 563 * 564 * Input ranges for this function must define a length. 565 * 503 566 * Returns: The Wilcoxon test statistic W.*/ 504 real wilcoxonSignedRank(T, U)(const T[] before, const U[] after, 505 real* tieSum = null) { 567 real wilcoxonSignedRank(T, U)(T before, U after, real* tieSum = null) 568 if(realInput!(T) && realInput!(U) && 569 dstats.base.hasLength!(T) && dstats.base.hasLength!(U)) { 506 570 mixin(newFrame); 507 571 float[] diffRanks = newStack!(float)(before.length); … … 517 581 } 518 582 519 foreach(i; 0..before.length) { 520 real diff = cast(real) before[i] - cast(real) after[i]; 521 signs[i] = sign(diff); 522 diffs[i] = abs(diff); 583 size_t ii = 0; 584 while(!before.empty && !after.empty) { 585 real diff = cast(real) before.front - cast(real) after.front; 586 signs[ii] = sign(diff); 587 diffs[ii] = abs(diff); 588 ii++; 589 before.popFront; 590 after.popFront; 523 591 } 524 592 rankSort(diffs, diffRanks); … … 556 624 557 625 unittest { 558 assert(wilcoxonSignedRank([1,2,3,4,5] , [2,1,4,5,3]) == 7.5);559 assert(wilcoxonSignedRank([3,1,4,1,5] , [2,7,1,8,2]) == 6);560 assert(wilcoxonSignedRank([8,6,7,5,3] , [0,9,8,6,7]) == 5);626 assert(wilcoxonSignedRank([1,2,3,4,5].dup, [2,1,4,5,3].dup) == 7.5); 627 assert(wilcoxonSignedRank([3,1,4,1,5].dup, [2,7,1,8,2].dup) == 6); 628 assert(wilcoxonSignedRank([8,6,7,5,3].dup, [0,9,8,6,7].dup) == 5); 561 629 writeln("Passed wilcoxonSignedRank unittest."); 562 630 } … … 583 651 * or equal pairs. 584 652 * 653 * The input ranges for this function must define a length and must be 654 * forward ranges. 655 * 585 656 * Returns: The p-value against the given alternative.*/ 586 real wilcoxonSignedRankPval(T)( const T[] before, const T[]after,657 real wilcoxonSignedRankPval(T)(T before, T after, 587 658 Alt alt = Alt.TWOSIDE, uint exactThresh = 50) 659 if(realInput!(T) && dstats.base.hasLength!(T) && isForwardRange!(T)) 588 660 in { 589 661 assert(before.length == after.length); 590 662 } body { 591 real tieSum;592 real W = wilcoxonSignedRank(before, after, &tieSum);593 ulong N = before.length;594 foreach(i; 0..before.length) {595 if(before[i] == after[i])663 real tieSum; 664 real W = wilcoxonSignedRank(before, after, &tieSum); 665 ulong N = before.length; 666 while(!before.empty && !after.empty) { 667 if(before.front == after.front) 596 668 N--; 597 } 598 return wilcoxonSignedRankPval(W, N, alt, tieSum, exactThresh); 669 before.popFront; 670 after.popFront; 671 672 } 673 return wilcoxonSignedRankPval(W, N, alt, tieSum, exactThresh); 599 674 } 600 675 … … 603 678 alias approxEqual ae; 604 679 // With ties, normal approx. 605 assert(ae(wilcoxonSignedRankPval([1,2,3,4,5] , [2,1,4,5,3]), 1));606 assert(ae(wilcoxonSignedRankPval([3,1,4,1,5] , [2,7,1,8,2]), 0.7865));607 assert(ae(wilcoxonSignedRankPval([8,6,7,5,3] , [0,9,8,6,7]), 0.5879));608 assert(ae(wilcoxonSignedRankPval([1,2,3,4,5] , [2,1,4,5,3], Alt.LESS), 0.5562));609 assert(ae(wilcoxonSignedRankPval([3,1,4,1,5] , [2,7,1,8,2], Alt.LESS), 0.3932));610 assert(ae(wilcoxonSignedRankPval([8,6,7,5,3] , [0,9,8,6,7], Alt.LESS), 0.2940));611 assert(ae(wilcoxonSignedRankPval([1,2,3,4,5] , [2,1,4,5,3], Alt.GREATER), 0.5562));612 assert(ae(wilcoxonSignedRankPval([3,1,4,1,5] , [2,7,1,8,2], Alt.GREATER), 0.706));613 assert(ae(wilcoxonSignedRankPval([8,6,7,5,3] , [0,9,8,6,7], Alt.GREATER), 0.7918));680 assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,1,4,5,3].dup), 1)); 681 assert(ae(wilcoxonSignedRankPval([3,1,4,1,5].dup, [2,7,1,8,2].dup), 0.7865)); 682 assert(ae(wilcoxonSignedRankPval([8,6,7,5,3].dup, [0,9,8,6,7].dup), 0.5879)); 683 assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,1,4,5,3].dup, Alt.LESS), 0.5562)); 684 assert(ae(wilcoxonSignedRankPval([3,1,4,1,5].dup, [2,7,1,8,2].dup, Alt.LESS), 0.3932)); 685 assert(ae(wilcoxonSignedRankPval([8,6,7,5,3].dup, [0,9,8,6,7].dup, Alt.LESS), 0.2940)); 686 assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,1,4,5,3].dup, Alt.GREATER), 0.5562)); 687 assert(ae(wilcoxonSignedRankPval([3,1,4,1,5].dup, [2,7,1,8,2].dup, Alt.GREATER), 0.706)); 688 assert(ae(wilcoxonSignedRankPval([8,6,7,5,3].dup, [0,9,8,6,7].dup, Alt.GREATER), 0.7918)); 614 689 615 690 // Exact. 616 assert(ae(wilcoxonSignedRankPval([1,2,3,4,5] , [2,-4,-8,16,32]), 0.625));617 assert(ae(wilcoxonSignedRankPval([1,2,3,4,5] , [2,-4,-8,16,32], Alt.LESS), 0.3125));618 assert(ae(wilcoxonSignedRankPval([1,2,3,4,5] , [2,-4,-8,16,32], Alt.GREATER), 0.7812));619 assert(ae(wilcoxonSignedRankPval([1,2,3,4,5] , [2,-4,-8,-16,32]), 0.8125));620 assert(ae(wilcoxonSignedRankPval([1,2,3,4,5] , [2,-4,-8,-16,32], Alt.LESS), 0.6875));621 assert(ae(wilcoxonSignedRankPval([1,2,3,4,5] , [2,-4,-8,-16,32], Alt.GREATER), 0.4062));691 assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,-4,-8,16,32].dup), 0.625)); 692 assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,-4,-8,16,32].dup, Alt.LESS), 0.3125)); 693 assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,-4,-8,16,32].dup, Alt.GREATER), 0.7812)); 694 assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,-4,-8,-16,32].dup), 0.8125)); 695 assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,-4,-8,-16,32].dup, Alt.LESS), 0.6875)); 696 assert(ae(wilcoxonSignedRankPval([1,2,3,4,5].dup, [2,-4,-8,-16,32].dup, Alt.GREATER), 0.4062)); 622 697 writeln("Passed wilcoxonSignedRankPval unittest."); 623 698 } … … 755 830 * elements of after, and Alt.TWOSIDE, meaning that there is a significant 756 831 * difference in either direction.*/ 757 real signTest(T)(const T[] before, const T[] after, Alt alt = Alt.TWOSIDE) 758 in { 759 assert(before.length == after.length); 760 } body { 832 real signTest(T)(T before, T after, Alt alt = Alt.TWOSIDE) 833 if(realInput!(T)) { 761 834 uint greater, less; 762 foreach(i; 0..before.length) {763 if(before [i] < after[i])835 while(!before.empty && !after.empty) { 836 if(before.front < after.front) 764 837 less++; 765 else if(after [i] < before[i])838 else if(after.front < before.front) 766 839 greater++; 767 840 // Ignore equals. 841 before.popFront; 842 after.popFront; 768 843 } 769 844 if(alt == Alt.LESS) { … … 780 855 unittest { 781 856 alias approxEqual ae; 782 assert(ae(signTest([1,3,4,2,5] , [1,2,4,8,16]), 1));783 assert(ae(signTest([1,3,4,2,5] , [1,2,4,8,16], Alt.LESS), 0.5));784 assert(ae(signTest([1,3,4,2,5] , [1,2,4,8,16], Alt.GREATER), 0.875));785 assert(ae(signTest([5,3,4,6,8] , [1,2,3,4,5], Alt.GREATER), 0.03125));786 assert(ae(signTest([5,3,4,6,8] , [1,2,3,4,5], Alt.LESS), 1));787 assert(ae(signTest([5,3,4,6,8] , [1,2,3,4,5]), 0.0625));857 assert(ae(signTest([1,3,4,2,5].dup, [1,2,4,8,16].dup), 1)); 858 assert(ae(signTest([1,3,4,2,5].dup, [1,2,4,8,16].dup, Alt.LESS), 0.5)); 859 assert(ae(signTest([1,3,4,2,5].dup, [1,2,4,8,16].dup, Alt.GREATER), 0.875)); 860 assert(ae(signTest([5,3,4,6,8].dup, [1,2,3,4,5].dup, Alt.GREATER), 0.03125)); 861 assert(ae(signTest([5,3,4,6,8].dup, [1,2,3,4,5].dup, Alt.LESS), 1)); 862 assert(ae(signTest([5,3,4,6,8].dup, [1,2,3,4,5].dup), 0.0625)); 788 863 } 789 864 … … 795 870 * elements of after, and Alt.TWOSIDE, meaning that there is a significant 796 871 * difference in either direction.*/ 797 real signTest(T, U)( const T[]data, U median, Alt alt = Alt.TWOSIDE)798 if( !isArray!(U)) {872 real signTest(T, U)(T data, U median, Alt alt = Alt.TWOSIDE) 873 if(realInput!(T) && isNumeric!(U)) { 799 874 uint greater, less; 800 foreach( i; 0..data.length) {801 if( data[i]< median)875 foreach(elem; data) { 876 if(elem < median) 802 877 less++; 803 else if(median < data[i])878 else if(median < elem) 804 879 greater++; 805 880 // Ignore equals. … … 817 892 818 893 unittest { 819 assert(approxEqual(signTest([1,2,6,7,9] , 2), 0.625));894 assert(approxEqual(signTest([1,2,6,7,9].dup, 2), 0.625)); 820 895 writeln("Passed signTest unittest."); 821 896 } … … 828 903 * To get the D value used in standard notation, 829 904 * simply take the absolute value of this D value.*/ 830 real ksTest(T, U)(const T[] F, const U[] Fprime) { 905 real ksTest(T, U)(T F, U Fprime) 906 if(isInputRange!(T) && isInputRange!(U)) { 831 907 auto TAState = TempAlloc.getState; 832 908 scope(exit) { … … 834 910 TempAlloc.free(TAState); 835 911 } 836 return ksTestDestructive( F.tempdup(TAState), Fprime.tempdup(TAState));912 return ksTestDestructive(tempdup(F), tempdup(Fprime)); 837 913 } 838 914 839 915 /**Same as ksTest, but sorts input data in place instead of duplicating 840 * data. Therefore, less "safe" but doesn't allocate memory.*/ 841 real ksTestDestructive(T, U)(T[] F, U[] Fprime) { 916 * data. Therefore, less "safe" but doesn't allocate memory. F, 917 * Fprime must both be random access ranges w/ lengths.*/ 918 real ksTestDestructive(T, U)(T F, U Fprime) 919 if(isRandomAccessRange!(U) && isRandomAccessRange!(T) && 920 dstats.base.hasLength!(T) && dstats.base.hasLength!(U)) { 842 921 qsort(F); 843 922 qsort(Fprime); … … 865 944 unittest { 866 945 // Values from R. 867 assert(approxEqual(ksTestDestructive([1,2,3,4,5] , [1,2,3,4,5]), 0));868 assert(approxEqual(ksTestDestructive([1,2,3,4,5] , [1,2,2,3,5]), -.2));869 assert(approxEqual(ksTestDestructive([-1,0,2,8, 6] , [1,2,2,3,5]), .4));870 assert(approxEqual(ksTestDestructive([1,2,3,4,5] , [1,2,2,3,5,7,8]), .2857));871 assert(approxEqual(ksTestDestructive([1, 2, 3, 4, 4, 4, 5] ,872 [1, 2, 3, 4, 5, 5, 5] ), .2857));946 assert(approxEqual(ksTestDestructive([1,2,3,4,5].dup, [1,2,3,4,5].dup), 0)); 947 assert(approxEqual(ksTestDestructive([1,2,3,4,5].dup, [1,2,2,3,5].dup), -.2)); 948 assert(approxEqual(ksTestDestructive([-1,0,2,8, 6].dup, [1,2,2,3,5].dup), .4)); 949 assert(approxEqual(ksTestDestructive([1,2,3,4,5].dup, [1,2,2,3,5,7,8].dup), .2857)); 950 assert(approxEqual(ksTestDestructive([1, 2, 3, 4, 4, 4, 5].dup, 951 [1, 2, 3, 4, 5, 5, 5].dup), .2857)); 873 952 writeln("Passed 2-sample ksTestDestructive."); 874 953 } … … 885 964 * --- 886 965 */ 887 real ksTest(T, Func)( const T[]Femp, Func F)888 if( is(Func == function) || is(Func == delegate)) {966 real ksTest(T, Func)(T Femp, Func F) 967 if(realInput!(T) && (is(Func == function) || is(Func == delegate))) { 889 968 scope(exit) TempAlloc.free; 890 return ksTestDestructive(Femp.tempdup, F); 891 } 892 893 /**One-sample KS test, sorts in place.*/ 894 real ksTestDestructive(T, Func)(T[] Femp, Func F) 895 if(is(Func == function) || is(Func == delegate)) { 969 return ksTestDestructive(tempdup(Femp), F); 970 } 971 972 /**One-sample KS test, sorts in place. Femp must be a random access range 973 * with length.*/ 974 real ksTestDestructive(T, Func)(T Femp, Func F) 975 if(isRandomAccessRange!(T) && dstats.base.hasLength!(T) && 976 (is(Func == function) || is(Func == delegate))) { 896 977 qsort(Femp); 897 978 real D = 0; … … 909 990 // Testing against values from R. 910 991 auto stdNormal = parametrize!(normalCDF)(0.0L, 1.0L); 911 assert(approxEqual(ksTestDestructive([1,2,3,4,5] , stdNormal), -.8413));912 assert(approxEqual(ksTestDestructive([-1,0,2,8, 6] , stdNormal), -.5772));913 auto lotsOfTies = [5,1,2,2,2,2,2,2,3,4] ;992 assert(approxEqual(ksTestDestructive([1,2,3,4,5].dup, stdNormal), -.8413)); 993 assert(approxEqual(ksTestDestructive([-1,0,2,8, 6].dup, stdNormal), -.5772)); 994 auto lotsOfTies = [5,1,2,2,2,2,2,2,3,4].dup; 914 995 assert(approxEqual(ksTestDestructive(lotsOfTies, stdNormal), -0.8772)); 915 996 writeln("Passed 1-sample ksTestDestructive."); … … 940 1021 /**KS-test, returns 2-sided P-val instead of D. 941 1022 * Bugs: Exact calculation not implemented. Uses asymptotic approximation.*/ 942 real ksPval(T, U)(const T[] F, const U[] Fprime) { 1023 real ksPval(T, U)(T F, U Fprime) 1024 if(realInput!(T) && realInput!(U)) { 943 1025 return ksPvalD(F.length, Fprime.length, ksTest(F, Fprime)); 944 1026 } 945 1027 946 1028 unittest { 947 assert(approxEqual(ksPval([1, 2, 3, 4, 4, 4, 5] , [1, 2, 3, 4, 5, 5, 5]),1029 assert(approxEqual(ksPval([1, 2, 3, 4, 4, 4, 5].dup, [1, 2, 3, 4, 5, 5, 5].dup), 948 1030 .9375)); 949 1031 writeln("Passed ksPval 2-sample test."); … … 952 1034 /**One-sided K-S test, returns P-val instead of D. 953 1035 * Bugs: Exact calculation not implemented. Uses asymptotic approximation.*/ 954 real ksPval(T, Func)( const T[]Femp, Func F)955 if( is(Func == function) || is(Func == delegate)) {1036 real ksPval(T, Func)(T Femp, Func F) 1037 if(realInput!(T) && (is(Func == function) || is(Func == delegate))) { 956 1038 return ksPvalD(Femp.length, ksTest(Femp, F)); 957 1039 } … … 959 1041 unittest { 960 1042 auto stdNormal = parametrize!(normalCDF)(0.0L, 1.0L); 961 assert(approxEqual(ksPval([0,1,2,3,4] , stdNormal), .03271));1043 assert(approxEqual(ksPval([0,1,2,3,4].dup, stdNormal), .03271)); 962 1044 963 1045 auto uniform01 = parametrize!(uniformCDF)(0, 1); 964 assert(approxEqual(ksPval([0.1, 0.3, 0.5, 0.9, 1] , uniform01), 0.7591));1046 assert(approxEqual(ksPval([0.1, 0.3, 0.5, 0.9, 1].dup, uniform01), 0.7591)); 965 1047 966 1048 writeln("Passed ksPval 1-sample test."); … … 1014 1096 alias contingencyTable c; 1015 1097 1016 i nvariantuint n1 = c[0][0] + c[0][1],1098 immutable uint n1 = c[0][0] + c[0][1], 1017 1099 n2 = c[1][0] + c[1][1], 1018 1100 n = c[0][0] + c[1][0]; 1019 1101 1020 i nvariantuint mode =1102 immutable uint mode = 1021 1103 cast(uint) ((cast(real) (n + 1) * (n1 + 1)) / (n1 + n2 + 2)); 1022 i nvariantreal pExact = hypergeometricPMF(c[0][0], n1, n2, n);1023 i nvariantreal pMode = hypergeometricPMF(mode, n1, n2, n);1104 immutable real pExact = hypergeometricPMF(c[0][0], n1, n2, n); 1105 immutable real pMode = hypergeometricPMF(mode, n1, n2, n); 1024 1106 1025 1107 if(approxEqual(pExact, pMode, 1e-7)) { 1026 1108 return 1; 1027 1109 } else if(c[0][0] < mode) { 1028 i nvariantreal pLower = hypergeometricCDF(c[0][0], n1, n2, n);1110 immutable real pLower = hypergeometricCDF(c[0][0], n1, n2, n); 1029 1111 1030 1112 // Special case to prevent binary search from getting stuck. … … 1039 1121 (cast(ulong) max + cast(ulong) min) / 2UL; 1040 1122 1041 i nvariantreal pGuess = hypergeometricPMF(guess, n1, n2, n);1123 immutable real pGuess = hypergeometricPMF(guess, n1, n2, n); 1042 1124 if(pGuess <= pExact && 1043 1125 hypergeometricPMF(guess - 1, n1, n2, n) > pExact) { … … 1053 1135 hypergeometricCDFR(guess, n1, n2, n), 1.0L); 1054 1136 } else { 1055 i nvariantreal pUpper = hypergeometricCDFR(c[0][0], n1, n2, n);1137 immutable real pUpper = hypergeometricCDFR(c[0][0], n1, n2, n); 1056 1138 1057 1139 // Special case to prevent binary search from getting stuck. … … 1086 1168 // Simple, naive impl. of two-sided to test against. 1087 1169 static real naive(const uint[][] c) { 1088 i nvariantuint n1 = c[0][0] + c[0][1],1170 immutable uint n1 = c[0][0] + c[0][1], 1089 1171 n2 = c[1][0] + c[1][1], 1090 1172 n = c[0][0] + c[1][0]; 1091 i nvariantuint mode =1173 immutable uint mode = 1092 1174 cast(uint) ((cast(real) (n + 1) * (n1 + 1)) / (n1 + n2 + 2)); 1093 i nvariantreal pExact = hypergeometricPMF(c[0][0], n1, n2, n);1094 i nvariantreal pMode = hypergeometricPMF(mode, n1, n2, n);1175 immutable real pExact = hypergeometricPMF(c[0][0], n1, n2, n); 1176 immutable real pMode = hypergeometricPMF(mode, n1, n2, n); 1095 1177 if(approxEqual(pExact, pMode, 1e-7)) 1096 1178 return 1; … … 1107 1189 1108 1190 foreach(i; 0..1000) { 1109 c[0][0] = uniform( gen,0U, 51U);1110 c[0][1] = uniform( gen,0U, 51U);1111 c[1][0] = uniform( gen,0U, 51U);1112 c[1][1] = uniform( gen,0U, 51U);1191 c[0][0] = uniform(0U, 51U); 1192 c[0][1] = uniform(0U, 51U); 1193 c[1][0] = uniform(0U, 51U); 1194 c[1][1] = uniform(0U, 51U); 1113 1195 real naiveAns = naive(c); 1114 1196 real fastAns = fisherExact(c); … … 1173 1255 * Bugs: No exact calculation of the P-value. Asymptotic approximation only. 1174 1256 */ 1175 real runsTest(alias positive = "a > 0", T)(const T[] obs, Alt alt = Alt.TWOSIDE) { 1176 OnlineRunsTest!(positive, T) r; 1257 real runsTest(alias positive = "a > 0", T)(T obs, Alt alt = Alt.TWOSIDE) 1258 if(isInputRange!(T)) { 1259 OnlineRunsTest!(positive, ElementType!(T)) r; 1177 1260 foreach(elem; obs) { 1178 r. addElement(elem);1261 r.put(elem); 1179 1262 } 1180 1263 return r.pVal(alt); … … 1185 1268 // hard-coded as the equivalent to positive(). The median of this data 1186 1269 // is 0.5, so everything works. 1187 i nvariantint[] data = [1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1].idup;1270 immutable int[] data = [1,0,0,0,1,1,0,0,1,0,1,0,1,0,1,1,1,0,0,1].idup; 1188 1271 assert(approxEqual(runsTest(data), 0.3581)); 1189 1272 assert(approxEqual(runsTest(data, Alt.LESS), 0.821)); … … 1206 1289 1207 1290 /// 1208 void addElement(T elem) {1291 void put(T elem) { 1209 1292 bool curPos = pos(elem); 1210 1293 if(nRun == 0) { … … 1345 1428 * Returns: An array of Q-values with indices 1346 1429 * corresponding to the indices of the p-values passed in.*/ 1347 real[] falseDiscoveryRate(T)(const T[] pVals) { 1430 real[] falseDiscoveryRate(T)(T pVals) 1431 if(realInput!(T)) { 1348 1432 // Not optimized at all because I can't imagine anyone writing code where 1349 1433 // FDR calculations are the main bottleneck. 1350 auto p = pVals.tempdup; scope(exit) TempAlloc.free;1434 auto p = tempdup(pVals); scope(exit) TempAlloc.free; 1351 1435 auto perm = newStack!(uint)(pVals.length); scope(exit) TempAlloc.free; 1352 1436 foreach(i, ref elem; perm) … … 1375 1459 unittest { 1376 1460 // Comparing results to R's qvalue package. 1377 auto pVals = [.90, .01, .03, .03, .70, .60, .01] ;1461 auto pVals = [.90, .01, .03, .03, .70, .60, .01].dup; 1378 1462 auto qVals = falseDiscoveryRate(pVals); 1379 1463 assert(approxEqual(qVals[0], .9));
