| | 838 | version(TangoPerformanceTest) { |
|---|
| | 839 | import tango.stdc.stdio; |
|---|
| | 840 | int clock() { asm { rdtsc; } } |
|---|
| | 841 | |
|---|
| | 842 | uint [2000] X1; |
|---|
| | 843 | uint [2000] Y1; |
|---|
| | 844 | uint [4000] Z1; |
|---|
| | 845 | |
|---|
| | 846 | void testPerformance() |
|---|
| | 847 | { |
|---|
| | 848 | // The performance results at the top of this file were obtained using |
|---|
| | 849 | // a Windows device driver to access the CPU performance counters. |
|---|
| | 850 | // The code below is less accurate but more widely usable. |
|---|
| | 851 | // The value for division is quite inconsistent. |
|---|
| | 852 | for (int i=0; i<X1.length; ++i) { X1[i]=i; Y1[i]=i; Z1[i]=i; } |
|---|
| | 853 | int t, t0; |
|---|
| | 854 | multibyteShr(Z1[0..2000], X1[0..2000], 7); |
|---|
| | 855 | t0 = clock(); |
|---|
| | 856 | multibyteShr(Z1[0..1000], X1[0..1000], 7); |
|---|
| | 857 | t = clock(); |
|---|
| | 858 | multibyteShr(Z1[0..2000], X1[0..2000], 7); |
|---|
| | 859 | auto shrtime = (clock() - t) - (t - t0); |
|---|
| | 860 | t0 = clock(); |
|---|
| | 861 | multibyteAddSub!('+')(Z1[0..1000], X1[0..1000], Y1[0..1000], 0); |
|---|
| | 862 | t = clock(); |
|---|
| | 863 | multibyteAddSub!('+')(Z1[0..2000], X1[0..2000], Y1[0..2000], 0); |
|---|
| | 864 | auto addtime = (clock() - t) - (t-t0); |
|---|
| | 865 | t0 = clock(); |
|---|
| | 866 | multibyteMul(Z1[0..1000], X1[0..1000], 7, 0); |
|---|
| | 867 | t = clock(); |
|---|
| | 868 | multibyteMul(Z1[0..2000], X1[0..2000], 7, 0); |
|---|
| | 869 | auto multime = (clock() - t) - (t - t0); |
|---|
| | 870 | multibyteMulAdd!('+')(Z1[0..2000], X1[0..2000], 217, 0); |
|---|
| | 871 | t0 = clock(); |
|---|
| | 872 | multibyteMulAdd!('+')(Z1[0..1000], X1[0..1000], 217, 0); |
|---|
| | 873 | t = clock(); |
|---|
| | 874 | multibyteMulAdd!('+')(Z1[0..2000], X1[0..2000], 217, 0); |
|---|
| | 875 | auto muladdtime = (clock() - t) - (t - t0); |
|---|
| | 876 | multibyteMultiplyAccumulate(Z1[0..64], X1[0..32], Y1[0..32]); |
|---|
| | 877 | t = clock(); |
|---|
| | 878 | multibyteMultiplyAccumulate(Z1[0..64], X1[0..32], Y1[0..32]); |
|---|
| | 879 | auto accumtime = clock() - t; |
|---|
| | 880 | t0 = clock(); |
|---|
| | 881 | multibyteDivAssign(Z1[0..2000], 217, 0); |
|---|
| | 882 | t = clock(); |
|---|
| | 883 | multibyteDivAssign(Z1[0..1000], 37, 0); |
|---|
| | 884 | auto divtime = (t - t0) - (clock() - t); |
|---|
| | 885 | |
|---|
| | 886 | printf("-- BigInt asm performance (cycles/int) --\n"); |
|---|
| | 887 | printf("Add: %.2f\n", addtime/1000.0); |
|---|
| | 888 | printf("Shr: %.2f\n", shrtime/1000.0); |
|---|
| | 889 | printf("Mul: %.2f\n", multime/1000.0); |
|---|
| | 890 | printf("MulAdd: %.2f\n", muladdtime/1000.0); |
|---|
| | 891 | printf("Div: %.2f\n", divtime/1000.0); |
|---|
| | 892 | printf("MulAccum32: %.2f*n*n (total %d)\n\n", accumtime/(32.0*32.0), accumtime); |
|---|
| | 893 | } |
|---|
| | 894 | |
|---|
| | 895 | static this() |
|---|
| | 896 | { |
|---|
| | 897 | testPerformance(); |
|---|
| | 898 | } |
|---|
| | 899 | } |
|---|
| | 900 | |
|---|
| | 901 | |
|---|