|
Revision 177, 1.5 kB
(checked in by Don Clugston, 5 months ago)
|
Added performance test using MSR registers; the device driver comes from Agner Fog. Only works on my system (path is hard-coded, only works on Windows, Pentium M and Pentium Core (not core2)).
|
- Property svn:mime-type set to
text/x-dsrc
- Property svn:eol-style set to
native
|
| Line | |
|---|
| 1 |
/** |
|---|
| 2 |
* Blade Performance tests |
|---|
| 3 |
*/ |
|---|
| 4 |
module PerfTest; |
|---|
| 5 |
import Blade.Performance.Perform; |
|---|
| 6 |
import std.stdio; |
|---|
| 7 |
//import std.cpuid; |
|---|
| 8 |
import win32.core; |
|---|
| 9 |
import std.string; |
|---|
| 10 |
|
|---|
| 11 |
|
|---|
| 12 |
/* Values for agner |
|---|
| 13 |
|
|---|
| 14 |
LDDQU xmm, mm128. uops fused, latency, uops unfused (port unknown) |
|---|
| 15 |
HADDPS xmm, xmm (same as HSUBPS) |
|---|
| 16 |
HADDPD xmm, xmm. uops fused, uops unfused p01. |
|---|
| 17 |
|
|---|
| 18 |
PMOVMSKB |
|---|
| 19 |
*/ |
|---|
| 20 |
|
|---|
| 21 |
double X[1000]; |
|---|
| 22 |
double Y[2000]; |
|---|
| 23 |
double Z[5000]; |
|---|
| 24 |
double da = 3.5; |
|---|
| 25 |
double SignBit[2] = [0,0]; |
|---|
| 26 |
|
|---|
| 27 |
void daxpyFunc(int REPS) |
|---|
| 28 |
{ |
|---|
| 29 |
asm { |
|---|
| 30 |
push ESI; |
|---|
| 31 |
push EDI; |
|---|
| 32 |
push EBX; |
|---|
| 33 |
mov EAX, REPS; //-128*8; |
|---|
| 34 |
neg EAX; |
|---|
| 35 |
|
|---|
| 36 |
lea ESI, X+8*128; |
|---|
| 37 |
lea EDI, Y+8*128+256; |
|---|
| 38 |
// lea ebx, Z+8*50000; |
|---|
| 39 |
movsd XMM2, da; |
|---|
| 40 |
xorpd XMM2, SignBit; |
|---|
| 41 |
shufpd XMM2, XMM2, 0; |
|---|
| 42 |
align 16; |
|---|
| 43 |
L1: |
|---|
| 44 |
// movapd xmm3, [edi+eax]; |
|---|
| 45 |
// cmp esi, 0; |
|---|
| 46 |
movapd XMM1, [ESI+EAX]; |
|---|
| 47 |
mulpd XMM1, XMM2; |
|---|
| 48 |
// addpd xmm3, [ebx+eax]; |
|---|
| 49 |
// addpd xmm1, xmm3; |
|---|
| 50 |
// add edi, 0; |
|---|
| 51 |
add EAX, 16; |
|---|
| 52 |
addpd XMM1, [EDI+EAX-16]; |
|---|
| 53 |
movapd [EDI+EAX-16], XMM1; |
|---|
| 54 |
js L1; |
|---|
| 55 |
pop EBX; |
|---|
| 56 |
pop EDI; |
|---|
| 57 |
pop ESI; |
|---|
| 58 |
} |
|---|
| 59 |
} |
|---|
| 60 |
|
|---|
| 61 |
void alignedmain() |
|---|
| 62 |
{ |
|---|
| 63 |
void init() { X[] = 0; Y[]=0; Z[]=0; } |
|---|
| 64 |
void loop() { |
|---|
| 65 |
daxpyFunc(200*8); |
|---|
| 66 |
} |
|---|
| 67 |
void loop2() { |
|---|
| 68 |
daxpyFunc(100*8); |
|---|
| 69 |
} |
|---|
| 70 |
scope PerformanceTester perform = new PerformanceTester; |
|---|
| 71 |
perform.DoPerformanceTest(&loop, &init, &loop2); |
|---|
| 72 |
perform.describe(); |
|---|
| 73 |
} |
|---|
| 74 |
|
|---|
| 75 |
|
|---|
| 76 |
// Align the stack to a multiple of 64 bytes (this improves repeatability). |
|---|
| 77 |
void main() |
|---|
| 78 |
{ |
|---|
| 79 |
asm { |
|---|
| 80 |
naked; |
|---|
| 81 |
mov EBP, ESP; |
|---|
| 82 |
and ESP, 0xFFFF_FFC0; |
|---|
| 83 |
call alignedmain; |
|---|
| 84 |
mov ESP, EBP; |
|---|
| 85 |
ret; |
|---|
| 86 |
} |
|---|
| 87 |
} |
|---|