root/trunk/blade/PerfTest.d

Revision 177, 1.5 kB (checked in by Don Clugston, 5 months ago)

Added performance test using MSR registers; the device driver comes from Agner Fog. Only works on my system (path is hard-coded, only works on Windows, Pentium M and Pentium Core (not core2)).

  • Property svn:mime-type set to text/x-dsrc
  • Property svn:eol-style set to native
Line 
1 /**
2  * Blade Performance tests
3  */
4 module PerfTest;
5 import Blade.Performance.Perform;
6 import std.stdio;
7 //import std.cpuid;
8 import win32.core;
9 import std.string;
10
11
12 /* Values for agner
13
14 LDDQU xmm, mm128.  uops fused, latency, uops unfused (port unknown)
15 HADDPS xmm, xmm (same as HSUBPS)
16 HADDPD xmm, xmm. uops fused, uops unfused p01.
17
18 PMOVMSKB
19 */
20
21 double X[1000];
22 double Y[2000];
23 double Z[5000];
24 double da = 3.5;
25 double SignBit[2] = [0,0];
26
27 void daxpyFunc(int REPS)
28 {
29     asm {
30     push ESI;
31     push EDI;
32     push EBX;
33     mov EAX, REPS; //-128*8;
34     neg EAX;
35
36     lea ESI, X+8*128;
37     lea EDI, Y+8*128+256;
38 //  lea ebx, Z+8*50000;
39     movsd XMM2, da;
40     xorpd XMM2, SignBit;
41     shufpd XMM2, XMM2, 0;
42     align 16;
43 L1:
44 //  movapd xmm3, [edi+eax];
45 //  cmp esi, 0;
46     movapd XMM1, [ESI+EAX];
47     mulpd XMM1, XMM2;
48 //  addpd xmm3, [ebx+eax];
49 //  addpd xmm1, xmm3;
50 //  add edi, 0;
51     add EAX, 16;
52     addpd XMM1, [EDI+EAX-16];
53     movapd [EDI+EAX-16], XMM1;
54     js L1;
55     pop EBX;
56     pop EDI;
57     pop ESI;
58     }
59 }
60
61 void alignedmain()
62 { 
63     void init() { X[] = 0; Y[]=0; Z[]=0; }
64     void loop() {
65         daxpyFunc(200*8);
66     }
67     void loop2() {
68         daxpyFunc(100*8);
69     }
70     scope PerformanceTester perform = new PerformanceTester;
71     perform.DoPerformanceTest(&loop, &init, &loop2);
72     perform.describe();   
73 }
74
75
76 // Align the stack to a multiple of 64 bytes (this improves repeatability).
77 void main()
78 {
79     asm {
80         naked;
81         mov EBP, ESP;
82         and ESP, 0xFFFF_FFC0;       
83         call alignedmain;
84         mov ESP, EBP;
85         ret;
86     }
87 }
Note: See TracBrowser for help on using the browser.