Changeset 93
- Timestamp:
- 04/05/07 15:35:52 (2 years ago)
- Files:
-
- trunk/mathextra/Blade.d (modified) (20 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/mathextra/Blade.d
r92 r93 14 14 * - Supports any mix of vector addition, subtraction, dot product, and multiplication 15 15 * by a real or pure imaginary scalar. 16 * - Generates either x87 asm code or pure D, depending on the complexity of 17 * the expression, and the availability of inline asm. 16 18 * - 80-bit precision is used whenever possible. 17 19 * - Supports mixed-length operations (eg, real[] + double[] + float[]). 18 20 * - Supports both real and imaginary vectors, and detects type mismatches between them. 19 21 * (Complex numbers are not yet supported). 20 * - When static arrays are used, mismatches in array length are detected at compile time. 22 * - When static arrays are used, mismatches in array length are detected 23 * at compile time. 21 24 * 22 25 * BUGS/ FUTURE DIRECTIONS: … … 30 33 * - Not optimal for the case of multiple real vectors (they could share a counter). 31 34 * - Not optimal for the case where all vectors are 80-bit (two counters are used, but only is required). 32 * - Doesn't take advantage of length being known at compile time (loop unrolling35 * - Doesn't take full advantage of length being known at compile time (loop unrolling 33 36 * is possible). 34 37 * - Doesn't use EBP register -- this would allow an extra vector in expressions. 35 38 * - Doesn't support complex numbers. 36 * - Need a D and an SSE2 version.39 * - Need an SSE2 version. 37 40 * 38 41 * THEORY: … … 56 59 } 57 60 58 59 61 char [] itoa(ulong x) 60 62 { … … 81 83 // PART 1 -- Expression Templates 82 84 // ------------------------------------------- 85 // Unlike expression templates in other languages, there is no 86 // 'abstract syntax tree' constructed from multiple templates. Instead, 87 // the expression is constructed as a normal, human-readable text string 88 // (for example, "a+(b*c-d)"). 89 // This string is a template parameter for a struct, which 90 // contains a tuple of all the arguments used in the expression. 91 // The advantage of this approach is that it's very easy to manipulate the 92 // text string. 83 93 84 94 // When we join two expressions together, we need to 'shuffle' all the variables 85 // in the firstone along.95 // in the second one along. 86 96 char [] joinExpressions(char [] op, char [] first, char [] second) 87 97 { … … 109 119 } 110 120 121 // Check for type mismatches when performing vector assignment. 111 122 template CompatibleVectors(A, B) 112 123 { … … 119 130 120 131 /** 121 * A proxy for a vector operation returning a vector type. 132 * A proxy for a vector operation returning a vector type; conceptually, it 133 * is an array of type BaseType[]. 122 134 * Stores all the arguments as a tuple, and the operations 123 135 * as a character string. 124 136 * Note that if operations=="a", it's a single vector; otherwise, it's a temporary. 125 * 'knownlength' is the length of vector, when known at compile time .126 * If 'knownlength' == 0, it is unknown at compile time.137 * 'knownlength' is the length of vector, when known at compile time; 138 * if 'knownlength' == 0, it is unknown at compile time. 127 139 */ 128 140 struct VectorExpr(BaseType_, char[] operations, int knownlength, B...) { … … 144 156 } 145 157 public: 158 // Operations which are valid for both vectors and temporaries. 159 // All they do is update the expression string and the tuple, creating a new 160 // VectorExpr. The existing VectorExpr will not be used again. 146 161 static if (operations.length>1 && operations[$-2]=='*') { 147 162 // Optimisation: already a scalar multiply, so constant fold it. … … 168 183 169 184 // The opAssign operations are only valid for single vectors, not for temporaries 185 // They actually perform the calculation. 170 186 static if (operations=="a") { 171 187 void opAssign(A)(A expr) { 172 188 static assert(CompatibleVectors!(BaseType,A.BaseType), "Vector type mismatch in " ~ BaseType.stringof ~ "[] = " ~ A.BaseType.stringof ~ "[]"); 173 189 static assert(len==0 || expr.len == 0 || len == expr.len, "Vector lengths must match"); 174 performOperation!( expr.ops, "=", len==0? expr.len : len, expr.ValueTuple, B[0])(expr.values, values);190 performOperation!(void, expr.ops, "=", len==0? expr.len : len, expr.ValueTuple, B[0])(expr.values, values); 175 191 } 176 192 void opAddAssign(A)(A expr) { 177 193 static assert(CompatibleVectors!(BaseType,A.BaseType), "Vector type mismatch in " ~ BaseType.stringof ~ "[] += " ~ A.BaseType.stringof ~ "[]"); 178 194 static assert(len==0 || expr.len == 0 || len == expr.len, "Vector lengths must match"); 179 performOperation!( expr.ops, "+=", len==0? expr.len : len, expr.ValueTuple, B[0])(expr.values, values);195 performOperation!(void, expr.ops, "+=", len==0? expr.len : len, expr.ValueTuple, B[0])(expr.values, values); 180 196 } 181 197 void opSubAssign(A)(A expr) { 182 198 static assert(CompatibleVectors!(BaseType,A.BaseType), "Vector type mismatch in " ~ BaseType.stringof ~ "[] -= " ~ A.BaseType.stringof ~ "[]"); 183 199 static assert(len==0 || expr.len == 0 || len == expr.len, "Vector lengths must match"); 184 performOperation!( expr.ops, "-=", len==0? expr.len : len, expr.ValueTuple, B[0])(expr.values, values);200 performOperation!(void, expr.ops, "-=", len==0? expr.len : len, expr.ValueTuple, B[0])(expr.values, values); 185 201 } 186 202 void opMulAssign(A)(A w) { // Use a template to avoid unnecessary code generation 187 203 static assert(is (A: real), "Vector type mismatch in " ~ BaseType.stringof ~ "[] *= real"); 188 performOperation!( "a", "*=", knownlength, real, B[0])(w, values);204 performOperation!(void, "a", "*=", knownlength, real, B[0])(w, values); 189 205 } 190 206 } … … 210 226 static if (is(A.BaseType: ireal) && is(B.BaseType:ireal)) 211 227 const MUL = -1; 212 else static if (is(typeof(A.BaseType*B.BaseType): ireal))213 const MUL = 1i;214 228 else const MUL = 1; 215 return MUL * performOperation!(joinExpressions(".", A.ops, B.ops), ".", a.len==0? b.len:a.len, A.ValueTuple, B.ValueTuple)(a.values, b.values); 216 } 217 218 real performOperation(char [] operations, char [] finaloperation, int knownlength, X...)(X expr) 219 { 220 const char [] tupstr = vectorTupleToString!(X); 221 version(BladeDebug) { 222 pragma(msg, finaloperation ~ " " ~ operations ~ 223 "\nPostfix: " ~ makePostfixForX87(operations, tupstr) ~ "\nTuple: " ~ tupstr); 224 static if (knownlength!=0) pragma(msg, "Length is known!"); 225 226 const char [] qqq = makeAsmX87(tupstr, makePostfixForX87(operations, tupstr), finaloperation); 227 pragma(msg, "Generated code:"\n ~ qqq); 229 return MUL * performOperation!(typeof(A.BaseType*B.BaseType), joinExpressions(".", A.ops, B.ops), ".", a.len==0? b.len:a.len, A.ValueTuple, B.ValueTuple)(a.values, b.values); 230 } 231 232 /** This the only place where any code is generated. 233 * Firstly, runtime checks are made of vector lengths. 234 */ 235 ReturnType performOperation(ReturnType, char [] operations, char [] finaloperation, int knownlength, X...)(X expr) 236 { 237 const char [] tupstr = vectorTupleToString!(X); 238 mixin(generateVectorLengthChecks(tupstr)); 239 version(BladeDebug) { 240 pragma(msg, finaloperation ~ " " ~ operations ~ 241 "\nPostfix: " ~ makePostfixForX87(operations, tupstr) ~ "\nTuple: " ~ tupstr); 242 static if (knownlength!=0) pragma(msg, "Length is known!"); 243 244 const char [] qqq = generateCodeForAsmX87(knownlength, tupstr, makePostfixForX87(operations, tupstr), finaloperation); 245 pragma(msg, "Generated code:"\n ~ qqq); 246 } 247 248 // Decide which code generator to use, based on expression complexity and 249 // assembler availability. 250 if (isX87AsmPossible(tupstr, operations)) { 251 mixin(generateCodeForAsmX87(knownlength, tupstr, makePostfixForX87(operations, tupstr), finaloperation)); 252 } else { 253 mixin(generateCodeForD!(ReturnType)(knownlength, tupstr, finaloperation,operations)); 228 254 } 229 255 230 mixin(makeAsmX87(tupstr, makePostfixForX87(operations, tupstr), finaloperation)); 231 } 232 256 } 233 257 234 258 // ------------------------------------------------ 235 // PART 2 -- Convert expression to postfix form + convert tuple to string. 259 // PART 2 -- Convert tuple to string. 260 // ------------------------------------------------ 261 // This is only necessary because CTFE functions cannot iterate over the elements 262 // of a tuple. The solution is to use templates to create a string representation 263 // of the types in the tuple. 264 265 template singleType(A) 266 { 267 static if (is(A == real[]) || is(A==ireal[])) const char [] singleType = "R"; 268 else static if (is(A == double[])|| is(A == idouble[]))const char [] singleType = "D"; 269 else static if (is(A == float[]) || is(A==ifloat[])) const char [] singleType = "F"; 270 else static if (is(A == real)) const char [] singleType = "S"; 271 else const char [] singleType = "?"; 272 } 273 274 // A CTFE function can't randomly index a tuple, so convert the type information 275 // into a char[]. 276 template vectorTupleToString(X...) 277 { 278 static if (X.length==1) const char [] vectorTupleToString = singleType!(X[0]); 279 else const char [] vectorTupleToString = singleType!(X[0]) ~ vectorTupleToString!(X[1..$]); 280 } 281 282 // And some functions to grab information from the typestring. 283 284 // Return the first index in the tuple which is of vector type 285 int findFirstVector(char [] typelist) 286 { 287 for (int i=0; i< typelist.length;++i) { 288 if (isVector(typelist[i])) return i; 289 } 290 } 291 292 bool isVector(char var) 293 { 294 return (var=='R' || var=='D' || var=='F'); 295 } 296 297 // --------------------------------------- 298 // PART 3 -- Mixins to generate D code 299 // --------------------------------------- 300 301 // Return a string to mixed in, which asserts that all vectors are of 302 // equal length. 303 char [] generateVectorLengthChecks(char [] typelist) 304 { 305 int firstvec = findFirstVector(typelist); 306 char [] result=""; 307 for (int i=firstvec+1; i< typelist.length;++i) { 308 if (isVector(typelist[i])) { 309 result ~= " assert(expr[" ~ itoa(i) ~ "].length" 310 ~ " == expr[" ~ itoa(firstvec) ~ `].length, "vectors are different lengths");`\n; 311 } 312 } 313 return result; 314 } 315 316 /** Generate D code to evaluate the given vector expression 317 * 318 */ 319 char [] generateCodeForD(RetType)(int knownlength, char [] typelist, char [] finalop, char [] operation) 320 { 321 char [] iter=""; 322 foreach(int i, ch; operation) { 323 if (ch=='.') { iter~="*"; } else 324 if (ch>='a' && ch<='z') { 325 iter ~= "expr[" ~ itoa(ch-'a') ~ "]"; 326 if (isVector(typelist[ch-'a'])) iter~="[i]"; 327 } else iter ~= ch; 328 } 329 char [] result; 330 if (knownlength>0) result = "int length=" ~ itoa(knownlength) ~ ";\n"; 331 else result = " int length = expr[" ~ itoa(findFirstVector(typelist))~ "].length;\n"; 332 if (finalop == ".") { 333 static if ( is(RetType: ireal)) result ~= " ireal sum=0.0i;\n"; 334 else result ~= " real sum=0.0;\n"; 335 result ~= " for (int i=0; i<length; ++i) {\n" 336 ~ " sum+=" ~iter~";"~ "\n }\n return sum;\n"; 337 } else { 338 result ~= " for (int i=0; i<length; ++i){\n" 339 ~ " expr[" ~ itoa(typelist.length-1L)~"][i]" ~ finalop ~ iter ~";\n }\n"; 340 } 341 return result; 342 } 343 344 // ------------------------------------------------ 345 // PART 4 -- Convert expression to postfix form 236 346 // ------------------------------------------------ 237 347 … … 292 402 if (operations[x+1]=='-') { 293 403 if (second[$-1]=='*' && first[$-1]!='*') { 294 return second ~ first ~ " ~";404 return second ~ first ~ "_"; 295 405 } 296 406 } … … 302 412 } 303 413 if (second.length==1 && typelist[second[0]-'a']=='R' && operations[x+1]=='-') { 304 return second ~ first ~ " ~";414 return second ~ first ~ "_"; 305 415 } 306 416 return first ~ second ~ operations[x+1..x+2]; 307 417 } 308 418 309 template singleType(A)310 {311 static if (is(A == real[]) || is(A==ireal[])) const char [] singleType = "R";312 else static if (is(A == double[])|| is(A == idouble[]))const char [] singleType = "D";313 else static if (is(A == float[]) || is(A==ifloat[])) const char [] singleType = "F";314 else static if (is(A == real)) const char [] singleType = "S";315 else const char [] singleType = "?";316 }317 318 // A CTFE function can't randomly index a tuple, so convert the type information319 // into a char[].320 template vectorTupleToString(X...)321 {322 static if (X.length==1) const char [] vectorTupleToString = singleType!(X[0]);323 else const char [] vectorTupleToString = singleType!(X[0]) ~ vectorTupleToString!(X[1..$]);324 }325 326 419 // ------------------------------- 327 // PART 3 -- Parsing420 // PART 5 -- Mixins to generate x87 ASM code 328 421 // ------------------------------- 329 422 330 423 bool isInstruction(char op) 331 424 { 332 return (op=='+' || op=='*' || op=='-'|| op=='.'|| op=='~'); 333 } 334 335 bool isVector(char var) 336 { 337 return (var=='R' || var=='D' || var=='F'); 338 } 339 340 int numVectors(char [] typelist) 425 return (op=='+' || op=='*' || op=='-'|| op=='.'|| op=='_'); 426 } 427 428 // Count the number of vectors in the typestring 429 int countVectors(char [] typelist) 341 430 { 342 431 int numVecs=0; … … 347 436 } 348 437 349 350 438 int vectorNum(char [] typelist, char var) 351 439 { … … 366 454 } 367 455 368 // -------------------------------369 // PART 4 -- Generate x87 ASM code370 // -------------------------------371 456 372 457 char [] operandSize(char var) … … 386 471 case '+': return "fadd"; 387 472 case '-': return "fsub"; 388 case ' ~': return "fsubr";473 case '_': return "fsubr"; 389 474 } 390 475 } … … 406 491 // use EBX, ESI, and EDI. Finally, use the frame register EBP. 407 492 const char [][5] vectorRegister = ["EAX", "ECX", "EDX", "EBX", "EDI"]; 493 494 // Is this expression simple enough for our code generator? 495 bool isX87AsmPossible(char [] typelist, char [] operations) { 496 version (D_InlineAsm_X86) { 497 // Are there enough index registers? 498 if (countVectors(typelist) >= vectorRegister.length) return false; 499 // BUG: should also check if it will overflow the FPU stack 500 return true; 501 } else { 502 // Without an assembler, there's no chance! 503 return false; 504 } 505 } 408 506 409 507 // Create code to push all used vector registors. … … 473 571 474 572 */ 475 char [] makeAsmX87(char [] typelist, char [] operations, char [] finaloperation)573 char [] generateCodeForAsmX87(int knownlength, char [] typelist, char [] operations, char [] finaloperation) 476 574 { 477 575 char [] result=""; … … 479 577 480 578 // Create local variables for everything (avoid bug #1028) 481 // Also assert that all vectors are the same length482 bool firstvec=true;483 579 int vecnum = 0; 484 int scalarnum = 0;485 580 for (int i=0; i< typelist.length;++i) { 486 581 if (typelist[i]=='S'){ 487 582 result~= " real var" ~ itoa(i) ~ " = expr[" ~ itoa(i) ~ "];\n"; 488 ++scalarnum;489 583 } else { 490 584 result~= " auto vec" ~ itoa(i) ~ " = expr[" ~itoa(i) ~"].ptr;\n"; … … 492 586 incrementRealVectors ~= " add " ~ vectorRegister[vecnum] ~ ", " ~ REALSIZE ~ ";\n"; 493 587 } 494 if (firstvec) {495 result~= " int veclength = expr[" ~itoa(i) ~"].length;\n";496 firstvec = false;497 } else result~=" assert(veclength == expr[" ~ itoa(i) ~ `].length, "vectors are different lengths");`\n;498 588 ++vecnum; 499 589 } 500 590 } 501 assert(vecnum-1 < vectorRegister.length, "Too many vectors!"); 591 result ~= " int veclength = expr[" ~itoa(findFirstVector(typelist)) ~"].length;\n"; 592 502 593 bool isDotProduct = (operations[$-1]=='.'); 503 594 int numScalarsOnStack=0; 504 595 505 result~= \n"asm {"\n ~ pushRegisters(vecnum) ~ 506 " mov ESI, veclength;"\n; // ESI will be the counter 596 result~= \n"asm {"\n ~ pushRegisters(vecnum); 597 // ESI will be the counter 598 if (knownlength>0) result~= " mov ESI, " ~ itoa(knownlength) ~";\n"; 599 else result ~= " mov ESI, veclength;"\n; 507 600 508 601 // Load all the vector pointers into registers, and push all the scalars onto the stack … … 528 621 529 622 if (isDotProduct) result ~= " fldz;"\n; 530 result ~= " xor ESI, ESI; "\n 531 " sub ESI, veclength; // counter=-length"\n 532 " jz short L3; // test for length==0"\n; 623 if (knownlength>0) result~= " mov ESI, -" ~ itoa(knownlength) ~";\n"; 624 else { 625 result ~= " xor ESI, ESI; "\n 626 " sub ESI, veclength; // counter=-length"\n 627 " jz short L3; // test for length==0"\n; 628 } 533 629 if (!isDotProduct && operations.length==1 && finaloperation[0]=='*') { 534 630 ++numScalarsOnStack; … … 542 638 char [] mainbody = ""; 543 639 char [] firstbody = ""; 640 641 // We need to keep track of how many things are on the FPU stack. 642 // Every time something is pushed, the indices of our variables change! 544 643 int numOnStack = 0; // How much of the FP stack is being used? 545 644
