Changeset 187 for trunk/blade/Blade.d
- Timestamp:
- 04/30/08 16:05:32 (6 months ago)
- Files:
-
- trunk/blade/Blade.d (modified) (33 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/blade/Blade.d
r176 r187 1 1 // Written in the D programming language 1.0 2 2 /** 3 * BLADE 0. 4Alpha -- Basic Linear Algebra D Expressions3 * BLADE 0.6Alpha -- Basic Linear Algebra D Expressions 4 4 * 5 5 * Generate near-optimal x87/SSE2 asm code for BLAS1 basic vector operations at compile time. … … 13 13 * 14 14 * FEATURES: 15 * - Supports any mix of vector addition, subtraction, dot product, unary minus, 16 * multiplication by a scalar, sum(), abs(), and multidimensional slicing. 15 * - Supports any mix of vector addition, subtraction, unary minus, 16 * multiplication by a scalar, 17 * cumulation via dot product, sum() and prod(), and multidimensional slicing. 17 18 * - Generates either x87 asm code, SSE or SSE2 asm code or pure D, depending on 18 19 * the complexity of the expression, and the availability of inline asm. … … 24 25 * 25 26 * SPEED/ACCURACY TRADEOFF: 26 * IEEE floating point multiplication and addition are not associative.27 * Tradeoff arises because IEEE floating point multiplication and addition are not associative. 27 28 * Assuming that overflow and underflow do not occur: 28 29 * (a*b)*c may differ from a*(b*c) in the last bit. … … 34 35 * 35 36 * FUTURE DIRECTIONS (in order of expected implementation): 36 * - trace() 37 * - Loop unrolling for cumulative operations dot, sum, trace. 37 * - nested D expressions 38 * - cumulative operations min, max 39 * - Loop unrolling for cumulative operations dot, sum, prod. 38 40 * - Dense matrix support. 39 41 * - Triangular, banded, symmetric, and sparse matrix support … … 46 48 * which accepts the tuple. 47 49 * 50 * COMPILER BUGS/LIMITATIONS AFFECTING THIS LIBRARY 51 * - Local arrays are not aligned to a 128-bit boundary, so use of aligned SSE is not 52 * always possible. 53 * - Bugzilla #1125 -- structs in a tuple can't be used in asm. 54 * - Bugzilla #1382 -- CTFE strings never get deleted --> SLOOOOOW compilation. KILLER BUG. 55 * - Bugzilla #1768 -- in CTFE, arrays of arrays aren't initialized properly 56 * 48 57 * HISTORY: 49 58 * 0.1 - Used classes to make expression templates. 50 59 * 0.2 - Support for a wider variety of expressions. Dot product, imaginary numbers, etc. 51 60 * 0.3 - Based on string mixins. Most of the new features of 0.2 are gone, but SSE2 is added. 52 * 0.4 - Added D code generator. Nice error messages. Optimal parameter passing. 61 * 0.4 - Added D code generator. Nice error messages. Optimal parameter passing. 53 62 * (passes pointers, not arrays). 54 63 * 0.5 - Expression simplification step. Slicing support. 55 * 0.6 - Dot product, nested expressions, intrinsics: abs, sqrt, sum. 64 * 0.6 - Dot product, nested expressions (asm only), intrinsics: abs, sqrt, sum. 65 * 0.7 - Intrinsics: prod 56 66 */ 57 67 … … 73 83 // FOR MIXIN: Generate code to evaluate the given vector expression. 74 84 char [] vectorize(char [] expr) 75 { 85 { 76 86 debug (BladeFrontEnd) { 77 87 return `pragma(msg, \n ~ "// " __FILE__ ~ "(" ~__LINE__.stringof[0..$-1] ~ ") ` ~ enquote(expr) ~ `" ~ \n ~ ` ~ mixin_tupleAndSyntaxtreeof("makeVectorCode", expr) ~ "~\\n);" … … 82 92 } 83 93 84 // Simplify the expression, categorise it, 94 // Simplify the expression, categorise it, 85 95 // and dispatch to the appropriate code generator. 86 96 char [] makeVectorCode(Types...)(AbstractSyntaxTree tree) … … 89 99 if (revised.errorMessage.length>0) return `static assert(0, "BLADE: ` ~ enquote(revised.errorMessage) ~ `");`; 90 100 VecExpressionType exprType = categorizeExpression(revised); 91 InvocationCode q; 101 InvocationCode q; 92 102 if (exprType == VecExpressionType.SSE2Expression || exprType == VecExpressionType.SSE1Expression) { 93 103 q = invokeSSE((exprType == VecExpressionType.SSE2Expression), revised); … … 105 115 { 106 116 // TODO: 107 return expression; 117 return expression; 108 118 } 109 119 110 120 template X87RetType(char [] expr) { 111 static if (expr[0]!='0' ) alias void X87RetType;121 static if (expr[0]!='0' && expr[0]!='1') alias void X87RetType; 112 122 else alias real X87RetType; 113 123 } … … 130 140 131 141 /** Function to implement BLAS1 operations using X87 assembler. 132 * Every member of the Values tuple must only be real, 142 * Every member of the Values tuple must only be real, 133 143 * float[], double [], or real[], or BladeStrided!(float), !(double), !(real) 134 144 */ 135 145 X87RetType!(expr) X87VECGEN(char [] expr, int numStrides, Values...)(int veclength, Values values) { 136 debug(BladeBackEnd) { 146 debug(BladeBackEnd) { 137 147 pragma(msg, generateCodeForAsmX87!(numStrides, Values)(expr)); 138 148 } … … 146 156 static ulong[2] SSE_SIGNBITpd = [0x8000_0000_0000_0000L, 0x8000_0000_0000_0000L]; 147 157 static uint[4] SSE_SIGNBITps = [0x8000_0000,0x8000_0000,0x8000_0000, 0x8000_0000]; 158 // The value 1.0 for a parallel SSE register 159 static ulong[2] SSE_ONEpd = [0x3FF0_0000_0000_0000L, 0x3FF0_0000_0000_0000L]; 160 static uint[4] SSE_ONEps = [0x3F0_000, 0x3F0_000, 0x3F0_000, 0x3F0_000]; 148 161 149 162 private: … … 156 169 // SSE1 is possible only if all vectors are floats. 157 170 // X87 is possible for any mix of real, double, and float vectors. 158 // BUG: for X87, should also check number of temporaries (don't overflow the FPU stack 171 // BUG: for X87, should also check number of temporaries (don't overflow the FPU stack) 159 172 enum VecExpressionType { SSE1Expression, SSE2Expression, X87Expression, DExpression }; 160 173 … … 164 177 bool SSE1 = true; 165 178 bool X87 = true; 166 bool strided = false; // true if any strided vector or matrix operations exist 179 bool strided = false; // true if any strided vector or matrix operations exist 167 180 version (D_InlineAsm_X86) {} else { 168 181 // Without an assembler, there's no chance! … … 199 212 y = tree.compounds[x-tree.symbolTable.length][0]-'A'; 200 213 // Check for a stride.. 201 if (tree.compounds[x-tree.symbolTable.length][$-1]==']') { 214 if (tree.compounds[x-tree.symbolTable.length][$-1]==']') { 202 215 strided |= isStrided(tree.compounds[x-tree.symbolTable.length]); 203 216 } 204 217 } 205 218 206 219 char [] t = tree.symbolTable[y].element; 207 220 if (t == "double") { … … 218 231 } 219 232 } 220 // It's not worth doing strided operations with SSE. 233 // It's not worth doing strided operations with SSE. 221 234 if (strided) { SSE1=false; SSE2=false; } 222 235 if (numRealScalars > MAX_87_REALSCALARSPLUSTEMPORARIES) X87 = false; … … 224 237 if (numvectors > MAX_SSE_VECTORS) { SSE1=false; SSE2=false; } 225 238 if (SSE1) return VecExpressionType.SSE1Expression; 226 if (SSE2) return VecExpressionType.SSE2Expression; 227 return X87 ? VecExpressionType.X87Expression : VecExpressionType.DExpression; 239 if (SSE2) return VecExpressionType.SSE2Expression; 240 return X87 ? VecExpressionType.X87Expression : VecExpressionType.DExpression; 228 241 } 229 242 … … 256 269 char [] stridelist=""; 257 270 char [] alltypes=""; 258 271 259 272 char [][] typelist; 260 273 261 274 char [] vals; 262 275 int numstrides=0; … … 283 296 } else { // for arrays, the type is the type of the original array 284 297 t = tree.symbolTable[tree.compounds[x-tree.symbolTable.length][0]-'A'].element; 285 // Check for a stride.. 298 // Check for a stride.. 286 299 if (tree.compounds[x-tree.symbolTable.length][$-1]==']') { 287 300 strided = isStrided(tree.compounds[x-tree.symbolTable.length]); 288 301 if (strided) ++numstrides; 289 302 } 290 303 291 304 } 292 305 } … … 296 309 // long, ulong, and real must become real. 297 310 // We convert everything else to double, since that uses less 298 // FPU stack space. 311 // FPU stack space. 299 312 if (t == "real" || t == "double" || t == "float") alltypes ~= t; 300 313 else if (t == "long" || t == "ulong") result ~= "real"; … … 310 323 alltypes ~= t ~ "*"; 311 324 // for vectors, we only need the pointer, not the length 312 vals ~= "&" ~ v ~ "[0]"; 325 //vals ~= "&" ~ v ~ "[0]"; 326 vals ~= v ~ ".ptr"; 313 327 } 314 328 } … … 323 337 result ~= ")("; 324 338 int firstVector = findVectorForLength(tree); 325 return InvocationCode(result ~ getValueForSymbol(tree.mapping[firstVector], tree).invoker ~ ".length" 339 return InvocationCode(result ~ getValueForSymbol(tree.mapping[firstVector], tree).invoker ~ ".length" 326 340 ~ vals ~ stridelist ~ ")", assertions); 327 341 } … … 342 356 char [] postfix = makePostfixForSSE(tree.expression, tree.rank); 343 357 char [] retType = "void"; 344 if (postfix[0]=='0' ) retType = (SSE2? "double" : "float");358 if (postfix[0]=='0' || postfix[0]=='1') retType = (SSE2? "double" : "float"); 345 359 346 360 char [] result = "SSEVECGEN!(" ~ retType ~ `,"` ~ enquote(postfix) ~ `"`; … … 352 366 else result ~= SSE2? ",double*" : ",float*"; 353 367 vals ~= ","; 354 if (rnk=='1') vals ~= "&";368 // if (rnk=='1') vals ~= "&"; 355 369 InvocationCode q = getValueForSymbol(tree.mapping[i], tree); 356 370 vals ~= q.invoker; 357 371 assertions ~= q.assertions; 358 372 // for vectors, we only need the pointer, not the length 359 if (rnk=='1') vals ~= " [0]";360 } 361 373 if (rnk=='1') vals ~= ".ptr"; 374 } 375 362 376 result ~= ")("; 363 377 int firstVector = findVectorForLength(tree); … … 384 398 // result ~= "static "; 385 399 // } 386 result ~= "assert(" 400 result ~= "assert(" 387 401 ~ getDimensionLengthForSymbol(tree.mapping[i], tree, 1) 388 402 ~ "==" ~ getDimensionLengthForSymbol(tree.mapping[firstVector], tree, 1) … … 399 413 for (int i=0; i<tree.mapping.length;++i) { 400 414 if (tree.rank[i]=='1'){ 401 result ~= "assert( (cast(size_t)( &" ~ getValueForSymbol(tree.mapping[i], tree).invoker402 ~ " [0])& 0x0F) == 0, `SSE Vector misalignment: " ~ getValueForSymbol(tree.mapping[i], tree).invoker ~ "`);"\n;415 result ~= "assert( (cast(size_t)(" ~ getValueForSymbol(tree.mapping[i], tree).invoker 416 ~ ".ptr)& 0x0F) == 0, `SSE Vector misalignment: " ~ getValueForSymbol(tree.mapping[i], tree).invoker ~ "`);"\n; 403 417 } 404 418 } … … 438 452 } 439 453 } 440 return dynamic>=0? dynamic : strided; 454 return dynamic>=0? dynamic : strided; 441 455 } 442 456 … … 458 472 } else { // else it's a compound or an indexed array 459 473 char [] comp = tree.compounds[c-'A'-tree.symbolTable.length]; 460 474 461 475 if (comp[$-1]!=']') { // simple compound expression 462 476 foreach(d; comp) { … … 477 491 char [] nextIndex; 478 492 char [] sliceTo; 479 480 for (int k = comp.length-1;k>=1; --k) { 493 494 for (int k = comp.length-1;k>=1; --k) { 481 495 char d = comp[k]; 482 496 if (d == ']') { ++numbracks; } 483 497 if (d == '[') { --numbracks; } 484 498 485 499 if (d == ']' && numbracks == 1) { nextIndex = ""; } 486 500 else if (numbracks == 1 && comp[k-1..k+1]=="..") { 487 501 isSlice = true; 488 502 sliceTo = nextIndex; 489 nextIndex = ""; 503 nextIndex = ""; 490 504 --k; 491 505 } else if ((d == '[' && numbracks==0) || (d==',' && numbracks==1)) { … … 539 553 } 540 554 RevisedExpression revised = remapCompounds(expr, ranks, symbolTable); 541 555 542 556 VecExpressionType exprType = categorizeExpression(revised); 543 557 if (exprType == VecExpressionType.SSE2Expression || exprType == VecExpressionType.SSE1Expression) { … … 560 574 if (c-'A'<tree.symbolTable.length) { 561 575 v = tree.symbolTable[c-'A'].value; 562 } else { // else it's a compound or an indexed array 576 } else { // else it's a compound or an indexed array 563 577 char [] comp = tree.compounds[c-'A'-tree.symbolTable.length]; 564 578 565 579 if (comp[$-1]!=']') { // compound expression (not an indexed array) 566 580 if (comp[0]>='a' && comp[0]<='z') { … … 590 604 bool isSlice = false; 591 605 char [] newSlice; 592 593 for (int k = comp.length-1;k>=1; --k) { 606 607 for (int k = comp.length-1;k>=1; --k) { 594 608 char d = comp[k]; 595 609 if (d == ']') { ++numbracks; } 596 610 if (d == '[') { --numbracks; } 597 611 598 612 if (d==']' && numbracks == 1) { newSlice = ""; } 599 else if (d=='.' && numbracks == 1) { isSlice = true; 600 if(numSlicesRemaining>0){ newSlice = ""; } 613 else if (d=='.' && numbracks == 1) { isSlice = true; 614 if(numSlicesRemaining>0){ newSlice = ""; } 601 615 else newSlice = "." ~ newSlice; 602 616 } … … 645 659 char [] assertions=""; 646 660 int wholerank = exprRank(tree.expression, tree.rank); 647 if (wholerank ==1) { 661 if (wholerank ==1) { 648 662 int lenvec = findVectorForLength(tree); 649 result = "for (int blade_index=0; blade_index<" 663 result = "for (int blade_index=0; blade_index<" 650 664 ~ getDimensionLengthForSymbol(tree.mapping[lenvec], tree, 1) 651 665 ~ "; ++blade_index) {"\n; 652 666 } 653 667 foreach (c; tree.expression) { 654 if (c>='A' && c<'Z') { 668 if (c>='A' && c<'Z') { 655 669 // restore all symbols into the expression 656 670 // If it's a vector, index it … … 667 681 } else result ~= c; 668 682 } 669 if (wholerank==0) return InvocationCode(result, assertions); 683 if (wholerank==0) return InvocationCode(result, assertions); 670 684 return InvocationCode(result ~ "; }", assertions); 671 685 }
