Changeset 171
- Timestamp:
- 01/08/08 07:11:22 (8 months ago)
- Files:
-
- trunk/blade/Blade.d (modified) (2 diffs)
- trunk/blade/BladeDemo.d (modified) (2 diffs)
- trunk/blade/BladeRank.d (modified) (3 diffs)
- trunk/blade/BladeSimplify.d (modified) (6 diffs)
- trunk/blade/CodegenX86.d (modified) (7 diffs)
- trunk/blade/PostfixX86.d (modified) (4 diffs)
- trunk/blade/SyntaxTree.d (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/blade/Blade.d
r170 r171 13 13 * 14 14 * FEATURES: 15 * - Supports any mix of vector addition, subtraction, dot product, slicing, and16 * multiplication by a scalar .15 * - Supports any mix of vector addition, subtraction, dot product, unary minus, 16 * multiplication by a scalar, sum(), abs(), and multidimensional slicing. 17 17 * - Generates either x87 asm code, SSE or SSE2 asm code or pure D, depending on 18 18 * the complexity of the expression, and the availability of inline asm. … … 143 143 } 144 144 145 private: 146 // Masks for setting or clearing the signbit in SSE registers. 147 static ulong[2] SSE_SIGNMASKpd = [0x7FFF_FFFF_FFFF_FFFFL, 0x7FFF_FFFF_FFFF_FFFFL]; 148 static uint[4] SSE_SIGNMASKps = [0x7FFF_FFFF, 0x7FFF_FFFF, 0x7FFF_FFFF, 0x7FFF_FFFF]; 149 static ulong[2] SSE_SIGNBITpd = [0x8000_0000_0000_0000L, 0x8000_0000_0000_0000L]; 150 static uint[4] SSE_SIGNBITps = [0x8000_0000,0x8000_0000,0x8000_0000, 0x8000_0000]; 145 151 146 152 private: trunk/blade/BladeDemo.d
r169 r171 42 42 mixin(vectorize("a+=6*another[1,0..$]")); 43 43 mixin(vectorize("a+=6*(another[1,0..$]+another[1,0..$])")); 44 44 45 45 46 mixin(vectorize("a+=6*another[1][0..$]")); … … 52 53 mixin(vectorize("another[0..$,1]=6*a[0..2]")); 53 54 54 // Simplifies to q*= 2*dot(q,q)*dot(q *q).55 // Simplifies to q*= 2*dot(q,q)*dot(q,q). 55 56 mixin(vectorize("q *=dot(q,q*dot(2*q,q))")); 56 57 double u; 57 58 mixin(vectorize("u = dot(q,q*dot(q,q))")); 58 59 mixin(vectorize("u = dot(a, q)")); 59 mixin(vectorize("u = sum(a)")); 60 mixin(vectorize("u = sum(abs(-p))")); 61 mixin(vectorize("a = -a")); 60 62 61 63 writefln("a=", a); trunk/blade/BladeRank.d
r169 r171 93 93 RankMismatchDotProduct = -10, 94 94 ExtraCharsAfterArrayLiteral = -11, 95 ArrayLiteralRankMismatch = -12 95 ArrayLiteralRankMismatch = -12, 96 AbsDimensionality = -13 96 97 } 97 98 … … 107 108 "Dimensionality mismatch (addition or subtraction)", 108 109 "Dimensionality mismatch in concatenation", 109 "Dimenionality error in dot product" 110 "Extra characters after array literal" 111 "Rank mismatch in array literal" 110 "Dimenionality error in dot product", 111 "Extra characters after array literal", 112 "Rank mismatch in array literal", 113 "Can only use abs with scalar or vector" 112 114 ][-err-1]; 113 115 } … … 135 137 if (lrank<0) return lrank; // propagate errors 136 138 return 0; 137 } 139 } else if (func=="abs") { 140 auto lrank = doVisit(this_,args[0]); 141 if (lrank>1) return RankError.AbsDimensionality; 142 return 0; 143 } 138 144 assert(0, "BLADE ICE: Unsupported function:" ~ func); 139 145 return 0; trunk/blade/BladeSimplify.d
r169 r171 51 51 bool isBladeIntrinsic(char [] str) 52 52 { 53 return str=="dot" || str=="sum" ;53 return str=="dot" || str=="sum" || str=="abs"; 54 54 } 55 55 … … 90 90 int numdups=0; 91 91 char [] mapping = ""; // The new letter which this symbol should become, 92 // or '!' if it is an intrinsic92 // or '!' if it is an intrinsic 93 93 for (int i=0; i<tree.symbolTable.length; ++i) { 94 94 char c = 'A'+i; … … 112 112 char c = tree.expression[i]; 113 113 if (c>='A' && c<='Z') { 114 if (mapping[c-'A']=='!') e~=tree.symbolTable[c-'A'].value;115 else e~= mapping[c-'A'];114 if (mapping[c-'A']=='!') e~=tree.symbolTable[c-'A'].value; 115 else e~= mapping[c-'A']; 116 116 } else e~=c; 117 117 } … … 281 281 } 282 282 } 283 ReturnType onVisitFunction(This this_, char [] func, char [][] args) { 284 if (func=="dot") { // dot product. 285 // Each element is reduced seperately 286 char [] left = wrapInParens(doVisit(this_,args[0])); 287 char [] right = wrapInParens(doVisit(this_, args[1])); 288 return func ~ "(" ~ left ~ "," ~ right ~ ")"; 289 } else if (func=="sum") { 290 char [] left = wrapInParens(doVisit(this_,args[0])); 291 return func ~ "(" ~ left ~ ")"; 292 } 293 assert(0, "BLADE ICE: Unsupported function"); 294 return ""; 283 ReturnType onVisitFunction(This this_, char [] func, char [][] args) { 284 switch (func) { 285 case "dot": 286 // Each element is reduced seperately 287 char [] left = wrapInParens(doVisit(this_,args[0])); 288 char [] right = wrapInParens(doVisit(this_, args[1])); 289 return func ~ "(" ~ left ~ "," ~ right ~ ")"; 290 case "sum": 291 case "abs": 292 char [] left = wrapInParens(doVisit(this_,args[0])); 293 return func ~ "(" ~ left ~ ")"; 294 default: 295 assert(0, "BLADE ICE: Unsupported function"); 296 return ""; 297 } 295 298 } 296 299 ReturnType onVisitPrefix(This this_, char [] op, char [] expr) { 297 300 assert(this_.slicing.length==0, "BLADE ICE"); 298 return op ~ doVisit(this_, expr);301 return op ~ wrapInParens(doVisit(this_, expr)); 299 302 } 300 303 ReturnType onVisitPostfix(This this_, char [] op, char [] expr) { 301 304 assert(this_.slicing.length==0, "BLADE ICE"); 302 return doVisit(this_, expr) ~ op;305 return wrapInParens(doVisit(this_, expr)) ~ op; 303 306 } 304 307 // Includes multi-dimensional slicing and indexing. … … 459 462 else return ScalarFold(sym, ""); 460 463 } 461 ReturnType onVisitFunction(This this_, char [] func, char [][] args) { 462 if (func=="dot") { // dot product. 464 ReturnType onVisitFunction(This this_, char [] func, char [][] args) { 465 switch(func) { 466 case "dot": 463 467 ScalarFold left = doVisit(this_,args[0]); 464 468 ScalarFold right = doVisit(this_, args[1]); 465 469 return ScalarFold("", combineMul(combineMul(left.multiplier, right.multiplier), "{" ~ func ~ "(" ~ wrapInParens(left.expr) ~ "," ~ wrapInParens(right.expr) ~ ")}")); 466 } else if (func=="sum") {470 case "sum": 467 471 ScalarFold left = doVisit(this_,args[0]); 468 472 return ScalarFold("", combineMul(left.multiplier, "{" ~ func ~ "(" ~ wrapInParens(left.expr) ~ ")}")); 469 } else { 473 case "abs": 474 ScalarFold left = doVisit(this_,args[0]); 475 return ScalarFold((left.expr!="" ? func ~ "(" ~ wrapInParens(left.expr) ~ ")" : ""), (left.multiplier.length>0 && left.multiplier!="-")? func ~ "(" ~ wrapInParens(left.multiplier) ~ ")":""); 476 default: 470 477 assert(0, "BLADE: Unsupported function"); 471 478 return ScalarFold("",""); … … 483 490 } else { 484 491 ScalarFold f = doVisit(this_, expr); 485 assert(f.expr=="" );492 assert(f.expr=="", "BLADE ICE: Prefix error" ~ op ~ " " ~ expr); 486 493 return ScalarFold("", op ~ wrapInParens(f.multiplier)); 487 494 } trunk/blade/CodegenX86.d
r170 r171 38 38 * This string is converted to postfix. The postfix string is converted to 39 39 * a string containing x87 asm, which is then mixed into a function which accepts the tuple. 40 *41 40 */ 42 41 … … 44 43 * POTENTIAL FROM RECENT INSTRUCTION SETS: 45 44 * SSE5(AMD): fmaddpd can dramatically improve both performance and accuracy. 46 * SSE4(Intel): dppd has limited use.45 * SSE4(Intel): dppd has some limited use. 47 46 */ 48 47 … … 399 398 ++done; 400 399 numOnStack--; 400 } else if (operations[done]=='a') { 401 mainbody ~= " fabs;"\n; 402 ++done; 403 } else if (operations[done]=='n') { 404 mainbody ~= " fchs;"\n; 405 ++done; 401 406 } else if (!isInstruction(operations[done+1])){ 402 407 // load a vector onto the FPU stack, to begin a new subexpression. … … 498 503 499 504 private: 505 500 506 // split off from the template to make code coverage work 501 507 char [] generateCodeForSSEImpl(bool usingDoubles, char [] ranklist, char [] operations, char cumulatingOp=0) … … 564 570 ++done; 565 571 numOnStack--; 572 } else if (operations[done]=='a') { // abs 573 mainbody ~= " andp" ~ suffix ~ XMM(numOnStack-1) ~ ", SSE_SIGNMASKp" ~ suffix ~"; // abs"\n; 574 extra ~= " andp" ~ suffix ~ XMM(numOnStack-1) ~ ", SSE_SIGNMASKp" ~ suffix ~ "; // abs"\n; 575 ++done; 576 } else if (operations[done]=='n') { // neg 577 mainbody ~= " xorp" ~ suffix ~ XMM(numOnStack-1) ~ ", SSE_SIGNBITp" ~ suffix ~"; // negate"\n; 578 extra ~= " xorp" ~ suffix ~ XMM(numOnStack-1) ~ ", SSE_SIGNBITp" ~ suffix ~ "; // negate"\n; 579 ++done; 566 580 } else if (!isInstruction(operations[done+1])){ 567 581 // load a vector onto the FPU stack, to begin a new subexpression. … … 571 585 ++done; 572 586 numOnStack++; 587 } else if (operations[done]==',') { 588 // operation on self, eg XX+ --> don't need to load it again. 589 comment = "; // " ~ operations[done..done+2] ~ \n; 590 mainbody ~= " " ~ opToSSE[operations[done+1]] ~ suffix ~ " " ~ XMM(numOnStack-1) ~ ", " 591 ~ XMM(numOnStack-1) ~ comment; 592 extra ~= " " ~ opToSSESingle[operations[done+1]] ~ suffix ~ " " ~ XMM(numOnStack-1) ~ ", " 593 ~ XMM(numOnStack-1) ~ comment; 594 done +=2; 573 595 } else if (ranklist[operations[done]-'A']=='1') { 574 596 // An operation will be performed between the stack top and a vector. … … 577 599 mainbody ~= " movap" ~ suffix ~ indexedSSEVector(ranklist, operations[$-2], vectorsize) ~ ", XMM" ~ itoa(numOnStack-1) ~ comment; 578 600 extra ~= " movs" ~ suffix ~ indexedSSENext(ranklist, operations[$-2], vectorsize) ~ ", XMM" ~ itoa(numOnStack-1) ~ comment; 579 } else580 if (operations[done-1]==operations[done]) {581 // operation on self, eg XX+ --> don't need to load it again.582 int cumvector = (operations[done-1]=='0')? numScalarsOnStack : numOnStack-1;583 mainbody ~= " " ~ opToSSE[operations[done+1]] ~ suffix ~ " " ~ XMM(numOnStack-1) ~ ", "584 ~ XMM(numOnStack-1) ~ comment;585 extra ~= " " ~ opToSSESingle[operations[done+1]] ~ suffix ~ " " ~ XMM(numOnStack-1) ~ ", "586 ~ XMM(numOnStack-1) ~ comment;587 601 } else { 588 602 mainbody ~= " " ~ opToSSE[operations[done+1]] ~ suffix ~ " " ~ XMM(numOnStack-1) ~ ", " trunk/blade/PostfixX86.d
r170 r171 14 14 * _ ST(1)-ST(0) and pop stack 15 15 * = store stack top and pop stack 16 * , duplicate stack top (so ,* means ST=ST*ST, ,+ means ST*=2) 17 * a abs 18 * n unary negation 16 19 * 17 20 * NOT YET IMPLEMENTED: 18 21 * 1 the literal one (used to initialize a product, for example) 19 * sc ST(0) = sine(ST(0)) ST(0) = cos(ST(0))20 * q ST(0) = sqrt(ST(0))22 * sc sin, cos 23 * q sqrt 21 24 */ 22 25 … … 57 60 return "0" ~ doVisit(this_,args[0]) ~ doVisit(this_, args[1]) ~ "*+"; 58 61 } 62 if (func=="sum") return "0" ~ doVisit(this_, args[0]) ~ "+"; 63 if (func=="abs") return doVisit(this_,args[0]) ~ "a"; 59 64 assert(0, "BLADE ICE: Unsupported"); 60 65 } 61 66 ReturnType onVisitPrefix(This this_, char [] op, char [] expr) { 67 if (op=="-") return doVisit(this_, expr) ~ "n"; // unary minus 62 68 assert(0, "BLADE ICE: Unsupported"); 63 69 } … … 134 140 } 135 141 if (func=="sum") return "0" ~ doVisit(this_, args[0]) ~ "+"; 142 if (func=="abs") return doVisit(this_,args[0]) ~ "a"; 136 143 assert(0, "BLADE ICE: Unsupported"); 137 144 } 138 145 ReturnType onVisitPrefix(This this_, char [] op, char [] expr) { 146 if (op=="-") return doVisit(this_, expr) ~ "n"; // unary minus 139 147 assert(0, "BLADE ICE: Unsupported"); 140 148 } … … 157 165 return second ~ first ~ "="; 158 166 } 167 if (second == first) return first ~ "," ~ op; 159 168 160 169 // SSE OPTIMISATION #1 trunk/blade/SyntaxTree.d
r157 r171 178 178 } 179 179 if (symbol.length>0) { 180 // Find it, and add it to the symbol table if not already present. 181 int k=0; 182 while(k < symbols.length && symbol!=symbols[k]) ++k; 183 code ~= cast(char)('A' + k); 184 if (k==symbols.length) { 185 symbols ~= symbol; 186 } 187 symbol=""; 180 // Add the new symbol to the symbol table 181 code ~= cast(char)('A' + symbols.length); 182 symbols ~= symbol; 183 symbol = ""; 188 184 } 189 185 if (c=='.') { // it was opSlice. Skip the next . as well. … … 217 213 return symbols; 218 214 } 219 220 215 221 216 // ==== SYNTAX PASS ====
