Changeset 108
- Timestamp:
- 09/09/07 15:57:44 (1 year ago)
- Files:
-
- trunk/blade/Blade.d (modified) (28 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/blade/Blade.d
r107 r108 71 71 // ------------------------------------------------ 72 72 73 /// Convert tuple to string.74 ///75 /// This is only necessary because CTFE functions cannot iterate over the elements76 /// of a tuple. The solution is to use templates to create a string representation77 /// of the types in the tuple.78 template vectorTupleToString(X...)79 {80 static if (X.length==0) const char [][] vectorTupleToString = [];81 else const char [][] vectorTupleToString = vectorTupleToString!(X[0..$-1]) ~ [X[$-1].stringof];82 }83 84 unittest {85 static assert( vectorTupleToString!(real [], double) == ["real[]", "double"]);86 static assert( vectorTupleToString!() == cast(char[][])([]));87 }88 89 90 73 /// Determine the (tensor) rank of type T; ie 0 if T is a scalar, 1 for vector, 91 74 /// 2 for matrix, 3 for rank-3 tensor, etc … … 148 131 } 149 132 150 151 133 // ------------------------------------------------ 152 134 // Convert infix string to postfix … … 173 155 } 174 156 175 /// Returns the (tensor) rank of the expression expr. 176 /// 177 /// Params: 178 /// expr Placeholder expression (A,B,... correspond to tuple[0],[1],...) 179 /// rank The rank of each tuple member A, B, C, ... 157 /** Returns the (tensor) rank of the expression expr. 158 * 159 * Params: 160 * expr Placeholder expression (A,B,... correspond to tuple[0],[1],...) 161 * rank The rank of each tuple member A, B, C, ... 162 */ 180 163 int exprRank(char [] expr, int [] rank) 181 164 { … … 203 186 } 204 187 205 /// Returns the return type of the expression expr. 206 /// 207 /// Params: 208 /// expr Placeholder expression (A,B,... correspond to tuple[0],[1],...) 209 /// T Every type in the expression 188 /** Returns the resultant element type of the tensor expression expr. 189 * 190 * Note that since D doesn't have array operations, the expression is not 191 * normally valid D code. 192 * 193 * Params: 194 * expr Placeholder expression (A,B,... correspond to tuple[0],[1],...) 195 * T Every type in the expression 196 */ 210 197 template exprElementType(char [] expr, T...) 211 198 { … … 234 221 /// Converts an infix string into postfix. 235 222 /// Apply x87-specific optimisations during the conversion. 236 char [] makePostfixForX87(char [] operations, char [][] typelist, int[] ranklist =null)223 char [] makePostfixForX87(char [] operations, char [][] typelist, int[] ranklist) 237 224 { 238 225 if (operations.length==1) return operations; … … 252 239 if (first[0]=='(') { 253 240 first = makePostfixForX87(first[1..first.length-1], typelist, ranklist); 254 } 241 }else assert(first.length<2, "Missing () in expression: " ~ first); 255 242 if (second[0]=='(') { 256 243 second = makePostfixForX87(second[1..second.length-1], typelist, ranklist); 257 } 244 }else assert(second.length<2, "Missing () in expression: " ~ second); 258 245 if (op=="=") { 259 246 return second ~ first ~ "="; … … 273 260 // When an operation is performed between a real[] and a non-real[], 274 261 // we want to have the real[] being the one which is loaded first. 275 if (second.length==1 && typelist[second[0]-'A']=="real []") {262 if (second.length==1 && typelist[second[0]-'A']=="real" && ranklist[second[0]-'A']==1) { 276 263 return second ~ first ~ oprvs; 277 264 } … … 282 269 283 270 unittest { 284 assert(makePostfixForX87("A=B", vectorTupleToString!(double[], double))=="BA="); 285 assert(makePostfixForX87("A+(B*C)", vectorTupleToString!(int, int, int))=="BC*A+"); 286 assert(makePostfixForX87("(B*C)+A", vectorTupleToString!(double[], float[], float[]))=="BC*A+"); 287 assert(makePostfixForX87("(B*C)+A", vectorTupleToString!(real[], float[], float[]))=="ABC*+"); 288 assert(makePostfixForX87("A-(B*C)", vectorTupleToString!(double[], float, float))=="BC*A_"); 289 assert(makePostfixForX87("(B*C)-A", vectorTupleToString!(float[], float, float))=="BC*A-"); 290 assert(makePostfixForX87("(B*C)-A", vectorTupleToString!(real[], float, float))=="ABC*_"); 291 assert(makePostfixForX87("C+=((B*C)-A)", vectorTupleToString!(real[], float, float))=="CABC*_+C="); 292 assert(makePostfixForX87("C-=((B*C)-A)", vectorTupleToString!(real[], float, float))=="CABC*_-C="); 293 assert(makePostfixForX87("C-=(B*A)", vectorTupleToString!(real, float, float[])) =="BA*C_C="); 294 assert(makePostfixForX87("C-=(B*A)", vectorTupleToString!(real, float, real[])) =="BA*C_C="); 295 assert(makePostfixForX87("((A*B)+(C*D))+(E*F)", vectorTupleToString!(int, int, int))=="EF*AB*CD*++"); 271 assert(makePostfixForX87("A=B", elementTupleToString!(double, double),[1,1])=="BA="); 272 assert(makePostfixForX87("(B*C)+A", elementTupleToString!(double, float, float),[1,1,1])=="BC*A+"); 273 assert(makePostfixForX87("(B*C)+A", elementTupleToString!(real, float, float),[1,1,1])=="ABC*+"); 274 assert(makePostfixForX87("A-(B*C)", elementTupleToString!(double, float, float),[1,0,0])=="BC*A_"); 275 assert(makePostfixForX87("(B*C)-A", elementTupleToString!(float, float, float),[1,0,0])=="BC*A-"); 276 assert(makePostfixForX87("(B*C)-A", elementTupleToString!(real, float, float),[1,0,0])=="ABC*_"); 277 assert(makePostfixForX87("C+=((B*C)-A)", elementTupleToString!(real, float, float),[1,0,1])=="CABC*_+C="); 278 assert(makePostfixForX87("C-=((B*C)-A)", elementTupleToString!(real, float, float),[1,0,1])=="CABC*_-C="); 279 assert(makePostfixForX87("C-=(B*A)", elementTupleToString!(real, float, float),[1,0,1]) =="BA*C_C="); 280 assert(makePostfixForX87("C-=(B*A)", elementTupleToString!(real, float, real),[1,0,1]) =="BA*C_C="); 281 assert(makePostfixForX87("((A*B)+(C*D))+(E*F)", elementTupleToString!(int, int, int),[0,0,0])=="EF*AB*CD*++"); 296 282 297 283 } … … 316 302 if (first[0]=='(') { 317 303 first = makePostfixForSSE(first[1..first.length-1], ranklist); 318 } 304 }else assert(first.length<2, "Missing () in expression: " ~ first); 319 305 if (second[0]=='(') { 320 306 second = makePostfixForSSE(second[1..second.length-1], ranklist); 321 } 307 }else assert(second.length<2, "Missing () in expression: " ~ second); 322 308 if (op=="=") { 323 309 return second ~ first ~ "="; … … 392 378 } 393 379 394 int vectorNum(char [][] typelist, char var)395 {396 int numVecs=0;397 for (int i=0; i<var-'A'; ++i) {398 if (typelist[i]=="real[]" || typelist[i]=="double[]" || typelist[i]=="float[]") ++numVecs;399 }400 return numVecs;401 }402 403 404 int realScalarNum(char [][] typelist, char var)405 {406 int k=0;407 for (int i=0; i<var-'A'; ++i) {408 if (typelist[i]=="real") ++k;409 }410 return k;411 }412 413 int scalarNum(char [][]typelist, char var)414 {415 int k=0;416 for (int i=0; i<var-'A'; ++i) {417 if (typelist[i]=="real" || typelist[i]=="double" || typelist[i]=="float") ++k;418 }419 return k;420 }421 422 423 380 char [] operandSize(char [] typestr) 424 381 { 425 382 switch(typestr) { 426 case "real": 427 case "real[]": return "real ptr "; 428 case "double": 429 case "double[]": return "double ptr "; 430 case "float": 431 case "float[]": return "float ptr "; 383 case "real": return "real ptr "; 384 case "double": return "double ptr "; 385 case "float": return "float ptr "; 432 386 default: 433 387 assert(0, typestr); … … 452 406 { 453 407 switch (typestr) { 454 case "double[]":455 408 case "double": return "8"; 456 case "float[]":457 409 case "float": return "4"; 458 case "real[]":459 410 case "real": return REALSIZE; 460 411 } … … 469 420 } 470 421 return numVecs; 422 } 423 424 int realScalarNum(char [][] typelist, int [] ranklist, char var) 425 { 426 int k=0; 427 for (int i=0; i<var-'A'; ++i) { 428 if (ranklist[i]==0 && typelist[i]=="real") ++k; 429 } 430 return k; 471 431 } 472 432 … … 489 449 490 450 // Is this expression simple enough for the x87 code generator? 491 bool isX87AsmPossible(char [][] typelist, char [] operations) {451 bool isX87AsmPossible(char [][] typelist, int [] ranklist, char [] operations) { 492 452 version (D_InlineAsm_X86) { 493 453 // Are there enough index registers? 494 if (countVectors( typelist) > vectorRegister.length) return false;454 if (countVectors(ranklist) > vectorRegister.length) return false; 495 455 // Does it contain any types we can't deal with? 456 foreach(r; ranklist) { 457 if (r>1) return false; 458 } 496 459 foreach(ch; typelist) { 497 460 // can only do float, double, and 80-bit vectors, and scalars. 498 if (ch!="real []" && ch!="double[]" && ch!="float[]" && ch!="real" && ch!="double" && ch!="float") return false;461 if (ch!="real" && ch!="double" && ch!="float") return false; 499 462 } 500 463 // BUG: should also check if it will overflow the FPU stack … … 539 502 540 503 // indexed by i. 541 char [] indexedVector(char [][] typelist, char var)542 { 543 if (typelist[var-'A']=="real []") return " real ptr [" ~ vectorRegister[vectorNum(typelist, var)] ~ "]";504 char [] indexedVector(char [][] typelist, int [] ranklist, char var) 505 { 506 if (typelist[var-'A']=="real") return " real ptr [" ~ vectorRegister[vectorNum(ranklist, var)] ~ "]"; 544 507 return operandSize(typelist[var-'A']) ~ "[" ~ 545 vectorRegister[vectorNum( typelist, var)] ~ " + " ~ vectorSize(typelist[var-'A']) ~ "*EAX]";508 vectorRegister[vectorNum(ranklist, var)] ~ " + " ~ vectorSize(typelist[var-'A']) ~ "*EAX]"; 546 509 } 547 510 548 511 // indexed by i-1 549 char [] indexedVectorPrev(char [][] typelist, char var)512 char [] indexedVectorPrev(char [][] typelist, int [] ranklist, char var) 550 513 { 551 514 char [] stride = " - " ~ vectorSize(typelist[var-'A']); 552 if (typelist[var-'A'] == "real []") return " real ptr [" ~ vectorRegister[vectorNum(typelist, var)] ~ stride ~ "]";515 if (typelist[var-'A'] == "real") return " real ptr [" ~ vectorRegister[vectorNum(ranklist, var)] ~ stride ~ "]"; 553 516 return operandSize(typelist[var-'A']) ~ "[" ~ 554 vectorRegister[vectorNum( typelist, var)] ~ " + " ~ vectorSize(typelist[var-'A']) ~ "*EAX" ~ stride ~ "]";517 vectorRegister[vectorNum(ranklist, var)] ~ " + " ~ vectorSize(typelist[var-'A']) ~ "*EAX" ~ stride ~ "]"; 555 518 } 556 519 … … 560 523 } 561 524 562 char [] indexedVectorWithStride(char [][] typelist, char var, int stride)525 char [] indexedVectorWithStride(char [][] typelist, int [] ranklist, char var, int stride) 563 526 { 564 527 char [] stridestr = " - " ~ vectorSize(typelist[var-'A']) ~ "*" ~ itoa(stride); 565 if (typelist[var-'A'] == "real []") return " real ptr [" ~ vectorRegister[vectorNum(typelist, var)] ~ stridestr ~ "]";528 if (typelist[var-'A'] == "real") return " real ptr [" ~ vectorRegister[vectorNum(ranklist, var)] ~ stridestr ~ "]"; 566 529 return operandSize(typelist[var-'A']) ~ "[" ~ 567 vectorRegister[vectorNum( typelist, var)] ~ " + " ~ vectorSize(typelist[var-'A']) ~ "*EAX" ~ stridestr ~ "]";530 vectorRegister[vectorNum(ranklist, var)] ~ " + " ~ vectorSize(typelist[var-'A']) ~ "*EAX" ~ stridestr ~ "]"; 568 531 } 569 532 570 533 // Some functions to grab information from the typestring. 571 572 bool isVector(char [] typestr)573 {574 return typestr=="double[]" || typestr=="real[]" || typestr=="float[]";575 }576 577 // Return the first index in the tuple which is of vector type578 int findFirstVector(char [][] typelist)579 {580 for (int i=0; i< typelist.length;++i) {581 if (isVector(typelist[i])) return i;582 }583 return 0;584 }585 534 586 535 int findFirstVector(int [] ranklist) … … 592 541 } 593 542 543 594 544 // Count the number of vectors in the typestring 595 int countVectors( char [][] typelist)545 int countVectors(int[] ranklist) 596 546 { 597 547 int numVecs=0; 598 for (int i=0; i< typelist.length; ++i) {599 if ( isVector(typelist[i])) ++numVecs;548 for (int i=0; i<ranklist.length; ++i) { 549 if (ranklist[i]==1) ++numVecs; 600 550 } 601 551 return numVecs; … … 648 598 649 599 */ 650 char [] generateCodeForAsmX87(char [][] typelist, char[] ranklist, char [] infixOperations, char cumulatingOp=0)651 { 652 char [] operations = makePostfixForX87(infixOperations, typelist );600 char [] generateCodeForAsmX87(char [][] typelist, int [] ranklist, char [] infixOperations, char cumulatingOp=0) 601 { 602 char [] operations = makePostfixForX87(infixOperations, typelist, ranklist); 653 603 char [] result=""; 654 604 char [] incrementRealVectors=""; … … 658 608 // Create local variables for pointers to vectors (avoid bug #1125) 659 609 int vecnum = 0; 660 for (int i=0; i< typelist.length;++i) {661 if ( isVector(typelist[i])){610 for (int i=0; i< ranklist.length;++i) { 611 if (ranklist[i]==1){ 662 612 result~= " auto vec" ~ itoa(i) ~ " = values[" ~itoa(i) ~"].ptr; // " ~ cast(char)('A'+i)~ \n; 663 if (typelist[i]=="real []") {613 if (typelist[i]=="real") { 664 614 incrementRealVectors ~= " add " ~ vectorRegister[vecnum] ~ ", " ~ REALSIZE ~ ";\n"; 665 615 } 666 616 ++vecnum; 667 } 668 } 669 result ~= " int veclength = values[" ~itoa(findFirstVector(typelist)) ~"].length;\n"; 617 } else result~= " alias values["~itoa(i)~"] val" ~ itoa(i) ~ "; // " ~ cast(char)('A'+i)~ \n; 618 } 619 620 result ~= " int veclength = values[" ~itoa(findFirstVector(ranklist)) ~"].length;\n"; 670 621 671 622 int numScalarsOnStack=0; … … 679 630 int numvecs=0; 680 631 int numconsts=0; 681 for (int i=0; i< typelist.length; ++i) {682 if ( isVector(typelist[i])) {683 if (typelist[i]=="real []") {632 for (int i=0; i<ranklist.length; ++i) { 633 if (ranklist[i]==1) { 634 if (typelist[i]=="real") { 684 635 result ~= " mov " ~ vectorRegister[numvecs] ~ ", vec" ~ itoa(i) ~ ";"; 685 636 } else { … … 718 669 719 670 if (operations.length>2 && operations[$-1]=='=') { 720 storage ~= " fstp " ~ indexedVectorPrev(typelist, operations[$-2] ) ~ "; // " ~ operations[done..done+2] ~ \n;671 storage ~= " fstp " ~ indexedVectorPrev(typelist, ranklist, operations[$-2] ) ~ "; // " ~ operations[done..done+2] ~ \n; 721 672 operations=operations[0..$-2]; 722 673 } … … 733 684 // load a vector onto the FPU stack, to begin a new subexpression. 734 685 int u = operations[done]-'A'; 735 next = " fld " ~ indexedVector(typelist, operations[done] ) ~ "; //" ~ operations[done] ~\n;686 next = " fld " ~ indexedVector(typelist, ranklist, operations[done] ) ~ "; //" ~ operations[done] ~\n; 736 687 mainbody ~= next; firstbody ~= next; 737 688 ++done; 738 689 numOnStack++; 739 } else if ( isVector(typelist[operations[done]-'A'])) {690 } else if (ranklist[operations[done]-'A']==1) { 740 691 // An operation will be performed between the stack top and a vector. 741 692 // If it's a float or double, we can combine the load+arithmetic op 742 693 // into a single instruction. 743 if (typelist[operations[done]-'A']=="real []") {694 if (typelist[operations[done]-'A']=="real") { 744 695 // 80-bit vectors must be loaded onto the FPU stack first 745 next = " fld real ptr [" ~ vectorRegister[vectorNum( typelist, operations[done])] ~ "]; //" ~ operations[done] ~ \n696 next = " fld real ptr [" ~ vectorRegister[vectorNum(ranklist, operations[done])] ~ "]; //" ~ operations[done] ~ \n 746 697 ~ " " ~ opToX87[operations[done+1]] ~ "p ST(1), ST; //" ~ operations[done+1] ~\n; 747 698 } else { // floats and doubles can be used directly 748 699 next = " " ~ opToX87[operations[done+1]] ~ " " 749 ~ indexedVector(typelist, operations[done] ) ~ "; //" ~ operations[done..done+2] ~ \n;700 ~ indexedVector(typelist, ranklist, operations[done] ) ~ "; //" ~ operations[done..done+2] ~ \n; 750 701 } 751 702 mainbody ~= next; firstbody ~= next; … … 754 705 if (typelist[operations[done]-'A']=="real") { 755 706 // Multiply by real scalar, which is already on the stack. Note that there's an extra item on the stack when we're in the body of the loop. 756 firstbody ~= " fmul ST, ST(" ~ itoa(numOnStack + numScalarsOnStack - realScalarNum(typelist, operations[done]-'A')-1) ~ "); // * " ~ operations[done] ~ \n;757 mainbody ~= " fmul ST, ST(" ~ itoa(1 + numOnStack + numScalarsOnStack - realScalarNum(typelist, operations[done]-'A')-1) ~ "); // * " ~ operations[done] ~ \n;707 firstbody ~= " fmul ST, ST(" ~ itoa(numOnStack + numScalarsOnStack - realScalarNum(typelist, ranklist, operations[done]-'A')-1) ~ "); // * " ~ operations[done] ~ \n; 708 mainbody ~= " fmul ST, ST(" ~ itoa(1 + numOnStack + numScalarsOnStack - realScalarNum(typelist, ranklist, operations[done]-'A')-1) ~ "); // * " ~ operations[done] ~ \n; 758 709 } else { 759 710 // For scalar float or double values, we can multiply directly, saving one slot on the FP stack. 760 next = " fmul " ~ operandSize(typelist[operations[done]-'A']) ~ "val ues[" ~ itoa(operations[done]-'A') ~"];\n";711 next = " fmul " ~ operandSize(typelist[operations[done]-'A']) ~ "val" ~ itoa(operations[done]-'A') ~";\n"; 761 712 mainbody ~= next; firstbody ~= next; 762 713 } … … 775 726 else result ~= storage; 776 727 777 result ~= "L2: \n" 778 728 result ~= "L2: \n" 779 729 ~ incrementRealVectors // Update the counters 780 730 ~ " inc EAX;\n jnz L1;\n"; … … 817 767 result~= " auto vec" ~ itoa(i) ~ " = values[" ~itoa(i) ~"].ptr; // " ~ cast(char)('A'+i)~ \n; 818 768 ++vecnum; 819 } else result~= " auto val" ~ itoa(i) ~ " = values["~itoa(i)~"]; "\n;769 } else result~= " auto val" ~ itoa(i) ~ " = values["~itoa(i)~"]; // " ~ cast(char)('A'+i)~ \n; 820 770 } 821 771 result ~= " int veclength = values[" ~itoa(findFirstVector(ranklist)) ~"].length;\n"; … … 950 900 } 951 901 952 static if (isX87AsmPossible(typelist, expr)) {902 static if (isX87AsmPossible(typelist, ranklist, expr)) { 953 903 // pragma(msg, "x87 is possible"); 954 904 // pragma(msg, makePostfixForX87(expr, typelist)); … … 966 916 writefln("Param: ", K); 967 917 } 968 const typelist = vectorTupleToString!(Values);918 const typelist = elementTupleToString!(Values); 969 919 const ranklist = TupleRank!(Values); 970 920 static if (rawexpr.length>6 && rawexpr[0..4]=="dot(" && rawexpr[$-1]==')') { … … 983 933 } 984 934 } 985 986 935 987 936 import std.stdio; … … 1009 958 r[i]= q[i]*2213.3L; 1010 959 } 1011 // SSEVEC!("A+=B+C")(a, d, d); 1012 SSEVEC!("A=(B*C)")(a, d, 2.354); 1013 // SSEVEC!("A=(B*C)")(a, d, 2.0); 960 SSEVEC!("A=(B*C)")(a, d, 2.0); 1014 961 writefln("a=", a); 1015 962 /*
