Changeset 93

Show
Ignore:
Timestamp:
04/05/07 15:35:52 (2 years ago)
Author:
Don Clugston
Message:

Added a code generator for D code. Preliminary use of known vector length. Code cleanup. Better comments.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/mathextra/Blade.d

    r92 r93  
    1414*  - Supports any mix of vector addition, subtraction, dot product, and multiplication 
    1515*    by a real or pure imaginary scalar. 
     16*  - Generates either x87 asm code or pure D, depending on the complexity of 
     17*    the expression, and the availability of inline asm. 
    1618*  - 80-bit precision is used whenever possible. 
    1719*  - Supports mixed-length operations (eg, real[] + double[] + float[]). 
    1820*  - Supports both real and imaginary vectors, and detects type mismatches between them. 
    1921*    (Complex numbers are not yet supported). 
    20 *  - When static arrays are used, mismatches in array length are detected at compile time. 
     22*  - When static arrays are used, mismatches in array length are detected 
     23*    at compile time. 
    2124* 
    2225* BUGS/ FUTURE DIRECTIONS: 
     
    3033*  - Not optimal for the case of multiple real vectors (they could share a counter). 
    3134*  - Not optimal for the case where all vectors are 80-bit (two counters are used, but only is required). 
    32 *  - Doesn't take advantage of length being known at compile time (loop unrolling 
     35*  - Doesn't take full advantage of length being known at compile time (loop unrolling 
    3336*     is possible). 
    3437*  - Doesn't use EBP register -- this would allow an extra vector in expressions. 
    3538*  - Doesn't support complex numbers. 
    36 *  - Need a D and an SSE2 version. 
     39*  - Need an SSE2 version. 
    3740* 
    3841* THEORY: 
     
    5659} 
    5760 
    58  
    5961char [] itoa(ulong x) 
    6062{ 
     
    8183//    PART 1 -- Expression Templates 
    8284// ------------------------------------------- 
     85// Unlike expression templates in other languages, there is no 
     86// 'abstract syntax tree' constructed from multiple templates. Instead, 
     87// the expression is constructed as a normal, human-readable text string 
     88// (for example, "a+(b*c-d)"). 
     89// This string is a template parameter for a struct, which 
     90// contains a tuple of all the arguments used in the expression. 
     91// The advantage of this approach is that it's very easy to manipulate the 
     92// text string. 
    8393 
    8494// When we join two expressions together, we need to 'shuffle' all the variables 
    85 // in the first one along. 
     95// in the second one along. 
    8696char [] joinExpressions(char [] op, char [] first, char [] second) 
    8797{ 
     
    109119} 
    110120 
     121// Check for type mismatches when performing vector assignment. 
    111122template CompatibleVectors(A, B) 
    112123{ 
     
    119130 
    120131/** 
    121  * A proxy for a vector operation returning a vector type. 
     132 * A proxy for a vector operation returning a vector type; conceptually, it 
     133 * is an array of type BaseType[]. 
    122134 * Stores all the arguments as a tuple, and the operations 
    123135 * as a character string. 
    124136 * Note that if operations=="a", it's a single vector; otherwise, it's a temporary. 
    125  * 'knownlength' is the length of vector, when known at compile time. 
    126  * If 'knownlength' == 0, it is unknown at compile time. 
     137 * 'knownlength' is the length of vector, when known at compile time; 
     138 *   if 'knownlength' == 0, it is unknown at compile time. 
    127139 */ 
    128140struct VectorExpr(BaseType_, char[] operations, int knownlength, B...) { 
     
    144156    } 
    145157public: 
     158    // Operations which are valid for both vectors and temporaries. 
     159    // All they do is update the expression string and the tuple, creating a new 
     160    // VectorExpr. The existing VectorExpr will not be used again. 
    146161    static if (operations.length>1 && operations[$-2]=='*') { 
    147162        // Optimisation: already a scalar multiply, so constant fold it. 
     
    168183 
    169184    // The opAssign operations are only valid for single vectors, not for temporaries 
     185    // They actually perform the calculation. 
    170186  static if (operations=="a") { 
    171187    void opAssign(A)(A expr) { 
    172188        static assert(CompatibleVectors!(BaseType,A.BaseType), "Vector type mismatch in " ~ BaseType.stringof ~ "[] = " ~ A.BaseType.stringof ~ "[]"); 
    173189        static assert(len==0 || expr.len == 0 || len == expr.len, "Vector lengths must match"); 
    174         performOperation!(expr.ops, "=", len==0? expr.len : len, expr.ValueTuple, B[0])(expr.values, values); 
     190        performOperation!(void, expr.ops, "=", len==0? expr.len : len, expr.ValueTuple, B[0])(expr.values, values); 
    175191    } 
    176192    void opAddAssign(A)(A expr) { 
    177193        static assert(CompatibleVectors!(BaseType,A.BaseType), "Vector type mismatch in " ~ BaseType.stringof ~ "[] += " ~ A.BaseType.stringof ~ "[]"); 
    178194        static assert(len==0 || expr.len == 0 || len == expr.len, "Vector lengths must match"); 
    179         performOperation!(expr.ops, "+=", len==0? expr.len : len, expr.ValueTuple, B[0])(expr.values, values); 
     195        performOperation!(void, expr.ops, "+=", len==0? expr.len : len, expr.ValueTuple, B[0])(expr.values, values); 
    180196    } 
    181197    void opSubAssign(A)(A expr) { 
    182198        static assert(CompatibleVectors!(BaseType,A.BaseType), "Vector type mismatch in " ~ BaseType.stringof ~ "[] -= " ~ A.BaseType.stringof ~ "[]"); 
    183199        static assert(len==0 || expr.len == 0 || len == expr.len, "Vector lengths must match"); 
    184         performOperation!(expr.ops, "-=", len==0? expr.len : len, expr.ValueTuple, B[0])(expr.values, values); 
     200        performOperation!(void, expr.ops, "-=", len==0? expr.len : len, expr.ValueTuple, B[0])(expr.values, values); 
    185201    } 
    186202    void opMulAssign(A)(A w) { // Use a template to avoid unnecessary code generation 
    187203        static assert(is (A: real), "Vector type mismatch in " ~ BaseType.stringof ~ "[] *= real"); 
    188         performOperation!("a", "*=", knownlength, real, B[0])(w, values); 
     204        performOperation!(void, "a", "*=", knownlength, real, B[0])(w, values); 
    189205    } 
    190206  } 
     
    210226    static if (is(A.BaseType: ireal) && is(B.BaseType:ireal)) 
    211227        const MUL = -1; 
    212     else static if (is(typeof(A.BaseType*B.BaseType): ireal)) 
    213         const MUL = 1i; 
    214228    else const MUL = 1; 
    215     return MUL * performOperation!(joinExpressions(".", A.ops, B.ops), ".", a.len==0? b.len:a.len, A.ValueTuple, B.ValueTuple)(a.values, b.values); 
    216 
    217  
    218 real performOperation(char [] operations, char [] finaloperation, int knownlength, X...)(X expr) 
    219 
    220     const char [] tupstr = vectorTupleToString!(X); 
    221   version(BladeDebug) { 
    222     pragma(msg, finaloperation ~ " " ~ operations ~ 
    223         "\nPostfix: " ~ makePostfixForX87(operations, tupstr) ~ "\nTuple: " ~ tupstr); 
    224     static if (knownlength!=0) pragma(msg, "Length is known!"); 
    225  
    226     const char [] qqq = makeAsmX87(tupstr, makePostfixForX87(operations, tupstr), finaloperation); 
    227     pragma(msg, "Generated code:"\n ~ qqq); 
     229    return MUL * performOperation!(typeof(A.BaseType*B.BaseType), joinExpressions(".", A.ops, B.ops), ".", a.len==0? b.len:a.len, A.ValueTuple, B.ValueTuple)(a.values, b.values); 
     230
     231 
     232/** This the only place where any code is generated. 
     233 * Firstly, runtime checks are made of vector lengths. 
     234 */ 
     235ReturnType performOperation(ReturnType, char [] operations, char [] finaloperation, int knownlength, X...)(X expr) 
     236
     237  const char [] tupstr = vectorTupleToString!(X); 
     238  mixin(generateVectorLengthChecks(tupstr)); 
     239          version(BladeDebug) { 
     240            pragma(msg, finaloperation ~ " " ~ operations ~ 
     241                "\nPostfix: " ~ makePostfixForX87(operations, tupstr) ~ "\nTuple: " ~ tupstr); 
     242            static if (knownlength!=0) pragma(msg, "Length is known!"); 
     243 
     244            const char [] qqq = generateCodeForAsmX87(knownlength, tupstr, makePostfixForX87(operations, tupstr), finaloperation); 
     245            pragma(msg, "Generated code:"\n ~ qqq); 
     246          } 
     247 
     248  // Decide which code generator to use, based on expression complexity and 
     249  // assembler availability. 
     250  if (isX87AsmPossible(tupstr, operations)) { 
     251     mixin(generateCodeForAsmX87(knownlength, tupstr, makePostfixForX87(operations, tupstr), finaloperation)); 
     252  } else { 
     253     mixin(generateCodeForD!(ReturnType)(knownlength, tupstr, finaloperation,operations)); 
    228254  } 
    229255 
    230     mixin(makeAsmX87(tupstr, makePostfixForX87(operations, tupstr), finaloperation)); 
    231 
    232  
     256
    233257 
    234258// ------------------------------------------------ 
    235 //   PART 2 -- Convert expression to postfix form + convert tuple to string. 
     259//   PART 2 -- Convert tuple to string. 
     260// ------------------------------------------------ 
     261// This is only necessary because CTFE functions cannot iterate over the elements 
     262// of a tuple. The solution is to use templates to create a string representation 
     263// of the types in the tuple. 
     264 
     265template singleType(A) 
     266
     267         static if (is(A == real[])  || is(A==ireal[]))    const char [] singleType = "R"; 
     268    else static if (is(A == double[])|| is(A == idouble[]))const char [] singleType = "D"; 
     269    else static if (is(A == float[]) || is(A==ifloat[]))   const char [] singleType = "F"; 
     270    else static if (is(A == real))   const char [] singleType = "S"; 
     271    else const char [] singleType = "?"; 
     272
     273 
     274// A CTFE function can't randomly index a tuple, so convert the type information 
     275// into a char[]. 
     276template vectorTupleToString(X...) 
     277
     278    static if (X.length==1) const char [] vectorTupleToString = singleType!(X[0]); 
     279    else  const char [] vectorTupleToString = singleType!(X[0]) ~ vectorTupleToString!(X[1..$]); 
     280
     281 
     282// And some functions to grab information from the typestring. 
     283 
     284// Return the first index in the tuple which is of vector type 
     285int findFirstVector(char [] typelist) 
     286
     287    for (int i=0; i< typelist.length;++i) { 
     288        if (isVector(typelist[i])) return i; 
     289    } 
     290
     291 
     292bool isVector(char var) 
     293
     294    return (var=='R' || var=='D' || var=='F'); 
     295
     296 
     297// --------------------------------------- 
     298//   PART 3 -- Mixins to generate D code 
     299// --------------------------------------- 
     300 
     301// Return a string to mixed in, which asserts that all vectors are of 
     302// equal length. 
     303char [] generateVectorLengthChecks(char [] typelist) 
     304
     305    int firstvec = findFirstVector(typelist); 
     306    char [] result=""; 
     307    for (int i=firstvec+1; i< typelist.length;++i) { 
     308        if (isVector(typelist[i])) { 
     309            result ~= "  assert(expr[" ~ itoa(i) ~ "].length" 
     310                ~ " == expr[" ~ itoa(firstvec) ~ `].length, "vectors are different lengths");`\n; 
     311        } 
     312    } 
     313    return result; 
     314
     315 
     316/** Generate D code to evaluate the given vector expression 
     317 * 
     318 */ 
     319char [] generateCodeForD(RetType)(int knownlength, char [] typelist, char [] finalop, char [] operation) 
     320
     321    char [] iter=""; 
     322    foreach(int i, ch; operation) { 
     323        if (ch=='.') { iter~="*"; } else 
     324        if (ch>='a' && ch<='z') { 
     325            iter ~= "expr[" ~ itoa(ch-'a') ~ "]"; 
     326            if (isVector(typelist[ch-'a'])) iter~="[i]"; 
     327        } else iter ~= ch; 
     328    } 
     329    char [] result; 
     330    if (knownlength>0) result = "int length=" ~ itoa(knownlength) ~ ";\n"; 
     331    else result = " int length = expr[" ~ itoa(findFirstVector(typelist))~ "].length;\n"; 
     332    if (finalop == ".") { 
     333        static if ( is(RetType: ireal)) result ~= " ireal sum=0.0i;\n"; 
     334                                   else result ~= " real sum=0.0;\n"; 
     335        result ~= " for (int i=0; i<length; ++i) {\n" 
     336            ~ "    sum+=" ~iter~";"~ "\n }\n return sum;\n"; 
     337        } else { 
     338        result ~= " for (int i=0; i<length; ++i){\n" 
     339            ~ "    expr[" ~ itoa(typelist.length-1L)~"][i]" ~ finalop ~ iter ~";\n }\n"; 
     340    } 
     341    return result; 
     342
     343 
     344// ------------------------------------------------ 
     345//   PART 4 -- Convert expression to postfix form 
    236346// ------------------------------------------------ 
    237347 
     
    292402    if (operations[x+1]=='-') { 
    293403        if (second[$-1]=='*' && first[$-1]!='*') { 
    294            return second ~ first ~ "~"; 
     404           return second ~ first ~ "_"; 
    295405        } 
    296406    } 
     
    302412    } 
    303413    if (second.length==1 && typelist[second[0]-'a']=='R' && operations[x+1]=='-') { 
    304            return second ~ first ~ "~"; 
     414           return second ~ first ~ "_"; 
    305415    } 
    306416    return first ~ second ~ operations[x+1..x+2]; 
    307417} 
    308418 
    309 template singleType(A) 
    310 { 
    311          static if (is(A == real[])  || is(A==ireal[]))    const char [] singleType = "R"; 
    312     else static if (is(A == double[])|| is(A == idouble[]))const char [] singleType = "D"; 
    313     else static if (is(A == float[]) || is(A==ifloat[]))   const char [] singleType = "F"; 
    314     else static if (is(A == real))   const char [] singleType = "S"; 
    315     else const char [] singleType = "?"; 
    316 } 
    317  
    318 // A CTFE function can't randomly index a tuple, so convert the type information 
    319 // into a char[]. 
    320 template vectorTupleToString(X...) 
    321 { 
    322     static if (X.length==1) const char [] vectorTupleToString = singleType!(X[0]); 
    323     else  const char [] vectorTupleToString = singleType!(X[0]) ~ vectorTupleToString!(X[1..$]); 
    324 } 
    325  
    326419// ------------------------------- 
    327 //   PART 3 -- Parsing 
     420//   PART 5 -- Mixins to generate x87 ASM code 
    328421// ------------------------------- 
    329422 
    330423bool isInstruction(char op) 
    331424{ 
    332     return (op=='+' || op=='*' || op=='-'|| op=='.'|| op=='~'); 
    333 
    334  
    335 bool isVector(char var) 
    336 
    337     return (var=='R' || var=='D' || var=='F'); 
    338 
    339  
    340 int numVectors(char [] typelist) 
     425    return (op=='+' || op=='*' || op=='-'|| op=='.'|| op=='_'); 
     426
     427 
     428// Count the number of vectors in the typestring 
     429int countVectors(char [] typelist) 
    341430{ 
    342431    int numVecs=0; 
     
    347436} 
    348437 
    349  
    350438int vectorNum(char [] typelist, char var) 
    351439{ 
     
    366454} 
    367455 
    368 // ------------------------------- 
    369 //   PART 4 -- Generate x87 ASM code 
    370 // ------------------------------- 
    371456 
    372457char [] operandSize(char var) 
     
    386471        case '+': return "fadd"; 
    387472        case '-': return "fsub"; 
    388         case '~': return "fsubr"; 
     473        case '_': return "fsubr"; 
    389474    } 
    390475} 
     
    406491// use EBX, ESI, and EDI. Finally, use the frame register EBP. 
    407492const char [][5] vectorRegister = ["EAX", "ECX", "EDX", "EBX", "EDI"]; 
     493 
     494// Is this expression simple enough for our code generator? 
     495bool isX87AsmPossible(char [] typelist, char [] operations) { 
     496  version (D_InlineAsm_X86) { 
     497        // Are there enough index registers? 
     498        if (countVectors(typelist) >= vectorRegister.length) return false; 
     499        // BUG: should also check if it will overflow the FPU stack 
     500        return true; 
     501  } else { 
     502      // Without an assembler, there's no chance! 
     503      return false; 
     504  } 
     505} 
    408506 
    409507// Create code to push all used vector registors. 
     
    473571 
    474572*/ 
    475 char [] makeAsmX87(char [] typelist, char [] operations, char [] finaloperation) 
     573char [] generateCodeForAsmX87(int knownlength, char [] typelist, char [] operations, char [] finaloperation) 
    476574{ 
    477575    char [] result=""; 
     
    479577 
    480578    // Create local variables for everything (avoid bug #1028) 
    481     // Also assert that all vectors are the same length 
    482     bool firstvec=true; 
    483579    int vecnum = 0; 
    484     int scalarnum = 0; 
    485580    for (int i=0; i< typelist.length;++i) { 
    486581        if (typelist[i]=='S'){ 
    487582            result~= "  real var" ~ itoa(i) ~ " = expr[" ~ itoa(i) ~ "];\n"; 
    488             ++scalarnum; 
    489583        } else { 
    490584            result~= "  auto vec" ~ itoa(i) ~ " = expr[" ~itoa(i) ~"].ptr;\n"; 
     
    492586                incrementRealVectors ~= "  add " ~ vectorRegister[vecnum] ~ ", " ~ REALSIZE ~ ";\n"; 
    493587            } 
    494             if (firstvec) { 
    495                 result~= "  int veclength = expr[" ~itoa(i) ~"].length;\n"; 
    496                 firstvec = false; 
    497             } else result~="  assert(veclength == expr[" ~ itoa(i) ~ `].length, "vectors are different lengths");`\n; 
    498588            ++vecnum; 
    499589        } 
    500590    } 
    501     assert(vecnum-1 < vectorRegister.length, "Too many vectors!"); 
     591    result ~= "  int veclength = expr[" ~itoa(findFirstVector(typelist)) ~"].length;\n"; 
     592 
    502593    bool isDotProduct = (operations[$-1]=='.'); 
    503594    int numScalarsOnStack=0; 
    504595 
    505     result~= \n"asm {"\n ~ pushRegisters(vecnum) ~ 
    506         "  mov ESI, veclength;"\n; // ESI will be the counter 
     596    result~= \n"asm {"\n ~ pushRegisters(vecnum); 
     597    // ESI will be the counter 
     598    if (knownlength>0) result~= "  mov ESI, " ~ itoa(knownlength) ~";\n"; 
     599    else result ~= "  mov ESI, veclength;"\n; 
    507600 
    508601    // Load all the vector pointers into registers, and push all the scalars onto the stack 
     
    528621 
    529622    if (isDotProduct) result ~= "  fldz;"\n; 
    530     result ~= "  xor ESI, ESI; "\n 
    531         "  sub ESI, veclength; // counter=-length"\n 
    532         "  jz short L3; // test for length==0"\n; 
     623    if (knownlength>0) result~= "  mov ESI, -" ~ itoa(knownlength) ~";\n"; 
     624    else { 
     625        result ~= "  xor ESI, ESI; "\n 
     626            "  sub ESI, veclength; // counter=-length"\n 
     627            "  jz short L3; // test for length==0"\n; 
     628    } 
    533629    if (!isDotProduct && operations.length==1 && finaloperation[0]=='*') { 
    534630            ++numScalarsOnStack; 
     
    542638    char [] mainbody = ""; 
    543639    char [] firstbody = ""; 
     640 
     641    // We need to keep track of how many things are on the FPU stack. 
     642    // Every time something is pushed, the indices of our variables change! 
    544643    int numOnStack = 0; // How much of the FP stack is being used? 
    545644