Changeset 97

Show
Ignore:
Timestamp:
04/12/07 17:20:00 (1 year ago)
Author:
Don Clugston
Message:

*No longer converts all scalars to reals/ireals (important for SSE support, also improves memory bandwidth). This also simplifies the expression template code.

* Added work-in-progress on article.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/blade/Blade.d

    r96 r97  
    8686// 'abstract syntax tree' constructed from multiple templates. Instead, 
    8787// the expression is constructed as a normal, human-readable text string 
    88 // (for example, "#a+(#b*#c-#d)"). 
     88// (for example, "#a+(#b*#c-#d)"). The expression arguments (vectors or scalars) 
     89// are represented by a # symbol followed by an alphabetic character, beginning 
     90// at #a. 
    8991// This string is a template parameter for a struct, which 
    9092// contains a tuple of all the arguments used in the expression. 
     
    122124 
    123125// Check for type mismatches when performing vector assignment. 
    124 template CompatibleVectors(A, B) 
     126template AssignableVectors(A, B) 
    125127{ 
    126128    static if(is (A:real) && is(B: real) 
    127129           || is (A:ireal) && is(B: ireal) 
    128130           || is (A:creal) && is(B: creal)) 
    129      const bool CompatibleVectors=true; 
    130     else const bool CompatibleVectors=false; 
     131     const bool AssignableVectors=true; 
     132    else const bool AssignableVectors=false; 
    131133} 
    132134 
     
    171173    } else { 
    172174        // trick: typeof(C*C) converts imag to real, but leaves real & complex unchanged. 
    173         JoinResult!(typeof(BaseType*C), "*", "#a", typeof(C*C*1.0L)) opMul(C)(C x) { 
     175        JoinResult!(typeof(BaseType*C), "*", "#a", typeof(C*C)) opMul(C)(C x) { 
    174176            static assert(is(C: real) || is(C:ireal) || is(C:creal), "Can only multiply by scalars"); 
    175177            static if (is(C: ireal)) { 
    176                 return JoinResult!(typeof(BaseType*C), "*", "#a", real)(values, x.im); 
    177             } else static if (is(C: real)) { 
    178                 return JoinResult!(BaseType, "*", "#a", real)(values, x); 
    179             } else { 
     178                return JoinResult!(typeof(BaseType*C), "*", "#a", typeof(C*C))(values, x.im); 
     179            } else 
    180180                return JoinResult!(typeof(BaseType*C), "*", "#a", C)(values, x); 
    181             } 
    182181        } 
    183182    } 
     
    193192  static if (operations=="#a") { 
    194193    void opAssign(A)(A expr) { 
    195         static assert(CompatibleVectors!(BaseType,A.BaseType), "Vector type mismatch in " ~ BaseType.stringof ~ "[] = " ~ A.BaseType.stringof ~ "[]"); 
     194        static assert(AssignableVectors!(BaseType,A.BaseType), "Vector type mismatch in " ~ BaseType.stringof ~ "[] = " ~ A.BaseType.stringof ~ "[]"); 
    196195        static assert(len==0 || expr.len == 0 || len == expr.len, "Vector lengths must match"); 
    197196        performOperation!(void, expr.ops, "=", len==0? expr.len : len, expr.ValueTuple, B[0])(expr.values, values); 
    198197    } 
    199198    void opAddAssign(A)(A expr) { 
    200         static assert(CompatibleVectors!(BaseType,A.BaseType), "Vector type mismatch in " ~ BaseType.stringof ~ "[] += " ~ A.BaseType.stringof ~ "[]"); 
     199        static assert(AssignableVectors!(BaseType,A.BaseType), "Vector type mismatch in " ~ BaseType.stringof ~ "[] += " ~ A.BaseType.stringof ~ "[]"); 
    201200        static assert(len==0 || expr.len == 0 || len == expr.len, "Vector lengths must match"); 
    202201        performOperation!(void, expr.ops, "+=", len==0? expr.len : len, expr.ValueTuple, B[0])(expr.values, values); 
    203202    } 
    204203    void opSubAssign(A)(A expr) { 
    205         static assert(CompatibleVectors!(BaseType,A.BaseType), "Vector type mismatch in " ~ BaseType.stringof ~ "[] -= " ~ A.BaseType.stringof ~ "[]"); 
     204        static assert(AssignableVectors!(BaseType,A.BaseType), "Vector type mismatch in " ~ BaseType.stringof ~ "[] -= " ~ A.BaseType.stringof ~ "[]"); 
    206205        static assert(len==0 || expr.len == 0 || len == expr.len, "Vector lengths must match"); 
    207206        performOperation!(void, expr.ops, "-=", len==0? expr.len : len, expr.ValueTuple, B[0])(expr.values, values); 
    208207    } 
    209     void opMulAssign(A)(A w) { // Use a template to avoid unnecessary code generation 
    210         static assert((is (BaseType: creal) && is(A:ireal) || is(A:creal)) || is (A: real), "Vector type mismatch in " ~ BaseType.stringof ~ "[] *= " ~ A.stringof); 
     208    void opMulAssign(A)(A w) { // *= is not allowed to change the vector type. 
     209        static assert(is (typeof(BaseType*A) : BaseType), "Vector type mismatch in " ~ BaseType.stringof ~ "[] *= " ~ A.stringof); 
    211210        performOperation!(void, "#a", "*=", knownlength, A, B[0])(w, values); 
    212211    } 
     
    227226 
    228227// Dot product of two vectors. 
    229 // Returns ireal if one of A or B is real, and the other is imaginary. 
     228// Note: Returns ireal if one of A or B is real, and the other is imaginary. 
    230229typeof(A.BaseType*B.BaseType) dot(A, B)(A a, B b) 
    231230{ 
     
    276275// of the types in the tuple. 
    277276 
     277// Create a single-character string representing the type A. 
    278278template singleType(A) 
    279279{ 
    280          static if (is(A == real[])  || is(A==ireal[]))    const char [] singleType = "R"; 
    281     else static if (is(A == creal[]) || is(A==cdouble[])||is(A==cfloat[])) const char [] singleType = "Z"; 
     280         static if (is(A == real[])  || is(A == ireal[]))    const char [] singleType = "R"; 
    282281    else static if (is(A == double[])|| is(A == idouble[]))const char [] singleType = "D"; 
    283     else static if (is(A == float[]) || is(A==ifloat[]))   const char [] singleType = "F"; 
    284     else static if (is(A == real) || is (A == ireal))   const char [] singleType = "S"; 
    285     else static if (is(A == creal))                     const char [] singleType = "C"; 
     282    else static if (is(A == float[]) || is(A == ifloat[]))   const char [] singleType = "F"; 
     283    else static if (is(A == creal[]) || is(A == cdouble[]) || is(A==cfloat[])) const char [] singleType = "Z"; 
     284 
     285    else static if (is(A == real)    || is (A == ireal))   const char [] singleType = "r"; 
     286    else static if (is(A == double)  || is (A == idouble))   const char [] singleType = "d"; 
     287    else static if (is(A == float)   || is (A == ifloat))   const char [] singleType = "f"; 
     288    else static if (is(A : creal))                     const char [] singleType = "z"; 
    286289    else const char [] singleType = "?"; 
    287290} 
     
    376379} 
    377380 
    378 // Converts an infix string into postfix. 
     381// Converts an infix string into postfix. Also strips off the # symbols. 
    379382// Apply x87-specific optimisations during the conversion. 
    380383char [] makePostfixForX87(char [] operations, char [] typelist) 
     
    397400    // result of a multiply. Since + is commutative, we can achieve this 
    398401    // by calculating the value with the multiply, before the other one. 
     402    // Note that there a few cases that could still be improved, eg with 
     403    //    ((a*b)+(c*d))+(e*f),  all three multiplies could be performed 
     404    // before any of the additions. This would require stack rotation 
     405    // operations (can't be done with simple postfix), greatly increasing the 
     406    // complexity of the mini-compiler.). 
    399407    if (operations[x+1]=='+') { 
    400408        if (second[$-1]=='*' && first[$-1]!='*') { 
     
    402410        } 
    403411    } 
    404     // We can also do the same thing with -, but we need to use fsubr 
    405     // instead of fsub. 
     412    // We can also do the same thing with -, but we'll need to use fsubr 
     413    // instead of fsub. We use _ to mean reversed subtraction. 
    406414    if (operations[x+1]=='-') { 
    407415        if (second[$-1]=='*' && first[$-1]!='*') { 
     
    449457} 
    450458 
    451 int scalarNum(char [] typelist, char var) 
     459int realScalarNum(char [] typelist, char var) 
    452460{ 
    453461    int k=0; 
    454462    for (int i=0; i<var-'a'; ++i) { 
    455         if (typelist[i]=='S') ++k; 
     463        if (typelist[i]=='r') ++k; 
    456464    } 
    457465    return k; 
     
    462470{ 
    463471    switch(var) { 
     472        case 'r': 
    464473        case 'R': return "real ptr "; 
     474        case 'd': 
    465475        case 'D': return "double ptr "; 
     476        case 'f': 
    466477        case 'F': return "float ptr "; 
    467478    } 
     
    503514        // Does it contain any types we can't deal with? 
    504515        foreach(ch; typelist) { 
    505             // can only do float, double, and 80-bit vectors and scalars. 
    506             if (ch!='R' && ch!='D' && ch!='F' && ch!='S') return false; 
     516            // can only do float, double, and 80-bit vectors, and scalars. 
     517            if (ch!='R' && ch!='D' && ch!='F' && ch!='r' && ch!='d' && ch!='s') return false; 
    507518        } 
    508519        // BUG: should also check if it will overflow the FPU stack 
     
    520531        // Does it contain any types we can't deal with? 
    521532        foreach(ch; typelist) { 
    522             // can only do double vectors and scalars. 
    523             if (ch!='D' && ch!='S') return false; 
     533            // can only do double vectors and double scalars. 
     534            if (ch!='D' && ch!='d') return false; 
    524535        } 
    525536        return false; // not yet implemented 
     
    563574} 
    564575 
     576char [] opToSSE2(char op) 
     577{ 
     578    switch (op) { 
     579        case '*': 
     580        case '.': return "mulpd"; 
     581        case '+': return "addpd"; 
     582        case '-': return "subpd"; 
     583        case '_': return "**BUG**"; // Non-existent! 
     584    } 
     585} 
     586 
     587char [] opToSSE(char op) 
     588{ 
     589    switch (op) { 
     590        case '*': 
     591        case '.': return "mulps"; 
     592        case '+': return "addps"; 
     593        case '-': return "subps"; 
     594        case '_': return "**BUG**"; // Non-existent! 
     595    } 
     596} 
     597 
     598 
    565599char [] generateCodeForAsmSSE2(int knownlength, char [] typelist, char [] operations, char [] finaloperation) 
    566600{ 
     601// Use ESI as the index register. 
    567602    char [] result="asm {"\n 
    568603    ~"L1: \n" 
    569604    ~ "  movapd XMM1, [ESI+EAX];"\n 
    570605    ~ "  mulpd XMM1, XMM2;"\n 
    571     ~ "  addpd XMM1, [EDI+EAX];"\n 
    572     ~ "  movapd [EDI+EAX], XMM1;"\n 
    573     ~ "  add EAX, 16;"\n 
     606    ~ "  addpd XMM1, [EDI+ESI];"\n 
     607    ~ "  movapd [EDI+ESI], XMM1;"\n 
     608    ~ "  add ESI, 16;"\n 
    574609    ~ "  js L1;"\n 
    575610    ~ "}"\n; 
     
    615650    char [] incrementRealVectors=""; 
    616651 
    617     // Create local variables for everything (avoid bug #1028
     652    // Create local variables for pointers to vectors (avoid bug #1125
    618653    int vecnum = 0; 
    619654    for (int i=0; i< typelist.length;++i) { 
    620         if (typelist[i]=='S'){ 
    621             result~= "  real var" ~ itoa(i) ~ " = expr[" ~ itoa(i) ~ "];\n"; 
    622         } else { 
     655        if (isVector(typelist[i])){ 
    623656            result~= "  auto vec" ~ itoa(i) ~ " = expr[" ~itoa(i) ~"].ptr;\n"; 
    624657            if (typelist[i]=='R') { 
     
    628661        } 
    629662    } 
    630     result ~= "  int veclength = expr[" ~itoa(findFirstVector(typelist)) ~"].length;\n"; 
     663    if (knownlength==0) { 
     664        result ~= "  int veclength = expr[" ~itoa(findFirstVector(typelist)) ~"].length;\n"; 
     665    } 
    631666 
    632667    bool isDotProduct = (operations[$-1]=='.'); 
     
    652687         } 
    653688        ++numvecs; 
    654       } else
    655           result ~= "  fld real ptr var"~ itoa(i) ~";\n"; 
     689      } else if (typelist[i]=='r')
     690          result ~= "  fld real ptr expr["~ itoa(i) ~"];\n"; 
    656691          ++numconsts; 
    657692          ++numScalarsOnStack; 
     
    669704            ++numScalarsOnStack; 
    670705            // load multiplier for *= 
    671             result ~= "  fld double ptr var0;\n"; 
     706            result ~= "  fld double ptr expr[0];\n"; 
    672707    } 
    673708    int done=0; 
     
    712747        mainbody ~= next; firstbody ~= next; 
    713748        done +=2; 
    714       } else { // multiply by scalar. Note that there's an extra item on the stack when we're in the body of the loop. 
    715         firstbody ~= "  fmul ST, ST(" ~ itoa(numOnStack + numScalarsOnStack - scalarNum(typelist, operations[done]-'a')) ~ "); //var" ~ itoa(operations[done]-'a') ~ \n; 
    716         mainbody ~= "  fmul ST, ST(" ~ itoa(1 + numOnStack + numScalarsOnStack - scalarNum(typelist, operations[done]-'a')) ~ "); //var" ~ itoa(operations[done]-'a') ~ \n; 
    717         // NOTE: For scalar float or double values, we can multiply directly, saving one slot on the FP stack. 
    718            // next = "  " ~ opToX87(operations[done+1]) ~ " double ptr var" ~ itoa(operations[done]-'a') ~";\n"; 
    719            // mainbody ~= next; firstbody ~= next; 
     749      } else { // multiply by scalar. 
     750        if (typelist[operations[done]-'a']=='r') { 
     751             // Multiply by real scalar, which is already on the stack. Note that there's an extra item on the stack when we're in the body of the loop. 
     752            firstbody ~= "  fmul ST, ST(" ~ itoa(numOnStack + numScalarsOnStack - realScalarNum(typelist, operations[done]-'a')) ~ "); //var" ~ itoa(operations[done]-'a') ~ \n; 
     753            mainbody ~= "  fmul ST, ST(" ~ itoa(1 + numOnStack + numScalarsOnStack - realScalarNum(typelist, operations[done]-'a')) ~ "); //var" ~ itoa(operations[done]-'a') ~ \n; 
     754        } else { 
     755            // For scalar float or double values, we can multiply directly, saving one slot on the FP stack. 
     756            next = "  fmul " ~ operandSize(typelist[operations[done]-'a']) ~ "expr[" ~ itoa(operations[done]-'a') ~"];\n"; 
     757           mainbody ~= next; firstbody ~= next; 
     758       } 
    720759            done +=2; 
    721760      } 
     
    811850    writefln(d, " ", e); 
    812851    assert(d==2267.625); 
    813     q*=2.1L
     852    q*=2.1
    814853 
    815854/*