Show
Ignore:
Timestamp:
04/30/08 16:05:32 (6 months ago)
Author:
Don Clugston
Message:

Added prod(). Use .ptr to get raw data, so it works with Bill Baxter's ArrayView?.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/blade/Blade.d

    r176 r187  
    11//  Written in the D programming language 1.0 
    22/** 
    3 * BLADE 0.4Alpha -- Basic Linear Algebra D Expressions 
     3* BLADE 0.6Alpha -- Basic Linear Algebra D Expressions 
    44* 
    55* Generate near-optimal x87/SSE2 asm code for BLAS1 basic vector operations at compile time. 
     
    1313* 
    1414* FEATURES: 
    15 *  - Supports any mix of vector addition, subtraction, dot product, unary minus, 
    16 *    multiplication by a scalar, sum(), abs(), and multidimensional slicing. 
     15*  - Supports any mix of vector addition, subtraction, unary minus, 
     16*    multiplication by a scalar, 
     17*    cumulation via dot product, sum() and prod(), and multidimensional slicing. 
    1718*  - Generates either x87 asm code, SSE or SSE2 asm code or pure D, depending on 
    1819*    the complexity of the expression, and the availability of inline asm. 
     
    2425* 
    2526* SPEED/ACCURACY TRADEOFF: 
    26 * IEEE floating point multiplication and addition are not associative. 
     27* Tradeoff arises because IEEE floating point multiplication and addition are not associative. 
    2728*  Assuming that overflow and underflow do not occur: 
    2829*  (a*b)*c may differ from a*(b*c) in the last bit. 
     
    3435* 
    3536* FUTURE DIRECTIONS (in order of expected implementation): 
    36 * - trace() 
    37 * - Loop unrolling for cumulative operations dot, sum, trace. 
     37* - nested D expressions 
     38* - cumulative operations min, max 
     39* - Loop unrolling for cumulative operations dot, sum, prod. 
    3840* - Dense matrix support. 
    3941* - Triangular, banded, symmetric, and sparse matrix support 
     
    4648* which accepts the tuple. 
    4749* 
     50* COMPILER BUGS/LIMITATIONS AFFECTING THIS LIBRARY 
     51* - Local arrays are not aligned to a 128-bit boundary, so use of aligned SSE is not 
     52*   always possible. 
     53* - Bugzilla #1125 -- structs in a tuple can't be used in asm. 
     54* - Bugzilla #1382 -- CTFE strings never get deleted --> SLOOOOOW compilation. KILLER BUG. 
     55* - Bugzilla #1768 -- in CTFE, arrays of arrays aren't initialized properly 
     56* 
    4857* HISTORY: 
    4958* 0.1 - Used classes to make expression templates. 
    5059* 0.2 - Support for a wider variety of expressions. Dot product, imaginary numbers, etc. 
    5160* 0.3 - Based on string mixins. Most of the new features of 0.2 are gone, but SSE2 is added. 
    52 * 0.4 - Added D code generator. Nice error messages. Optimal parameter passing.  
     61* 0.4 - Added D code generator. Nice error messages. Optimal parameter passing. 
    5362*       (passes pointers, not arrays). 
    5463* 0.5 - Expression simplification step. Slicing support. 
    55 * 0.6 - Dot product, nested expressions, intrinsics: abs, sqrt, sum. 
     64* 0.6 - Dot product, nested expressions (asm only), intrinsics: abs, sqrt, sum. 
     65* 0.7 - Intrinsics: prod 
    5666*/ 
    5767 
     
    7383// FOR MIXIN: Generate code to evaluate the given vector expression. 
    7484char [] vectorize(char [] expr) 
    75 {     
     85{ 
    7686    debug (BladeFrontEnd) { 
    7787    return `pragma(msg, \n ~ "// " __FILE__ ~ "(" ~__LINE__.stringof[0..$-1] ~ ") ` ~ enquote(expr) ~ `" ~ \n ~ ` ~ mixin_tupleAndSyntaxtreeof("makeVectorCode", expr) ~ "~\\n);" 
     
    8292} 
    8393 
    84 // Simplify the expression, categorise it,  
     94// Simplify the expression, categorise it, 
    8595// and dispatch to the appropriate code generator. 
    8696char [] makeVectorCode(Types...)(AbstractSyntaxTree tree) 
     
    8999    if (revised.errorMessage.length>0)  return `static assert(0, "BLADE: ` ~ enquote(revised.errorMessage) ~ `");`; 
    90100    VecExpressionType exprType = categorizeExpression(revised); 
    91     InvocationCode q;     
     101    InvocationCode q; 
    92102    if (exprType == VecExpressionType.SSE2Expression || exprType == VecExpressionType.SSE1Expression) { 
    93103        q = invokeSSE((exprType == VecExpressionType.SSE2Expression), revised); 
     
    105115{ 
    106116    // TODO: 
    107     return expression;     
     117    return expression; 
    108118} 
    109119 
    110120template X87RetType(char [] expr) { 
    111     static if (expr[0]!='0') alias void X87RetType; 
     121    static if (expr[0]!='0' && expr[0]!='1') alias void X87RetType; 
    112122    else alias real X87RetType; 
    113123} 
     
    130140 
    131141/** Function to implement BLAS1 operations using X87 assembler. 
    132  * Every member of the Values tuple must only be real,  
     142 * Every member of the Values tuple must only be real, 
    133143 * float[], double [], or real[], or BladeStrided!(float), !(double), !(real) 
    134144 */ 
    135145X87RetType!(expr) X87VECGEN(char [] expr, int numStrides, Values...)(int veclength, Values values) { 
    136     debug(BladeBackEnd) {     
     146    debug(BladeBackEnd) { 
    137147        pragma(msg, generateCodeForAsmX87!(numStrides, Values)(expr)); 
    138148    } 
     
    146156static ulong[2] SSE_SIGNBITpd = [0x8000_0000_0000_0000L, 0x8000_0000_0000_0000L]; 
    147157static uint[4] SSE_SIGNBITps = [0x8000_0000,0x8000_0000,0x8000_0000, 0x8000_0000]; 
     158// The value 1.0 for a parallel SSE register 
     159static ulong[2] SSE_ONEpd = [0x3FF0_0000_0000_0000L, 0x3FF0_0000_0000_0000L]; 
     160static uint[4] SSE_ONEps = [0x3F0_000, 0x3F0_000, 0x3F0_000, 0x3F0_000]; 
    148161 
    149162private: 
     
    156169// SSE1 is possible only if all vectors are floats. 
    157170// X87 is possible for any mix of real, double, and float vectors. 
    158 // BUG: for X87, should also check number of temporaries (don't overflow the FPU stack 
     171// BUG: for X87, should also check number of temporaries (don't overflow the FPU stack) 
    159172enum VecExpressionType { SSE1Expression, SSE2Expression, X87Expression, DExpression }; 
    160173 
     
    164177    bool SSE1 = true; 
    165178    bool X87 = true; 
    166     bool strided = false; // true if any strided vector or matrix operations exist     
     179    bool strided = false; // true if any strided vector or matrix operations exist 
    167180version (D_InlineAsm_X86) {} else { 
    168181    // Without an assembler, there's no chance! 
     
    199212            y = tree.compounds[x-tree.symbolTable.length][0]-'A'; 
    200213            // Check for a stride.. 
    201             if (tree.compounds[x-tree.symbolTable.length][$-1]==']') {                
     214            if (tree.compounds[x-tree.symbolTable.length][$-1]==']') { 
    202215                strided |= isStrided(tree.compounds[x-tree.symbolTable.length]); 
    203216            } 
    204217        } 
    205          
     218 
    206219        char [] t = tree.symbolTable[y].element; 
    207220        if (t == "double") { 
     
    218231        } 
    219232    } 
    220     // It's not worth doing strided operations with SSE.    
     233    // It's not worth doing strided operations with SSE. 
    221234    if (strided) { SSE1=false; SSE2=false; } 
    222235    if (numRealScalars > MAX_87_REALSCALARSPLUSTEMPORARIES) X87 = false; 
     
    224237    if (numvectors > MAX_SSE_VECTORS) { SSE1=false; SSE2=false; } 
    225238    if (SSE1) return VecExpressionType.SSE1Expression; 
    226     if (SSE2) return VecExpressionType.SSE2Expression;     
    227     return X87 ? VecExpressionType.X87Expression : VecExpressionType.DExpression;  
     239    if (SSE2) return VecExpressionType.SSE2Expression; 
     240    return X87 ? VecExpressionType.X87Expression : VecExpressionType.DExpression; 
    228241} 
    229242 
     
    256269    char [] stridelist=""; 
    257270    char [] alltypes=""; 
    258      
     271 
    259272    char [][] typelist; 
    260      
     273 
    261274    char [] vals; 
    262275    int numstrides=0; 
     
    283296            } else { // for arrays, the type is the type of the original array 
    284297                t = tree.symbolTable[tree.compounds[x-tree.symbolTable.length][0]-'A'].element; 
    285                         // Check for a stride..                 
     298                        // Check for a stride.. 
    286299                if (tree.compounds[x-tree.symbolTable.length][$-1]==']') { 
    287300                    strided = isStrided(tree.compounds[x-tree.symbolTable.length]); 
    288301                    if (strided) ++numstrides; 
    289302                } 
    290                  
     303 
    291304            } 
    292305        } 
     
    296309            // long, ulong, and real must become real. 
    297310            // We convert everything else to double, since that uses less 
    298             // FPU stack space.            
     311            // FPU stack space. 
    299312            if (t == "real" || t == "double" || t == "float") alltypes ~= t; 
    300313            else if (t == "long" || t == "ulong") result ~= "real"; 
     
    310323                alltypes ~= t ~ "*"; 
    311324                // for vectors, we only need the pointer, not the length 
    312                 vals ~= "&" ~  v ~ "[0]"; 
     325                //vals ~= "&" ~  v ~ "[0]"; 
     326                vals ~= v ~ ".ptr"; 
    313327            } 
    314328        } 
     
    323337    result ~= ")("; 
    324338    int firstVector = findVectorForLength(tree); 
    325     return InvocationCode(result ~ getValueForSymbol(tree.mapping[firstVector], tree).invoker ~ ".length"  
     339    return InvocationCode(result ~ getValueForSymbol(tree.mapping[firstVector], tree).invoker ~ ".length" 
    326340        ~ vals ~ stridelist  ~ ")", assertions); 
    327341} 
     
    342356    char [] postfix = makePostfixForSSE(tree.expression, tree.rank); 
    343357    char [] retType = "void"; 
    344     if (postfix[0]=='0') retType = (SSE2? "double" : "float"); 
     358    if (postfix[0]=='0' || postfix[0]=='1') retType = (SSE2? "double" : "float"); 
    345359 
    346360    char [] result = "SSEVECGEN!(" ~ retType ~ `,"` ~ enquote(postfix) ~ `"`; 
     
    352366        else result ~= SSE2? ",double*" : ",float*"; 
    353367        vals ~= ","; 
    354         if (rnk=='1') vals ~= "&"; 
     368//        if (rnk=='1') vals ~= "&"; 
    355369        InvocationCode q = getValueForSymbol(tree.mapping[i], tree); 
    356370        vals ~= q.invoker; 
    357371        assertions ~= q.assertions; 
    358372        // for vectors, we only need the pointer, not the length 
    359         if (rnk=='1') vals ~= "[0]"; 
    360     } 
    361              
     373        if (rnk=='1') vals ~= ".ptr"; 
     374    } 
     375 
    362376    result ~= ")("; 
    363377    int firstVector = findVectorForLength(tree); 
     
    384398//                    result ~= "static "; 
    385399//                } 
    386                 result ~= "assert("  
     400                result ~= "assert(" 
    387401                 ~ getDimensionLengthForSymbol(tree.mapping[i], tree, 1) 
    388402                    ~ "==" ~ getDimensionLengthForSymbol(tree.mapping[firstVector], tree, 1) 
     
    399413    for (int i=0; i<tree.mapping.length;++i) { 
    400414        if (tree.rank[i]=='1'){ 
    401             result ~= "assert( (cast(size_t)(&" ~ getValueForSymbol(tree.mapping[i], tree).invoker 
    402                     ~ "[0])& 0x0F) == 0, `SSE Vector misalignment: " ~ getValueForSymbol(tree.mapping[i], tree).invoker ~ "`);"\n; 
     415            result ~= "assert( (cast(size_t)(" ~ getValueForSymbol(tree.mapping[i], tree).invoker 
     416                    ~ ".ptr)& 0x0F) == 0, `SSE Vector misalignment: " ~ getValueForSymbol(tree.mapping[i], tree).invoker ~ "`);"\n; 
    403417        } 
    404418    } 
     
    438452        } 
    439453    } 
    440     return dynamic>=0? dynamic : strided;     
     454    return dynamic>=0? dynamic : strided; 
    441455} 
    442456 
     
    458472    } else {  // else it's a compound or an indexed array 
    459473        char [] comp = tree.compounds[c-'A'-tree.symbolTable.length]; 
    460          
     474 
    461475        if (comp[$-1]!=']') { // simple compound expression 
    462476            foreach(d; comp) { 
     
    477491            char [] nextIndex; 
    478492            char [] sliceTo; 
    479      
    480             for (int k = comp.length-1;k>=1; --k) {             
     493 
     494            for (int k = comp.length-1;k>=1; --k) { 
    481495                char d = comp[k]; 
    482496                if (d == ']') { ++numbracks; } 
    483497                if (d == '[') { --numbracks; } 
    484                  
     498 
    485499                if (d == ']' && numbracks == 1) { nextIndex = ""; } 
    486500                else if (numbracks == 1 && comp[k-1..k+1]=="..") { 
    487501                    isSlice = true; 
    488502                    sliceTo = nextIndex; 
    489                     nextIndex = "";                      
     503                    nextIndex = ""; 
    490504                    --k; 
    491505                } else if ((d == '[' && numbracks==0) || (d==',' && numbracks==1)) { 
     
    539553    } 
    540554    RevisedExpression revised = remapCompounds(expr, ranks, symbolTable); 
    541      
     555 
    542556    VecExpressionType exprType = categorizeExpression(revised); 
    543557    if (exprType == VecExpressionType.SSE2Expression || exprType == VecExpressionType.SSE1Expression) { 
     
    560574    if (c-'A'<tree.symbolTable.length) { 
    561575        v = tree.symbolTable[c-'A'].value; 
    562     } else {  // else it's a compound or an indexed array        
     576    } else {  // else it's a compound or an indexed array 
    563577        char [] comp = tree.compounds[c-'A'-tree.symbolTable.length]; 
    564          
     578 
    565579        if (comp[$-1]!=']') { // compound expression (not an indexed array) 
    566580            if (comp[0]>='a' && comp[0]<='z') { 
     
    590604            bool isSlice = false; 
    591605            char [] newSlice; 
    592      
    593             for (int k = comp.length-1;k>=1; --k) {             
     606 
     607            for (int k = comp.length-1;k>=1; --k) { 
    594608                char d = comp[k]; 
    595609                if (d == ']') { ++numbracks; } 
    596610                if (d == '[') { --numbracks; } 
    597                 
     611 
    598612                if (d==']' && numbracks == 1) { newSlice = ""; } 
    599                 else if (d=='.' && numbracks == 1) { isSlice = true;  
    600                     if(numSlicesRemaining>0){ newSlice = ""; }  
     613                else if (d=='.' && numbracks == 1) { isSlice = true; 
     614                    if(numSlicesRemaining>0){ newSlice = ""; } 
    601615                    else newSlice = "." ~ newSlice; 
    602616                } 
     
    645659    char [] assertions=""; 
    646660    int wholerank = exprRank(tree.expression, tree.rank); 
    647     if (wholerank ==1) {   
     661    if (wholerank ==1) { 
    648662        int lenvec = findVectorForLength(tree); 
    649         result = "for (int blade_index=0; blade_index<"  
     663        result = "for (int blade_index=0; blade_index<" 
    650664               ~ getDimensionLengthForSymbol(tree.mapping[lenvec], tree, 1) 
    651665               ~ "; ++blade_index) {"\n; 
    652666    } 
    653667    foreach (c; tree.expression) { 
    654         if (c>='A' && c<'Z') {             
     668        if (c>='A' && c<'Z') { 
    655669            // restore all symbols into the expression 
    656670            // If it's a vector, index it 
     
    667681        } else result ~= c; 
    668682    } 
    669     if (wholerank==0) return InvocationCode(result, assertions);    
     683    if (wholerank==0) return InvocationCode(result, assertions); 
    670684    return InvocationCode(result ~ "; }", assertions); 
    671685}