Changeset 122

Show
Ignore:
Timestamp:
10/30/07 03:21:19 (10 months ago)
Author:
Don Clugston
Message:

x87 code now explicitly sets the tuple parameters. This means that BLADE can now cope with integer multiplies (not just floating-point).
Asserts for vector length are now in the invoker code; this results in very nice error messages.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/blade/Blade.d

    r121 r122  
    4141* entries of which correspond to A, B, C, ... 
    4242* This string is converted to postfix. The postfix string is converted to 
    43 * a string containing x87 asm, which is then mixed into a function which accepts the tuple. 
     43* a string containing asm instructions, which is then mixed into a function 
     44* which accepts the tuple. 
    4445* 
    4546* HISTORY: 
     
    5758 
    5859public: 
     60// These functions have the complete expression encoded in the template type. 
     61// One of these functions is instantiated for each expression. 
     62// A difficulty is, that the only way to transfer information from the CTFE code 
     63// into the function, is via the template parameters. So from inside the function, 
     64// we must re-assemble the type information, and use this to generate the asm code. 
     65 
    5966/** Function to implement BLAS1 operations using SSE2 assembler. 
    6067 * Every member of the Values tuple must only be double or double []. 
    6168 */ 
    62 void SSEVECGEN(char [] rawexpr, Values...)(Values values) { 
     69void SSEVECGEN(char [] expr, Values...)(Values values) { 
     70    const ranklist = TupleRank!(Values);     
     71    pragma(msg, generateCodeForSSE2(ranklist, expr)); 
     72    mixin(generateCodeForSSE2(ranklist, expr)); 
     73 } 
     74 
     75/** Function to implement BLAS1 operations using X87 assembler. 
     76 * Every member of the Values tuple must only be real, float[], double [], or real[]. 
     77 */ 
     78void X87VECGEN(char [] rawexpr, Values...)(Values values) { 
    6379    const typelist = elementTupleToString!(Values); 
    6480    const ranklist = TupleRank!(Values); 
    6581    const char [] expr = rawexpr; 
    66     const char  combineOp = 0; 
    6782     
    68     pragma(msg, generateCodeForSSE2(ranklist, expr, combineOp)); 
    69     mixin(generateCodeForSSE2(ranklist, expr, combineOp)); 
    70  } 
    71  
    72 /** Function to implement BLAS1 operations using X87 assembler. 
    73 */ 
    74 template X87VECGEN(char [] rawexpr) { 
    75  void X87VECGEN(Values...)(Values values) { 
    76     const typelist = elementTupleToString!(Values); 
    77     const ranklist = TupleRank!(Values); 
    78     const char [] expr = rawexpr; 
    79     const char  combineOp = 0; 
    80      
    81     pragma(msg, generateCodeForAsmX87(typelist, ranklist, expr, combineOp)); 
    82     mixin(generateCodeForAsmX87(typelist, ranklist, expr, combineOp)); 
    83  } 
     83    pragma(msg, generateCodeForAsmX87(typelist, ranklist, expr)); 
     84    mixin(generateCodeForAsmX87(typelist, ranklist, expr)); 
     85
     86 
     87// ------------------------------------ 
     88// return true if the first characters of x are y. 
     89bool startsWith(char [] x, char [] y) 
     90
     91    return (x.length>=y.length && x[0..y.length]==y); 
    8492} 
    8593 
     
    114122            continue; // assume we can always do scalars 
    115123        } 
    116         if ((t.length>7 && t[0..7]=="double[")) { 
     124        if (startsWith(t, "double[")) { 
    117125            ++numvectors; 
    118126            SSE1 = false; 
    119         } else if ((t.length>6 && t[0..6]=="float[")) { 
     127        } else if (startsWith(t, "float[")) { 
    120128            ++numvectors; 
    121129            SSE2 = false; 
     
    123131            SSE1 = false; 
    124132            SSE2 = false; 
    125             if ((t.length>5 && t[0..5]=="real[")) { ++numvectors; } 
     133            if (startsWith(t, "real[")) { ++numvectors; } 
    126134            else X87 = false; 
    127135        } 
     
    134142} 
    135143 
     144 
     145//------------------------------------------------------- 
     146//                Invoker functions 
     147//------------------------------------------------------- 
     148// These are CTFE functions which, when mixed in, will call 
     149// the BLAS function. They ensure that all types are converted into standard 
     150// simple forms, ensure that the vector lengths are equal, and pass in all 
     151// of the parameters. 
     152 
     153/// Generate code which will call the X87 function 
     154char [] invokeX87(AbstractSyntaxTree tree) 
     155{ 
     156    char [] result = "X87VECGEN!(" ~ wrapInQuotes(tree.expression); 
     157    for (int i=0; i<tree.symbolTable.length;++i) { 
     158        char [] t = tree.symbolTable[i].type; 
     159        if (tree.symbolTable[i].rank==0) { 
     160            // Convert scalars into standard form. 
     161            // long, ulong, and real must become real. 
     162            // We convert everything else to double, since that uses less 
     163            // FPU stack space.            
     164            if (t == "real" || t=="double" || t=="float") result ~= "," ~ t; 
     165            else if (t=="long" || t=="ulong") result~=",real"; 
     166            else result ~= "double"; // Convert all other scalars into doubles. 
     167         } 
     168        else if (startsWith(t, "real[")) result ~= ",real[]"; 
     169        else if (startsWith(t, "float[")) result ~= ",float[]"; 
     170        else if (startsWith(t, "double[")) result~= ",double[]"; 
     171        // else error. 
     172    } 
     173    result ~= ")("; 
     174    int knt=0; 
     175    for (int i=0; i<tree.symbolTable.length;++i) { 
     176        if (knt>0) result ~=","; 
     177        result ~= tree.symbolTable[i].value; 
     178        ++knt; 
     179    } 
     180    return result~ ");";         
     181} 
     182 
     183/// Generate code which will call the SSE2 function 
     184char [] invokeSSE2(AbstractSyntaxTree tree) 
     185{ 
     186    char [] result =""; 
     187    int firstVector = -1; 
     188    for (int i=0; i<tree.symbolTable.length;++i) { 
     189        if (tree.symbolTable[i].rank==1){ 
     190            if (firstVector==-1) { 
     191                firstVector = i; 
     192            } else { 
     193                result ~= "assert(" ~ tree.symbolTable[i].value  
     194                    ~ ".length==" ~ tree.symbolTable[firstVector].value 
     195                    ~ ".length, `Vector length mismatch`);"\n; 
     196            } 
     197        } 
     198    } 
     199     
     200    result ~= "SSEVECGEN!(" ~ wrapInQuotes(tree.expression);         
     201    // For SSE2, everything must be implicitly convertible to double. 
     202    for (int i=0; i<tree.symbolTable.length;++i) { 
     203        if (tree.symbolTable[i].rank==0) result ~= ",double"; 
     204        else result ~= ",double[]"; 
     205    } 
     206    result ~= ")("; 
     207    int knt=0; 
     208    for (int i=0; i<tree.symbolTable.length;++i) { 
     209        if (knt>0) result ~=","; 
     210        result ~= tree.symbolTable[i].value; 
     211        ++knt; 
     212    } 
     213    return result ~ ");"; 
     214} 
     215 
     216// Categorise the expression, and dispatch to the appropriate code generator. 
    136217char [] makeVectorCode(AbstractSyntaxTree tree) 
    137218{ 
    138219    VecExpressionType exprType = categorizeExpression(tree); 
    139220    if (exprType == VecExpressionType.SSE2Expression) { 
    140         char [] result = "SSEVECGEN!(" ~ wrapInQuotes(tree.expression);         
    141         // For SSE2, everything must be implicitly convertible to double. 
    142         for (int i=0; i<tree.symbolTable.length;++i) { 
    143             if (tree.symbolTable[i].rank==0) result ~= ",double"; 
    144             else result ~= ",double[]"; 
    145         } 
    146         result ~= ")("; 
    147         int knt=0; 
    148         for (int i=0; i<tree.symbolTable.length;++i) { 
    149             if (knt>0) result ~=","; 
    150             result ~= tree.symbolTable[i].value; 
    151             ++knt; 
    152         } 
    153         return result ~ ");"; 
     221        return invokeSSE2(tree); 
    154222    } else if (exprType == VecExpressionType.X87Expression) { 
    155         char [] result = "X87VECGEN!(" ~ wrapInQuotes(tree.expression) ~ ")("; 
    156         int knt=0; 
    157         for (int i=0; i<tree.symbolTable.length;++i) { 
    158             if (knt>0) result ~=","; 
    159             result ~= tree.symbolTable[i].value; 
    160             ++knt; 
    161         } 
    162         return result~ ");";         
     223        return invokeX87(tree); 
    163224    } else { 
    164225        return `static assert(0,` ~ wrapInQuotes("Cannot generate ASM for expression " ~ tree.expression) ~ `);`; 
    165226    }     
    166      
    167 
    168  
     227
    169228 
    170229char [] vectorize(char [] expr) 
  • trunk/blade/BladeDemo.d

    r121 r122  
    1616{ 
    1717     
    18     auto z = [3.4L, 565, 31.3]; 
     18    auto z = [3.4, 565, 31.3]; 
    1919    double [] a = new double[4]; 
    2020    double [] d = new double[4];