| 62 | | void SSEVECGEN(char [] rawexpr, Values...)(Values values) { |
|---|
| | 69 | void SSEVECGEN(char [] expr, Values...)(Values values) { |
|---|
| | 70 | const ranklist = TupleRank!(Values); |
|---|
| | 71 | pragma(msg, generateCodeForSSE2(ranklist, expr)); |
|---|
| | 72 | mixin(generateCodeForSSE2(ranklist, expr)); |
|---|
| | 73 | } |
|---|
| | 74 | |
|---|
| | 75 | /** Function to implement BLAS1 operations using X87 assembler. |
|---|
| | 76 | * Every member of the Values tuple must only be real, float[], double [], or real[]. |
|---|
| | 77 | */ |
|---|
| | 78 | void X87VECGEN(char [] rawexpr, Values...)(Values values) { |
|---|
| 68 | | pragma(msg, generateCodeForSSE2(ranklist, expr, combineOp)); |
|---|
| 69 | | mixin(generateCodeForSSE2(ranklist, expr, combineOp)); |
|---|
| 70 | | } |
|---|
| 71 | | |
|---|
| 72 | | /** Function to implement BLAS1 operations using X87 assembler. |
|---|
| 73 | | */ |
|---|
| 74 | | template X87VECGEN(char [] rawexpr) { |
|---|
| 75 | | void X87VECGEN(Values...)(Values values) { |
|---|
| 76 | | const typelist = elementTupleToString!(Values); |
|---|
| 77 | | const ranklist = TupleRank!(Values); |
|---|
| 78 | | const char [] expr = rawexpr; |
|---|
| 79 | | const char combineOp = 0; |
|---|
| 80 | | |
|---|
| 81 | | pragma(msg, generateCodeForAsmX87(typelist, ranklist, expr, combineOp)); |
|---|
| 82 | | mixin(generateCodeForAsmX87(typelist, ranklist, expr, combineOp)); |
|---|
| 83 | | } |
|---|
| | 83 | pragma(msg, generateCodeForAsmX87(typelist, ranklist, expr)); |
|---|
| | 84 | mixin(generateCodeForAsmX87(typelist, ranklist, expr)); |
|---|
| | 85 | } |
|---|
| | 86 | |
|---|
| | 87 | // ------------------------------------ |
|---|
| | 88 | // return true if the first characters of x are y. |
|---|
| | 89 | bool startsWith(char [] x, char [] y) |
|---|
| | 90 | { |
|---|
| | 91 | return (x.length>=y.length && x[0..y.length]==y); |
|---|
| | 144 | |
|---|
| | 145 | //------------------------------------------------------- |
|---|
| | 146 | // Invoker functions |
|---|
| | 147 | //------------------------------------------------------- |
|---|
| | 148 | // These are CTFE functions which, when mixed in, will call |
|---|
| | 149 | // the BLAS function. They ensure that all types are converted into standard |
|---|
| | 150 | // simple forms, ensure that the vector lengths are equal, and pass in all |
|---|
| | 151 | // of the parameters. |
|---|
| | 152 | |
|---|
| | 153 | /// Generate code which will call the X87 function |
|---|
| | 154 | char [] invokeX87(AbstractSyntaxTree tree) |
|---|
| | 155 | { |
|---|
| | 156 | char [] result = "X87VECGEN!(" ~ wrapInQuotes(tree.expression); |
|---|
| | 157 | for (int i=0; i<tree.symbolTable.length;++i) { |
|---|
| | 158 | char [] t = tree.symbolTable[i].type; |
|---|
| | 159 | if (tree.symbolTable[i].rank==0) { |
|---|
| | 160 | // Convert scalars into standard form. |
|---|
| | 161 | // long, ulong, and real must become real. |
|---|
| | 162 | // We convert everything else to double, since that uses less |
|---|
| | 163 | // FPU stack space. |
|---|
| | 164 | if (t == "real" || t=="double" || t=="float") result ~= "," ~ t; |
|---|
| | 165 | else if (t=="long" || t=="ulong") result~=",real"; |
|---|
| | 166 | else result ~= "double"; // Convert all other scalars into doubles. |
|---|
| | 167 | } |
|---|
| | 168 | else if (startsWith(t, "real[")) result ~= ",real[]"; |
|---|
| | 169 | else if (startsWith(t, "float[")) result ~= ",float[]"; |
|---|
| | 170 | else if (startsWith(t, "double[")) result~= ",double[]"; |
|---|
| | 171 | // else error. |
|---|
| | 172 | } |
|---|
| | 173 | result ~= ")("; |
|---|
| | 174 | int knt=0; |
|---|
| | 175 | for (int i=0; i<tree.symbolTable.length;++i) { |
|---|
| | 176 | if (knt>0) result ~=","; |
|---|
| | 177 | result ~= tree.symbolTable[i].value; |
|---|
| | 178 | ++knt; |
|---|
| | 179 | } |
|---|
| | 180 | return result~ ");"; |
|---|
| | 181 | } |
|---|
| | 182 | |
|---|
| | 183 | /// Generate code which will call the SSE2 function |
|---|
| | 184 | char [] invokeSSE2(AbstractSyntaxTree tree) |
|---|
| | 185 | { |
|---|
| | 186 | char [] result =""; |
|---|
| | 187 | int firstVector = -1; |
|---|
| | 188 | for (int i=0; i<tree.symbolTable.length;++i) { |
|---|
| | 189 | if (tree.symbolTable[i].rank==1){ |
|---|
| | 190 | if (firstVector==-1) { |
|---|
| | 191 | firstVector = i; |
|---|
| | 192 | } else { |
|---|
| | 193 | result ~= "assert(" ~ tree.symbolTable[i].value |
|---|
| | 194 | ~ ".length==" ~ tree.symbolTable[firstVector].value |
|---|
| | 195 | ~ ".length, `Vector length mismatch`);"\n; |
|---|
| | 196 | } |
|---|
| | 197 | } |
|---|
| | 198 | } |
|---|
| | 199 | |
|---|
| | 200 | result ~= "SSEVECGEN!(" ~ wrapInQuotes(tree.expression); |
|---|
| | 201 | // For SSE2, everything must be implicitly convertible to double. |
|---|
| | 202 | for (int i=0; i<tree.symbolTable.length;++i) { |
|---|
| | 203 | if (tree.symbolTable[i].rank==0) result ~= ",double"; |
|---|
| | 204 | else result ~= ",double[]"; |
|---|
| | 205 | } |
|---|
| | 206 | result ~= ")("; |
|---|
| | 207 | int knt=0; |
|---|
| | 208 | for (int i=0; i<tree.symbolTable.length;++i) { |
|---|
| | 209 | if (knt>0) result ~=","; |
|---|
| | 210 | result ~= tree.symbolTable[i].value; |
|---|
| | 211 | ++knt; |
|---|
| | 212 | } |
|---|
| | 213 | return result ~ ");"; |
|---|
| | 214 | } |
|---|
| | 215 | |
|---|
| | 216 | // Categorise the expression, and dispatch to the appropriate code generator. |
|---|