Download Reference Manual
The Developer's Library for D
About Wiki Forums Source Search Contact

Changeset 3876

Show
Ignore:
Timestamp:
08/12/08 03:54:09 (4 months ago)
Author:
Don Clugston
Message:

Better memory efficiency in the asymmetric multiply case

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/tango/math/internal/BiguintCore.d

    r3874 r3876  
    55 * Authors:   Don Clugston 
    66 */ 
    7  
     7  
    88module tango.math.internal.BiguintCore; 
    99 
     
    161161/** General unsigned subtraction routine for bigints. 
    162162 *  Sets result = x - y. If the result is negative, negative will be true. 
    163  * 
    164  *  The length of y must not be larger than the length of x. 
    165163 */ 
    166164uint [] biguintSubtract(uint[] x, uint[] y, bool *negative) 
     
    203201uint [] biguintAdd(uint[] a, uint [] b) { 
    204202    uint [] x, y; 
    205     if (a.length<b.length) { x = b; y = a; } else {x = a; y = b; } 
     203    if (a.length<b.length) { x = b; y = a; } else { x = a; y = b; } 
    206204    // now we know x.length > y.length 
    207205    // create result. add 1 in case it overflows 
     
    239237} 
    240238 
     239/** Return x-y.. 
     240 *  x must be greater than y. 
     241 */   
     242uint [] biguintSubInt(uint[] x, ulong y) 
     243{ 
     244    uint hi = cast(uint)(y >>> 32); 
     245    uint lo = cast(uint)(y& 0xFFFF_FFFF); 
     246    uint [] result = new uint[x.length]; 
     247    result[] = x[]; 
     248    multibyteIncrementAssign!('-')(result[], lo); 
     249    if (hi) multibyteIncrementAssign!('-')(result[1..$], hi); 
     250    if (result[$-1]==0) return result[0..$-1]; 
     251    else return result;  
     252} 
     253 
    241254/** General unsigned multiply routine for bigints. 
    242255 *  Sets result = x*y. 
     
    291304        // For maximum performance, we want the ratio to be as close to  
    292305        // 1:1 as possible. To achieve this, we can either pad x or y. 
    293         // The best choice depends on the modulus x%y. 
    294         
     306        // The best choice depends on the modulus x%y.        
    295307        uint numchunks = x.length / y.length; 
    296308        uint chunksize = y.length; 
     
    298310        uint maxchunk = chunksize + extra; 
    299311        bool paddingY; // true = we're padding Y, false = we're padding X. 
    300         // padding Y will mean we have  
    301         if (extra*extra*2 < y.length*y.length) { 
     312        if (extra * extra * 2 < y.length*y.length) { 
    302313            // The leftover bit is small enough that it should be incorporated 
    303314            // in the existing chunks.             
     
    320331        } 
    321332        // We make the buffer a bit bigger so we have space for the partial sums. 
    322         uint [] scratchbuff = new uint[karatsubaRequiredBuffSize(maxchunk) + maxchunk * 2]; 
    323         uint [] partial = scratchbuff[$ - maxchunk * 2 .. $]; 
    324          
     333        uint [] scratchbuff = new uint[karatsubaRequiredBuffSize(maxchunk) + y.length]; 
     334        uint [] partial = scratchbuff[$ - y.length .. $]; 
    325335        uint done; // how much of X have we done so far? 
    326336        double residual = 0; 
    327337        if (paddingY) { 
    328338            // If the first chunk is bigger, do it first. We're padding y.  
    329           mulKaratsuba(result[0 .. y.length + chunksize + (extra>0?1:0)], x[0 .. chunksize + (extra>0?1:0)], y, scratchbuff); 
    330           done = chunksize + (extra>0?1:0); 
     339          mulKaratsuba(result[0 .. y.length + chunksize + (extra > 0 ? 1 : 0 )],  
     340                        x[0 .. chunksize + (extra>0?1:0)], y, scratchbuff); 
     341          done = chunksize + (extra > 0 ? 1 : 0); 
    331342          if (extra) --extra; 
    332343        } else { // Begin with the extra bit. 
     
    337348        auto basechunksize = chunksize; 
    338349        while (done < x.length) { 
    339             chunksize = basechunksize + (extra>0? 1:0); 
     350            chunksize = basechunksize + (extra > 0 ? 1 : 0); 
    340351            if (extra) --extra; 
    341              mulKaratsuba(partial[0 .. y.length + chunksize],  
     352            partial[] = result[done .. done+y.length]; 
     353            mulKaratsuba(result[done .. done + y.length + chunksize],  
    342354                       x[done .. done+chunksize], y, scratchbuff); 
    343             result[done + y.length .. done + y.length + chunksize]  
    344                 = partial[y.length .. y.length + chunksize]; 
    345             simpleAddAssign(result[done .. done + y.length+chunksize], partial[0 .. y.length]); 
     355            simpleAddAssign(result[done .. done + y.length + chunksize], partial); 
    346356            done += chunksize; 
    347357        } 
     
    384394in {     
    385395    assert(result.length == left.length + right.length); 
    386 //    assert(right.length>1); 
     396    assert(right.length>1); 
    387397} 
    388398body { 
    389399    result[left.length] = multibyteMul(result[0..left.length], left, right[0], 0); 
    390     if (right.length>1) 
    391         multibyteMultiplyAccumulate(result[1..$], left, right[1..$]); 
     400    multibyteMultiplyAccumulate(result[1..$], left, right[1..$]); 
    392401} 
    393402 
     
    407416        result[right.length..left.length] = left[right.length .. $];             
    408417        carry = multibyteIncrementAssign!('+')(result[right.length..$], carry); 
    409     } //else if (result.length==left.length+1) { result[$-1] = carry; carry=0; } 
     418    } 
    410419    return carry; 
    411420} 
     
    457466uint karatsubaRequiredBuffSize(uint xlen) 
    458467{ 
    459     uint half = (xlen >> 1) + (xlen & 1); 
    460     return xlen <= KARATSUBALIMIT ? 0 : 2*half+1 + karatsubaRequiredBuffSize(half); 
     468    return xlen <= KARATSUBALIMIT ? 0 : 2*xlen - KARATSUBALIMIT + 2*uint.sizeof*8; 
    461469} 
    462470 
     
    537545        } 
    538546    } else mulKaratsuba(resultHigh, x1, y1, newscratchbuff); 
     547     
    539548    mid.simpleSubAssign(resultHigh); 
    540549