 |
Changeset 3900
- Timestamp:
- 08/22/08 12:22:11
(3 months ago)
- Author:
- Don Clugston
- Message:
Added division. Bugfix for the return value of shl in asm. Numerous small additions to BigInt?, and many minor fixes.
-
Files:
-
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
| r3885 |
r3900 |
|
| 25 | 25 | { |
|---|
| 26 | 26 | private: |
|---|
| 27 | | uint[] data; // In D2 this would be invariant(uint)[] |
|---|
| | 27 | uint[] data = [0]; // In D2 this would be invariant(uint)[] |
|---|
| 28 | 28 | bool sign; |
|---|
| 29 | 29 | static uint [] ZERO = [0]; |
|---|
| … | … | |
| 73 | 73 | } |
|---|
| 74 | 74 | /// |
|---|
| 75 | | BigInt opSub(T:BigInt)(BigInt y) { |
|---|
| | 75 | BigInt opSub(T: BigInt)(T y) { |
|---|
| 76 | 76 | return addsubInternal(*this, y, this.sign == y.sign); |
|---|
| 77 | 77 | } |
|---|
| … | … | |
| 90 | 90 | *this = mulInternal(*this, cast(ulong)(y<0? -y: y), sign!=(y<0)); |
|---|
| 91 | 91 | return *this; |
|---|
| 92 | | } |
|---|
| | 92 | } |
|---|
| | 93 | /// |
|---|
| 93 | 94 | BigInt opMul(T:BigInt)(T y) { |
|---|
| 94 | 95 | return mulInternal(*this, y); |
|---|
| … | … | |
| 99 | 100 | return *this; |
|---|
| 100 | 101 | } |
|---|
| 101 | | |
|---|
| | 102 | /// |
|---|
| | 103 | BigInt opDivAssign(T: BigInt)(T y) { |
|---|
| | 104 | *this = divInternal(*this, y); |
|---|
| | 105 | return *this; |
|---|
| | 106 | } |
|---|
| | 107 | /// |
|---|
| | 108 | BigInt opDiv(T: BigInt)(T y) { |
|---|
| | 109 | *this = divInternal(*this, y); |
|---|
| | 110 | return *this; |
|---|
| | 111 | } |
|---|
| 102 | 112 | /// |
|---|
| 103 | 113 | BigInt opDiv(T:int)(T x) { |
|---|
| … | … | |
| 106 | 116 | uint u = x < 0 ? -x : x; |
|---|
| 107 | 117 | r.data = biguintDivInt(data, u); |
|---|
| 108 | | r.sign = r.isZero()? false : this.sign ^ (x<0); |
|---|
| | 118 | r.sign = r.isZero()? false : this.sign != (x<0); |
|---|
| 109 | 119 | return *this; |
|---|
| | 120 | } |
|---|
| | 121 | /// |
|---|
| | 122 | int opMod(T:int)(T y) { |
|---|
| | 123 | assert(y!=0); |
|---|
| | 124 | uint u = y < 0 ? -y : y; |
|---|
| | 125 | int rem = biguintModInt(data, u); |
|---|
| | 126 | // x%y always has the same sign as x. |
|---|
| | 127 | // This is not the same as mathematical mod. |
|---|
| | 128 | return sign? -rem : rem; |
|---|
| 110 | 129 | } |
|---|
| 111 | 130 | /// |
|---|
| … | … | |
| 121 | 140 | BigInt opNeg() { negate(); return *this; } |
|---|
| 122 | 141 | /// |
|---|
| 123 | | BigInt opPos() { return *this; } |
|---|
| 124 | | |
|---|
| | 142 | BigInt opPos() { return *this; } |
|---|
| | 143 | /// |
|---|
| | 144 | BigInt opPostInc() { |
|---|
| | 145 | BigInt old = *this; |
|---|
| | 146 | *this = addsubInternal(*this, 1, false); |
|---|
| | 147 | return old; |
|---|
| | 148 | } |
|---|
| | 149 | /// |
|---|
| | 150 | BigInt opPostDec() { |
|---|
| | 151 | BigInt old = *this; |
|---|
| | 152 | *this = addsubInternal(*this, 1, true); |
|---|
| | 153 | return old; |
|---|
| | 154 | } |
|---|
| 125 | 155 | /// |
|---|
| 126 | 156 | BigInt opShr(T:int)(T y) { |
|---|
| … | … | |
| 198 | 228 | return buff; |
|---|
| 199 | 229 | } |
|---|
| | 230 | invariant() { assert(data.length==1 || data[$-1]!=0); } |
|---|
| 200 | 231 | private: |
|---|
| 201 | 232 | void negate() { if (!isZero()) sign = !sign; } |
|---|
| | 233 | |
|---|
| 202 | 234 | bool isZero() { return data.length==1 && data[0]==0; } |
|---|
| 203 | 235 | bool isNegative() { return sign; } |
|---|
| … | … | |
| 257 | 289 | biguintMul(r.data, x.data, y.data); |
|---|
| 258 | 290 | } |
|---|
| 259 | | // the highest element could be zero, in which case we need to reduce the length |
|---|
| | 291 | // the highest element could be zero, |
|---|
| | 292 | // in which case we need to reduce the length |
|---|
| 260 | 293 | if (r.data.length > 1 && r.data[$-1] == 0) { |
|---|
| 261 | 294 | r.data = r.data[0..$-1]; |
|---|
| 262 | 295 | } |
|---|
| | 296 | return r; |
|---|
| | 297 | } |
|---|
| | 298 | static BigInt divInternal(BigInt x, BigInt y) { |
|---|
| | 299 | if (x.isZero()) return x; |
|---|
| | 300 | BigInt r; |
|---|
| | 301 | r.sign = x.sign ^ y.sign; |
|---|
| | 302 | r.data = biguintDiv(x.data, y.data); |
|---|
| 263 | 303 | return r; |
|---|
| 264 | 304 | } |
|---|
| … | … | |
| 273 | 313 | r.sign = negResult; |
|---|
| 274 | 314 | r.data = biguintMulInt(x.data, y); |
|---|
| | 315 | if (r.data.length > 1 && r.data[$-1] == 0) { |
|---|
| | 316 | r.data = r.data[0..$-1]; |
|---|
| | 317 | } |
|---|
| 275 | 318 | return r; |
|---|
| 276 | 319 | } |
|---|
| r3884 |
r3900 |
|
| 112 | 112 | } |
|---|
| 113 | 113 | |
|---|
| 114 | | enum LogicOp : byte { AND, OR, XOR }; |
|---|
| 115 | | |
|---|
| 116 | | /** Dest[] = src1[] op src2[] |
|---|
| 117 | | * where op == AND,OR, or XOR |
|---|
| 118 | | */ |
|---|
| 119 | | void multibyteLogical(LogicOp op)(uint [] dest, uint [] src1, uint [] src2) |
|---|
| 120 | | { |
|---|
| 121 | | for (int i=0; i<dest.length;++i) { |
|---|
| 122 | | static if (op == LogicOp.AND) dest[i] = src1[i] & src2[i]; |
|---|
| 123 | | else static if (op == LogicOp.OR) dest[i] = src1[i] | src2[i]; |
|---|
| 124 | | else dest[i] = src1[i] ^ src2[i]; |
|---|
| 125 | | } |
|---|
| 126 | | } |
|---|
| 127 | | |
|---|
| 128 | | unittest |
|---|
| 129 | | { |
|---|
| 130 | | uint [] bb = [0x0F0F_0F0F, 0xF0F0_F0F0, 0x0F0F_0F0F, 0xF0F0_F0F0]; |
|---|
| 131 | | for (int qqq=0; qqq<3; ++qqq) { |
|---|
| 132 | | uint [] aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD, 0xEEEE_EEEE]; |
|---|
| 133 | | |
|---|
| 134 | | switch(qqq) { |
|---|
| 135 | | case 0: |
|---|
| 136 | | multibyteLogical!(LogicOp.AND)(aa[1..3], aa[1..3], bb[0..4]); |
|---|
| 137 | | assert(aa[1]==0x0202_0203 && aa[2]==0x4050_5050 && aa[3]== 0x8999_999A); |
|---|
| 138 | | break; |
|---|
| 139 | | case 1: |
|---|
| 140 | | multibyteLogical!(LogicOp.OR)(aa[1..2], aa[1..2], bb[0..3]); |
|---|
| 141 | | assert(aa[1]==0x1F2F_2F2F && aa[2]==0x4555_5556 && aa[3]== 0x8999_999A); |
|---|
| 142 | | break; |
|---|
| 143 | | case 2: |
|---|
| 144 | | multibyteLogical!(LogicOp.XOR)(aa[1..2], aa[1..2], bb[0..3]); |
|---|
| 145 | | assert(aa[1]==0x1D2D_2D2C && aa[2]==0x4555_5556 && aa[3]== 0x8999_999A); |
|---|
| 146 | | break; |
|---|
| 147 | | default: |
|---|
| 148 | | assert(0); |
|---|
| 149 | | } |
|---|
| 150 | | |
|---|
| 151 | | assert(aa[0]==0xF0FF_FFFF); |
|---|
| 152 | | } |
|---|
| 153 | | } |
|---|
| 154 | | |
|---|
| 155 | 114 | /** dest[] = src[] << numbits |
|---|
| 156 | 115 | * numbits must be in the range 1..31 |
|---|
| … | … | |
| 226 | 185 | * Returns carry out of MSB (0..FFFF_FFFF). |
|---|
| 227 | 186 | */ |
|---|
| 228 | | uint multibyteMulAdd(uint [] dest, uint[] src, uint multiplier, uint carry) |
|---|
| | 187 | uint multibyteMulAdd(char op)(uint [] dest, uint[] src, uint multiplier, uint carry) |
|---|
| 229 | 188 | { |
|---|
| 230 | 189 | assert(dest.length == src.length); |
|---|
| 231 | 190 | ulong c = carry; |
|---|
| 232 | 191 | for(int i = 0; i < src.length; ++i){ |
|---|
| 233 | | c += cast(ulong)(multiplier) * src[i] + dest[i]; |
|---|
| 234 | | dest[i] = cast(uint)c; |
|---|
| 235 | | c >>= 32; |
|---|
| | 192 | static if(op=='+') { |
|---|
| | 193 | c += cast(ulong)(multiplier) * src[i] + dest[i]; |
|---|
| | 194 | dest[i] = cast(uint)c; |
|---|
| | 195 | c >>= 32; |
|---|
| | 196 | } else { |
|---|
| | 197 | c += cast(ulong)multiplier * src[i]; |
|---|
| | 198 | ulong t = cast(ulong)dest[i] - cast(uint)c; |
|---|
| | 199 | dest[i] = cast(uint)t; |
|---|
| | 200 | c = cast(uint)((c>>32) - (t>>32)); |
|---|
| | 201 | } |
|---|
| 236 | 202 | } |
|---|
| 237 | 203 | return cast(uint)c; |
|---|
| … | … | |
| 242 | 208 | uint [] aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD, 0xEEEE_EEEE]; |
|---|
| 243 | 209 | uint [] bb = [0x1234_1234, 0xF0F0_F0F0, 0x00C0_C0C0, 0xF0F0_F0F0, 0xC0C0_C0C0]; |
|---|
| 244 | | multibyteMulAdd(bb[1..$-1], aa[1..$-2], 16, 5); |
|---|
| | 210 | multibyteMulAdd!('+')(bb[1..$-1], aa[1..$-2], 16, 5); |
|---|
| 245 | 211 | assert(bb[0] == 0x1234_1234 && bb[4] == 0xC0C0_C0C0); |
|---|
| 246 | 212 | assert(bb[1] == 0x2222_2230 + 0xF0F0_F0F0+5 && bb[2] == 0x5555_5561+0x00C0_C0C0+1 |
|---|
| … | … | |
| 263 | 229 | { |
|---|
| 264 | 230 | for (int i = 0; i< right.length; ++i) { |
|---|
| 265 | | dest[left.length + i] = multibyteMulAdd(dest[i..left.length+i], |
|---|
| | 231 | dest[left.length + i] = multibyteMulAdd!('+')(dest[i..left.length+i], |
|---|
| 266 | 232 | left, right[i], 0); |
|---|
| 267 | 233 | } |
|---|
| r3884 |
r3900 |
|
| 22 | 22 | * uses the basic instruction set (doesn't use MMX, SSE, SSE2) |
|---|
| 23 | 23 | * Generally the code remains near-optimal for Core2, after translating |
|---|
| 24 | | * EAX-> RAX, etc, since all these CPUs use essentially the same pipeline. |
|---|
| 25 | | * The code uses techniques described in Agner Fog's superb manuals available |
|---|
| 26 | | * at www.agner.org. |
|---|
| | 24 | * EAX-> RAX, etc, since all these CPUs use essentially the same pipeline, and |
|---|
| | 25 | * are typically limited by memory access. |
|---|
| | 26 | * The code uses techniques described in Agner Fog's superb Pentium manuals |
|---|
| | 27 | * available at www.agner.org. |
|---|
| 27 | 28 | * Not optimal for AMD64, which can do two memory loads per cycle (Intel |
|---|
| 28 | 29 | * CPUs can only do one). Division is far from optimal. |
|---|
| … | … | |
| 31 | 32 | * PentiumM Core2 |
|---|
| 32 | 33 | * +,- 2.25 2.25 |
|---|
| 33 | | * &,|,^ 2.0 2.0 |
|---|
| 34 | 34 | * <<,>> 2.0 2.0 |
|---|
| 35 | 35 | * cmp 2.0 2.0 |
|---|
| … | … | |
| 37 | 37 | * mulAdd 5.4 |
|---|
| 38 | 38 | * div 18.0 |
|---|
| 39 | | * |
|---|
| 40 | 39 | */ |
|---|
| 41 | 40 | |
|---|
| … | … | |
| 228 | 227 | } |
|---|
| 229 | 228 | } |
|---|
| 230 | | |
|---|
| 231 | | enum LogicOp : byte { AND, OR, XOR }; |
|---|
| 232 | | |
|---|
| 233 | | /** Dest[#] = src1[#] op src2[#] |
|---|
| 234 | | * where op == AND,OR, or XOR |
|---|
| 235 | | */ |
|---|
| 236 | | void multibyteLogical(LogicOp op)(uint [] dest, uint [] src1, uint [] src2) |
|---|
| 237 | | { |
|---|
| 238 | | // PM: 2 cycles/operation. Limited by execution unit p2. |
|---|
| 239 | | // (AMD64 could reach 1.5 cycles/operation since it has TWO read ports. |
|---|
| 240 | | // On Core2, we could use SSE2 with 128-bit reads). |
|---|
| 241 | | enum { LASTPARAM = 3*4 } // 2* pushes + return address. |
|---|
| 242 | | asm { |
|---|
| 243 | | naked; |
|---|
| 244 | | push EDI; |
|---|
| 245 | | push ESI; |
|---|
| 246 | | mov EDI, [ESP + LASTPARAM + 4*5]; // dest.ptr |
|---|
| 247 | | mov ECX, [ESP + LASTPARAM + 4*4]; // dest.length; |
|---|
| 248 | | mov EDX, [ESP + LASTPARAM + 4*3]; // src1.ptr; |
|---|
| 249 | | mov ESI, [ESP + LASTPARAM + 4*1]; // src2.ptr |
|---|
| 250 | | lea EDI, [EDI + 4*ECX]; // EDI = end of dest. |
|---|
| 251 | | lea EDX, [EDX + 4*ECX]; // EDX = end of src1. |
|---|
| 252 | | lea ESI, [ESI + 4*ECX]; // ESI = end of src2. |
|---|
| 253 | | neg ECX; |
|---|
| 254 | | L1: |
|---|
| 255 | | mov EAX, [EDX+ECX*4]; |
|---|
| 256 | | } |
|---|
| 257 | | static if (op == LogicOp.AND) asm { and EAX, [ESI+ECX*4]; } |
|---|
| 258 | | else if (op == LogicOp.OR) asm { or EAX, [ESI+ECX*4]; } |
|---|
| 259 | | else if (op == LogicOp.XOR) asm { xor EAX, [ESI+ECX*4]; } |
|---|
| 260 | | asm { |
|---|
| 261 | | mov [EDI + ECX *4], EAX; |
|---|
| 262 | | add ECX, 1; |
|---|
| 263 | | jl L1; |
|---|
| 264 | | |
|---|
| 265 | | pop ESI; |
|---|
| 266 | | pop EDI; |
|---|
| 267 | | ret 6*4; |
|---|
| 268 | | } |
|---|
| 269 | | } |
|---|
| 270 | | |
|---|
| 271 | | unittest |
|---|
| 272 | | { |
|---|
| 273 | | uint [] bb = [0x0F0F_0F0F, 0xF0F0_F0F0, 0x0F0F_0F0F, 0xF0F0_F0F0]; |
|---|
| 274 | | for (int qqq=0; qqq<3; ++qqq) { |
|---|
| 275 | | uint [] aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD, 0xEEEE_EEEE]; |
|---|
| 276 | | |
|---|
| 277 | | switch(qqq) { |
|---|
| 278 | | case 0: |
|---|
| 279 | | multibyteLogical!(LogicOp.AND)(aa[1..3], aa[1..3], bb[0..4]); |
|---|
| 280 | | assert(aa[1]==0x0202_0203 && aa[2]==0x4050_5050 && aa[3]== 0x8999_999A); |
|---|
| 281 | | break; |
|---|
| 282 | | case 1: |
|---|
| 283 | | multibyteLogical!(LogicOp.OR)(aa[1..2], aa[1..2], bb[0..3]); |
|---|
| 284 | | assert(aa[1]==0x1F2F_2F2F && aa[2]==0x4555_5556 && aa[3]== 0x8999_999A); |
|---|
| 285 | | break; |
|---|
| 286 | | case 2: |
|---|
| 287 | | multibyteLogical!(LogicOp.XOR)(aa[1..2], aa[1..2], bb[0..3]); |
|---|
| 288 | | assert(aa[1]==0x1D2D_2D2C && aa[2]==0x4555_5556 && aa[3]== 0x8999_999A); |
|---|
| 289 | | break; |
|---|
| 290 | | default: |
|---|
| 291 | | assert(0); |
|---|
| 292 | | } |
|---|
| 293 | | |
|---|
| 294 | | assert(aa[0]==0xF0FF_FFFF); |
|---|
| 295 | | } |
|---|
| 296 | | } |
|---|
| 297 | 229 | |
|---|
| 298 | 230 | /** dest[#] = src[#] << numbits |
|---|
| … | … | |
| 317 | 249 | |
|---|
| 318 | 250 | mov EAX, [-4+ESI + 4*EBX]; |
|---|
| | 251 | mov EDX, 0; |
|---|
| | 252 | shld EDX, EAX, CL; |
|---|
| | 253 | push EDX; // Save return value |
|---|
| 319 | 254 | cmp EBX, 1; |
|---|
| 320 | 255 | jz L_last; |
|---|
| … | … | |
| 334 | 269 | jg L_even; |
|---|
| 335 | 270 | L_last: |
|---|
| 336 | | mov EDX, 0; |
|---|
| 337 | | shld EAX, EDX, CL; |
|---|
| | 271 | shl EAX, CL; |
|---|
| 338 | 272 | mov [EDI], EAX; |
|---|
| 339 | | mov EAX, EDX; |
|---|
| | 273 | pop EAX; // pop return value |
|---|
| 340 | 274 | pop EBX; |
|---|
| 341 | 275 | pop EDI; |
|---|
| … | … | |
| 408 | 342 | |
|---|
| 409 | 343 | aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD, 0xEEEE_EEEE]; |
|---|
| 410 | | multibyteShl(aa[1..4], aa[1..$], 4); |
|---|
| | 344 | uint r = multibyteShl(aa[1..4], aa[1..$], 4); |
|---|
| 411 | 345 | assert(aa[0] == 0xF0FF_FFFF && aa[1] == 0x2222_2230 |
|---|
| 412 | | && aa[2]==0x5555_5561 && aa[3]==0x9999_99A4 && aa[4]==0x0BCCC_CCCD); |
|---|
| | 346 | && aa[2]==0x5555_5561); |
|---|
| | 347 | assert(aa[3]==0x9999_99A4 && aa[4]==0xBCCC_CCCD); |
|---|
| | 348 | assert(r==8); |
|---|
| 413 | 349 | } |
|---|
| 414 | 350 | |
|---|
| … | … | |
| 484 | 420 | |
|---|
| 485 | 421 | /** |
|---|
| 486 | | * dest[#] += src[#] * multiplier + carry(0..FFFF_FFFF). |
|---|
| | 422 | * dest[#] += src[#] * multiplier OP carry(0..FFFF_FFFF). |
|---|
| | 423 | * where op == '+' or '-' |
|---|
| 487 | 424 | * Returns carry out of MSB (0..FFFF_FFFF). |
|---|
| 488 | 425 | */ |
|---|
| 489 | | uint multibyteMulAdd(uint [] dest, uint[] src, uint multiplier, uint carry) |
|---|
| 490 | | { |
|---|
| | 426 | uint multibyteMulAdd(char op)(uint [] dest, uint[] src, uint multiplier, uint carry) |
|---|
| | 427 | { |
|---|
| | 428 | static if (op=='-') { |
|---|
| | 429 | /* This is equivalent to: |
|---|
| | 430 | --- |
|---|
| | 431 | uint [] tmp = new uint[src.length]; |
|---|
| | 432 | uint c = multibyteMul(tmp, src, multiplier, carry); |
|---|
| | 433 | return c + multibyteAddSub!('-')(dest, dest, tmp, 0); |
|---|
| | 434 | --- |
|---|
| | 435 | */ |
|---|
| | 436 | ulong c = carry; |
|---|
| | 437 | for (int i = 0; i < src.length; i++) { |
|---|
| | 438 | c += cast(ulong)multiplier * src[i]; |
|---|
| | 439 | ulong t = cast(ulong)dest[i] - cast(uint)c; |
|---|
| | 440 | dest[i] = cast(uint)t; |
|---|
| | 441 | c = cast(uint)((c>>32) - (t>>32)); |
|---|
| | 442 | } |
|---|
| | 443 | return cast(uint)c; |
|---|
| | 444 | } else { |
|---|
| | 445 | |
|---|
| | 446 | |
|---|
| 491 | 447 | // Timing: This is the most time-critical bignum function. |
|---|
| 492 | 448 | // Pentium M: 5.4 cycles/operation, still has 2 resource stalls + 1 load block/iteration |
|---|
| … | … | |
| 613 | 569 | |
|---|
| 614 | 570 | } |
|---|
| | 571 | }// op=='+' |
|---|
| 615 | 572 | } |
|---|
| 616 | 573 | |
|---|
| … | … | |
| 619 | 576 | uint [] aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD, 0xEEEE_EEEE]; |
|---|
| 620 | 577 | uint [] bb = [0x1234_1234, 0xF0F0_F0F0, 0x00C0_C0C0, 0xF0F0_F0F0, 0xC0C0_C0C0]; |
|---|
| 621 | | multibyteMulAdd(bb[1..$-1], aa[1..$-2], 16, 5); |
|---|
| | 578 | multibyteMulAdd!('+')(bb[1..$-1], aa[1..$-2], 16, 5); |
|---|
| 622 | 579 | assert(bb[0] == 0x1234_1234 && bb[4] == 0xC0C0_C0C0); |
|---|
| 623 | 580 | assert(bb[1] == 0x2222_2230 + 0xF0F0_F0F0+5 && bb[2] == 0x5555_5561+0x00C0_C0C0+1 |
|---|
| r3884 |
r3900 |
|
| 20 | 20 | |
|---|
| 21 | 21 | private: |
|---|
| 22 | | uint [1] BIGINTZERO = [0]; |
|---|
| | 22 | /*invariant */ uint [1] BIGINTZERO = [0]; |
|---|
| 23 | 23 | public: |
|---|
| 24 | 24 | |
|---|
| … | … | |
| 26 | 26 | int biguintCompare(uint [] x, uint []y) |
|---|
| 27 | 27 | { |
|---|
| 28 | | uint k = lastDifferentDigit(x, y); |
|---|
| | 28 | uint k = highestDifferentDigit(x, y); |
|---|
| 29 | 29 | if (x[k] == y[k]) return 0; |
|---|
| 30 | 30 | return x[k] > y[k] ? 1 : -1; |
|---|
| … | … | |
| 69 | 69 | } |
|---|
| 70 | 70 | |
|---|
| 71 | | |
|---|
| 72 | 71 | /** General unsigned subtraction routine for bigints. |
|---|
| 73 | 72 | * Sets result = x - y. If the result is negative, negative will be true. |
|---|
| … | … | |
| 77 | 76 | if (x.length == y.length) { |
|---|
| 78 | 77 | // There's a possibility of cancellation, if x and y are almost equal. |
|---|
| 79 | | int last = lastDifferentDigit(x, y); |
|---|
| | 78 | int last = highestDifferentDigit(x, y); |
|---|
| 80 | 79 | uint [] result = new uint[last+1]; |
|---|
| 81 | 80 | if (x[last] < y[last]) { // we know result is negative |
|---|
| … | … | |
| 86 | 85 | *negative = false; |
|---|
| 87 | 86 | } |
|---|
| | 87 | if (result.length >1 && result[$-1]==0) return result[0..$-1]; |
|---|
| 88 | 88 | return result; |
|---|
| 89 | 89 | } |
|---|
| … | … | |
| 101 | 101 | uint [] result = new uint[large.length]; |
|---|
| 102 | 102 | uint carry = multibyteAddSub!('-')(result[0..small.length], large[0..small.length], small, 0); |
|---|
| 103 | | result[small.length..$-1] = large[small.length..$]; |
|---|
| | 103 | result[small.length..$] = large[small.length..$]; |
|---|
| 104 | 104 | if (carry) { |
|---|
| 105 | 105 | multibyteIncrementAssign!('-')(result[small.length..$-1], carry); |
|---|
| 106 | | if (result[$-1]==0) return result[0..$-1]; |
|---|
| 107 | | } |
|---|
| | 106 | } |
|---|
| | 107 | if (result.length >1 && result[$-1]==0) return result[0..$-1]; |
|---|
| 108 | 108 | return result; |
|---|
| 109 | 109 | } |
|---|
| … | … | |
| 173 | 173 | result[x.length] = multibyteMul(result[0..x.length], x, lo, 0); |
|---|
| 174 | 174 | if (hi!=0) { |
|---|
| 175 | | result[x.length+1] = multibyteMulAdd(result[1..x.length+1], x, hi, 0); |
|---|
| | 175 | result[x.length+1] = multibyteMulAdd!('+')(result[1..x.length+1], x, hi, 0); |
|---|
| 176 | 176 | } |
|---|
| 177 | 177 | return result; |
|---|
| … | … | |
| 295 | 295 | uint [] result = new uint[x.length]; |
|---|
| 296 | 296 | if ((y&(-y))==y) { |
|---|
| | 297 | assert(y!=0); |
|---|
| 297 | 298 | // perfect power of 2 |
|---|
| 298 | 299 | uint b = 0; |
|---|
| … | … | |
| 308 | 309 | return result[0..$-1]; |
|---|
| 309 | 310 | } else return result; |
|---|
| | 311 | } |
|---|
| | 312 | |
|---|
| | 313 | // return x%y |
|---|
| | 314 | uint biguintModInt(uint [] x, uint y) { |
|---|
| | 315 | assert(y!=0); |
|---|
| | 316 | if (y&(-y)==y) { // perfect power of 2 |
|---|
| | 317 | return x[0]&(y-1); |
|---|
| | 318 | } else { |
|---|
| | 319 | // horribly inefficient - malloc, copy, & store are unnecessary. |
|---|
| | 320 | uint [] wasteful = new uint[x.length]; |
|---|
| | 321 | wasteful[] = x[]; |
|---|
| | 322 | uint rem = multibyteDivAssign(wasteful, y, 0); |
|---|
| | 323 | delete wasteful; |
|---|
| | 324 | return rem; |
|---|
| | 325 | } |
|---|
| | 326 | } |
|---|
| | 327 | |
|---|
| | 328 | uint [] biguintDiv(uint [] x, uint [] y) |
|---|
| | 329 | { |
|---|
| | 330 | if (y.length > x.length) return BIGINTZERO; |
|---|
| | 331 | if (y.length == 1) return biguintDivInt(x, y[0]); |
|---|
| | 332 | uint [] result = new uint[x.length - y.length + 1]; |
|---|
| | 333 | schoolbookDivMod(result, null, x, y); |
|---|
| | 334 | if (result.length>1 && result[$-1]==0) result=result[0..$-1]; |
|---|
| | 335 | return result; |
|---|
| 310 | 336 | } |
|---|
| 311 | 337 | |
|---|
| … | … | |
| 386 | 412 | uint hi = 0; |
|---|
| 387 | 413 | data[0] = 0; // initially number is 0. |
|---|
| | 414 | data[1]=0; |
|---|
| 388 | 415 | |
|---|
| 389 | 416 | for (int i= (s[0]=='-' || s[0]=='+')? 1 : 0; i<s.length; ++i) { |
|---|
| … | … | |
| 414 | 441 | uint c = multibyteIncrementAssign!('+')(data[0..hi], cast(uint)(y&0xFFFF_FFFF)); |
|---|
| 415 | 442 | c += multibyteIncrementAssign!('+')(data[1..hi], cast(uint)(y>>32)); |
|---|
| 416 | | if (c!=0) { data[hi]=c; ++hi; } |
|---|
| | 443 | if (c!=0) { |
|---|
| | 444 | data[hi]=c; |
|---|
| | 445 | ++hi; |
|---|
| | 446 | } |
|---|
| 417 | 447 | y = 0; |
|---|
| 418 | 448 | lo = 0; |
|---|
| … | … | |
| 420 | 450 | } |
|---|
| 421 | 451 | // Now set y = all remaining digits. |
|---|
| 422 | | if (lo>=18) { |
|---|
| | 452 | if (lo>=18) { |
|---|
| 423 | 453 | } else if (lo>=9) { |
|---|
| 424 | 454 | for (int k=9; k<lo; ++k) y*=10; |
|---|
| … | … | |
| 429 | 459 | } |
|---|
| 430 | 460 | if (y!=0) { |
|---|
| 431 | | if (hi==0) { *cast(ulong *)(&data[hi]) = y; hi=2; } |
|---|
| 432 | | else { |
|---|
| 433 | | |
|---|
| 434 | | while (lo>0) { |
|---|
| 435 | | uint c = multibyteMul(data[0..hi], data[0..hi], 10, 0); |
|---|
| 436 | | if (c!=0) { data[hi]=c; ++hi; } |
|---|
| 437 | | --lo; |
|---|
| 438 | | } |
|---|
| | 461 | if (hi==0) { |
|---|
| | 462 | *cast(ulong *)(&data[hi]) = y; |
|---|
| | 463 | hi=2; |
|---|
| | 464 | } else { |
|---|
| | 465 | while (lo>0) { |
|---|
| | 466 | uint c = multibyteMul(data[0..hi], data[0..hi], 10, 0); |
|---|
| | 467 | if (c!=0) { data[hi]=c; ++hi; } |
|---|
| | 468 | --lo; |
|---|
| | 469 | } |
|---|
| 439 | 470 | uint c = multibyteIncrementAssign!('+')(data[0..hi], cast(uint)(y&0xFFFF_FFFF)); |
|---|
| 440 | 471 | if (y>0xFFFF_FFFFL) { |
|---|
| … | … | |
| 442 | 473 | } |
|---|
| 443 | 474 | if (c!=0) { data[hi]=c; ++hi; } |
|---|
| 444 | | hi+=2; |
|---|
| 445 | | } |
|---|
| 446 | | } |
|---|
| | 475 | // hi+=2; |
|---|
| | 476 | } |
|---|
| | 477 | } |
|---|
| | 478 | if (hi>1 && data[hi-1]==0) --hi; |
|---|
| 447 | 479 | return hi; |
|---|
| 448 | 480 | } |
|---|
| … | … | |
| 619 | 651 | } |
|---|
| 620 | 652 | |
|---|
| | 653 | import std.intrinsic; |
|---|
| | 654 | |
|---|
| | 655 | |
|---|
| | 656 | /* Knuth's Algorithm D, as presented in "Hacker's Delight" |
|---|
| | 657 | * given u and v, calculates quotient = u/v, remainder = u%v. |
|---|
| | 658 | */ |
|---|
| | 659 | //import tango.stdc.stdio; |
|---|
| | 660 | |
|---|
| | 661 | // given u and v, calculates quotient = u/v, remainder = u%v. |
|---|
| | 662 | void schoolbookDivMod(uint [] quotient, uint[] remainder, uint [] u, uint [] v) { |
|---|
| | 663 | assert(quotient.length == u.length - v.length + 1); |
|---|
| | 664 | assert(remainder==null || remainder.length == v.length); |
|---|
| | 665 | assert(v.length > 1); |
|---|
| | 666 | assert(u.length >= v.length); |
|---|
| | 667 | assert(v[$-1]!=0); |
|---|
| | 668 | |
|---|
| | 669 | // Normalize by shifting v left just enough so that |
|---|
| | 670 | // its high-order bit is on, and shift u left the |
|---|
| | 671 | // same amount. |
|---|
| | 672 | |
|---|
| | 673 | uint [] vn = new uint[v.length]; |
|---|
| | 674 | uint [] un = new uint[u.length + 1]; |
|---|
| | 675 | // How much to left shift v, so that its MSB is set. |
|---|
| | 676 | uint s = 31 - bsr(v[$-1]); |
|---|
| | 677 | multibyteShl(vn, v, s); |
|---|
| | 678 | un[$-1] = multibyteShl(un[0..$-1], u, s); |
|---|
| | 679 | for (int j = u.length - v.length; j >= 0; j--) { |
|---|
| | 680 | // Compute estimate qhat of quotient[j]. |
|---|
| | 681 | ulong bigqhat, rhat; |
|---|
| | 682 | bigqhat = ( (cast(ulong)(un[j+v.length])<<32) + un[j+v.length-1])/vn[$-1]; |
|---|
| | 683 | rhat = ((cast(ulong)(un[j+v.length])<<32) + un[j+v.length-1]) - bigqhat*vn[$-1]; |
|---|
| | 684 | again: |
|---|
| | 685 | if (bigqhat & 0xFFFF_FFFF_0000_0000L |
|---|
| | 686 | || bigqhat*vn[$-2] > 0x1_0000_0000L*rhat + un[j+v.length-2]) { |
|---|
| | 687 | --bigqhat; |
|---|
| | 688 | rhat += vn[$-1]; |
|---|
| | 689 | if (rhat < 0x1_0000_0000L) goto again; |
|---|
| | 690 | } |
|---|
| | 691 | assert(bigqhat < 0x1_0000_0000L); |
|---|
| | 692 | uint qhat = cast(uint)bigqhat; |
|---|
| | 693 | |
|---|
| | 694 | // Multiply and subtract. |
|---|
| | 695 | uint carry = multibyteMulAdd!('-')(un[j..j+v.length], vn, qhat, 0); |
|---|
| | 696 | |
|---|
| | 697 | if (un[j+v.length] < carry) { |
|---|
| | 698 | // If we subtracted too much, add back |
|---|
| | 699 | --qhat; |
|---|
| | 700 | carry -= multibyteAddSub!('+')(un[j..j+v.length],un[j..j+v.length], vn, 0); |
|---|
| | 701 | } |
|---|
| | 702 | quotient[j] = qhat; |
|---|
| | 703 | un[j+v.length] = un[j+v.length] - carry; |
|---|
| | 704 | assert(un[j+v.length] == 0); |
|---|
| | 705 | } |
|---|
| | 706 | // Unnormalize remainder, if required. |
|---|
| | 707 | if (remainder != null) { |
|---|
| | 708 | multibyteShr(remainder, un, s); |
|---|
| | 709 | } |
|---|
| | 710 | } |
|---|
| 621 | 711 | |
|---|
| 622 | 712 | private: |
|---|
| | 713 | // TODO: Replace with a library call |
|---|
| 623 | 714 | void itoaZeroPadded(char[] output, uint value, int radix = 10) { |
|---|
| 624 | 715 | int x = output.length - 1; |
|---|
| … | … | |
| 642 | 733 | // Returns the highest value of i for which left[i]!=right[i], |
|---|
| 643 | 734 | // or 0 if left[]==right[] |
|---|
| 644 | | int lastDifferentDigit(uint [] left, uint [] right) |
|---|
| | 735 | int highestDifferentDigit(uint [] left, uint [] right) |
|---|
| 645 | 736 | { |
|---|
| 646 | 737 | assert(left.length == right.length); |
|---|
Download in other formats:
|
 |