 |
Changeset 3911
- Timestamp:
- 08/25/08 02:53:21
(3 months ago)
- Author:
- Don Clugston
- Message:
Asm implementation of subMul. This speeds up division enormously.
-
Files:
-
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
| r3900 |
r3911 |
|
| 426 | 426 | uint multibyteMulAdd(char op)(uint [] dest, uint[] src, uint multiplier, uint carry) |
|---|
| 427 | 427 | { |
|---|
| 428 | | static if (op=='-') { |
|---|
| 429 | | /* This is equivalent to: |
|---|
| 430 | | --- |
|---|
| 431 | | uint [] tmp = new uint[src.length]; |
|---|
| 432 | | uint c = multibyteMul(tmp, src, multiplier, carry); |
|---|
| 433 | | return c + multibyteAddSub!('-')(dest, dest, tmp, 0); |
|---|
| 434 | | --- |
|---|
| 435 | | */ |
|---|
| 436 | | ulong c = carry; |
|---|
| 437 | | for (int i = 0; i < src.length; i++) { |
|---|
| 438 | | c += cast(ulong)multiplier * src[i]; |
|---|
| 439 | | ulong t = cast(ulong)dest[i] - cast(uint)c; |
|---|
| 440 | | dest[i] = cast(uint)t; |
|---|
| 441 | | c = cast(uint)((c>>32) - (t>>32)); |
|---|
| 442 | | } |
|---|
| 443 | | return cast(uint)c; |
|---|
| 444 | | } else { |
|---|
| 445 | | |
|---|
| 446 | | |
|---|
| 447 | 428 | // Timing: This is the most time-critical bignum function. |
|---|
| 448 | 429 | // Pentium M: 5.4 cycles/operation, still has 2 resource stalls + 1 load block/iteration |
|---|
| … | … | |
| 467 | 448 | // ESI = src |
|---|
| 468 | 449 | |
|---|
| | 450 | const char [] OP = (op=='+')? "add" : "sub"; |
|---|
| 469 | 451 | version(D_PIC) { |
|---|
| 470 | 452 | enum { zero = 0 } |
|---|
| … | … | |
| 478 | 460 | |
|---|
| 479 | 461 | enum { LASTPARAM = 5*4 } // 4* pushes + return address. |
|---|
| | 462 | mixin(" |
|---|
| 480 | 463 | asm { |
|---|
| 481 | 464 | naked; |
|---|
| … | … | |
| 510 | 493 | |
|---|
| 511 | 494 | mul int ptr [ESP+LASTPARAM]; |
|---|
| 512 | | |
|---|
| 513 | | add [-4+EDI+4*EBX], EBP; |
|---|
| | 495 | " ~ OP ~ " [-4+EDI+4*EBX], EBP; |
|---|
| 514 | 496 | mov EBP, zero; |
|---|
| 515 | 497 | |
|---|
| … | … | |
| 523 | 505 | L1: |
|---|
| 524 | 506 | mul int ptr [ESP+LASTPARAM]; |
|---|
| 525 | | add [-8+EDI+4*EBX], ECX; |
|---|
| | 507 | " ~ OP ~ " [-8+EDI+4*EBX], ECX; |
|---|
| 526 | 508 | mov ECX, zero; |
|---|
| 527 | 509 | |
|---|
| … | … | |
| 538 | 520 | asm { |
|---|
| 539 | 521 | mul int ptr [ESP+LASTPARAM]; |
|---|
| 540 | | add [-4+EDI+4*EBX], EBP; |
|---|
| | 522 | " ~ OP ~ " [-4+EDI+4*EBX], EBP; |
|---|
| 541 | 523 | mov EBP, zero; |
|---|
| 542 | 524 | |
|---|
| … | … | |
| 547 | 529 | add EBX, 2; |
|---|
| 548 | 530 | jl L1; |
|---|
| 549 | | L_done: |
|---|
| 550 | | add [-8+EDI+4*EBX], ECX; |
|---|
| | 531 | L_done: " ~ OP ~ " [-8+EDI+4*EBX], ECX; |
|---|
| 551 | 532 | mov EAX, EBP; // get final carry |
|---|
| 552 | 533 | adc EAX, 0; |
|---|
| … | … | |
| 567 | 548 | jl L1; |
|---|
| 568 | 549 | jmp L_done; |
|---|
| 569 | | |
|---|
| 570 | | } |
|---|
| 571 | | }// op=='+' |
|---|
| | 550 | } "); |
|---|
| 572 | 551 | } |
|---|
| 573 | 552 | |
|---|
Download in other formats:
|
 |
 |
|
 |
Copyright © 2006-2008 Tango. All Rights Reserved. | Page Width:
Static or
Dynamic