Changeset 577
- Timestamp:
- 07/13/10 20:22:22 (14 years ago)
- Files:
-
- branches/dmd-1.x/src/backend/cdef.h (modified) (1 diff)
- branches/dmd-1.x/src/backend/cg87.c (modified) (14 diffs)
- branches/dmd-1.x/src/backend/cg.c (modified) (1 diff)
- branches/dmd-1.x/src/backend/cgcod.c (modified) (2 diffs)
- branches/dmd-1.x/src/backend/cgen.c (modified) (2 diffs)
- branches/dmd-1.x/src/backend/cod1.c (modified) (23 diffs)
- branches/dmd-1.x/src/backend/cod2.c (modified) (27 diffs)
- branches/dmd-1.x/src/backend/cod3.c (modified) (12 diffs)
- branches/dmd-1.x/src/backend/cod4.c (modified) (24 diffs)
- branches/dmd-1.x/src/backend/code.c (modified) (1 diff)
- branches/dmd-1.x/src/backend/code.h (modified) (7 diffs)
- branches/dmd-1.x/src/backend/iasm.h (modified) (1 diff)
- branches/dmd-1.x/src/backend/ptrntab.c (modified) (1 diff)
- branches/dmd-1.x/src/freebsd.mak (modified) (1 diff)
- branches/dmd-1.x/src/iasm.c (modified) (6 diffs)
- branches/dmd-1.x/src/linux.mak (modified) (1 diff)
- branches/dmd-1.x/src/osx.mak (modified) (1 diff)
- branches/dmd-1.x/src/solaris.mak (modified) (1 diff)
- trunk/src/backend/cdef.h (modified) (1 diff)
- trunk/src/backend/cg87.c (modified) (14 diffs)
- trunk/src/backend/cg.c (modified) (1 diff)
- trunk/src/backend/cgcod.c (modified) (2 diffs)
- trunk/src/backend/cgen.c (modified) (2 diffs)
- trunk/src/backend/cod1.c (modified) (23 diffs)
- trunk/src/backend/cod2.c (modified) (27 diffs)
- trunk/src/backend/cod3.c (modified) (12 diffs)
- trunk/src/backend/cod4.c (modified) (24 diffs)
- trunk/src/backend/code.c (modified) (1 diff)
- trunk/src/backend/code.h (modified) (7 diffs)
- trunk/src/backend/iasm.h (modified) (1 diff)
- trunk/src/backend/ptrntab.c (modified) (1 diff)
- trunk/src/freebsd.mak (modified) (1 diff)
- trunk/src/iasm.c (modified) (5 diffs)
- trunk/src/linux.mak (modified) (1 diff)
- trunk/src/osx.mak (modified) (1 diff)
- trunk/src/solaris.mak (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
branches/dmd-1.x/src/backend/cdef.h
r569 r577 542 542 #define LARGECODE (config.memmodel & 5) 543 543 544 544 #define Smodel 0 /* 64k code, 64k data */ 545 545 #define Mmodel 1 /* large code, 64k data */ 546 546 #define Cmodel 2 /* 64k code, large data */ 547 547 #define Lmodel 3 /* large code, large data */ 548 548 #define Vmodel 4 /* large code, large data, vcm */ 549 549 #define MEMMODELS 5 /* number of memory models */ 550 550 #endif 551 551 552 552 /* Segments */ 553 553 #define CODE 1 /* code segment */ 554 554 #define DATA 2 /* initialized data */ 555 555 #define CDATA 3 /* constant data */ 556 556 #define UDATA 4 /* uninitialized data */ 557 557 #define UNKNOWN 0x7FFF // unknown segment 558 558 #define DGROUPIDX 1 /* group index of DGROUP */ 559 559 560 560 #define KEEPBITFIELDS 0 /* 0 means code generator cannot handle bit fields, */ 561 561 /* so replace them with shifts and masks */ 562 563 #if TARGET_OSX564 #define STACKALIGN 16565 #else566 #define STACKALIGN 0567 #endif568 562 569 563 #define REGMAX 29 // registers are numbered 0..10 570 564 571 565 typedef unsigned tym_t; // data type big enough for type masks 572 566 typedef int SYMIDX; // symbol table index 573 567 574 568 #if 0 575 569 #if defined(__DMC__) && __DMC__ < 0x81e 576 570 typedef int bool; 577 571 #endif 578 572 #define bool int 579 573 #endif 580 574 581 575 #define _chkstack() (void)0 582 576 583 577 /* For 32 bit compilations, we don't need far keyword */ 584 578 #if 1 585 579 #define far 586 580 #define _far 587 581 #define __far branches/dmd-1.x/src/backend/cg87.c
r569 r577 1 1 // Copyright (C) 1987-1995 by Symantec 2 // Copyright (C) 2000-20 09by Digital Mars2 // Copyright (C) 2000-2010 by Digital Mars 3 3 // All Rights Reserved 4 4 // http://www.digitalmars.com 5 5 // Written by Walter Bright 6 6 /* 7 7 * This source file is made available for personal use 8 8 * only. The license is in /dmd/src/dmd/backendlicense.txt 9 9 * or /dm/src/dmd/backendlicense.txt 10 10 * For any other uses, please contact Digital Mars. 11 11 */ 12 12 13 13 #if !SPP 14 14 15 15 #include <stdio.h> 16 16 #include <string.h> 17 17 #include <time.h> 18 18 #include <math.h> 19 19 #include "cc.h" 20 20 #include "el.h" 21 21 #include "oper.h" 22 22 #include "code.h" … … 80 80 /********************************** 81 81 * When we need to temporarilly save 8087 registers, we record information 82 82 * about the save into an array of NDP structs: 83 83 */ 84 84 85 85 NDP *NDP::save = NULL; 86 86 int NDP::savemax = 0; /* # of entries in NDP::save[] */ 87 87 int NDP::savetop = 0; /* # of entries used in NDP::save[] */ 88 88 89 89 #ifdef DEBUG 90 90 #define NDPSAVEINC 2 /* flush reallocation bugs */ 91 91 #else 92 92 #define NDPSAVEINC 8 /* allocation chunk sizes */ 93 93 #endif 94 94 95 95 /**************************************** 96 96 * Store/load to ndp save location i 97 97 */ 98 98 99 99 code *ndp_fstp(code *c, int i, tym_t ty) 100 { 100 { unsigned grex = I64 ? (REX_W << 16) : 0; 101 101 switch (tybasic(ty)) 102 102 { 103 103 case TYfloat: 104 104 case TYifloat: 105 105 case TYcfloat: 106 c = genc1(c,0xD9, modregrm(2,3,BPRM),FLndp,i); // FSTP m32real i[BP]106 c = genc1(c,0xD9,grex | modregrm(2,3,BPRM),FLndp,i); // FSTP m32real i[BP] 107 107 break; 108 108 109 109 case TYdouble: 110 110 case TYdouble_alias: 111 111 case TYidouble: 112 112 case TYcdouble: 113 c = genc1(c,0xDD, modregrm(2,3,BPRM),FLndp,i); // FSTP m64real i[BP]113 c = genc1(c,0xDD,grex | modregrm(2,3,BPRM),FLndp,i); // FSTP m64real i[BP] 114 114 break; 115 115 116 116 case TYldouble: 117 117 case TYildouble: 118 118 case TYcldouble: 119 c = genc1(c,0xDB, modregrm(2,7,BPRM),FLndp,i); // FSTP m80real i[BP]119 c = genc1(c,0xDB,grex | modregrm(2,7,BPRM),FLndp,i); // FSTP m80real i[BP] 120 120 break; 121 121 122 122 default: 123 123 assert(0); 124 124 } 125 125 return c; 126 126 } 127 127 128 128 code *ndp_fld(code *c, int i, tym_t ty) 129 { 129 { unsigned grex = I64 ? (REX_W << 16) : 0; 130 130 switch (tybasic(ty)) 131 131 { 132 132 case TYfloat: 133 133 case TYifloat: 134 134 case TYcfloat: 135 c = genc1(c,0xD9, modregrm(2,0,BPRM),FLndp,i);135 c = genc1(c,0xD9,grex | modregrm(2,0,BPRM),FLndp,i); 136 136 break; 137 137 138 138 case TYdouble: 139 139 case TYdouble_alias: 140 140 case TYidouble: 141 141 case TYcdouble: 142 c = genc1(c,0xDD, modregrm(2,0,BPRM),FLndp,i);142 c = genc1(c,0xDD,grex | modregrm(2,0,BPRM),FLndp,i); 143 143 break; 144 144 145 145 case TYldouble: 146 146 case TYildouble: 147 147 case TYcldouble: 148 c = genc1(c,0xDB, modregrm(2,5,BPRM),FLndp,i); // FLD m80real i[BP]148 c = genc1(c,0xDB,grex | modregrm(2,5,BPRM),FLndp,i); // FLD m80real i[BP] 149 149 break; 150 150 151 151 default: 152 152 assert(0); 153 153 } 154 154 return c; 155 155 } 156 156 157 157 /************************** 158 158 * Return index of empty slot in NDP::save[]. 159 159 */ 160 160 161 161 STATIC int getemptyslot() 162 162 { int i; 163 163 164 164 for (i = 0; i < NDP::savemax; i++) 165 165 if (NDP::save[i].e == NULL) 166 166 goto L1; 167 167 /* Out of room, reallocate NDP::save[] */ 168 168 NDP::save = (NDP *)mem_realloc(NDP::save, … … 428 428 break; 429 429 } 430 430 if (_8087elems[i].e == e && 431 431 _8087elems[i].offset == offset) 432 432 { //printf("cse found %d\n",i); 433 433 //elem_print(e); 434 434 break; 435 435 } 436 436 } 437 437 return i; 438 438 } 439 439 440 440 /************************************* 441 441 * Reload common subexpression. 442 442 */ 443 443 444 444 code *comsub87(elem *e,regm_t *pretregs) 445 445 { code *c; 446 446 447 447 // Look on 8087 stack 448 int i; 449 450 i = cse_get(e, 0); 448 int i = cse_get(e, 0); 451 449 452 450 if (tycomplex(e->Ety)) 453 { unsigned sz; 454 int j; 455 456 sz = tysize(e->Ety); 457 j = cse_get(e, sz / 2); 451 { 452 unsigned sz = tysize(e->Ety); 453 int j = cse_get(e, sz / 2); 458 454 if (i >= 0 && j >= 0) 459 455 { 460 456 c = push87(); 461 457 c = cat(c, push87()); 462 458 c = genf2(c,0xD9,0xC0 + i); // FLD ST(i) 463 459 c = genf2(c,0xD9,0xC0 + j + 1); // FLD ST(j + 1) 464 460 c = cat(c,fixresult_complex87(e,mST01,pretregs)); 465 461 } 466 462 else 467 463 // Reload 468 464 c = loaddata(e,pretregs); 469 465 } 470 466 else 471 467 { 472 468 if (i >= 0) 473 469 { 474 470 c = push87(); 475 471 c = genf2(c,0xD9,0xC0 + i); // FLD ST(i) 476 472 c = cat(c,fixresult(e,mST0,pretregs)); 477 473 } 478 474 else 479 475 // Reload 480 476 c = loaddata(e,pretregs); 481 477 } 482 478 483 479 freenode(e); 484 480 return c; 485 481 } 486 482 487 483 488 484 /************************** 489 485 * Generate code to deal with floatreg. 490 486 */ 491 487 492 488 code * genfltreg(code *c,unsigned opcode,unsigned reg,targ_size_t offset) 493 489 { 494 490 floatreg = TRUE; 495 491 reflocal = TRUE; 496 492 if ((opcode & 0xF8) == 0xD8) 497 493 c = genfwait(c); 498 return genc1(c,opcode,modregrm(2,reg,BPRM),FLfltreg,offset); 494 unsigned grex = I64 ? (REX_W << 16) : 0; 495 return genc1(c,opcode,grex | modregxrm(2,reg,BPRM),FLfltreg,offset); 499 496 } 500 497 501 498 /******************************* 502 499 * Decide if we need to gen an FWAIT. 503 500 */ 504 501 505 502 code *genfwait(code *c) 506 503 { 507 504 if (ADDFWAIT()) 508 505 c = gen1(c,FWAIT); 509 506 return c; 510 507 } 511 508 512 509 /*************************************** 513 510 * Generate floating point instruction. 514 511 */ 515 512 516 513 STATIC code * genf2(code *c,unsigned op,unsigned rm) 517 514 { 518 515 return gen2(genfwait(c),op,rm); … … 743 740 tym_t tym; 744 741 code *c1,*c2; 745 742 unsigned sz; 746 743 747 744 //printf("fixresult87(e = %p, retregs = x%x, *pretregs = x%x)\n", e,retregs,*pretregs); 748 745 //printf("fixresult87(e = %p, retregs = %s, *pretregs = %s)\n", e,regm_str(retregs),regm_str(*pretregs)); 749 746 assert(!*pretregs || retregs); 750 747 c1 = CNIL; 751 748 c2 = CNIL; 752 749 tym = tybasic(e->Ety); 753 750 sz = tysize[tym]; 754 751 //printf("tym = x%x, sz = %d\n", tym, sz); 755 752 756 753 if (*pretregs & mST01) 757 754 return fixresult_complex87(e, retregs, pretregs); 758 755 759 756 /* if retregs needs to be transferred into the 8087 */ 760 757 if (*pretregs & mST0 && retregs & (mBP | ALLREGS)) 761 758 { 762 759 assert(sz <= DOUBLESIZE); 763 if ( I32)760 if (!I16) 764 761 { 765 762 766 763 if (*pretregs & mPSW) 767 764 { // Set flags 768 regm_t r; 769 770 r = retregs | mPSW; 765 regm_t r = retregs | mPSW; 771 766 c1 = fixresult(e,retregs,&r); 772 767 } 773 768 c2 = push87(); 774 if (sz == REGSIZE) 775 { unsigned reg; 776 777 reg = findreg(retregs); 769 if (sz == REGSIZE || (I64 && sz == 4)) 770 { 771 unsigned reg = findreg(retregs); 778 772 c2 = genfltreg(c2,0x89,reg,0); // MOV fltreg,reg 779 773 genfltreg(c2,0xD9,0,0); // FLD float ptr fltreg 780 774 } 781 775 else 782 776 { unsigned msreg,lsreg; 783 777 784 778 msreg = findregmsw(retregs); 785 779 lsreg = findreglsw(retregs); 786 780 c2 = genfltreg(c2,0x89,lsreg,0); // MOV fltreg,lsreg 787 781 genfltreg(c2,0x89,msreg,4); // MOV fltreg+4,msreg 788 782 genfltreg(c2,0xDD,0,0); // FLD double ptr fltreg 789 783 } 790 784 } 791 785 else 792 786 { 793 787 regm = (sz == FLOATSIZE) ? FLOATREGS : DOUBLEREGS; 794 788 regm |= *pretregs & mPSW; 795 789 c1 = fixresult(e,retregs,®m); 796 790 regm = 0; // don't worry about result from CLIBxxx 797 791 c2 = callclib(e, 798 792 ((sz == FLOATSIZE) ? CLIBfltto87 : CLIBdblto87), 799 793 ®m,0); 800 794 } 801 795 } 802 796 else if (*pretregs & (mBP | ALLREGS) && retregs & mST0) 803 797 { unsigned mf; 804 798 unsigned reg; 805 799 806 800 assert(sz <= DOUBLESIZE); 807 801 mf = (sz == FLOATSIZE) ? MFfloat : MFdouble; 808 802 if (*pretregs & mPSW && !(retregs & mPSW)) 809 803 c1 = genftst(c1,e,0); 810 804 /* FSTP floatreg */ 811 805 pop87(); 812 806 c1 = genfltreg(c1,ESC(mf,1),3,0); 813 807 genfwait(c1); 814 808 c2 = allocreg(pretregs,®,(sz == FLOATSIZE) ? TYfloat : TYdouble); 815 809 if (sz == FLOATSIZE) 816 810 { 817 if ( I32)811 if (!I16) 818 812 c2 = genfltreg(c2,0x8B,reg,0); 819 813 else 820 814 { c2 = genfltreg(c2,0x8B,reg,REGSIZE); 821 815 genfltreg(c2,0x8B,findreglsw(*pretregs),0); 822 816 } 823 817 } 824 818 else 825 819 { 826 if ( I32)820 if (!I16) 827 821 { c2 = genfltreg(c2,0x8B,reg,REGSIZE); 828 822 genfltreg(c2,0x8B,findreglsw(*pretregs),0); 829 823 } 830 824 else 831 825 { c2 = genfltreg(c2,0x8B,AX,6); 832 826 genfltreg(c2,0x8B,BX,4); 833 827 genfltreg(c2,0x8B,CX,2); 834 828 genfltreg(c2,0x8B,DX,0); 835 829 } 836 830 } 837 831 } 838 832 else if (*pretregs == 0 && retregs == mST0) 839 833 { 840 834 c1 = genf2(c1,0xDD,modregrm(3,3,0)); // FPOP 841 835 pop87(); 842 836 } 843 837 else 844 838 { if (*pretregs & mPSW) 845 839 { if (!(retregs & mPSW)) 846 840 { assert(retregs & mST0); … … 1382 1376 (e2->E1->Eoper == OPs32_d || e2->E1->Eoper == OPs64_d || e2->E1->Eoper == OPs16_d) && 1383 1377 e2->E1->E1->Eoper == OPvar 1384 1378 ) || 1385 1379 ((e2oper == OPs32_d || e2oper == OPs64_d || e2oper == OPs16_d) && 1386 1380 e2->E1->Eoper == OPvar 1387 1381 ) 1388 1382 ) 1389 1383 ) 1390 1384 ) 1391 1385 { // Reverse order of evaluation 1392 1386 e1 = e->E2; 1393 1387 e2 = e->E1; 1394 1388 op = oprev[op + 1]; 1395 1389 reverse ^= 1; 1396 1390 } 1397 1391 1398 1392 c1 = codelem(e1,&retregs,FALSE); 1399 1393 note87(e1,0,0); 1400 1394 1401 1395 if (config.flags4 & CFG4fdivcall && e->Eoper == OPdiv) 1402 { regm_t retregs; 1403 1404 retregs = mST0; 1396 { 1397 regm_t retregs = mST0; 1405 1398 c2 = load87(e2,0,&retregs,e1,-1); 1406 1399 c2 = cat(c2,makesure87(e1,0,1,0)); 1407 1400 if (op == 7) // if reverse divide 1408 1401 c2 = genf2(c2,0xD9,0xC8 + 1); // FXCH ST(1) 1409 1402 c2 = cat(c2,callclib(e,CLIBfdiv87,&retregs,0)); 1410 1403 pop87(); 1411 1404 resregm = mST0; 1412 1405 freenode(e2); 1413 1406 c4 = fixresult87(e,resregm,pretregs); 1414 1407 } 1415 1408 else if (e->Eoper == OPmod) 1416 1409 { 1417 1410 /* 1418 1411 * fld tbyte ptr y 1419 1412 * fld tbyte ptr x // ST = x, ST1 = y 1420 1413 * FM1: // We don't use fprem1 because for some inexplicable 1421 1414 * // reason we get -5 when we do _modulo(15, 10) 1422 1415 * fprem // ST = ST % ST1 1423 1416 * fstsw word ptr sw 1424 1417 * fwait 1425 1418 * mov AH,byte ptr sw+1 // get msb of status word in AH 1426 1419 * sahf // transfer to flags 1427 1420 * jp FM1 // continue till ST < ST1 1428 1421 * fstp ST(1) // leave remainder on stack 1429 1422 */ 1430 regm_t retregs; 1431 1432 retregs = mST0; 1423 regm_t retregs = mST0; 1433 1424 c2 = load87(e2,0,&retregs,e1,-1); 1434 1425 c2 = cat(c2,makesure87(e1,0,1,0)); // now have x,y on stack; need y,x 1435 1426 if (!reverse) // if not reverse modulo 1436 1427 c2 = genf2(c2,0xD9,0xC8 + 1); // FXCH ST(1) 1437 1428 1438 1429 c3 = gen2(NULL, 0xD9, 0xF8); // FM1: FPREM 1439 1430 c3 = cg87_87topsw(c3); 1440 1431 c3 = genjmp(c3, JP, FLcode, (block *)c3); // JP FM1 1441 1432 c3 = genf2(c3,0xDD,0xD8 + 1); // FSTP ST(1) 1442 1433 1443 1434 pop87(); 1444 1435 resregm = mST0; 1445 1436 freenode(e2); 1446 1437 c4 = fixresult87(e,resregm,pretregs); 1447 1438 } 1448 1439 else 1449 1440 { c2 = load87(e2,0,pretregs,e1,op); 1450 1441 freenode(e2); 1451 1442 } 1452 1443 if (*pretregs & mST0) … … 1593 1584 break; 1594 1585 } 1595 1586 note87(e,0,0); 1596 1587 } 1597 1588 break; 1598 1589 case OPd_f: 1599 1590 case OPf_d: 1600 1591 case OPd_ld: 1601 1592 mf1 = (tybasic(e->E1->Ety) == TYfloat || tybasic(e->E1->Ety) == TYifloat) 1602 1593 ? MFfloat : MFdouble; 1603 1594 if (op != -1 && stackused) 1604 1595 note87(eleft,eoffset,0); // don't trash this value 1605 1596 if (e->E1->Eoper == OPvar || e->E1->Eoper == OPind) 1606 1597 { 1607 1598 #if 1 1608 1599 L4: 1609 1600 c = getlvalue(&cs,e->E1,0); 1610 1601 cs.Iop = ESC(mf1,0); 1611 1602 if (ADDFWAIT()) 1612 1603 cs.Iflags |= CFwait; 1613 if ( I32)1604 if (!I16) 1614 1605 cs.Iflags &= ~CFopsize; 1615 1606 if (op != -1) 1616 1607 { cs.Irm |= modregrm(0,op,0); 1617 1608 c = cat(c,makesure87(eleft,eoffset,0,0)); 1618 1609 } 1619 1610 else 1620 1611 { cs.Iop |= 1; 1621 1612 c = cat(c,push87()); 1622 1613 } 1623 1614 c = gen(c,&cs); /* FLD / Fop */ 1624 1615 #else 1625 1616 c = loadea(e->E1,&cs,ESC(mf1,1),0,0,0,0); /* FLD e->E1 */ 1626 1617 #endif 1627 1618 /* Variable cannot be put into a register anymore */ 1628 1619 if (e->E1->Eoper == OPvar) 1629 1620 notreg(e->E1); 1630 1621 freenode(e->E1); 1631 1622 } 1632 1623 else 1633 1624 { 1634 1625 retregs = mST0; 1635 1626 c = codelem(e->E1,&retregs,FALSE); 1636 1627 if (op != -1) 1637 1628 { c = cat(c,makesure87(eleft,eoffset,1,0)); 1638 1629 c = genf2(c,0xDE,modregrm(3,opr,1)); // FopRP 1639 1630 pop87(); 1640 1631 } 1641 1632 } 1642 1633 break; 1643 1634 1644 1635 case OPs64_d: 1645 1636 if (e->E1->Eoper == OPvar || 1646 1637 (e->E1->Eoper == OPind && e->E1->Ecount == 0)) 1647 1638 { 1648 1639 c = getlvalue(&cs,e->E1,0); 1649 1640 cs.Iop = 0xDF; 1650 1641 if (ADDFWAIT()) 1651 1642 cs.Iflags |= CFwait; 1652 if ( I32)1643 if (!I16) 1653 1644 cs.Iflags &= ~CFopsize; 1654 1645 c = cat(c,push87()); 1655 1646 cs.Irm |= modregrm(0,5,0); 1656 1647 c = gen(c,&cs); // FILD m64 1657 1648 // Variable cannot be put into a register anymore 1658 1649 if (e->E1->Eoper == OPvar) 1659 1650 notreg(e->E1); 1660 1651 freenode(e->E1); 1661 1652 } 1662 1653 else 1663 1654 { 1664 1655 retregs = ALLREGS; 1665 1656 c = codelem(e->E1,&retregs,FALSE); 1666 1657 reg = findreglsw(retregs); 1667 1658 c = genfltreg(c,0x89,reg,0); // MOV floatreg,reglsw 1668 1659 reg = findregmsw(retregs); 1669 1660 c = genfltreg(c,0x89,reg,4); // MOV floatreg+4,regmsw 1670 1661 c = cat(c,push87()); 1671 1662 c = genfltreg(c,0xDF,5,0); // FILD long long ptr floatreg 1672 1663 } … … 1908 1899 if (I32) 1909 1900 cs.Iflags &= ~CFopsize; 1910 1901 else if (ADDFWAIT()) 1911 1902 cs.Iflags |= CFwait; 1912 1903 c2 = gen(c2, &cs); 1913 1904 #if LNGDBLSIZE == 12 1914 1905 /* This deals with the fact that 10 byte reals really 1915 1906 * occupy 12 bytes by zeroing the extra 2 bytes. 1916 1907 */ 1917 1908 if (op1 == 0xDB) 1918 1909 { 1919 1910 cs.Iop = 0xC7; // MOV EA+10,0 1920 1911 NEWREG(cs.Irm, 0); 1921 1912 cs.IEV1.sp.Voffset += 10; 1922 1913 cs.IFL2 = FLconst; 1923 1914 cs.IEV2.Vint = 0; 1924 1915 cs.Iflags |= CFopsize; 1925 1916 c2 = gen(c2, &cs); 1926 1917 } 1927 1918 #endif 1928 #if LNGDBLSIZE == 16 1919 if (tysize[TYldouble] == 16) 1920 { 1929 1921 /* This deals with the fact that 10 byte reals really 1930 1922 * occupy 16 bytes by zeroing the extra 6 bytes. 1931 1923 */ 1932 1924 if (op1 == 0xDB) 1933 1925 { 1934 1926 cs.Iop = 0xC7; // MOV EA+10,0 1935 1927 NEWREG(cs.Irm, 0); 1936 1928 cs.IEV1.sp.Voffset += 10; 1937 1929 cs.IFL2 = FLconst; 1938 1930 cs.IEV2.Vint = 0; 1939 1931 cs.Iflags |= CFopsize; 1940 1932 c2 = gen(c2, &cs); 1941 1933 1942 1934 cs.IEV1.sp.Voffset += 2; 1943 1935 cs.Iflags &= ~CFopsize; 1944 1936 c2 = gen(c2, &cs); 1945 1937 } 1946 #endif 1938 } 1947 1939 #endif 1948 1940 c2 = genfwait(c2); 1949 1941 freenode(e->E1); 1950 1942 c1 = cat3(c1,c2,fixresult87(e,mST0 | mPSW,pretregs)); 1951 1943 return c1; 1952 1944 } 1953 1945 1954 1946 /******************************* 1955 1947 * Perform an assignment to a long double/double/float. 1956 1948 */ 1957 1949 1958 1950 code *complex_eq87(elem *e,regm_t *pretregs) 1959 1951 { 1960 1952 regm_t retregs; 1961 1953 code *c1,*c2; 1962 1954 code cs; 1963 1955 unsigned op1; 1964 1956 unsigned op2; 1965 1957 unsigned sz; 1966 1958 tym_t ty1; … … 2023 2015 if (fxch) 2024 2016 c2 = genf2(c2,0xD9,0xC8 + 1); // FXCH ST(1) 2025 2017 cs.IEVoffset1 -= sz; 2026 2018 gen(c2, &cs); 2027 2019 if (fxch) 2028 2020 genf2(c2,0xD9,0xC8 + 1); // FXCH ST(1) 2029 2021 #if LNGDBLSIZE == 12 2030 2022 if (op1 == 0xDB) 2031 2023 { 2032 2024 cs.Iop = 0xC7; // MOV EA+10,0 2033 2025 NEWREG(cs.Irm, 0); 2034 2026 cs.IEV1.sp.Voffset += 10; 2035 2027 cs.IFL2 = FLconst; 2036 2028 cs.IEV2.Vint = 0; 2037 2029 cs.Iflags |= CFopsize; 2038 2030 c2 = gen(c2, &cs); 2039 2031 cs.IEVoffset1 += 12; 2040 2032 c2 = gen(c2, &cs); // MOV EA+22,0 2041 2033 } 2042 2034 #endif 2043 #if LNGDBLSIZE == 16 2035 if (tysize[TYldouble] == 16) 2036 { 2044 2037 if (op1 == 0xDB) 2045 2038 { 2046 2039 cs.Iop = 0xC7; // MOV EA+10,0 2047 2040 NEWREG(cs.Irm, 0); 2048 2041 cs.IEV1.sp.Voffset += 10; 2049 2042 cs.IFL2 = FLconst; 2050 2043 cs.IEV2.Vint = 0; 2051 2044 cs.Iflags |= CFopsize; 2052 2045 c2 = gen(c2, &cs); 2053 2046 2054 2047 cs.IEV1.sp.Voffset += 2; 2055 2048 cs.Iflags &= ~CFopsize; 2056 2049 c2 = gen(c2, &cs); 2057 2050 2058 2051 cs.IEVoffset1 += 14; 2059 2052 cs.Iflags |= CFopsize; 2060 2053 c2 = gen(c2, &cs); 2061 2054 2062 2055 cs.IEV1.sp.Voffset += 2; 2063 2056 cs.Iflags &= ~CFopsize; 2064 2057 c2 = gen(c2, &cs); 2065 2058 } 2066 #endif 2059 } 2067 2060 c2 = genfwait(c2); 2068 2061 freenode(e->E1); 2069 2062 return cat3(c1,c2,fixresult_complex87(e,mST01 | mPSW,pretregs)); 2070 2063 } 2071 2064 2072 2065 /******************************* 2073 2066 * Perform an assignment while converting to integral type, 2074 2067 * i.e. handle (e1 = (int) e2) 2075 2068 */ 2076 2069 2077 2070 code *cnvteq87(elem *e,regm_t *pretregs) 2078 2071 { 2079 2072 regm_t retregs; 2080 2073 code *c1,*c2; 2081 2074 code cs; 2082 2075 unsigned op1; 2083 2076 unsigned op2; 2084 2077 tym_t ty1; 2085 2078 2086 2079 assert(e->Eoper == OPeq); … … 2281 2274 jp FM1 2282 2275 fxch ST(1) 2283 2276 fld E1.im 2284 2277 FM2: fprem 2285 2278 fstsw word ptr sw 2286 2279 fwait 2287 2280 mov AH, byte ptr sw+1 2288 2281 jp FM2 2289 2282 fstp ST(1) 2290 2283 */ 2291 2284 2292 2285 ty1 = tybasic(e->E1->Ety); 2293 2286 sz2 = tysize[ty1] / 2; 2294 2287 2295 2288 retregs = mST0; 2296 2289 cr = codelem(e->E2,&retregs,FALSE); // FLD E2 2297 2290 note87(e->E2,0,0); 2298 2291 cl = getlvalue(&cs,e->E1,0); 2299 2292 cl = cat(cl,makesure87(e->E2,0,0,0)); 2300 2293 cs.Iflags |= ADDFWAIT() ? CFwait : 0; 2301 if ( I32)2294 if (!I16) 2302 2295 cs.Iflags &= ~CFopsize; 2303 2296 2304 2297 c = push87(); 2305 2298 switch (ty1) 2306 2299 { 2307 2300 case TYcdouble: cs.Iop = ESC(MFdouble,1); break; 2308 2301 case TYcfloat: cs.Iop = ESC(MFfloat,1); break; 2309 2302 case TYcldouble: cs.Iop = 0xDB; cs.Irm |= modregrm(0, 5, 0); break; 2310 2303 default: 2311 2304 assert(0); 2312 2305 } 2313 2306 c = gen(c,&cs); // FLD E1.re 2314 2307 2315 2308 code *c1; 2316 2309 2317 2310 c1 = gen2(NULL, 0xD9, 0xF8); // FPREM 2318 2311 c1 = cg87_87topsw(c1); 2319 2312 c1 = genjmp(c1, JP, FLcode, (block *)c1); // JP FM1 2320 2313 c1 = genf2(c1, 0xD9, 0xC8 + 1); // FXCH ST(1) 2321 2314 c = cat(c,c1); … … 2393 2386 if (!tycomplex(e->E2->Ety) && 2394 2387 (e->Eoper == OPmulass || e->Eoper == OPdivass)) 2395 2388 { 2396 2389 retregs = mST0; 2397 2390 cr = codelem(e->E2, &retregs, FALSE); 2398 2391 note87(e->E2, 0, 0); 2399 2392 cl = getlvalue(&cs, e->E1, 0); 2400 2393 cl = cat(cl,makesure87(e->E2,0,0,0)); 2401 2394 cl = cat(cl,push87()); 2402 2395 cl = genf2(cl,0xD9,0xC0); // FLD ST(0) 2403 2396 goto L1; 2404 2397 } 2405 2398 else 2406 2399 { 2407 2400 cr = loadComplex(e->E2); 2408 2401 cl = getlvalue(&cs,e->E1,0); 2409 2402 cl = cat(cl,makesure87(e->E2,sz2,0,0)); 2410 2403 cl = cat(cl,makesure87(e->E2,0,1,0)); 2411 2404 } 2412 2405 cs.Iflags |= ADDFWAIT() ? CFwait : 0; 2413 if ( I32)2406 if (!I16) 2414 2407 cs.Iflags &= ~CFopsize; 2415 2408 2416 2409 switch (e->Eoper) 2417 2410 { 2418 2411 case OPpostinc: 2419 2412 case OPaddass: 2420 2413 case OPpostdec: 2421 2414 case OPminass: 2422 2415 L1: 2423 2416 if (ty1 == TYcldouble) 2424 2417 { 2425 2418 c = push87(); 2426 2419 c = cat(c, push87()); 2427 2420 cs.Iop = 0xDB; 2428 2421 cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2429 2422 c = gen(c,&cs); // FLD e->E1.re 2430 2423 cs.IEVoffset1 += sz2; 2431 2424 gen(c,&cs); // FLD e->E1.im 2432 2425 genf2(c, 0xDE, op2 + 2); // FADDP/FSUBRP ST(2),ST 2433 2426 genf2(c, 0xDE, op2 + 2); // FADDP/FSUBRP ST(2),ST … … 2581 2574 cs.IEVoffset1 -= sz2; 2582 2575 gen(c, &cs); // FST mreal.re 2583 2576 retregs = mST01; 2584 2577 } 2585 2578 else 2586 2579 { 2587 2580 cs.Irm |= modregrm(0, 3, 0); 2588 2581 gen(c, &cs); // FSTP mreal.im 2589 2582 cs.IEVoffset1 -= sz2; 2590 2583 gen(c, &cs); // FSTP mreal.re 2591 2584 pop87(); 2592 2585 pop87(); 2593 2586 retregs = 0; 2594 2587 } 2595 2588 goto L3; 2596 2589 } 2597 2590 2598 2591 case OPdivass: 2599 2592 c = push87(); 2600 2593 c = cat(c, push87()); 2601 idxregs = idxregm( cs.Irm,cs.Isib);// mask of index regs used2594 idxregs = idxregm(&cs); // mask of index regs used 2602 2595 if (ty1 == TYcldouble) 2603 2596 { 2604 2597 cs.Iop = 0xDB; 2605 2598 cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2606 2599 c = gen(c,&cs); // FLD e->E1.re 2607 2600 genf2(c,0xD9,0xC8 + 2); // FXCH ST(2) 2608 2601 cs.IEVoffset1 += sz2; 2609 2602 gen(c,&cs); // FLD e->E1.im 2610 2603 genf2(c,0xD9,0xC8 + 2); // FXCH ST(2) 2611 2604 retregs = mST01; 2612 2605 c = cat(c,callclib(e, CLIBcdiv, &retregs, idxregs)); 2613 2606 goto L2; 2614 2607 } 2615 2608 else 2616 2609 { 2617 2610 cs.Iop = (ty1 == TYcfloat) ? 0xD9 : 0xDD; 2618 2611 cs.Irm |= modregrm(0, 0, 0); // FLD tbyte ptr ... 2619 2612 c = gen(c,&cs); // FLD e->E1.re 2620 2613 genf2(c,0xD9,0xC8 + 2); // FXCH ST(2) 2621 2614 cs.IEVoffset1 += sz2; … … 2652 2645 2653 2646 /************************** 2654 2647 * OPnegass 2655 2648 */ 2656 2649 2657 2650 code *cdnegass87(elem *e,regm_t *pretregs) 2658 2651 { regm_t retregs; 2659 2652 tym_t tyml; 2660 2653 unsigned op; 2661 2654 targ_long val; 2662 2655 code *cl,*cr,*c,cs; 2663 2656 elem *e1; 2664 2657 int sz; 2665 2658 2666 2659 //printf("cdnegass87(e = %p, *pretregs = x%x)\n", e, *pretregs); 2667 2660 e1 = e->E1; 2668 2661 tyml = tybasic(e1->Ety); // type of lvalue 2669 2662 sz = tysize[tyml]; 2670 2663 2671 2664 cl = getlvalue(&cs,e1,0); 2672 cr = modEA( cs.Irm);2665 cr = modEA(&cs); 2673 2666 cs.Irm |= modregrm(0,6,0); 2674 2667 cs.Iop = 0x80; 2668 cs.Irex = 0; 2675 2669 #if LNGDBLSIZE > 10 2676 2670 if (tyml == TYldouble || tyml == TYildouble) 2677 2671 cs.IEVoffset1 += 10 - 1; 2678 2672 else if (tyml == TYcldouble) 2679 2673 cs.IEVoffset1 += LNGDBLSIZE + 10 - 1; 2680 2674 else 2681 2675 #endif 2682 2676 cs.IEVoffset1 += sz - 1; 2683 2677 cs.IFL2 = FLconst; 2684 2678 cs.IEV2.Vuns = 0x80; 2685 2679 c = gen(NULL,&cs); // XOR 7[EA],0x80 2686 2680 if (tycomplex(tyml)) 2687 2681 { 2688 2682 cs.IEVoffset1 -= sz / 2; 2689 2683 gen(c,&cs); // XOR 7[EA],0x80 2690 2684 } 2691 2685 c = cat3(cl,cr,c); 2692 2686 2693 2687 if (*pretregs) 2694 2688 { … … 2718 2712 } 2719 2713 2720 2714 /************************ 2721 2715 * Take care of OPpostinc and OPpostdec. 2722 2716 */ 2723 2717 2724 2718 code *post87(elem *e,regm_t *pretregs) 2725 2719 { 2726 2720 regm_t retregs; 2727 2721 code *cl,*cr,*c; 2728 2722 code cs; 2729 2723 unsigned op; 2730 2724 unsigned op1; 2731 2725 unsigned reg; 2732 2726 tym_t ty1; 2733 2727 2734 2728 //printf("post87()\n"); 2735 2729 assert(*pretregs); 2736 2730 cl = getlvalue(&cs,e->E1,0); 2737 2731 cs.Iflags |= ADDFWAIT() ? CFwait : 0; 2738 if ( I32)2732 if (!I16) 2739 2733 cs.Iflags &= ~CFopsize; 2740 2734 ty1 = tybasic(e->E1->Ety); 2741 2735 switch (ty1) 2742 2736 { case TYdouble_alias: 2743 2737 case TYidouble: 2744 2738 case TYdouble: 2745 2739 case TYcdouble: op1 = ESC(MFdouble,1); reg = 0; break; 2746 2740 case TYifloat: 2747 2741 case TYfloat: 2748 2742 case TYcfloat: op1 = ESC(MFfloat,1); reg = 0; break; 2749 2743 case TYildouble: 2750 2744 case TYldouble: 2751 2745 case TYcldouble: op1 = 0xDB; reg = 5; break; 2752 2746 default: 2753 2747 assert(0); 2754 2748 } 2755 2749 NEWREG(cs.Irm, reg); 2756 2750 if (reg == 5) 2757 2751 reg = 7; 2758 2752 else … … 2805 2799 * OPd_s32 2806 2800 * OPd_u16 2807 2801 * OPd_s64 2808 2802 */ 2809 2803 2810 2804 code *cnvt87(elem *e,regm_t *pretregs) 2811 2805 { 2812 2806 regm_t retregs; 2813 2807 code *c1,*c2; 2814 2808 unsigned mf,rf,reg; 2815 2809 tym_t tym; 2816 2810 int clib; 2817 2811 int sz; 2818 2812 int szoff; 2819 2813 2820 2814 //printf("cnvt87(e = %p, *pretregs = x%x)\n", e, *pretregs); 2821 2815 assert(*pretregs); 2822 2816 tym = e->Ety; 2823 2817 sz = tysize(tym); 2824 2818 szoff = sz; 2819 unsigned grex = I64 ? REX_W << 16 : 0; 2825 2820 2826 2821 switch (e->Eoper) 2827 2822 { case OPd_s16: 2828 2823 clib = CLIBdblint87; 2829 2824 mf = ESC(MFword,1); 2830 2825 rf = 3; 2831 2826 break; 2832 2827 2833 2828 case OPd_u16: 2834 2829 szoff = 4; 2835 2830 case OPd_s32: 2836 2831 clib = CLIBdbllng87; 2837 2832 mf = ESC(MFlong,1); 2838 2833 rf = 3; 2839 2834 break; 2840 2835 2841 2836 case OPd_s64: 2842 2837 clib = CLIBdblllng; 2843 2838 mf = 0xDF; 2844 2839 rf = 7; 2845 2840 break; 2846 2841 2847 2842 default: 2848 2843 assert(0); 2849 2844 } 2850 2845 2851 if ( !I32) // C may change the default control word2846 if (I16) // C may change the default control word 2852 2847 { 2853 2848 if (clib == CLIBdblllng) 2854 2849 { retregs = I32 ? DOUBLEREGS_32 : DOUBLEREGS_16; 2855 2850 c1 = codelem(e->E1,&retregs,FALSE); 2856 2851 c2 = callclib(e,clib,pretregs,0); 2857 2852 } 2858 2853 else 2859 2854 { retregs = mST0; //I32 ? DOUBLEREGS_32 : DOUBLEREGS_16; 2860 2855 c1 = codelem(e->E1,&retregs,FALSE); 2861 2856 c2 = callclib(e,clib,pretregs,0); 2862 2857 pop87(); 2863 2858 } 2864 2859 } 2865 2860 else if (1) 2866 2861 { // Generate: 2867 2862 // sub ESP,12 2868 2863 // fstcw 8[ESP] 2869 2864 // fldcw roundto0 2870 2865 // fistp long64 ptr [ESP] 2871 2866 // fldcw 8[ESP] 2872 2867 // pop lsw 2873 2868 // pop msw 2874 2869 // add ESP,4 2875 2870 2876 2871 unsigned szpush = szoff + 2; 2877 2872 if (config.flags3 & CFG3pic) 2878 2873 szpush += 2; 2879 2874 szpush = (szpush + REGSIZE - 1) & ~(REGSIZE - 1); 2880 2875 2881 2876 retregs = mST0; 2882 2877 c1 = codelem(e->E1,&retregs,FALSE); 2883 2878 2884 2879 if (szpush == REGSIZE) 2885 2880 c1 = gen1(c1,0x50 + AX); // PUSH EAX 2886 2881 else 2887 c1 = genc2(c1,0x81, modregrm(3,5,SP), szpush); // SUB ESP,122882 c1 = genc2(c1,0x81,grex | modregrm(3,5,SP), szpush); // SUB ESP,12 2888 2883 c1 = genfwait(c1); 2889 genc1(c1,0xD9, modregrm(2,7,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FSTCW szoff[ESP]2884 genc1(c1,0xD9,grex | modregrm(2,7,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FSTCW szoff[ESP] 2890 2885 2891 2886 c1 = genfwait(c1); 2892 2887 2893 2888 if (config.flags3 & CFG3pic) 2894 2889 { 2895 genc(c1,0xC7, modregrm(2,0,4) + 256*modregrm(0,4,SP),FLconst,szoff+2,FLconst,CW_roundto0); // MOV szoff+2[ESP], CW_roundto02890 genc(c1,0xC7,grex | modregrm(2,0,4) + 256*modregrm(0,4,SP),FLconst,szoff+2,FLconst,CW_roundto0); // MOV szoff+2[ESP], CW_roundto0 2896 2891 code_orflag(c1, CFopsize); 2897 genc1(c1,0xD9, modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff+2); // FLDCW szoff+2[ESP]2892 genc1(c1,0xD9,grex | modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff+2); // FLDCW szoff+2[ESP] 2898 2893 } 2899 2894 else 2900 2895 c1 = genrnd(c1, CW_roundto0); // FLDCW roundto0 2901 2896 2902 2897 pop87(); 2903 2898 2904 2899 c1 = genfwait(c1); 2905 gen2sib(c1,mf, modregrm(0,rf,4),modregrm(0,4,SP)); // FISTP [ESP]2900 gen2sib(c1,mf,grex | modregrm(0,rf,4),modregrm(0,4,SP)); // FISTP [ESP] 2906 2901 2907 2902 retregs = *pretregs & (ALLREGS | mBP); 2908 2903 if (!retregs) 2909 2904 retregs = ALLREGS; 2910 2905 c2 = allocreg(&retregs,®,tym); 2911 2906 2912 2907 c2 = genfwait(c2); // FWAIT 2913 c2 = genc1(c2,0xD9, modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FLDCW szoff[ESP]2908 c2 = genc1(c2,0xD9,grex | modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FLDCW szoff[ESP] 2914 2909 2915 2910 if (szoff > REGSIZE) 2916 2911 { szpush -= REGSIZE; 2917 c2 = gen 1(c2,0x58 +findreglsw(retregs)); // POP lsw2912 c2 = genpop(c2,findreglsw(retregs)); // POP lsw 2918 2913 } 2919 2914 szpush -= REGSIZE; 2920 c2 = gen 1(c2,0x58 +reg); // POP reg2915 c2 = genpop(c2,reg); // POP reg 2921 2916 2922 2917 if (szpush) 2923 genc2(c2,0x81, modregrm(3,0,SP), szpush); // ADD ESP,42918 genc2(c2,0x81,grex | modregrm(3,0,SP), szpush); // ADD ESP,4 2924 2919 c2 = cat(c2,fixresult(e,retregs,pretregs)); 2925 2920 } 2926 2921 else 2927 2922 { 2928 2923 // This is incorrect. For -inf and nan, the 8087 returns the largest 2929 2924 // negative int (0x80000....). For -inf, 0x7FFFF... should be returned, 2930 2925 // and for nan, 0 should be returned. 2931 2926 retregs = mST0; 2932 2927 c1 = codelem(e->E1,&retregs,FALSE); 2933 2928 2934 2929 c1 = genfwait(c1); 2935 2930 c1 = genrnd(c1, CW_roundto0); // FLDCW roundto0 2936 2931 2937 2932 pop87(); 2938 2933 c1 = genfltreg(c1,mf,rf,0); // FISTP floatreg 2939 2934 retregs = *pretregs & (ALLREGS | mBP); 2940 2935 if (!retregs) 2941 2936 retregs = ALLREGS; 2942 2937 c2 = allocreg(&retregs,®,tym); 2943 2938 branches/dmd-1.x/src/backend/cg.c
r558 r577 36 36 // of the public name index of a COMDAT) 37 37 38 38 /* Stack offsets */ 39 39 targ_size_t localsize, /* amt subtracted from SP for local vars */ 40 40 Toff, /* base for temporaries */ 41 41 Poff,Aoff; // comsubexps, params, regs, autos 42 42 43 43 /* The following are initialized for the 8088. cod3_set386() will 44 44 * change them if generating code for the 386, cod3_set64() for 64 bit code. 45 45 */ 46 46 int BPRM = 6; /* R/M value for [BP] or [EBP] */ 47 47 regm_t fregsaved = mBP | mSI | mDI; // mask of registers saved across 48 48 // function calls 49 49 // (add in mBX for I32) 50 50 regm_t FLOATREGS = FLOATREGS_16; 51 51 regm_t FLOATREGS2 = FLOATREGS2_16; 52 52 regm_t DOUBLEREGS = DOUBLEREGS_16; 53 53 54 54 symbol *localgot; // reference to GOT for this function 55 55 symbol *tls_get_addr_sym; // function __tls_get_addr 56 57 #if TARGET_OSX 58 int STACKALIGN = 16; 59 #else 60 int STACKALIGN = 0; 61 #endif branches/dmd-1.x/src/backend/cgcod.c
r558 r577 19 19 20 20 #if __sun&&__SVR4 21 21 #include <alloca.h> 22 22 #endif 23 23 24 24 #include "cc.h" 25 25 #include "el.h" 26 26 #include "oper.h" 27 27 #include "code.h" 28 28 #include "global.h" 29 29 #include "type.h" 30 30 #include "exh.h" 31 31 32 32 static char __file__[] = __FILE__; /* for tassert.h */ 33 33 #include "tassert.h" 34 34 35 35 STATIC void resetEcomsub(elem *e); 36 36 STATIC code * loadcse(elem *,unsigned,regm_t); 37 37 STATIC void blcodgen(block *); 38 38 STATIC void cgcod_eh(); 39 STATIC int numbitsset(regm_t);40 39 STATIC code * cse_save(regm_t ms); 41 40 STATIC int cse_simple(elem *e,int i); 42 41 STATIC code * comsub(elem *,regm_t *); 43 42 44 43 bool floatreg; // !=0 if floating register is required 45 44 46 45 targ_size_t Aoffset; // offset of automatics and registers 47 46 targ_size_t Toffset; // offset of temporaries 48 47 targ_size_t EEoffset; // offset of SCstack variables from ESP 49 48 int Aalign; // alignment for Aoffset 50 49 51 50 CGstate cgstate; // state of code generator 52 51 53 52 /************************************ 54 53 * # of bytes that SP is beyond BP. 55 54 */ 56 55 57 56 unsigned stackpush; 58 57 59 58 int stackchanged; /* set to !=0 if any use of the stack … … 1561 1560 case TYcdouble: 1562 1561 if (I64) 1563 1562 return mXMM0 | mXMM1; 1564 1563 case TYcldouble: 1565 1564 return mST01; 1566 1565 1567 1566 default: 1568 1567 #if DEBUG 1569 1568 WRTYxx(tym); 1570 1569 #endif 1571 1570 assert(0); 1572 1571 return 0; 1573 1572 } 1574 1573 } 1575 1574 1576 1575 1577 1576 /****************************** 1578 1577 * Count the number of bits set in a register mask. 1579 1578 */ 1580 1579 1581 STATICint numbitsset(regm_t regm)1580 int numbitsset(regm_t regm) 1582 1581 { int n; 1583 1582 1584 1583 n = 0; 1585 1584 if (regm) 1586 1585 do 1587 1586 n++; 1588 1587 while ((regm &= regm - 1) != 0); 1589 1588 return n; 1590 1589 } 1591 1590 1592 1591 /****************************** 1593 1592 * Given a register mask, find and return the number 1594 1593 * of the first register that fits. 1595 1594 */ 1596 1595 1597 1596 #undef findreg 1598 1597 1599 1598 unsigned findreg(regm_t regm 1600 1599 #ifdef DEBUG 1601 1600 ,int line,const char *file branches/dmd-1.x/src/backend/cgen.c
r569 r577 147 147 unsigned reg; 148 148 149 149 #ifdef DEBUG /* this is a high usage routine */ 150 150 assert(cs); 151 151 #endif 152 152 assert(I64 || cs->Irex == 0); 153 153 ce = code_calloc(); 154 154 *ce = *cs; 155 155 if (config.flags4 & CFG4optimized && 156 156 ce->IFL2 == FLconst && 157 157 (ce->Iop == 0x81 || ce->Iop == 0x80) && 158 158 reghasvalue((ce->Iop == 0x80) ? BYTEREGS : ALLREGS,ce->IEV2.Vlong,®) && 159 159 !(ce->Iflags & CFopsize && I16) 160 160 ) 161 161 { // See if we can replace immediate instruction with register instruction 162 162 static unsigned char regop[8] = 163 163 { 0x00,0x08,0x10,0x18,0x20,0x28,0x30,0x38 }; 164 164 165 165 //printf("replacing 0x%02x, val = x%lx\n",ce->Iop,ce->IEV2.Vlong); 166 166 ce->Iop = regop[(ce->Irm & modregrm(0,7,0)) >> 3] | (ce->Iop & 1); 167 c e->Irm = (ce->Irm & modregrm(3,0,7)) | modregrm(0,reg,0);167 code_newreg(ce, reg); 168 168 } 169 169 code_next(ce) = CNIL; 170 170 if (c) 171 171 { cstart = c; 172 172 while (code_next(c)) c = code_next(c); /* find end of list */ 173 173 code_next(c) = ce; /* link into list */ 174 174 return cstart; 175 175 } 176 176 return ce; 177 177 } 178 178 179 179 code *gen1(code *c,unsigned op) 180 180 { code *ce,*cstart; 181 181 182 182 ce = code_calloc(); 183 183 ce->Iop = op; 184 184 if (c) 185 185 { cstart = c; 186 186 while (code_next(c)) c = code_next(c); /* find end of list */ 187 187 code_next(c) = ce; /* link into list */ 188 188 return cstart; 189 189 } 190 190 return ce; 191 191 } 192 192 193 193 code *gen2(code *c,unsigned op,unsigned rm) 194 194 { code *ce,*cstart; 195 195 196 196 cstart = ce = code_calloc(); 197 197 /*cxcalloc++;*/ 198 198 if (op > 0xFF) 199 199 { ce->Iop = op >> 8; 200 200 ce->Iop2 = op & 0xFF; 201 201 } 202 202 else 203 203 ce->Iop = op; 204 ce->Irm = rm; 205 ce->Irex = rm >> 16; 204 ce->Iea = rm; 206 205 if (c) 207 206 { cstart = c; 208 207 while (code_next(c)) c = code_next(c); /* find end of list */ 209 208 code_next(c) = ce; /* link into list */ 210 209 } 211 210 return cstart; 212 211 } 213 212 214 213 code *gen2sib(code *c,unsigned op,unsigned rm,unsigned sib) 215 214 { code *ce,*cstart; 216 215 217 216 cstart = ce = code_calloc(); 218 217 /*cxcalloc++;*/ 219 218 ce->Iop = op; 220 219 ce->Irm = rm; 221 220 ce->Isib = sib; 222 ce->Irex = rm >> 16; 221 ce->Irex = (rm | (sib & (REX_B << 16))) >> 16; 222 if (sib & (REX_R << 16)) 223 ce->Irex |= REX_X; 223 224 if (c) 224 225 { cstart = c; 225 226 while (code_next(c)) c = code_next(c); /* find end of list */ 226 227 code_next(c) = ce; /* link into list */ 227 228 } 228 229 return cstart; 229 230 } 230 231 231 232 code *genregs(code *c,unsigned op,unsigned dstreg,unsigned srcreg) 232 233 { return gen2(c,op,modregxrmx(3,dstreg,srcreg)); } 233 234 234 235 code *gentstreg(code *c,unsigned t) 235 236 { 236 237 c = gen2(c,0x85,modregxrmx(3,t,t)); // TEST t,t 237 238 code_orflag(c,CFpsw); 239 return c; 240 } 241 242 code *genpush(code *c, unsigned reg) 243 { 244 c = gen1(c, 0x50 + (reg & 7)); 245 if (reg & 8) 246 code_orrex(c, REX_B); 247 return c; 248 } 249 250 code *genpop(code *c, unsigned reg) 251 { 252 c = gen1(c, 0x58 + (reg & 7)); 253 if (reg & 8) 254 code_orrex(c, REX_B); 238 255 return c; 239 256 } 240 257 241 258 /******************************** 242 259 * Generate an ASM sequence. 243 260 */ 244 261 245 262 code *genasm(code *c,char *s,unsigned slen) 246 263 { code *ce; 247 264 248 265 ce = code_calloc(); 249 266 ce->Iop = ASM; 250 267 ce->IFL1 = FLasm; 251 268 ce->IEV1.as.len = slen; 252 269 ce->IEV1.as.bytes = (char *) mem_malloc(slen); 253 270 memcpy(ce->IEV1.as.bytes,s,slen); 254 271 return cat(c,ce); 255 272 } 256 273 257 274 /************************** … … 310 327 gen(cj,&cs); 311 328 break; 312 329 case JNP << 8: 313 330 /* Do a JP around the jump instruction */ 314 331 cnop = gennop(CNIL); 315 332 c = genjmp(c,JP,FLcode,(block *) cnop); 316 333 cat(cj,cnop); 317 334 break; 318 335 case 1 << 8: /* toggled no jump */ 319 336 case 0 << 8: 320 337 break; 321 338 default: 322 339 #ifdef DEBUG 323 340 printf("jop = x%x\n",op); 324 341 #endif 325 342 assert(0); 326 343 } 327 344 return cat(c,cj); 328 345 } 329 346 330 code *gencs(code *c,unsigned op,unsigned rm,unsigned FL2,symbol *s)347 code *gencs(code *c,unsigned op,unsigned ea,unsigned FL2,symbol *s) 331 348 { code cs; 332 349 333 350 cs.Iop = op; 334 cs.I rm = rm;351 cs.Iea = ea; 335 352 cs.Iflags = 0; 336 cs.Irex = rm >> 16;337 353 cs.IFL2 = FL2; 338 354 cs.IEVsym2 = s; 339 355 cs.IEVoffset2 = 0; 340 356 341 357 return gen(c,&cs); 342 358 } 343 359 344 code *genc2(code *c,unsigned op,unsigned rm,targ_size_t EV2)360 code *genc2(code *c,unsigned op,unsigned ea,targ_size_t EV2) 345 361 { code cs; 346 362 347 363 if (op > 0xFF) 348 364 { cs.Iop = op >> 8; 349 365 cs.Iop2 = op & 0xFF; 350 366 } 351 367 else 352 368 cs.Iop = op; 353 cs.I rm = rm;369 cs.Iea = ea; 354 370 cs.Iflags = CFoff; 355 cs.Irex = rm >> 16;356 371 cs.IFL2 = FLconst; 357 372 cs.IEV2.Vsize_t = EV2; 358 373 return gen(c,&cs); 359 374 } 360 375 361 376 /***************** 362 377 * Generate code. 363 378 */ 364 379 365 code *genc1(code *c,unsigned op,unsigned rm,unsigned FL1,targ_size_t EV1)380 code *genc1(code *c,unsigned op,unsigned ea,unsigned FL1,targ_size_t EV1) 366 381 { code cs; 367 382 368 383 assert(FL1 < FLMAX); 369 384 assert(op < 256); 370 385 cs.Iop = op; 371 cs.Irm = rm; 372 cs.Isib = rm >> 8; 373 cs.Irex = rm >> 16; 386 cs.Iea = ea; 374 387 cs.Iflags = CFoff; 375 388 cs.IFL1 = FL1; 376 389 cs.IEV1.Vsize_t = EV1; 377 390 return gen(c,&cs); 378 391 } 379 392 380 393 /***************** 381 394 * Generate code. 382 395 */ 383 396 384 code *genc(code *c,unsigned op,unsigned rm,unsigned FL1,targ_size_t EV1,unsigned FL2,targ_size_t EV2)397 code *genc(code *c,unsigned op,unsigned ea,unsigned FL1,targ_size_t EV1,unsigned FL2,targ_size_t EV2) 385 398 { code cs; 386 399 387 400 assert(FL1 < FLMAX); 388 401 assert(op < 256); 389 402 cs.Iop = op; 390 cs.Irm = rm; 391 cs.Isib = rm >> 8; 392 cs.Irex = rm >> 16; 403 cs.Iea = ea; 393 404 cs.Iflags = CFoff; 394 405 cs.IFL1 = FL1; 395 406 cs.IEV1.Vsize_t = EV1; 396 407 assert(FL2 < FLMAX); 397 408 cs.IFL2 = FL2; 398 409 cs.IEV2.Vsize_t = EV2; 399 410 return gen(c,&cs); 400 411 } 401 412 402 413 /*************************************** 403 414 * Generate immediate multiply instruction for r1=r2*imm. 404 415 * Optimize it into LEA's if we can. 405 416 */ 406 417 407 418 code *genmulimm(code *c,unsigned r1,unsigned r2,targ_int imm) 408 419 { code cs; 409 420 410 421 // These optimizations should probably be put into pinholeopt() 411 422 switch (imm) 412 423 { case 1: branches/dmd-1.x/src/backend/cod1.c
r552 r577 26 26 #include "tassert.h" 27 27 28 28 targ_size_t paramsize(elem *e,unsigned stackalign); 29 29 STATIC code * funccall (elem *,unsigned,unsigned,regm_t *,regm_t); 30 30 31 31 /* array to convert from index register to r/m field */ 32 32 /* AX CX DX BX SP BP SI DI */ 33 33 static const signed char regtorm32[8] = { 0, 1, 2, 3,-1, 5, 6, 7 }; 34 34 signed char regtorm [8] = { -1,-1,-1, 7,-1, 6, 4, 5 }; 35 35 36 36 /************************** 37 37 * Determine if e is a 32 bit scaled index addressing mode. 38 38 * Returns: 39 39 * 0 not a scaled index addressing mode 40 40 * !=0 the value for ss in the SIB byte 41 41 */ 42 42 43 43 int isscaledindex(elem *e) 44 44 { targ_uns ss; 45 45 46 assert( I32);46 assert(!I16); 47 47 while (e->Eoper == OPcomma) 48 48 e = e->E2; 49 49 if (!(e->Eoper == OPshl && !e->Ecount && 50 50 e->E2->Eoper == OPconst && 51 51 (ss = e->E2->EV.Vuns) <= 3 52 52 ) 53 53 ) 54 54 ss = 0; 55 55 return ss; 56 56 } 57 57 58 58 /********************************************* 59 59 * Generate code for which isscaledindex(e) returned a non-zero result. 60 60 */ 61 61 62 62 code *cdisscaledindex(elem *e,regm_t *pidxregs,regm_t keepmsk) 63 63 { code *c; 64 64 regm_t r; 65 65 66 66 // Load index register with result of e->E1 … … 132 132 { 133 133 if (ssindex_array[i].product == product) 134 134 return i; 135 135 } 136 136 return 0; 137 137 } 138 138 139 139 /*************************************** 140 140 * Build an EA of the form disp[base][index*scale]. 141 141 * Input: 142 142 * c struct to fill in 143 143 * base base register (-1 if none) 144 144 * index index register (-1 if none) 145 145 * scale scale factor - 1,2,4,8 146 146 * disp displacement 147 147 */ 148 148 149 149 void buildEA(code *c,int base,int index,int scale,targ_size_t disp) 150 150 { unsigned char rm; 151 151 unsigned char sib; 152 unsigned char rex = 0; 152 153 153 154 sib = 0; 154 if ( I32)155 if (!I16) 155 156 { unsigned ss; 156 157 157 158 assert(index != SP); 158 159 159 160 switch (scale) 160 161 { case 1: ss = 0; break; 161 162 case 2: ss = 1; break; 162 163 case 4: ss = 2; break; 163 164 case 8: ss = 3; break; 164 165 default: assert(0); 165 166 } 166 167 167 168 if (base == -1) 168 169 { 169 170 if (index == -1) 170 171 rm = modregrm(0,0,5); 171 172 else 172 173 { 173 174 rm = modregrm(0,0,4); 174 sib = modregrm(ss,index,5); 175 sib = modregrm(ss,index & 7,5); 176 if (index & 8) 177 rex |= REX_X; 175 178 } 176 179 } 177 180 else if (index == -1) 178 181 { 179 182 if (base == SP) 180 183 { 181 184 rm = modregrm(2,0,4); 182 185 sib = modregrm(0,4,SP); 183 186 } 184 187 else 185 rm = modregrm(2,0,base); 188 { rm = modregrm(2,0,base & 7); 189 if (base & 8) 190 rex |= REX_B; 191 } 186 192 } 187 193 else 188 194 { 189 195 rm = modregrm(2,0,4); 190 sib = modregrm(ss,index,base); 196 sib = modregrm(ss,index & 7,base & 7); 197 if (index & 8) 198 rex |= REX_X; 199 if (base & 8) 200 rex |= REX_B; 191 201 } 192 202 } 193 203 else 194 204 { 195 205 // -1 AX CX DX BX SP BP SI DI 196 206 static unsigned char EA16rm[9][9] = 197 207 { 198 208 { 0x06,0x09,0x09,0x09,0x87,0x09,0x86,0x84,0x85, }, // -1 199 209 { 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, }, // AX 200 210 { 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, }, // CX 201 211 { 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, }, // DX 202 212 { 0x87,0x09,0x09,0x09,0x09,0x09,0x09,0x80,0x81, }, // BX 203 213 { 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, }, // SP 204 214 { 0x86,0x09,0x09,0x09,0x09,0x09,0x09,0x82,0x83, }, // BP 205 215 { 0x84,0x09,0x09,0x09,0x80,0x09,0x82,0x09,0x09, }, // SI 206 216 { 0x85,0x09,0x09,0x09,0x81,0x09,0x83,0x09,0x09, } // DI 207 217 }; 208 218 209 219 assert(scale == 1); 210 220 rm = EA16rm[base + 1][index + 1]; 211 221 assert(rm != 9); 212 222 } 213 223 c->Irm = rm; 214 224 c->Isib = sib; 225 c->Irex = rex; 215 226 c->IFL1 = FLconst; 216 227 c->IEV1.Vuns = disp; 217 228 } 218 229 219 230 /************************** 220 231 * For elems in regcon that don't match regconsave, 221 232 * clear the corresponding bit in regcon.cse.mval. 222 233 * Do same for regcon.immed. 223 234 */ 224 235 225 236 void andregcon(con_t *pregconsave) 226 { int i; 227 regm_t m; 228 229 m = ~1; 230 for (i = 0; i < REGMAX; i++) 237 { 238 regm_t m = ~1; 239 for (int i = 0; i < REGMAX; i++) 231 240 { if (pregconsave->cse.value[i] != regcon.cse.value[i]) 232 241 regcon.cse.mval &= m; 233 242 if (pregconsave->immed.value[i] != regcon.immed.value[i]) 234 243 regcon.immed.mval &= m; 235 244 m <<= 1; 236 245 m |= 1; 237 246 } 238 247 //printf("regcon.cse.mval = x%x, regconsave->mval = x%x ",regcon.cse.mval,pregconsave->cse.mval); 239 248 regcon.used |= pregconsave->used; 240 249 regcon.cse.mval &= pregconsave->cse.mval; 241 250 regcon.immed.mval &= pregconsave->immed.mval; 242 251 regcon.params &= pregconsave->params; 243 252 //printf("regcon.cse.mval®con.cse.mops = x%x, regcon.cse.mops = x%x\n",regcon.cse.mval & regcon.cse.mops,regcon.cse.mops); 244 253 regcon.cse.mops &= regcon.cse.mval; 245 254 } 246 255 247 256 /********************************* 248 257 * Scan down comma-expressions. 249 258 * Output: 250 259 * *pe = first elem down right side that is not an OPcomma … … 295 304 if (e) 296 305 { 297 306 unsigned stackpushsave; 298 307 int stackcleansave; 299 308 300 309 stackpushsave = stackpush; 301 310 stackcleansave = cgstate.stackclean; 302 311 cgstate.stackclean = 0; // defer cleaning of stack 303 312 c = cat(c,codelem(e,pretregs,constflag)); 304 313 assert(cgstate.stackclean == 0); 305 314 cgstate.stackclean = stackcleansave; 306 315 c = genstackclean(c,stackpush - stackpushsave,*pretregs); // do defered cleaning 307 316 } 308 317 return c; 309 318 } 310 319 311 320 /******************************************** 312 321 * Gen a save/restore sequence for mask of registers. 313 322 */ 314 323 315 void gensaverestore(regm_t regm,code **csave,code **crestore) 316 { code *cs1; 317 code *cs2; 318 int i; 319 320 cs1 = NULL; 321 cs2 = NULL; 324 void gensaverestore2(regm_t regm,code **csave,code **crestore) 325 { 326 code *cs1 = *csave; 327 code *cs2 = *crestore; 328 322 329 regm &= mBP | mES | ALLREGS; 323 for (i = 0; regm; i++)330 for (int i = 0; regm; i++) 324 331 { 325 332 if (regm & 1) 326 333 { 327 assert(i != ES); // fix later 328 cs1 = gen1(cs1,0x50 + i); 329 cs2 = cat(gen1(NULL,0x58 + i),cs2); 334 if (i == ES) 335 { 336 cs1 = gen1(cs1, 0x06); // PUSH ES 337 cs2 = cat(gen1(CNIL, 0x07),cs2); // POP ES 338 } 339 else 340 { 341 cs1 = gen1(cs1,0x50 + (i & 7)); // PUSH i 342 code *c = gen1(NULL, 0x58 + (i & 7)); // POP i 343 if (i & 8) 344 { code_orrex(cs1, REX_B); 345 code_orrex(c, REX_B); 346 } 347 cs2 = cat(c,cs2); 348 } 330 349 } 331 350 regm >>= 1; 332 351 } 333 352 *csave = cs1; 334 353 *crestore = cs2; 354 } 355 356 void gensaverestore(regm_t regm,code **csave,code **crestore) 357 { 358 *csave = NULL; 359 *crestore = NULL; 360 gensaverestore2(regm, csave, crestore); 335 361 } 336 362 337 363 /**************************************** 338 364 * Clean parameters off stack. 339 365 * Input: 340 366 * numpara amount to adjust stack pointer 341 367 * keepmsk mask of registers to not destroy 342 368 */ 343 369 344 370 code *genstackclean(code *c,unsigned numpara,regm_t keepmsk) 345 371 { 346 372 //dbg_printf("genstackclean(numpara = %d, stackclean = %d)\n",numpara,cgstate.stackclean); 347 373 if (numpara && (cgstate.stackclean || STACKALIGN == 16)) 348 374 { 349 375 #if 0 // won't work if operand of scodelem 350 376 if (numpara == stackpush && // if this is all those pushed 351 377 needframe && // and there will be a BP 352 378 !config.windows && 353 379 !(regcon.mvar & fregsaved) // and no registers will be pushed 354 380 ) 355 381 c = genregs(c,0x89,BP,SP); // MOV SP,BP 356 382 else 357 383 #endif 358 384 { regm_t scratchm = 0; 359 unsigned r;360 385 361 386 if (numpara == REGSIZE && config.flags4 & CFG4space) 362 387 { 363 388 scratchm = ALLREGS & ~keepmsk & regcon.used & ~regcon.mvar; 364 389 } 365 390 366 391 if (scratchm) 367 { c = cat(c,allocreg(&scratchm,&r,TYint)); 392 { unsigned r; 393 c = cat(c,allocreg(&scratchm,&r,TYint)); 368 394 c = gen1(c,0x58 + r); // POP r 369 395 } 370 396 else 371 c = genc2(c,0x81,modregrm(3,0,SP),numpara); // ADD SP,numpara 397 { c = genc2(c,0x81,modregrm(3,0,SP),numpara); // ADD SP,numpara 398 if (I64) 399 code_orrex(c, REX_W); 400 } 372 401 } 373 402 stackpush -= numpara; 374 403 c = genadjesp(c,-numpara); 375 404 } 376 405 return c; 377 406 } 378 407 379 408 380 409 /********************************* 381 410 * Generate code for a logical expression. 382 411 * Input: 383 412 * e elem 384 413 * jcond 385 414 * bit 1 if TRUE then goto jump address if e 386 415 * if FALSE then goto jump address if !e 387 416 * 2 don't call save87() 388 417 * fltarg FLcode or FLblock, flavor of target if e evaluates to jcond 389 418 * targ either code or block pointer to destination 390 419 */ 391 420 392 421 code *logexp(elem *e,int jcond,unsigned fltarg,code *targ) 393 { code *c c,*c,*ce,*cnop;422 { code *c,*ce,*cnop; 394 423 regm_t retregs; 395 424 unsigned op; 396 int no87;397 425 398 426 //printf("logexp(e = %p, jcond = %d)\n", e, jcond); 399 no87 = (jcond & 2) == 0;427 int no87 = (jcond & 2) == 0; 400 428 _chkstack(); 401 c c = docommas(&e); /* scan down commas */429 code *cc = docommas(&e); // scan down commas 402 430 cgstate.stackclean++; 403 431 404 432 if (EOP(e) && !e->Ecount) /* if operator and not common sub */ 405 433 { con_t regconsave; 406 434 407 435 switch (e->Eoper) 408 436 { case OPoror: 409 437 if (jcond & 1) 410 438 { c = logexp(e->E1,jcond,fltarg,targ); 411 439 regconsave = regcon; 412 440 ce = logexp(e->E2,jcond,fltarg,targ); 413 441 } 414 442 else 415 443 { cnop = gennop(CNIL); 416 444 c = logexp(e->E1,jcond | 1,FLcode,cnop); 417 445 regconsave = regcon; 418 446 ce = logexp(e->E2,jcond,fltarg,targ); 419 447 ce = cat(ce,cnop); 420 448 } 421 449 cnop = CNIL; … … 438 466 freenode(e); 439 467 c = cat4(cc,c,ce,cnop); 440 468 goto Lret; 441 469 442 470 case OPnot: 443 471 jcond ^= 1; 444 472 case OPbool: 445 473 case OPs8int: 446 474 case OPu8int: 447 475 case OPs16_32: 448 476 case OPu16_32: 449 477 case OPs32_64: 450 478 case OPu32_64: 451 479 case OPu32_d: 452 480 case OPd_ld: 453 481 c = logexp(e->E1,jcond,fltarg,targ); 454 482 freenode(e); 455 483 goto Lretc; 456 484 457 485 case OPcond: 458 { code *cnop2; 459 con_t regconold; 460 461 cnop2 = gennop(CNIL); /* addresses of start of leaves */ 486 { 487 code *cnop2 = gennop(CNIL); // addresses of start of leaves 462 488 cnop = gennop(CNIL); 463 489 c = logexp(e->E1,FALSE,FLcode,cnop2); /* eval condition */ 464 regconold = regcon;490 con_t regconold = regcon; 465 491 ce = logexp(e->E2->E1,jcond,fltarg,targ); 466 492 ce = genjmp(ce,JMP,FLcode,(block *) cnop); /* skip second leaf */ 467 493 468 494 regconsave = regcon; 469 495 regcon = regconold; 470 496 471 497 code_next(cnop2) = logexp(e->E2->E2,jcond,fltarg,targ); 472 498 andregcon(®conold); 473 499 andregcon(®consave); 474 500 freenode(e->E2); 475 501 freenode(e); 476 502 c = cat6(cc,c,NULL,ce,cnop2,cnop); 477 503 goto Lret; 478 504 } 479 505 } 480 506 } 481 507 482 /* Special code for signed long compare */ 508 /* Special code for signed long compare. 509 * Not necessary for I64 until we do cents. 510 */ 483 511 if (OTrel2(e->Eoper) && /* if < <= >= > */ 484 512 !e->Ecount && 485 ( ( !I32&& tybasic(e->E1->Ety) == TYlong && tybasic(e->E2->Ety) == TYlong) ||486 ( I32 && tybasic(e->E1->Ety) == TYllong && tybasic(e->E2->Ety) == TYllong))513 ( (I16 && tybasic(e->E1->Ety) == TYlong && tybasic(e->E2->Ety) == TYlong) || 514 (I32 && tybasic(e->E1->Ety) == TYllong && tybasic(e->E2->Ety) == TYllong)) 487 515 ) 488 516 { 489 517 c = longcmp(e,jcond,fltarg,targ); 490 518 goto Lretc; 491 519 } 492 520 493 521 retregs = mPSW; /* return result in flags */ 494 522 op = jmpopcode(e); /* get jump opcode */ 495 523 if (!(jcond & 1)) 496 524 op ^= 0x101; // toggle jump condition(s) 497 525 c = codelem(e,&retregs,TRUE); /* evaluate elem */ 498 526 if (no87) 499 527 c = cat(c,cse_flush(no87)); // flush CSE's to memory 500 528 genjmp(c,op,fltarg,(block *) targ); /* generate jmp instruction */ 501 529 Lretc: 502 530 c = cat(cc,c); 503 531 Lret: 504 532 cgstate.stackclean--; 505 533 return c; 506 534 } 507 535 508 536 509 537 /****************************** 510 538 * Routine to aid in setting things up for gen(). 511 539 * Look for common subexpression. 512 540 * Can handle indirection operators, but not if they're common subs. 513 541 * Input: 514 542 * e -> elem where we get some of the data from 515 543 * cs -> partially filled code to add 516 544 * op = opcode 517 545 * reg = reg field of (mod reg r/m) 518 546 * offset = data to be added to Voffset field 519 547 * keepmsk = mask of registers we must not destroy 520 548 * desmsk = mask of registers destroyed by executing the instruction 521 549 * Returns: 522 550 * pointer to code generated 523 551 */ 524 552 525 code *loadea(elem *e,code __ss*cs,unsigned op,unsigned reg,targ_size_t offset,553 code *loadea(elem *e,code *cs,unsigned op,unsigned reg,targ_size_t offset, 526 554 regm_t keepmsk,regm_t desmsk) 527 { unsigned i; 528 regm_t rm; 529 tym_t tym; 555 { 530 556 code *c,*cg,*cd; 531 557 532 558 #ifdef DEBUG 533 559 if (debugw) 534 560 printf("loadea: e=%p cs=%p op=x%x reg=%d offset=%ld keepmsk=x%x desmsk=x%x\n", 535 561 e,cs,op,reg,offset,keepmsk,desmsk); 536 562 #endif 537 563 538 564 assert(e); 539 565 cs->Iflags = 0; 540 566 cs->Irex = 0; 541 567 cs->Iop = op; 542 if ( I32 && op >= 0x100) /* if 2 byte opcode */568 if (!I16 && op >= 0x100) // if 2 byte opcode 543 569 { cs->Iop = op >> 8; 544 570 cs->Iop2 = op; 545 571 } 546 tym = e->Ety; 572 tym_t tym = e->Ety; 573 int sz = tysize(tym); 547 574 548 575 /* Determine if location we want to get is in a register. If so, */ 549 576 /* substitute the register for the EA. */ 550 577 /* Note that operators don't go through this. CSE'd operators are */ 551 578 /* picked up by comsub(). */ 552 579 if (e->Ecount && /* if cse */ 553 580 e->Ecount != e->Ecomsub && /* and cse was generated */ 554 581 op != 0x8D && op != 0xC4 && /* and not an LEA or LES */ 555 582 (op != 0xFF || reg != 3) && /* and not CALLF MEM16 */ 556 583 (op & 0xFFF8) != 0xD8) // and not 8087 opcode 557 { int sz; 558 584 { 559 585 assert(!EOP(e)); /* can't handle this */ 560 rm = regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; /* possible regs */ 561 sz = tysize(tym); 562 if (sz > REGSIZE) 586 regm_t rm = regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; // possible regs 587 if (sz > REGSIZE) // value is in 2 or 4 registers 563 588 { 564 if ( !I32 && sz == 8)589 if (I16 && sz == 8) // value is in 4 registers 565 590 { static regm_t rmask[4] = { mDX,mCX,mBX,mAX }; 566 591 rm &= rmask[offset >> 1]; 567 592 } 568 593 569 594 else if (offset) 570 595 rm &= mMSW; /* only high words */ 571 596 else 572 597 rm &= mLSW; /* only low words */ 573 598 } 574 for ( i = 0; rm; i++)599 for (unsigned i = 0; rm; i++) 575 600 { if (mask[i] & rm) 576 601 { if (regcon.cse.value[i] == e && // if register has elem 577 602 /* watch out for a CWD destroying DX */ 578 603 !(i == DX && op == 0xF7 && desmsk & mDX)) 579 604 { 580 605 /* if ES, then it can only be a load */ 581 606 if (i == ES) 582 607 { if (op != 0x8B) 583 608 goto L1; /* not a load */ 584 609 cs->Iop = 0x8C; /* MOV reg,ES */ 585 cs->Irm = modregrm(3,0,reg); 610 cs->Irm = modregrm(3,0,reg & 7); 611 if (reg & 8) 612 code_orrex(cs, REX_B); 586 613 } 587 else /* XXX reg,i */ 588 cs->Irm = modregrm(3,reg,i); 614 else // XXX reg,i 615 { 616 cs->Irm = modregrm(3,reg & 7,i & 7); 617 if (reg & 8) 618 code_orrex(cs, REX_R); 619 if (i & 8) 620 code_orrex(cs, REX_B); 621 } 589 622 c = CNIL; 590 623 goto L2; 591 624 } 592 625 rm &= ~mask[i]; 593 626 } 594 627 } 595 628 } 596 629 597 630 L1: 598 631 c = getlvalue(cs,e,keepmsk); 599 632 if (offset == REGSIZE) 600 633 getlvalue_msw(cs); 601 634 else 602 635 cs->IEVoffset1 += offset; 603 c s->Irm |= modregrm(0,reg,0); /* OR in reg field */604 if ( I32)636 code_newreg(cs, reg); // OR in reg field 637 if (!I16) 605 638 { 606 639 if (reg == 6 && op == 0xFF || /* don't PUSH a word */ 607 640 op == 0x0FB7 || op == 0x0FBF || /* MOVZX/MOVSX */ 608 641 (op & 0xFFF8) == 0xD8 || /* 8087 instructions */ 609 642 op == 0x8D) /* LEA */ 610 643 cs->Iflags &= ~CFopsize; 611 644 } 612 645 else if ((op & 0xFFF8) == 0xD8 && ADDFWAIT()) 613 646 cs->Iflags |= CFwait; 614 647 L2: 615 648 cg = getregs(desmsk); /* save any regs we destroy */ 616 649 617 650 /* KLUDGE! fix up DX for divide instructions */ 618 651 cd = CNIL; 619 652 if (op == 0xF7 && desmsk == (mAX|mDX)) /* if we need to fix DX */ 620 653 { if (reg == 7) /* if IDIV */ 621 cd = gen1(cd,0x99); /* CWD */ 622 else if (reg == 6) /* if DIV */ 623 cd = genregs(cd,0x33,DX,DX); /* CLR DX */ 654 { cd = gen1(cd,0x99); // CWD 655 if (I64 && sz == 8) 656 code_orrex(cd, REX_W); 657 } 658 else if (reg == 6) // if DIV 659 { cd = genregs(cd,0x33,DX,DX); // XOR DX,DX 660 if (I64 && sz == 8) 661 code_orrex(cd, REX_W); 662 } 624 663 } 625 664 626 665 // Eliminate MOV reg,reg 627 666 if ((cs->Iop & 0xFC) == 0x88 && 628 (cs->Irm & 0xC7) == modregrm(3,0,reg)) 629 cs->Iop = NOP; 667 (cs->Irm & 0xC7) == modregrm(3,0,reg & 7)) 668 { 669 unsigned r = cs->Irm & 7; 670 if (cs->Irex & REX_B) 671 r |= 8; 672 if (r == reg) 673 cs->Iop = NOP; 674 } 630 675 631 676 return cat4(c,cg,cd,gen(CNIL,cs)); 632 677 } 633 678 634 679 635 680 /************************** 636 681 * Get addressing mode. 637 682 */ 638 683 639 684 unsigned getaddrmode(regm_t idxregs) 640 685 { 641 unsigned reg;642 685 unsigned mode; 643 686 644 if (I32) 645 { reg = findreg(idxregs & (ALLREGS | mBP)); 646 mode = modregrm(2,0,reg); 647 } 648 else 687 if (I16) 649 688 { 650 689 mode = (idxregs & mBX) ? modregrm(2,0,7) : /* [BX] */ 651 690 (idxregs & mDI) ? modregrm(2,0,5): /* [DI] */ 652 691 (idxregs & mSI) ? modregrm(2,0,4): /* [SI] */ 653 692 (assert(0),1); 654 693 } 694 else 695 { unsigned reg = findreg(idxregs & (ALLREGS | mBP)); 696 mode = modregrmx(2,0,reg); 697 } 655 698 return mode; 699 } 700 701 void setaddrmode(code *c, regm_t idxregs) 702 { 703 unsigned mode = getaddrmode(idxregs); 704 c->Irm = mode & 0xFF; 705 c->Irex &= ~REX_B; 706 c->Irex |= mode >> 16; 656 707 } 657 708 658 709 /********************************************** 659 710 */ 660 711 661 712 void getlvalue_msw(code *c) 662 713 { 663 714 if (c->IFL1 == FLreg) 664 { unsigned regmsw; 665 666 regmsw = c->IEVsym1->Sregmsw; 667 c->Irm = (c->Irm & ~7) | regmsw; 715 { 716 unsigned regmsw = c->IEVsym1->Sregmsw; 717 c->Irm = (c->Irm & ~7) | (regmsw & 7); 718 if (regmsw & 8) 719 c->Irex |= REX_B; 720 else 721 c->Irex &= ~REX_B; 668 722 } 669 723 else 670 724 c->IEVoffset1 += REGSIZE; 671 725 } 672 726 673 727 /********************************************** 674 728 */ 675 729 676 730 void getlvalue_lsw(code *c) 677 731 { 678 732 if (c->IFL1 == FLreg) 679 { unsigned reglsw; 680 681 reglsw = c->IEVsym1->Sreglsw; 682 c->Irm = (c->Irm & ~7) | reglsw; 733 { 734 unsigned reglsw = c->IEVsym1->Sreglsw; 735 c->Irm = (c->Irm & ~7) | (reglsw & 7); 736 if (reglsw & 8) 737 c->Irex |= REX_B; 738 else 739 c->Irex &= ~REX_B; 683 740 } 684 741 else 685 742 c->IEVoffset1 -= REGSIZE; 686 743 } 687 744 688 745 /****************** 689 746 * Compute addressing mode. 690 747 * Generate & return sequence of code (if any). 691 748 * Return in cs the info on it. 692 749 * Input: 693 750 * pcs -> where to store data about addressing mode 694 751 * e -> the lvalue elem 695 752 * keepmsk mask of registers we must not destroy or use 696 753 * if (keepmsk & RMstore), this will be only a store operation 697 754 * into the lvalue 698 755 * if (keepmsk & RMload), this will be a read operation only 699 756 */ 700 757 701 code *getlvalue(code __ss*pcs,elem *e,regm_t keepmsk)758 code *getlvalue(code *pcs,elem *e,regm_t keepmsk) 702 759 { regm_t idxregs; 703 760 unsigned fl,f,opsave; 704 761 code *c; 705 762 elem *e1; 706 763 elem *e11; 707 764 elem *e12; 708 765 bool e1isadd,e1free; 709 766 unsigned reg; 710 tym_t ty;711 767 tym_t e1ty; 712 768 symbol *s; 713 unsigned sz;714 769 715 770 //printf("getlvalue(e = %p)\n",e); 716 771 //elem_print(e); 717 772 assert(e); 718 773 elem_debug(e); 719 774 if (e->Eoper == OPvar || e->Eoper == OPrelconst) 720 775 { s = e->EV.sp.Vsym; 721 776 fl = s->Sfl; 722 777 if (tyfloating(s->ty())) 723 778 obj_fltused(); 724 779 } 725 780 else 726 781 fl = FLoper; 727 782 pcs->IFL1 = fl; 728 783 pcs->Iflags = CFoff; /* only want offsets */ 729 784 pcs->Irex = 0; 730 785 pcs->IEVoffset1 = 0; 731 ty = e->Ety; 786 787 tym_t ty = e->Ety; 788 unsigned sz = tysize(ty); 732 789 if (tyfloating(ty)) 733 790 obj_fltused(); 734 sz = tysize(ty); 735 if (I32 && sz == SHORTSIZE) 791 else if (I64 && sz == 8) 792 pcs->Irex |= REX_W; 793 if (!I16 && sz == SHORTSIZE) 736 794 pcs->Iflags |= CFopsize; 737 795 if (ty & mTYvolatile) 738 796 pcs->Iflags |= CFvolatile; 739 797 c = CNIL; 740 798 switch (fl) 741 799 { 742 800 #if 0 && TARGET_LINUX 743 801 case FLgot: 744 802 case FLgotoff: 745 803 gotref = 1; 746 804 pcs->IEVsym1 = s; 747 805 pcs->IEVoffset1 = e->EV.sp.Voffset; 748 806 if (e->Eoper == OPvar && fl == FLgot) 749 807 { 750 808 code *c1; 751 809 int saveop = pcs->Iop; 752 810 idxregs = allregs & ~keepmsk; // get a scratch register 753 811 c = allocreg(&idxregs,®,TYptr); 754 812 pcs->Irm = modregrm(2,reg,BX); // BX has GOT 755 813 pcs->Isib = 0; … … 801 859 e11 = e1->E1; 802 860 } 803 861 804 862 /* First see if we can replace *(e+&v) with 805 863 * MOV idxreg,e 806 864 * EA = [ES:] &v+idxreg 807 865 */ 808 866 809 867 if (e1isadd && 810 868 e12->Eoper == OPrelconst && 811 869 (f = el_fl(e12)) != FLfardata && 812 870 e1->Ecount == e1->Ecomsub && 813 871 (!e1->Ecount || (~keepmsk & ALLREGS & mMSW) || (e1ty != TYfptr && e1ty != TYhptr)) && 814 872 tysize(e11->Ety) == REGSIZE 815 873 ) 816 874 { unsigned char t; /* component of r/m field */ 817 875 int ss; 818 876 int ssi; 819 877 820 878 /*assert(datafl[f]);*/ /* what if addr of func? */ 821 if ( I32)879 if (!I16) 822 880 { /* Any register can be an index register */ 823 idxregs = allregs & ~keepmsk;881 regm_t idxregs = allregs & ~keepmsk; 824 882 assert(idxregs); 825 883 826 884 /* See if e1->E1 can be a scaled index */ 827 885 ss = isscaledindex(e11); 828 886 if (ss) 829 887 { 830 888 /* Load index register with result of e11->E1 */ 831 889 c = cdisscaledindex(e11,&idxregs,keepmsk); 832 890 reg = findreg(idxregs); 833 891 #if 0 && TARGET_LINUX 834 892 if (f == FLgot || f == FLgotoff) // config.flags3 & CFG3pic 835 893 { 836 894 gotref = 1; 837 895 pcs->Irm = modregrm(2,0,4); 838 896 pcs->Isib = modregrm(ss,reg,BX); 839 897 } 840 898 else 841 899 #endif 842 900 { 843 901 t = stackfl[f] ? 2 : 0; 844 902 pcs->Irm = modregrm(t,0,4); 845 pcs->Isib = modregrm(ss,reg,5); 903 pcs->Isib = modregrm(ss,reg & 7,5); 904 if (reg & 8) 905 pcs->Irex |= REX_X; 846 906 } 847 907 } 848 908 else if ((e11->Eoper == OPmul || e11->Eoper == OPshl) && 849 909 !e11->Ecount && 850 910 e11->E2->Eoper == OPconst && 851 911 (ssi = ssindex(e11->Eoper,e11->E2->EV.Vuns)) != 0 852 912 ) 853 913 { 854 914 regm_t scratchm; 855 unsigned r;856 int ss1;857 915 int ss2; 858 char ssflags;859 916 860 917 #if 0 && TARGET_LINUX 861 918 assert(f != FLgot && f != FLgotoff); 862 919 #endif 863 ssflags = ssindex_array[ssi].ssflags;920 char ssflags = ssindex_array[ssi].ssflags; 864 921 if (ssflags & SSFLnobp && stackfl[f]) 865 922 goto L6; 866 923 867 924 // Load index register with result of e11->E1 868 925 c = scodelem(e11->E1,&idxregs,keepmsk,TRUE); 869 926 reg = findreg(idxregs); 870 927 871 ss1 = ssindex_array[ssi].ss1;928 int ss1 = ssindex_array[ssi].ss1; 872 929 if (ssflags & SSFLlea) 873 930 { 874 931 assert(!stackfl[f]); 875 932 pcs->Irm = modregrm(2,0,4); 876 pcs->Isib = modregrm(ss1,reg,reg); 933 pcs->Isib = modregrm(ss1,reg & 7,reg & 7); 934 if (reg & 8) 935 pcs->Irex |= REX_X | REX_B; 877 936 } 878 937 else 879 938 { int rbase; 939 unsigned r; 880 940 881 941 scratchm = ALLREGS & ~keepmsk; 882 942 c = cat(c,allocreg(&scratchm,&r,TYint)); 883 943 884 944 if (ssflags & SSFLnobase1) 885 945 { t = 0; 886 946 rbase = 5; 887 947 } 888 948 else 889 949 { t = 0; 890 950 rbase = reg; 891 951 if (rbase == BP) 892 952 { static unsigned imm32[4] = {1+1,2+1,4+1,8+1}; 893 953 894 954 // IMUL r,BP,imm32 895 c = genc2(c,0x69,modreg rm(3,r,BP),imm32[ss1]);955 c = genc2(c,0x69,modregxrm(3,r,BP),imm32[ss1]); 896 956 goto L7; 897 957 } 898 958 } 899 959 900 c = gen2sib(c,0x8D,modregrm(t,r,4),modregrm(ss1,reg,rbase)); 960 c = gen2sib(c,0x8D,modregxrm(t,r,4),modregrm(ss1,reg & 7,rbase & 7)); 961 if (reg & 8) 962 code_orrex(c, REX_X); 963 if (rbase & 8) 964 code_orrex(c, REX_B); 965 901 966 if (ssflags & SSFLnobase1) 902 967 { code_last(c)->IFL1 = FLconst; 903 968 code_last(c)->IEV1.Vuns = 0; 904 969 } 905 970 L7: 906 971 if (ssflags & SSFLnobase) 907 972 { t = stackfl[f] ? 2 : 0; 908 973 rbase = 5; 909 974 } 910 975 else 911 976 { t = 2; 912 977 rbase = r; 913 978 assert(rbase != BP); 914 979 } 915 980 pcs->Irm = modregrm(t,0,4); 916 pcs->Isib = modregrm(ssindex_array[ssi].ss2,r,rbase); 981 pcs->Isib = modregrm(ssindex_array[ssi].ss2,r & 7,rbase & 7); 982 if (r & 8) 983 code_orrex(pcs, REX_X); 984 if (rbase & 8) 985 code_orrex(pcs, REX_B); 917 986 } 918 987 freenode(e11->E2); 919 988 freenode(e11); 920 989 } 921 990 else 922 991 { 923 992 L6: 924 993 /* Load index register with result of e11 */ 925 994 c = scodelem(e11,&idxregs,keepmsk,TRUE); 926 pcs->Irm = getaddrmode(idxregs);995 setaddrmode(pcs, idxregs); 927 996 #if 0 && TARGET_LINUX 928 997 if (e12->EV.sp.Vsym->Sfl == FLgot || e12->EV.sp.Vsym->Sfl == FLgotoff) 929 998 { 930 999 gotref = 1; 931 1000 #if 1 932 1001 reg = findreg(idxregs & (ALLREGS | mBP)); 933 1002 pcs->Irm = modregrm(2,0,4); 934 1003 pcs->Isib = modregrm(0,reg,BX); 935 1004 #else 936 1005 pcs->Isib = modregrm(0,pcs->Irm,BX); 937 1006 pcs->Irm = modregrm(2,0,4); 938 1007 #endif 939 1008 } 940 1009 else 941 1010 #endif 942 1011 if (stackfl[f]) /* if we need [EBP] too */ 943 { 944 pcs->Isib = modregrm(0,pcs->Irm,BP); 1012 { unsigned idx = pcs->Irm & 7; 1013 if (pcs->Irex & REX_B) 1014 pcs->Irex = (pcs->Irex & ~REX_B) | REX_X; 1015 pcs->Isib = modregrm(0,idx,BP); 945 1016 pcs->Irm = modregrm(2,0,4); 946 1017 } 947 1018 } 948 1019 } 949 1020 else 950 1021 { 951 1022 idxregs = IDXREGS & ~keepmsk; /* only these can be index regs */ 952 1023 assert(idxregs); 953 1024 #if 0 && TARGET_LINUX 954 1025 assert(f != FLgot && f != FLgotoff); 955 1026 #endif 956 1027 if (stackfl[f]) /* if stack data type */ 957 1028 { idxregs &= mSI | mDI; /* BX can't index off stack */ 958 1029 if (!idxregs) goto L1; /* index regs aren't avail */ 959 1030 t = 6; /* [BP+SI+disp] */ 960 1031 } 961 1032 else 962 1033 t = 0; /* [SI + disp] */ 963 1034 c = scodelem(e11,&idxregs,keepmsk,TRUE); /* load idx reg */ 964 1035 pcs->Irm = getaddrmode(idxregs) ^ t; … … 981 1052 { unsigned flagsave; 982 1053 983 1054 idxregs = IDXREGS & ~keepmsk; 984 1055 c = cat(c,allocreg(&idxregs,®,TYoffset)); 985 1056 986 1057 /* If desired result is a far pointer, we'll have */ 987 1058 /* to load another register with the segment of v */ 988 1059 if (e1ty == TYfptr) 989 1060 { 990 1061 unsigned msreg; 991 1062 992 1063 idxregs |= mMSW & ALLREGS & ~keepmsk; 993 1064 c = cat(c,allocreg(&idxregs,&msreg,TYfptr)); 994 1065 msreg = findregmsw(idxregs); 995 1066 /* MOV msreg,segreg */ 996 1067 c = genregs(c,0x8C,segfl[f],msreg); 997 1068 } 998 1069 opsave = pcs->Iop; 999 1070 flagsave = pcs->Iflags; 1000 1071 pcs->Iop = 0x8D; 1001 pcs->Irm |= modregrm(0,reg,0);1002 if ( I32)1072 code_newreg(pcs, reg); 1073 if (!I16) 1003 1074 pcs->Iflags &= ~CFopsize; 1004 1075 c = gen(c,pcs); /* LEA idxreg,EA */ 1005 1076 cssave(e1,idxregs,TRUE); 1006 if ( I32)1077 if (!I16) 1007 1078 pcs->Iflags = flagsave; 1008 1079 if (stackfl[f] && (config.wflags & WFssneds)) // if pointer into stack 1009 1080 pcs->Iflags |= CFss; // add SS: override 1010 1081 pcs->Iop = opsave; 1011 1082 pcs->IFL1 = FLoffset; 1012 1083 pcs->IEV1.Vuns = 0; 1013 pcs->Irm = getaddrmode(idxregs);1084 setaddrmode(pcs, idxregs); 1014 1085 } 1015 1086 freenode(e12); 1016 1087 if (e1free) 1017 1088 freenode(e1); 1018 1089 goto Lptr; 1019 1090 } 1020 1091 1021 1092 L1: 1022 1093 1023 1094 /* The rest of the cases could be a far pointer */ 1024 1095 1025 idxregs = (I 32 ? allregs : IDXREGS) & ~keepmsk; /* only these can be index regs */1096 idxregs = (I16 ? IDXREGS : allregs) & ~keepmsk; // only these can be index regs 1026 1097 assert(idxregs); 1027 if (I32 && sz == REGSIZE && keepmsk & RMstore) 1098 if (!I16 && 1099 (sz == REGSIZE || (I64 && sz == 4)) && 1100 keepmsk & RMstore) 1028 1101 idxregs |= regcon.mvar; 1029 1102 1030 1103 #if !TARGET_FLAT 1031 1104 switch (e1ty) 1032 1105 { case TYfptr: /* if far pointer */ 1033 1106 case TYhptr: 1034 1107 idxregs = (mES | IDXREGS) & ~keepmsk; // need segment too 1035 1108 assert(idxregs & mES); 1036 1109 pcs->Iflags |= CFes; /* ES segment override */ 1037 1110 break; 1038 1111 case TYsptr: /* if pointer to stack */ 1039 1112 if (config.wflags & WFssneds) // if SS != DS 1040 1113 pcs->Iflags |= CFss; /* then need SS: override */ 1041 1114 break; 1042 1115 case TYcptr: /* if pointer to code */ 1043 1116 pcs->Iflags |= CFcs; /* then need CS: override */ 1044 1117 break; 1045 1118 } 1046 1119 #endif 1047 1120 pcs->IFL1 = FLoffset; … … 1054 1127 */ 1055 1128 1056 1129 if (e1isadd && e12->Eoper == OPconst && 1057 1130 tysize(e12->Ety) == REGSIZE && 1058 1131 (!e1->Ecount || !e1free) 1059 1132 ) 1060 1133 { int ss; 1061 1134 1062 1135 pcs->IEV1.Vuns = e12->EV.Vuns; 1063 1136 freenode(e12); 1064 1137 if (e1free) freenode(e1); 1065 1138 if (I32 && e11->Eoper == OPadd && !e11->Ecount && 1066 1139 tysize(e11->Ety) == REGSIZE) 1067 1140 { 1068 1141 e12 = e11->E2; 1069 1142 e11 = e11->E1; 1070 1143 e1 = e1->E1; 1071 1144 e1free = TRUE; 1072 1145 goto L4; 1073 1146 } 1074 if ( I32&& (ss = isscaledindex(e11)) != 0)1147 if (!I16 && (ss = isscaledindex(e11)) != 0) 1075 1148 { // (v * scale) + const 1076 1149 c = cdisscaledindex(e11,&idxregs,keepmsk); 1077 1150 reg = findreg(idxregs); 1078 1151 pcs->Irm = modregrm(0,0,4); 1079 pcs->Isib = modregrm(ss,reg,5); 1152 pcs->Isib = modregrm(ss,reg & 7,5); 1153 if (reg & 8) 1154 pcs->Irex |= REX_X; 1080 1155 } 1081 1156 else 1082 1157 { 1083 1158 c = scodelem(e11,&idxregs,keepmsk,TRUE); // load index reg 1084 pcs->Irm = getaddrmode(idxregs);1159 setaddrmode(pcs, idxregs); 1085 1160 } 1086 1161 goto Lptr; 1087 1162 } 1088 1163 1089 1164 /* Look for *(v1 + v2) 1090 1165 * EA = [v1][v2] 1091 1166 */ 1092 1167 1093 if ( I32&& e1isadd && (!e1->Ecount || !e1free) &&1168 if (!I16 && e1isadd && (!e1->Ecount || !e1free) && 1094 1169 tysize[e1ty] == REGSIZE) 1095 1170 { code *c2; 1096 1171 regm_t idxregs2; 1097 1172 unsigned base,index; 1098 1173 int ss; 1099 1174 1100 1175 L4: 1101 1176 // Look for *(v1 + v2 << scale) 1102 1177 ss = isscaledindex(e12); 1103 1178 if (ss) 1104 1179 { 1105 1180 c = scodelem(e11,&idxregs,keepmsk,TRUE); 1106 1181 idxregs2 = allregs & ~(idxregs | keepmsk); 1107 1182 c2 = cdisscaledindex(e12,&idxregs2,keepmsk | idxregs); 1108 1183 } 1109 1184 1110 1185 // Look for *(v1 << scale + v2) 1111 1186 else if ((ss = isscaledindex(e11)) != 0) 1112 1187 { 1113 1188 idxregs2 = idxregs; … … 1122 1197 ) 1123 1198 { 1124 1199 pcs->IEV1.Vuns = e11->E2->EV.Vuns; 1125 1200 idxregs2 = idxregs; 1126 1201 c = cdisscaledindex(e11->E1,&idxregs2,keepmsk); 1127 1202 idxregs = allregs & ~(idxregs2 | keepmsk); 1128 1203 c2 = scodelem(e12,&idxregs,keepmsk | idxregs2,TRUE); 1129 1204 freenode(e11->E2); 1130 1205 freenode(e11); 1131 1206 } 1132 1207 else 1133 1208 { 1134 1209 c = scodelem(e11,&idxregs,keepmsk,TRUE); 1135 1210 idxregs2 = allregs & ~(idxregs | keepmsk); 1136 1211 c2 = scodelem(e12,&idxregs2,keepmsk | idxregs,TRUE); 1137 1212 } 1138 1213 c = cat(c,c2); 1139 1214 base = findreg(idxregs); 1140 1215 index = findreg(idxregs2); 1141 1216 pcs->Irm = modregrm(2,0,4); 1142 pcs->Isib = modregrm(ss,index,base); 1217 pcs->Isib = modregrm(ss,index & 7,base & 7); 1218 if (index & 8) 1219 pcs->Irex |= REX_X; 1220 if (base & 8) 1221 pcs->Irex |= REX_B; 1143 1222 if (e1free) freenode(e1); 1144 1223 goto Lptr; 1145 1224 } 1146 1225 1147 1226 /* give up and replace *e1 with 1148 1227 * MOV idxreg,e 1149 1228 * EA = 0[idxreg] 1150 1229 * pinholeopt() will usually correct the 0, we need it in case 1151 1230 * we have a pointer to a long and need an offset to the second 1152 1231 * word. 1153 1232 */ 1154 1233 1155 1234 assert(e1free); 1156 1235 c = scodelem(e1,&idxregs,keepmsk,TRUE); /* load index register */ 1157 pcs->Irm = getaddrmode(idxregs);1236 setaddrmode(pcs, idxregs); 1158 1237 Lptr: 1159 1238 if (config.flags3 & CFG3ptrchk) 1160 1239 cod3_ptrchk(&c,pcs,keepmsk); // validate pointer code 1161 1240 break; 1162 1241 case FLdatseg: 1163 1242 assert(0); 1164 1243 #if 0 1165 1244 pcs->Irm = modregrm(0,0,BPRM); 1166 1245 pcs->IEVpointer1 = e->EVpointer; 1167 1246 break; 1168 1247 #endif 1169 1248 case FLfltreg: 1170 1249 reflocal = TRUE; 1171 1250 pcs->Irm = modregrm(2,0,BPRM); 1172 1251 pcs->IEV1.Vint = 0; 1173 1252 break; 1174 1253 case FLreg: 1175 1254 goto L2; 1176 1255 case FLpara: 1177 1256 refparam = TRUE; 1178 1257 pcs->Irm = modregrm(2,0,BPRM); 1179 1258 goto L2; 1180 1259 1181 1260 case FLauto: 1182 1261 if (s->Sclass == SCfastpar && regcon.params & mask[s->Spreg]) 1183 1262 { 1184 1263 if (keepmsk & RMload) 1185 1264 { 1186 1265 if (sz == REGSIZE) // could this be (sz <= REGSIZE) ? 1187 1266 { 1188 pcs->Irm = modregrm(3,0,s->Spreg); 1267 pcs->Irm = modregrm(3,0,s->Spreg & 7); 1268 if (s->Spreg & 8) 1269 pcs->Irex |= REX_B; 1189 1270 regcon.used |= mask[s->Spreg]; 1190 1271 break; 1191 1272 } 1192 1273 } 1193 1274 else 1194 1275 regcon.params &= ~mask[s->Spreg]; 1195 1276 } 1196 1277 case FLtmp: 1197 1278 case FLbprel: 1198 1279 reflocal = TRUE; 1199 1280 pcs->Irm = modregrm(2,0,BPRM); 1200 1281 goto L2; 1201 1282 case FLextern: 1202 1283 if (s->Sident[0] == '_' && memcmp(s->Sident + 1,"tls_array",10) == 0) 1203 1284 { 1204 1285 #if TARGET_LINUX || TARGET_FREEBSD || TARGET_SOLARIS 1205 1286 // Rewrite as GS:[0000] 1206 1287 pcs->Irm = modregrm(0, 0, BPRM); 1207 1288 pcs->IFL1 = FLconst; 1208 1289 pcs->IEV1.Vuns = 0; … … 1216 1297 goto L3; 1217 1298 case FLdata: 1218 1299 case FLudata: 1219 1300 case FLcsdata: 1220 1301 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 1221 1302 case FLgot: 1222 1303 case FLgotoff: 1223 1304 case FLtlsdata: 1224 1305 #endif 1225 1306 L3: 1226 1307 pcs->Irm = modregrm(0,0,BPRM); 1227 1308 L2: 1228 1309 if (fl == FLreg) 1229 1310 { assert(s->Sregm & regcon.mvar); 1230 1311 if ( 1231 1312 s->Sclass == SCregpar || 1232 1313 s->Sclass == SCparameter) 1233 1314 { refparam = TRUE; 1234 1315 reflocal = TRUE; // kludge to set up prolog 1235 1316 } 1236 pcs->Irm = modregrm(3,0,s->Sreglsw); 1317 pcs->Irm = modregrm(3,0,s->Sreglsw & 7); 1318 if (s->Sreglsw & 8) 1319 pcs->Irex |= REX_B; 1237 1320 if (e->EV.sp.Voffset == 1 && sz == 1) 1238 1321 { assert(s->Sregm & BYTEREGS); 1322 assert(s->Sreglsw < 4); 1239 1323 pcs->Irm |= 4; // use 2nd byte of register 1240 1324 } 1241 1325 else 1242 1326 assert(!e->EV.sp.Voffset); 1243 1327 } 1244 1328 else if (s->ty() & mTYcs && !(fl == FLextern && LARGECODE)) 1245 1329 { 1246 1330 pcs->Iflags |= CFcs | CFoff; 1247 1331 } 1248 1332 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 1249 1333 // if (fl == FLtlsdata || s->ty() & mTYthread) 1250 1334 // pcs->Iflags |= CFgs; 1251 1335 #endif 1252 1336 pcs->IEVsym1 = s; 1253 1337 pcs->IEVoffset1 = e->EV.sp.Voffset; 1254 1338 if (sz == 1) 1255 1339 { /* Don't use SI or DI for this variable */ 1256 1340 s->Sflags |= GTbyte; 1257 1341 if (e->EV.sp.Voffset > 1) 1258 1342 s->Sflags &= ~GTregcand; 1259 1343 } 1260 1344 else if (e->EV.sp.Voffset) 1261 1345 s->Sflags &= ~GTregcand; 1262 1346 if (!(keepmsk & RMstore)) // if not store only 1263 1347 { s->Sflags |= SFLread; // assume we are doing a read 1264 1348 } 1265 1349 break; 1266 1350 case FLpseudo: 1267 1351 #if MARS 1268 1352 assert(0); 1269 1353 #else 1270 { unsigned u; 1271 1272 u = s->Sreglsw; 1354 { 1355 unsigned u = s->Sreglsw; 1273 1356 c = getregs(pseudomask[u]); 1274 1357 pcs->Irm = modregrm(3,0,pseudoreg[u] & 7); 1275 1358 break; 1276 1359 } 1277 1360 #endif 1278 1361 case FLfardata: 1279 1362 assert(!TARGET_FLAT); 1280 1363 case FLfunc: /* reading from code seg */ 1281 1364 if (config.exe & EX_flat) 1282 1365 goto L3; 1283 1366 Lfardata: 1284 { regm_t regm; 1285 code *c1; 1286 1287 regm = ALLREGS & ~keepmsk; /* need scratch register */ 1288 c1 = allocreg(®m,®,TYint); 1367 { 1368 regm_t regm = ALLREGS & ~keepmsk; // need scratch register 1369 code *c1 = allocreg(®m,®,TYint); 1289 1370 /* MOV mreg,seg of symbol */ 1290 1371 c = gencs(CNIL,0xB8 + reg,0,FLextern,s); 1291 1372 c->Iflags = CFseg; 1292 c = gen2(c,0x8E,modregrm (3,0,reg)); /* MOV ES,reg */1373 c = gen2(c,0x8E,modregrmx(3,0,reg)); /* MOV ES,reg */ 1293 1374 c = cat3(c1,getregs(mES),c); 1294 1375 pcs->Iflags |= CFes | CFoff; /* ES segment override */ 1295 1376 goto L3; 1296 1377 } 1297 1378 1298 1379 case FLstack: 1299 assert( I32);1380 assert(!I16); 1300 1381 pcs->Irm = modregrm(2,0,4); 1301 1382 pcs->Isib = modregrm(0,4,SP); 1302 1383 pcs->IEVsym1 = s; 1303 1384 pcs->IEVoffset1 = e->EV.sp.Voffset; 1304 1385 break; 1305 1386 1306 1387 default: 1307 1388 #ifdef DEBUG 1308 1389 WRFL((enum FL)fl); 1309 1390 symbol_print(s); 1310 1391 #endif 1311 1392 assert(0); 1312 1393 } 1313 1394 return c; 1314 1395 } 1315 1396 1316 1397 1317 1398 /******************************* 1318 1399 * Same as codelem(), but do not destroy the registers in keepmsk. 1319 1400 * Use scratch registers as much as possible, then use stack. … … 1432 1512 1433 1513 assert(i < REGMAX); 1434 1514 if (mi & tosave) /* i = register to save */ 1435 1515 { 1436 1516 if (touse) /* if any scratch registers */ 1437 1517 { for (j = 0; j < 8; j++) 1438 1518 { regm_t mj = mask[j]; 1439 1519 1440 1520 if (touse & mj) 1441 1521 { cs1 = genmovreg(cs1,j,i); 1442 1522 cs2 = cat(genmovreg(CNIL,i,j),cs2); 1443 1523 touse &= ~mj; 1444 1524 mfuncreg &= ~mj; 1445 1525 regcon.used |= mj; 1446 1526 break; 1447 1527 } 1448 1528 } 1449 1529 assert(j < 8); 1450 1530 } 1451 1531 else /* else use stack */ 1532 #if 0 1452 1533 { int push,pop; 1453 1534 1454 1535 stackchanged = 1; 1455 1536 adjesp += REGSIZE; 1456 1537 if (i == ES) 1457 1538 { push = 0x06; 1458 1539 pop = 0x07; 1459 1540 } 1460 1541 else 1461 1542 { push = 0x50 + i; 1462 1543 pop = push | 8; 1463 1544 } 1464 1545 cs1 = gen1(cs1,push); /* PUSH i */ 1465 1546 cs2 = cat(gen1(CNIL,pop),cs2); /* POP i */ 1466 1547 } 1548 #else 1549 { 1550 stackchanged = 1; 1551 adjesp += REGSIZE; 1552 gensaverestore2(mask[i], &cs1, &cs2); 1553 } 1554 #endif 1467 1555 cs3 = cat(getregs(mi),cs3); 1468 1556 tosave &= ~mi; 1469 1557 } 1470 1558 } 1471 1559 if (adjesp) 1472 1560 { 1473 1561 // If this is done an odd number of times, it 1474 1562 // will throw off the 8 byte stack alignment. 1475 1563 // We should *only* worry about this if a function 1476 1564 // was called in the code generation by codelem(). 1477 1565 int sz; 1478 1566 if (STACKALIGN == 16) 1479 1567 sz = -(adjesp & (STACKALIGN - 1)) & (STACKALIGN - 1); 1480 1568 else 1481 1569 sz = -(adjesp & 7) & 7; 1482 if (calledafunc && I32&& sz && (STACKALIGN == 16 || config.flags4 & CFG4stackalign))1483 { code *cx;1484 1570 if (calledafunc && !I16 && sz && (STACKALIGN == 16 || config.flags4 & CFG4stackalign)) 1571 { 1572 unsigned grex = I64 ? REX_W << 16 : 0; 1485 1573 regm_t mval_save = regcon.immed.mval; 1486 1574 regcon.immed.mval = 0; // prevent reghasvalue() optimizations 1487 1575 // because c hasn't been executed yet 1488 cs1 = genc2(cs1,0x81, modregrm(3,5,SP),sz); // SUB ESP,sz1576 cs1 = genc2(cs1,0x81,grex | modregrm(3,5,SP),sz); // SUB ESP,sz 1489 1577 regcon.immed.mval = mval_save; 1490 1578 cs1 = genadjesp(cs1, sz); 1491 1579 1492 c x = genc2(CNIL,0x81,modregrm(3,0,SP),sz); // ADD ESP,sz1580 code *cx = genc2(CNIL,0x81,grex | modregrm(3,0,SP),sz); // ADD ESP,sz 1493 1581 cx = genadjesp(cx, -sz); 1494 1582 cs2 = cat(cx, cs2); 1495 1583 } 1496 1584 1497 1585 cs1 = genadjesp(cs1,adjesp); 1498 1586 cs2 = genadjesp(cs2,-adjesp); 1499 1587 } 1500 1588 1501 1589 calledafunc |= calledafuncsave; 1502 1590 msavereg &= ~keepmsk | overlap; /* remove from mask of regs to save */ 1503 1591 mfuncreg &= oldmfuncreg; /* update original */ 1504 1592 #ifdef DEBUG 1505 1593 if (debugw) 1506 1594 printf("-scodelem(e=%p *pretregs=x%x keepmsk=x%x constflag=%d\n", 1507 1595 e,*pretregs,keepmsk,constflag); 1508 1596 #endif 1509 1597 return cat4(cs1,c,cs3,cs2); 1510 1598 } 1511 1599 1512 1600 1513 1601 /***************************** 1514 1602 * Given an opcode and EA in cs, generate code 1515 1603 * for each floating register in turn. 1516 1604 * Input: 1517 1605 * tym either TYdouble or TYfloat 1518 1606 */ 1519 1607 1520 code *fltregs(code __ss*pcs,tym_t tym)1608 code *fltregs(code *pcs,tym_t tym) 1521 1609 { code *c; 1522 1610 1611 assert(!I64); 1523 1612 tym = tybasic(tym); 1524 1613 if (I32) 1525 1614 { 1526 1615 c = getregs((tym == TYfloat) ? mAX : mAX | mDX); 1527 1616 if (tym != TYfloat) 1528 1617 { 1529 1618 pcs->IEVoffset1 += REGSIZE; 1530 1619 NEWREG(pcs->Irm,DX); 1531 1620 c = gen(c,pcs); 1532 1621 pcs->IEVoffset1 -= REGSIZE; 1533 1622 } 1534 1623 NEWREG(pcs->Irm,AX); 1535 1624 c = gen(c,pcs); 1536 1625 } 1537 1626 else 1538 1627 { 1539 1628 c = getregs((tym == TYfloat) ? FLOATREGS_16 : DOUBLEREGS_16); 1540 1629 pcs->IEVoffset1 += (tym == TYfloat) ? 2 : 6; 1541 1630 if (tym == TYfloat) 1542 1631 NEWREG(pcs->Irm,DX); … … 1553 1642 { pcs->IEVoffset1 -= 2; 1554 1643 NEWREG(pcs->Irm,CX); 1555 1644 gen(c,pcs); 1556 1645 pcs->IEVoffset1 -= 2; /* note that exit is with Voffset unaltered */ 1557 1646 NEWREG(pcs->Irm,DX); 1558 1647 gen(c,pcs); 1559 1648 } 1560 1649 } 1561 1650 return c; 1562 1651 } 1563 1652 1564 1653 1565 1654 /***************************** 1566 1655 * Given a result in registers, test it for TRUE or FALSE. 1567 1656 * Will fail if TYfptr and the reg is ES! 1568 1657 * If saveflag is TRUE, preserve the contents of the 1569 1658 * registers. 1570 1659 */ 1571 1660 1572 1661 code *tstresult(regm_t regm,tym_t tym,unsigned saveflag) 1573 { code *ce; 1574 unsigned reg; 1662 { 1575 1663 unsigned scrreg; /* scratch register */ 1576 unsigned sz;1577 1664 regm_t scrregm; 1578 1665 1579 1666 #ifdef DEBUG 1580 1667 if (!(regm & (mBP | ALLREGS))) 1581 1668 printf("tstresult(regm = x%x, tym = x%lx, saveflag = %d)\n", 1582 1669 regm,tym,saveflag); 1583 1670 #endif 1584 1671 assert(regm & (mBP | ALLREGS)); 1585 1672 tym = tybasic(tym); 1586 c e = CNIL;1587 reg = findreg(regm);1588 sz = tysize[tym];1673 code *ce = CNIL; 1674 unsigned reg = findreg(regm); 1675 unsigned sz = tysize[tym]; 1589 1676 if (sz == 1) 1590 1677 { assert(regm & BYTEREGS); 1591 1678 return genregs(ce,0x84,reg,reg); // TEST regL,regL 1592 1679 } 1593 1680 if (sz <= REGSIZE) 1594 1681 { 1595 if ( I32)1682 if (!I16) 1596 1683 { 1597 1684 if (tym == TYfloat) 1598 1685 { if (saveflag) 1599 1686 { 1600 1687 scrregm = allregs & ~regm; /* possible scratch regs */ 1601 1688 ce = allocreg(&scrregm,&scrreg,TYoffset); /* allocate scratch reg */ 1602 1689 ce = genmovreg(ce,scrreg,reg); /* MOV scrreg,msreg */ 1603 1690 reg = scrreg; 1604 1691 } 1605 1692 ce = cat(ce,getregs(mask[reg])); 1606 return gen2(ce,0xD1,modregrm (3,4,reg)); /* SHL reg,1 */1693 return gen2(ce,0xD1,modregrmx(3,4,reg)); // SHL reg,1 1607 1694 } 1608 1695 ce = gentstreg(ce,reg); // TEST reg,reg 1609 if ( tysize[tym]== SHORTSIZE)1696 if (sz == SHORTSIZE) 1610 1697 ce->Iflags |= CFopsize; /* 16 bit operands */ 1698 else if (sz == 8) 1699 code_orrex(ce, REX_W); 1611 1700 } 1612 1701 else 1613 1702 ce = gentstreg(ce,reg); // TEST reg,reg 1614 1703 return ce; 1615 1704 } 1616 1705 if (saveflag || tyfv(tym)) 1617 1706 { 1618 1707 scrregm = ALLREGS & ~regm; /* possible scratch regs */ 1619 1708 ce = allocreg(&scrregm,&scrreg,TYoffset); /* allocate scratch reg */ 1620 1709 if (I32 || sz == REGSIZE * 2) 1621 1710 { code *c; 1622 1711 1623 1712 assert(regm & mMSW && regm & mLSW); 1624 1713 1625 1714 reg = findregmsw(regm); 1626 1715 if (I32) 1627 1716 { 1628 1717 if (tyfv(tym)) 1629 1718 { c = genregs(CNIL,0x0F,scrreg,reg); 1630 1719 c->Iop2 = 0xB7; /* MOVZX scrreg,msreg */ … … 1701 1790 1702 1791 // printf("fixresult(e = %p, retregs = %s, *pretregs = %s)\n", 1703 1792 // e,regm_str(retregs),regm_str(*pretregs)); 1704 1793 if (*pretregs == 0) return CNIL; /* if don't want result */ 1705 1794 assert(e && retregs); /* need something to work with */ 1706 1795 forccs = *pretregs & mPSW; 1707 1796 forregs = *pretregs & (mST01 | mST0 | mBP | ALLREGS | mES | mSTACK); 1708 1797 tym = tybasic(e->Ety); 1709 1798 #if 0 1710 1799 if (tym == TYstruct) 1711 1800 // Hack to support cdstreq() 1712 1801 tym = TYfptr; 1713 1802 #else 1714 1803 if (tym == TYstruct) 1715 1804 // Hack to support cdstreq() 1716 1805 tym = (forregs & mMSW) ? TYfptr : TYnptr; 1717 1806 #endif 1718 1807 c = CNIL; 1719 1808 sz = tysize[tym]; 1720 1809 if (sz == 1) 1721 { unsigned reg; 1722 1810 { 1723 1811 assert(retregs & BYTEREGS); 1724 reg = findreg(retregs);1812 unsigned reg = findreg(retregs); 1725 1813 if (e->Eoper == OPvar && 1726 1814 e->EV.sp.Voffset == 1 && 1727 1815 e->EV.sp.Vsym->Sfl == FLreg) 1728 1816 { 1817 assert(reg < 4); 1729 1818 if (forccs) 1730 1819 c = gen2(c,0x84,modregrm(3,reg | 4,reg | 4)); // TEST regH,regH 1731 1820 forccs = 0; 1732 1821 } 1733 1822 } 1734 1823 if ((retregs & forregs) == retregs) /* if already in right registers */ 1735 1824 *pretregs = retregs; 1736 1825 else if (forregs) /* if return the result in registers */ 1737 { unsigned opsflag; 1738 1826 { 1739 1827 if (forregs & (mST01 | mST0)) 1740 1828 return fixresult87(e,retregs,pretregs); 1741 1829 ce = CNIL; 1742 opsflag = FALSE;1743 if ( !I32&& sz == 8)1830 unsigned opsflag = FALSE; 1831 if (I16 && sz == 8) 1744 1832 { if (forregs & mSTACK) 1745 1833 { assert(retregs == DOUBLEREGS_16); 1746 1834 /* Push floating regs */ 1747 1835 c = CNIL; 1748 1836 ce = gen1(ce,0x50 + AX); 1749 1837 gen1(ce,0x50 + BX); 1750 1838 gen1(ce,0x50 + CX); 1751 1839 gen1(ce,0x50 + DX); 1752 1840 stackpush += DOUBLESIZE; 1753 1841 } 1754 1842 else if (retregs & mSTACK) 1755 1843 { assert(forregs == DOUBLEREGS_16); 1756 1844 /* Pop floating regs */ 1757 1845 c = getregs(forregs); 1758 1846 ce = gen1(ce,0x58 + DX); 1759 1847 gen1(ce,0x58 + CX); 1760 1848 gen1(ce,0x58 + BX); 1761 1849 gen1(ce,0x58 + AX); 1762 1850 stackpush -= DOUBLESIZE; 1763 1851 retregs = DOUBLEREGS_16; /* for tstresult() below */ … … 1796 1884 /* cuz we have real result in rreg */ 1797 1885 retregs = *pretregs & ~mPSW; 1798 1886 } 1799 1887 if (forccs) /* if return result in flags */ 1800 1888 c = cat(c,tstresult(retregs,tym,forregs)); 1801 1889 return c; 1802 1890 } 1803 1891 1804 1892 1805 1893 /******************************** 1806 1894 * Generate code sequence to call C runtime library support routine. 1807 1895 * clib = CLIBxxxx 1808 1896 * keepmask = mask of registers not to destroy. Currently can 1809 1897 * handle only 1. Should use a temporary rather than 1810 1898 * push/pop for speed. 1811 1899 */ 1812 1900 1813 1901 int clib_inited = 0; // != 0 if initialized 1814 1902 1815 1903 code *callclib(elem *e,unsigned clib,regm_t *pretregs,regm_t keepmask) 1816 { code *c,*cpop; 1817 regm_t retregs; 1818 symbol *s; 1819 int i; 1820 1904 { 1821 1905 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 1822 1906 static symbol lib[] = 1823 1907 { 1824 1908 /* Convert destroyed regs into saved regs */ 1825 1909 #define Z(desregs) (~(desregs) & (mBP| mES | ALLREGS)) 1826 1910 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 1827 1911 #define N(name) "_" name 1828 1912 #else 1829 1913 #define N(name) name 1830 1914 #endif 1831 1915 1832 1916 /* Shorthand to map onto SYMBOLY() */ 1833 1917 #define Y(desregs,name) SYMBOLY(FLfunc,Z(desregs),N(name),0) 1834 1918 1835 1919 Y(0,"_LCMP__"), // CLIBlcmp 1836 1920 Y(mAX|mCX|mDX,"_LMUL__"), // CLIBlmul 1837 1921 #if 1 1838 1922 Y(mAX|mBX|mCX|mDX,"_LDIV__"), // CLIBldiv 1839 1923 Y(mAX|mBX|mCX|mDX,"_LDIV__"), // CLIBlmod 1840 1924 Y(mAX|mBX|mCX|mDX,"_ULDIV__"), // CLIBuldiv … … 2072 2156 {mAX,mAX,0,2}, // _DBLINT87@ dblint87 2073 2157 {mDX|mAX,mAX,0,2}, // _DBLLNG87@ dbllng87 2074 2158 {0,0,0,2}, // _FTST@ 2075 2159 {mPSW,mPSW,0,INFfloat,0,2}, // _FCOMPP@ 2076 2160 {mPSW,mPSW,0,2}, // _FTEST@ 2077 2161 {mPSW,mPSW,0,2}, // _FTEST0@ 2078 2162 {mST0,mST0,0,INFfloat,1,1}, // _FDIV@ 2079 2163 2080 2164 {mST01,mST01,0,INF32|INFfloat,3,5}, // _Cmul 2081 2165 {mST01,mST01,0,INF32|INFfloat,0,2}, // _Cdiv 2082 2166 {mPSW, mPSW, 0,INF32|INFfloat,0,4}, // _Ccmp 2083 2167 2084 2168 {mST0,mST0,0,INF32|INFfloat,2,1}, // _U64_LDBL 2085 2169 {0,mDX|mAX,0,INF32|INFfloat,1,2}, // __LDBLULLNG 2086 2170 }; 2087 2171 2088 2172 if (!clib_inited) /* if not initialized */ 2089 2173 { 2090 2174 assert(sizeof(lib) / sizeof(lib[0]) == CLIBMAX); 2091 2175 assert(sizeof(info) / sizeof(info[0]) == CLIBMAX); 2092 for (i = 0; i < CLIBMAX; i++)2176 for (int i = 0; i < CLIBMAX; i++) 2093 2177 { lib[i].Stype = tsclib; 2094 2178 #if MARS 2095 2179 lib[i].Sxtrnnum = 0; 2096 2180 lib[i].Stypidx = 0; 2097 2181 #endif 2098 2182 } 2099 2183 2100 if ( I32)2184 if (!I16) 2101 2185 { /* Adjust table for 386 */ 2102 2186 lib[CLIBdbllng].Sregsaved = Z(DOUBLEREGS_32); 2103 2187 lib[CLIBlngdbl].Sregsaved = Z(DOUBLEREGS_32); 2104 2188 lib[CLIBdblint].Sregsaved = Z(DOUBLEREGS_32); 2105 2189 lib[CLIBintdbl].Sregsaved = Z(DOUBLEREGS_32); 2106 2190 #if TARGET_WINDOS 2107 2191 lib[CLIBfneg].Sregsaved = Z(FLOATREGS_32); 2108 2192 lib[CLIBdneg].Sregsaved = Z(DOUBLEREGS_32); 2109 2193 lib[CLIBdbluns].Sregsaved = Z(DOUBLEREGS_32); 2110 2194 lib[CLIBunsdbl].Sregsaved = Z(DOUBLEREGS_32); 2111 2195 lib[CLIBdblulng].Sregsaved = Z(DOUBLEREGS_32); 2112 2196 lib[CLIBulngdbl].Sregsaved = Z(DOUBLEREGS_32); 2113 2197 #endif 2114 2198 lib[CLIBdblflt].Sregsaved = Z(DOUBLEREGS_32); 2115 2199 lib[CLIBfltdbl].Sregsaved = Z(DOUBLEREGS_32); 2116 2200 2117 2201 lib[CLIBdblllng].Sregsaved = Z(DOUBLEREGS_32); 2118 2202 lib[CLIBllngdbl].Sregsaved = Z(DOUBLEREGS_32); 2119 2203 lib[CLIBdblullng].Sregsaved = Z(DOUBLEREGS_32); 2120 2204 lib[CLIBullngdbl].Sregsaved = Z(DOUBLEREGS_32); 2121 2205 } 2122 2206 clib_inited++; 2123 2207 } 2124 2208 #undef Z 2125 2209 2126 2210 assert(clib < CLIBMAX); 2127 s = &lib[clib];2211 symbol *s = &lib[clib]; 2128 2212 assert(I32 || !(info[clib].flags & INF32)); 2129 c pop = CNIL;2130 c = getregs((~s->Sregsaved & (mES | mBP | ALLREGS)) & ~keepmask); // mask of regs destroyed2213 code *cpop = CNIL; 2214 code *c = getregs((~s->Sregsaved & (mES | mBP | ALLREGS)) & ~keepmask); // mask of regs destroyed 2131 2215 keepmask &= ~s->Sregsaved; 2132 int npushed = 0; 2216 int npushed = numbitsset(keepmask); 2217 gensaverestore2(keepmask, &c, &cpop); 2218 #if 0 2133 2219 while (keepmask) 2134 2220 { unsigned keepreg; 2135 2221 2136 2222 if (keepmask & (mBP|ALLREGS)) 2137 2223 { keepreg = findreg(keepmask & (mBP|ALLREGS)); 2138 2224 c = gen1(c,0x50 + keepreg); /* PUSH keepreg */ 2139 2225 cpop = cat(gen1(CNIL,0x58 + keepreg),cpop); // POP keepreg 2140 2226 keepmask &= ~mask[keepreg]; 2141 2227 npushed++; 2142 2228 } 2143 2229 if (keepmask & mES) 2144 2230 { c = gen1(c,0x06); /* PUSH ES */ 2145 2231 cpop = cat(gen1(CNIL,0x07),cpop); /* POP ES */ 2146 2232 keepmask &= ~mES; 2147 2233 npushed++; 2148 2234 } 2149 2235 } 2236 #endif 2150 2237 2151 2238 c = cat(c, save87regs(info[clib].push87)); 2152 for (i = 0; i < info[clib].push87; i++)2239 for (int i = 0; i < info[clib].push87; i++) 2153 2240 c = cat(c, push87()); 2154 2241 2155 for (i = 0; i < info[clib].pop87; i++)2242 for (int i = 0; i < info[clib].pop87; i++) 2156 2243 pop87(); 2157 2244 2158 2245 if (config.target_cpu >= TARGET_80386 && clib == CLIBlmul && !I32) 2159 2246 { static char lmul[] = { 2160 2247 0x66,0xc1,0xe1,0x10, // shl ECX,16 2161 2248 0x8b,0xcb, // mov CX,BX ;ECX = CX,BX 2162 2249 0x66,0xc1,0xe0,0x10, // shl EAX,16 2163 2250 0x66,0x0f,0xac,0xd0,0x10, // shrd EAX,EDX,16 ;EAX = DX,AX 2164 2251 0x66,0xf7,0xe1, // mul ECX 2165 2252 0x66,0x0f,0xa4,0xc2,0x10, // shld EDX,EAX,16 ;DX,AX = EAX 2166 2253 }; 2167 2254 2168 2255 c = genasm(c,lmul,sizeof(lmul)); 2169 2256 } 2170 2257 else 2171 2258 { makeitextern(s); 2172 2259 int nalign = 0; 2173 2260 if (STACKALIGN == 16) 2174 2261 { // Align the stack (assume no args on stack) 2175 2262 int npush = npushed * REGSIZE + stackpush; 2176 2263 if (npush & (STACKALIGN - 1)) 2177 2264 { nalign = STACKALIGN - (npush & (STACKALIGN - 1)); 2178 2265 c = genc2(c,0x81,modregrm(3,5,SP),nalign); // SUB ESP,nalign 2266 if (I64) 2267 code_orrex(c, REX_W); 2179 2268 } 2180 2269 } 2181 2270 c = gencs(c,(LARGECODE) ? 0x9A : 0xE8,0,FLfunc,s); // CALL s 2182 2271 if (nalign) 2183 c = genc2(c,0x81,modregrm(3,0,SP),nalign); // ADD ESP,nalign 2272 { c = genc2(c,0x81,modregrm(3,0,SP),nalign); // ADD ESP,nalign 2273 if (I64) 2274 code_orrex(c, REX_W); 2275 } 2184 2276 calledafunc = 1; 2185 2277 2186 if ( !I32 && // bug in Optlink2278 if (I16 && // bug in Optlink for weak references 2187 2279 config.flags3 & CFG3wkfloat && 2188 2280 (info[clib].flags & (INFfloat | INFwkdone)) == INFfloat) 2189 2281 { info[clib].flags |= INFwkdone; 2190 2282 makeitextern(rtlsym[RTLSYM_INTONLY]); 2191 2283 obj_wkext(s,rtlsym[RTLSYM_INTONLY]); 2192 2284 } 2193 }2194 if (!I32)2285 } 2286 if (I16) 2195 2287 stackpush -= info[clib].pop; 2196 retregs = I32 ? info[clib].retregs32 : info[clib].retregs16;2197 return cat(cat(c,cpop),fixresult(e,retregs,pretregs));2288 regm_t retregs = I16 ? info[clib].retregs16 : info[clib].retregs32; 2289 return cat(cat(c,cpop),fixresult(e,retregs,pretregs)); 2198 2290 } 2199 2291 2200 2292 2201 2293 /******************************* 2202 2294 * Generate code sequence for function call. 2203 2295 */ 2204 2296 2205 2297 code *cdfunc(elem *e,regm_t *pretregs) 2206 2298 { unsigned numpara = 0; 2207 2299 unsigned stackpushsave; 2208 2300 unsigned preg; 2209 2301 regm_t keepmsk; 2210 2302 unsigned numalign = 0; 2211 2303 code *c; 2212 2304 2213 2305 //printf("cdfunc()\n"); elem_print(e); 2214 2306 assert(e); 2215 2307 stackpushsave = stackpush; /* so we can compute # of parameters */ 2216 2308 cgstate.stackclean++; 2217 2309 c = CNIL; 2218 2310 keepmsk = 0; 2219 2311 if (OTbinary(e->Eoper)) // if parameters 2220 2312 { unsigned stackalign = REGSIZE; 2221 elem *ep;2222 2313 elem *en; 2223 2314 regm_t retregs; 2224 tym_t tyf; 2225 2226 if (I32) 2315 2316 if (!I16) 2227 2317 { 2228 ty f = tybasic(e->E1->Ety);2318 tym_t tyf = tybasic(e->E1->Ety); 2229 2319 2230 2320 // First compute numpara, the total pushed on the stack 2231 2321 switch (tyf) 2232 2322 { case TYf16func: 2233 2323 stackalign = 2; 2234 2324 goto Ldefault; 2235 2325 case TYmfunc: 2236 2326 case TYjfunc: 2237 2327 // last parameter goes into register 2328 elem *ep; 2238 2329 for (ep = e->E2; ep->Eoper == OPparam; ep = ep->E2) 2239 2330 { 2240 2331 numpara += paramsize(ep->E1,stackalign); 2241 2332 } 2242 2333 if (tyf == TYjfunc && 2243 2334 // This must match type_jparam() 2244 2335 !(tyjparam(ep->Ety) || 2245 2336 ((tybasic(ep->Ety) == TYstruct || tybasic(ep->Ety) == TYarray) && ep->Enumbytes <= intsize && ep->Enumbytes != 3 && ep->Enumbytes) 2246 2337 ) 2247 2338 ) 2248 2339 { 2249 2340 numpara += paramsize(ep,stackalign); 2250 2341 } 2251 2342 break; 2252 2343 default: 2253 2344 Ldefault: 2254 2345 numpara += paramsize(e->E2,stackalign); 2255 2346 break; 2256 2347 } 2257 2348 assert((numpara & (REGSIZE - 1)) == 0); 2258 2349 assert((stackpush & (REGSIZE - 1)) == 0); 2259 2350 2260 2351 /* Adjust start of the stack so after all args are pushed, 2261 2352 * the stack will be aligned. 2262 2353 */ 2263 2354 if (STACKALIGN == 16 && (numpara + stackpush) & (STACKALIGN - 1)) 2264 2355 { 2265 2356 numalign = STACKALIGN - ((numpara + stackpush) & (STACKALIGN - 1)); 2266 2357 c = genc2(NULL,0x81,modregrm(3,5,SP),numalign); // SUB ESP,numalign 2358 if (I64) 2359 code_orrex(c, REX_W); 2267 2360 c = genadjesp(c, numalign); 2268 2361 stackpush += numalign; 2269 2362 stackpushsave += numalign; 2270 2363 } 2271 2364 2272 2365 switch (tyf) 2273 2366 { case TYf16func: 2274 2367 stackalign = 2; 2275 2368 break; 2276 2369 case TYmfunc: // last parameter goes into ECX 2277 2370 preg = CX; 2278 2371 goto L1; 2279 2372 case TYjfunc: // last parameter goes into EAX 2280 2373 preg = AX; 2281 2374 goto L1; 2282 2375 2283 2376 L1: 2377 elem *ep; 2284 2378 for (ep = e->E2; ep->Eoper == OPparam; ep = en) 2285 2379 { 2286 2380 c = cat(c,params(ep->E1,stackalign)); 2287 2381 en = ep->E2; 2288 2382 freenode(ep); 2289 2383 ep = en; 2290 2384 } 2291 2385 if (tyf == TYjfunc && 2292 2386 // This must match type_jparam() 2293 2387 !(tyjparam(ep->Ety) || 2294 2388 ((tybasic(ep->Ety) == TYstruct || tybasic(ep->Ety) == TYarray) && ep->Enumbytes <= intsize && ep->Enumbytes != 3 && ep->Enumbytes) 2295 2389 ) 2296 2390 ) 2297 2391 { 2298 2392 c = cat(c,params(ep,stackalign)); 2299 2393 goto Lret; 2300 2394 } 2301 keepmsk = mask[preg]; 2395 // preg is the register to put the parameter ep in 2396 keepmsk = mask[preg]; // don't change preg when evaluating func address 2302 2397 retregs = keepmsk; 2303 2398 if (ep->Eoper == OPstrthis) 2304 { code *c1; 2305 code *c2; 2306 unsigned np; 2307 2308 c1 = getregs(retregs); 2399 { code *c2; 2400 2401 code *c1 = getregs(retregs); 2309 2402 // LEA preg,np[ESP] 2310 np = stackpush - ep->EV.Vuns; // stack delta to parameter 2311 c2 = genc1(CNIL,0x8D,modregrm(2,preg,4),FLconst,np); 2312 c2->Isib = modregrm(0,4,SP); 2403 unsigned np = stackpush - ep->EV.Vuns; // stack delta to parameter 2404 c2 = genc1(CNIL,0x8D,(modregrm(0,4,SP) << 8) | modregrm(2,preg,4),FLconst,np); 2405 if (I64) 2406 code_orrex(c2, REX_W); 2313 2407 c = cat3(c,c1,c2); 2314 2408 } 2315 2409 else 2316 2410 { code *cp = codelem(ep,&retregs,FALSE); 2317 2411 c = cat(c,cp); 2318 2412 } 2319 2413 goto Lret; 2320 2414 } 2321 2415 } 2322 2416 c = cat(c, params(e->E2,stackalign)); // push parameters 2323 2417 } 2324 2418 else 2325 2419 { 2326 2420 /* Adjust start of the stack so 2327 2421 * the stack will be aligned. 2328 2422 */ 2329 2423 if (STACKALIGN == 16 && (stackpush) & (STACKALIGN - 1)) 2330 2424 { 2331 2425 numalign = STACKALIGN - ((stackpush) & (STACKALIGN - 1)); 2332 2426 c = genc2(NULL,0x81,modregrm(3,5,SP),numalign); // SUB ESP,numalign 2427 if (I64) 2428 code_orrex(c, REX_W); 2333 2429 c = genadjesp(c, numalign); 2334 2430 stackpush += numalign; 2335 2431 stackpushsave += numalign; 2336 2432 } 2337 2433 2338 2434 } 2339 2435 Lret: 2340 2436 cgstate.stackclean--; 2341 if ( I32)2437 if (!I16) 2342 2438 { 2343 2439 if (numpara != stackpush - stackpushsave) 2344 2440 printf("numpara = %d, stackpush = %d, stackpushsave = %d\n", numpara, stackpush, stackpushsave); 2345 2441 assert(numpara == stackpush - stackpushsave); 2346 2442 } 2347 2443 else 2348 2444 numpara = stackpush - stackpushsave; 2349 2445 return cat(c,funccall(e,numpara,numalign,pretregs,keepmsk)); 2350 2446 } 2351 2447 2352 2448 /*********************************** 2353 2449 */ 2354 2450 2355 2451 code *cdstrthis(elem *e,regm_t *pretregs) 2356 2452 { 2357 2453 code *c1; 2358 2454 code *c2; 2359 unsigned np;2360 unsigned reg;2361 2455 2362 2456 assert(tysize(e->Ety) == REGSIZE); 2363 reg = findreg(*pretregs & allregs);2457 unsigned reg = findreg(*pretregs & allregs); 2364 2458 c1 = getregs(mask[reg]); 2365 2459 // LEA reg,np[ESP] 2366 np = stackpush - e->EV.Vuns; // stack delta to parameter 2367 c2 = genc1(CNIL,0x8D,modregrm(2,reg,4),FLconst,np); 2368 c2->Isib = modregrm(0,4,SP); 2460 unsigned np = stackpush - e->EV.Vuns; // stack delta to parameter 2461 c2 = genc1(CNIL,0x8D,(modregrm(0,4,SP) << 8) | modregxrm(2,reg,4),FLconst,np); 2462 if (I64) 2463 code_orrex(c2, REX_W); 2369 2464 return cat3(c1,c2,fixresult(e,mask[reg],pretregs)); 2370 2465 } 2371 2466 2372 2467 /****************************** 2373 2468 * Call function. All parameters are pushed onto the stack, numpara gives 2374 2469 * the size of them all. 2375 2470 */ 2376 2471 2377 2472 STATIC code * funccall(elem *e,unsigned numpara,unsigned numalign,regm_t *pretregs,regm_t keepmsk) 2378 2473 { 2379 2474 elem *e1; 2380 2475 code *c,*ce,cs; 2381 2476 tym_t tym1; 2382 2477 char farfunc; 2383 2478 regm_t retregs; 2384 2479 symbol *s; 2385 2480 2386 2481 //printf("funccall(e = %p, *pretregs = x%x, numpara = %d, numalign = %d)\n",e,*pretregs,numpara,numalign); 2387 2482 calledafunc = 1; 2388 2483 /* Determine if we need frame for function prolog/epilog */ … … 2408 2503 if (s->Sflags & SFLexit) 2409 2504 c = NULL; 2410 2505 else 2411 2506 c = save87(); // assume 8087 regs are all trashed 2412 2507 if (s->Sflags & SFLexit) 2413 2508 // Function doesn't return, so don't worry about registers 2414 2509 // it may use 2415 2510 c1 = NULL; 2416 2511 else if (!tyfunc(s->ty()) || !(config.flags4 & CFG4optimized)) 2417 2512 // so we can replace func at runtime 2418 2513 c1 = getregs(~fregsaved & (mBP | ALLREGS | mES)); 2419 2514 else 2420 2515 c1 = getregs(~s->Sregsaved & (mBP | ALLREGS | mES)); 2421 2516 if (strcmp(s->Sident,"alloca") == 0) 2422 2517 { 2423 2518 #if 1 2424 2519 s = rtlsym[RTLSYM_ALLOCA]; 2425 2520 makeitextern(s); 2426 2521 c1 = cat(c1,getregs(mCX)); 2427 2522 c1 = genc(c1,0x8D,modregrm(2,CX,BPRM),FLallocatmp,0,0,0); // LEA CX,&localsize[BP] 2523 if (I64) 2524 code_orrex(c1, REX_W); 2428 2525 usedalloca = 2; // new way 2429 2526 #else 2430 2527 usedalloca = 1; // old way 2431 2528 #endif 2432 2529 } 2433 2530 if (sytab[s->Sclass] & SCSS) // if function is on stack (!) 2434 2531 { 2435 2532 retregs = allregs & ~keepmsk; 2436 2533 s->Sflags &= ~GTregcand; 2437 2534 s->Sflags |= SFLread; 2438 2535 ce = cat(c1,cdrelconst(e1,&retregs)); 2439 2536 if (farfunc) 2440 2537 goto LF1; 2441 2538 else 2442 2539 goto LF2; 2443 2540 } 2444 2541 else 2445 2542 { int fl; 2446 2543 2447 2544 fl = FLfunc; … … 2462 2559 ce = gen1(ce, 0x90); // NOP 2463 2560 code_orflag(ce, CFvolatile); // don't schedule it 2464 2561 } 2465 2562 #endif 2466 2563 } 2467 2564 ce = cat(c1,ce); 2468 2565 } 2469 2566 else 2470 2567 { /* Call function via pointer */ 2471 2568 elem *e11; 2472 2569 tym_t e11ty; 2473 2570 2474 2571 #ifdef DEBUG 2475 2572 if (e1->Eoper != OPind 2476 2573 ) { WRFL((enum FL)el_fl(e1)); WROP(e1->Eoper); } 2477 2574 #endif 2478 2575 c = save87(); // assume 8087 regs are all trashed 2479 2576 assert(e1->Eoper == OPind); 2480 2577 e11 = e1->E1; 2481 2578 e11ty = tybasic(e11->Ety); 2482 assert( I32|| (e11ty == (farfunc ? TYfptr : TYnptr)));2579 assert(!I16 || (e11ty == (farfunc ? TYfptr : TYnptr))); 2483 2580 2484 2581 /* if we can't use loadea() */ 2485 2582 if ((EOP(e11) || e11->Eoper == OPconst) && 2486 2583 (e11->Eoper != OPind || e11->Ecount)) 2487 2584 { 2488 2585 unsigned reg; 2489 2586 2490 2587 retregs = allregs & ~keepmsk; 2491 2588 cgstate.stackclean++; 2492 2589 ce = scodelem(e11,&retregs,keepmsk,TRUE); 2493 2590 cgstate.stackclean--; 2494 2591 /* Kill registers destroyed by an arbitrary function call */ 2495 2592 ce = cat(ce,getregs((mBP | ALLREGS | mES) & ~fregsaved)); 2496 2593 if (e11ty == TYfptr) 2497 2594 { unsigned lsreg; 2498 2595 LF1: 2499 2596 reg = findregmsw(retregs); 2500 2597 lsreg = findreglsw(retregs); 2501 2598 floatreg = TRUE; /* use float register */ 2502 2599 reflocal = TRUE; 2503 2600 ce = genc1(ce,0x89, /* MOV floatreg+2,reg */ 2504 2601 modregrm(2,reg,BPRM),FLfltreg,REGSIZE); 2505 2602 genc1(ce,0x89, /* MOV floatreg,lsreg */ 2506 2603 modregrm(2,lsreg,BPRM),FLfltreg,0); 2507 2604 if (tym1 == TYifunc) 2508 2605 gen1(ce,0x9C); // PUSHF 2509 2606 genc1(ce,0xFF, /* CALL [floatreg] */ 2510 2607 modregrm(2,3,BPRM),FLfltreg,0); 2511 2608 } 2512 2609 else 2513 2610 { 2514 2611 LF2: 2515 2612 reg = findreg(retregs); 2516 ce = gen2(ce,0xFF,modregrm (3,2,reg)); /* CALL reg */2613 ce = gen2(ce,0xFF,modregrmx(3,2,reg)); /* CALL reg */ 2517 2614 } 2518 2615 } 2519 2616 else 2520 2617 { 2521 2618 if (tym1 == TYifunc) 2522 2619 c = gen1(c,0x9C); // PUSHF 2523 2620 // CALL [function] 2524 2621 cs.Iflags = 0; 2525 2622 cgstate.stackclean++; 2526 2623 ce = loadea(e11,&cs,0xFF,farfunc ? 3 : 2,0,keepmsk,(ALLREGS|mES|mBP) & ~fregsaved); 2527 2624 cgstate.stackclean--; 2528 2625 freenode(e11); 2529 2626 } 2530 2627 s = NULL; 2531 2628 } 2532 2629 c = cat(c,ce); 2533 2630 freenode(e1); 2534 2631 2535 2632 /* See if we will need the frame pointer. 2536 2633 Calculate it here so we can possibly use BP to fix the stack. … … 2678 2775 symbol *s; 2679 2776 int fl; 2680 2777 2681 2778 //printf("params(e = %p, stackalign = %d)\n", e, stackalign); 2682 2779 cp = NULL; 2683 2780 stackchanged = 1; 2684 2781 assert(e); 2685 2782 while (e->Eoper == OPparam) /* if more params */ 2686 2783 { 2687 2784 e2 = e->E2; 2688 2785 cp = cat(cp,params(e->E1,stackalign)); // push them backwards 2689 2786 freenode(e); 2690 2787 e = e2; 2691 2788 } 2692 2789 //printf("params()\n"); elem_print(e); 2693 2790 2694 2791 tym = tybasic(e->Ety); 2695 2792 if (tyfloating(tym)) 2696 2793 obj_fltused(); 2697 2794 2795 int grex = I64 ? REX_W << 16 : 0; 2796 2698 2797 /* sz = number of bytes pushed */ 2699 2798 if (tyscalar(tym)) 2700 2799 szb = size(tym); 2701 2800 else if (tym == TYstruct) 2702 2801 szb = e->Enumbytes; 2703 2802 else 2704 2803 { 2705 2804 #ifdef DEBUG 2706 2805 WRTYxx(tym); 2707 2806 #endif 2708 2807 assert(0); 2709 2808 } 2710 2809 sz = align(stackalign,szb); /* align on word stack boundary */ 2711 2810 assert((sz & (stackalign - 1)) == 0); /* ensure that alignment worked */ 2712 2811 assert((sz & (REGSIZE - 1)) == 0); 2713 2812 2714 2813 c = CNIL; 2715 2814 cs.Iflags = 0; 2716 2815 cs.Irex = 0; 2717 2816 switch (e->Eoper) 2718 2817 { 2719 2818 #if SCPP 2720 2819 case OPstrctor: 2721 2820 { 2722 2821 e1 = e->E1; 2723 2822 c = docommas(&e1); /* skip over any comma expressions */ 2724 2823 2725 c = genc2(c,0x81, modregrm(3,5,SP),sz); /* SUB SP,sizeof(struct) */2824 c = genc2(c,0x81,grex | modregrm(3,5,SP),sz); // SUB SP,sizeof(struct) 2726 2825 stackpush += sz; 2727 2826 genadjesp(c,sz); 2728 2827 2729 2828 // Find OPstrthis and set it to stackpush 2730 2829 exp2_setstrthis(e1,NULL,stackpush,NULL); 2731 2830 2732 2831 retregs = 0; 2733 2832 ce = codelem(e1,&retregs,TRUE); 2734 2833 goto L2; 2735 2834 } 2736 2835 case OPstrthis: 2737 2836 // This is the parameter for the 'this' pointer corresponding to 2738 2837 // OPstrctor. We push a pointer to an object that was already 2739 2838 // allocated on the stack by OPstrctor. 2740 2839 { unsigned np; 2741 2840 2742 2841 retregs = allregs; 2743 2842 c = allocreg(&retregs,®,TYoffset); 2744 2843 c = genregs(c,0x89,SP,reg); // MOV reg,SP 2844 if (I64) 2845 code_orrex(c, REX_W); 2745 2846 np = stackpush - e->EV.Vuns; // stack delta to parameter 2746 c = genc2(c,0x81, modregrm(3,0,reg),np); // ADD reg,np2847 c = genc2(c,0x81,grex | modregrmx(3,0,reg),np); // ADD reg,np 2747 2848 if (sz > REGSIZE) 2748 2849 { c = gen1(c,0x16); // PUSH SS 2749 2850 stackpush += REGSIZE; 2750 2851 } 2751 c = gen1(c,0x50 + reg); // PUSH reg 2852 c = gen1(c,0x50 + (reg & 7)); // PUSH reg 2853 if (reg & 8) 2854 code_orrex(c, REX_B); 2752 2855 stackpush += REGSIZE; 2753 2856 genadjesp(c,sz); 2754 2857 ce = CNIL; 2755 2858 goto L2; 2756 2859 } 2757 2860 #endif 2758 2861 case OPstrpar: 2759 2862 { code *cc,*c1,*c2,*c3; 2760 2863 unsigned rm; 2761 2864 unsigned seg; // segment override prefix flags 2762 2865 bool doneoff; 2763 2866 unsigned pushsize = REGSIZE; 2764 2867 unsigned op16 = 0; 2765 2868 unsigned npushes; 2766 2869 2767 2870 e1 = e->E1; 2768 2871 if (sz == 0) 2769 2872 { 2770 2873 ce = docommas(&e1); /* skip over any commas */ 2771 2874 goto L2; 2772 2875 } 2773 2876 if ((sz & 3) == 0 && (sz / REGSIZE) <= 4 && e1->Eoper == OPvar) 2774 2877 { freenode(e); 2775 2878 e = e1; 2776 2879 goto L1; 2777 2880 } 2778 2881 cc = docommas(&e1); /* skip over any commas */ 2779 2882 seg = 0; /* assume no seg override */ 2780 2883 retregs = sz ? IDXREGS : 0; 2781 2884 doneoff = FALSE; 2782 if ( I32 && sz & 2) /* if odd number of words to push */2885 if (!I16 && sz & 2) // if odd number of words to push 2783 2886 { pushsize = 2; 2784 2887 op16 = 1; 2785 2888 } 2786 else if ( !I32&& config.target_cpu >= TARGET_80386 && (sz & 3) == 0)2889 else if (I16 && config.target_cpu >= TARGET_80386 && (sz & 3) == 0) 2787 2890 { pushsize = 4; // push DWORDs at a time 2788 2891 op16 = 1; 2789 2892 } 2790 2893 npushes = sz / pushsize; 2791 2894 switch (e1->Eoper) 2792 2895 { case OPind: 2793 2896 if (sz) 2794 2897 { switch (tybasic(e1->E1->Ety)) 2795 2898 { 2796 2899 case TYfptr: 2797 2900 case TYhptr: 2798 2901 seg = CFes; 2799 2902 retregs |= mES; 2800 2903 break; 2801 2904 case TYsptr: 2802 2905 if (config.wflags & WFssneds) 2803 2906 seg = CFss; 2804 2907 break; 2805 2908 case TYcptr: 2806 2909 seg = CFcs; … … 2850 2953 /* Reverse the effect of the previous add */ 2851 2954 if (doneoff) 2852 2955 e1->EV.sp.Voffset -= sz - pushsize; 2853 2956 freenode(e1); 2854 2957 break; 2855 2958 case OPstreq: 2856 2959 //case OPcond: 2857 2960 if (!(config.exe & EX_flat)) 2858 2961 { seg = CFes; 2859 2962 retregs |= mES; 2860 2963 } 2861 2964 c1 = codelem(e1,&retregs,FALSE); 2862 2965 break; 2863 2966 default: 2864 2967 #ifdef DEBUG 2865 2968 elem_print(e1); 2866 2969 #endif 2867 2970 assert(0); 2868 2971 } 2869 2972 reg = findreglsw(retregs); 2870 rm = I 32 ? regtorm32[reg] : regtorm[reg];2973 rm = I16 ? regtorm[reg] : regtorm32[reg]; 2871 2974 if (op16) 2872 2975 seg |= CFopsize; // operand size 2873 2976 if (npushes <= 4) 2874 2977 { 2875 2978 assert(!doneoff); 2876 2979 for (c2 = CNIL; npushes > 1; npushes--) 2877 { c2 = genc1(c2,0xFF,modregrm (2,6,rm),FLconst,pushsize * (npushes - 1)); // PUSH [reg]2980 { c2 = genc1(c2,0xFF,modregrmx(2,6,rm),FLconst,pushsize * (npushes - 1)); // PUSH [reg] 2878 2981 code_orflag(c2,seg); 2879 2982 genadjesp(c2,pushsize); 2880 2983 } 2881 c3 = gen2(CNIL,0xFF,modregrm (0,6,rm));// PUSH [reg]2984 c3 = gen2(CNIL,0xFF,modregrmx(0,6,rm)); // PUSH [reg] 2882 2985 c3->Iflags |= seg; 2883 2986 genadjesp(c3,pushsize); 2884 2987 ce = cat4(cc,c1,c2,c3); 2885 2988 } 2886 2989 else if (sz) 2887 2990 { int size; 2888 2991 2889 2992 c2 = getregs_imm(mCX | retregs); 2890 2993 /* MOV CX,sz/2 */ 2891 2994 c2 = movregconst(c2,CX,npushes,0); 2892 2995 if (!doneoff) 2893 2996 { /* This disgusting thing should be done when */ 2894 2997 /* reg is loaded. Too lazy to fix it now. */ 2895 2998 /* ADD reg,sz-2 */ 2896 c2 = genc2(c2,0x81, modregrm(3,0,reg),sz-pushsize);2999 c2 = genc2(c2,0x81,grex | modregrmx(3,0,reg),sz-pushsize); 2897 3000 } 2898 c3 = gen2(CNIL,0xFF,modregrm (0,6,rm)); // PUSH [reg]3001 c3 = gen2(CNIL,0xFF,modregrmx(0,6,rm)); // PUSH [reg] 2899 3002 c3->Iflags |= seg | CFtarg2; 2900 genc2(c3,0x81, modregrm(3,5,reg),pushsize); // SUB reg,23003 genc2(c3,0x81,grex | modregrmx(3,5,reg),pushsize); // SUB reg,2 2901 3004 size = ((seg & CFSEG) ? -8 : -7) - op16; 2902 3005 if (code_next(c3)->Iop != 0x81) 2903 3006 size++; 2904 3007 //genc2(c3,0xE2,0,size); // LOOP .-7 or .-8 2905 3008 genjmp(c3,0xE2,FLcode,(block *)c3); // LOOP c3 2906 3009 regimmed_set(CX,0); 2907 3010 genadjesp(c3,sz); 2908 3011 ce = cat4(cc,c1,c2,c3); 2909 3012 } 2910 3013 else 2911 3014 ce = cat(cc,c1); 2912 3015 stackpush += sz; 2913 3016 goto L2; 2914 3017 } 2915 3018 case OPind: 2916 3019 if (!e->Ecount) /* if *e1 */ 2917 3020 { if (sz <= REGSIZE) 2918 3021 { // Watch out for single byte quantities being up 2919 3022 // against the end of a segment or in memory-mapped I/O 2920 3023 if (!(config.exe & EX_flat) && szb == 1) … … 2991 3094 if (tysize[tym] == tysize[TYfptr] && 2992 3095 (fl = s->Sfl) != FLfardata && 2993 3096 /* not a function that CS might not be the segment of */ 2994 3097 (!((fl == FLfunc || s->ty() & mTYcs) && 2995 3098 (s->Sclass == SCcomdat || s->Sclass == SCextern || s->Sclass == SCinline || config.wflags & WFthunk)) || 2996 3099 (fl == FLfunc && config.exe == EX_DOSX) 2997 3100 ) 2998 3101 ) 2999 3102 { 3000 3103 stackpush += sz; 3001 3104 c = gen1(c,0x06 + /* PUSH SEGREG */ 3002 3105 (((fl == FLfunc || s->ty() & mTYcs) ? 1 : segfl[fl]) << 3)); 3003 3106 c = genadjesp(c,REGSIZE); 3004 3107 3005 3108 if (config.target_cpu >= TARGET_80286 && !e->Ecount) 3006 3109 { ce = getoffset(e,STACK); 3007 3110 goto L2; 3008 3111 } 3009 3112 else 3010 3113 { c = cat(c,offsetinreg(e,&retregs)); 3011 c = gen1(c,0x50+findreg(retregs)); /* PUSH reg */ 3114 unsigned reg = findreg(retregs); 3115 c = genpush(c,reg); // PUSH reg 3012 3116 genadjesp(c,REGSIZE); 3013 3117 } 3014 3118 goto ret; 3015 3119 } 3016 3120 if (config.target_cpu >= TARGET_80286 && !e->Ecount) 3017 3121 { 3018 3122 stackpush += sz; 3019 3123 if (tysize[tym] == tysize[TYfptr]) 3020 { code *c1; 3021 3124 { 3022 3125 /* PUSH SEG e */ 3023 c 1 = gencs(CNIL,0x68,0,FLextern,s);3126 code *c1 = gencs(CNIL,0x68,0,FLextern,s); 3024 3127 c1->Iflags = CFseg; 3025 3128 genadjesp(c1,REGSIZE); 3026 3129 c = cat(c,c1); 3027 3130 } 3028 3131 ce = getoffset(e,STACK); 3029 3132 goto L2; 3030 3133 } 3031 3134 #endif 3032 3135 break; /* else must evaluate expression */ 3033 3136 case OPvar: 3034 3137 L1: 3035 3138 if (0 && I32 && sz == 2) 3036 3139 { /* 32 bit code, but pushing 16 bit values anyway */ 3037 3140 ce = loadea(e,&cs,0xFF,6,0,0,0); /* PUSH EA */ 3038 3141 // BUG: 0x66 fails with scheduler 3039 3142 ce = cat(gen1(CNIL,0x66),ce); /* 16 bit override */ 3040 3143 stackpush += sz; 3041 3144 genadjesp(ce,sz); 3042 3145 } 3043 3146 else if (config.flags4 & CFG4speed && 3044 3147 (config.target_cpu >= TARGET_80486 && 3045 3148 config.target_cpu <= TARGET_PentiumMMX) && 3046 3149 sz <= 2 * REGSIZE && 3047 3150 !tyfloating(tym)) 3048 3151 { // Avoid PUSH MEM on the Pentium when optimizing for speed 3049 3152 break; 3050 3153 } 3051 3154 else 3052 3155 { int regsize = REGSIZE; 3053 3156 unsigned flag = 0; 3054 3157 3055 if ( !I32&& config.target_cpu >= TARGET_80386 && sz > 2 &&3158 if (I16 && config.target_cpu >= TARGET_80386 && sz > 2 && 3056 3159 !e->Ecount) 3057 3160 { regsize = 4; 3058 3161 flag |= CFopsize; 3059 3162 } 3060 3163 ce = loadea(e,&cs,0xFF,6,sz - regsize,RMload,0); // PUSH EA+sz-2 3061 3164 code_orflag(ce,flag); 3062 3165 ce = genadjesp(ce,REGSIZE); 3063 3166 stackpush += sz; 3064 3167 while ((targ_int)(sz -= regsize) > 0) 3065 3168 { ce = cat(ce,loadea(e,&cs,0xFF,6,sz - regsize,RMload,0)); 3066 3169 code_orflag(ce,flag); 3067 3170 ce = genadjesp(ce,REGSIZE); 3068 3171 } 3069 3172 } 3070 3173 L2: 3071 3174 freenode(e); 3072 3175 c = cat(c,ce); 3073 3176 goto ret; 3074 3177 case OPconst: 3075 3178 { targ_int *pi; … … 3091 3194 ce = genadjesp(NULL,sz); 3092 3195 for (i = 2; i >= 0; i--) 3093 3196 { 3094 3197 if (reghasvalue(allregs, value, ®)) 3095 3198 ce = gen1(ce,0x50 + reg); // PUSH reg 3096 3199 else 3097 3200 ce = genc2(ce,0x68,0,value); // PUSH value 3098 3201 value = ((unsigned *)&e->EV.Vldouble)[i - 1]; 3099 3202 } 3100 3203 goto L2; 3101 3204 } 3102 3205 3103 3206 assert(sz <= LNGDBLSIZE); 3104 3207 i = sz; 3105 3208 if (I32 && i == 2) 3106 3209 flag = CFopsize; 3107 3210 3108 3211 if (config.target_cpu >= TARGET_80286) 3109 3212 // && (e->Ecount == 0 || e->Ecount != e->Ecomsub)) 3110 3213 { pushi = 1; 3111 if ( !I32&& config.target_cpu >= TARGET_80386 && i >= 4)3214 if (I16 && config.target_cpu >= TARGET_80386 && i >= 4) 3112 3215 { regsize = 4; 3113 3216 flag = CFopsize; 3114 3217 } 3115 3218 } 3116 3219 else if (i == REGSIZE) 3117 3220 break; 3118 3221 3119 3222 stackpush += sz; 3120 3223 ce = genadjesp(NULL,sz); 3121 3224 pi = (targ_long *) &e->EV.Vdouble; 3122 3225 ps = (targ_short *) pi; 3123 3226 i /= regsize; 3124 3227 do 3125 3228 { code *cp; 3126 3229 3127 3230 if (i) /* be careful not to go negative */ 3128 3231 i--; 3129 3232 value = (regsize == 4) ? pi[i] : ps[i]; 3130 3233 if (pushi) 3131 3234 { 3132 3235 if (regsize == REGSIZE && reghasvalue(allregs,value,®)) 3133 3236 goto Preg; 3134 3237 ce = genc2(ce,(szb == 1) ? 0x6A : 0x68,0,value); // PUSH value 3135 3238 } 3136 3239 else 3137 3240 { 3138 3241 ce = regwithvalue(ce,allregs,value,®,0); 3139 3242 Preg: 3140 ce = gen 1(ce,0x50 + reg); /* PUSH reg */3243 ce = genpush(ce,reg); // PUSH reg 3141 3244 } 3142 3245 code_orflag(ce,flag); /* operand size */ 3143 3246 } while (i); 3144 3247 goto L2; 3145 3248 } 3146 3249 default: 3147 3250 break; 3148 3251 } 3149 3252 retregs = tybyte(tym) ? BYTEREGS : allregs; 3150 3253 if (tyfloating(tym)) 3151 3254 { if (config.inline8087) 3152 3255 { code *c1,*c2; 3153 3256 unsigned op; 3154 3257 unsigned r; 3155 3258 3156 3259 retregs = tycomplex(tym) ? mST01 : mST0; 3157 3260 c = cat(c,codelem(e,&retregs,FALSE)); 3158 3261 stackpush += sz; 3159 3262 c = genadjesp(c,sz); 3160 c = genc2(c,0x81, modregrm(3,5,SP),sz); /* SUB SP,sz */3263 c = genc2(c,0x81,grex | modregrm(3,5,SP),sz); // SUB SP,sz 3161 3264 switch (tym) 3162 3265 { 3163 3266 case TYfloat: 3164 3267 case TYifloat: 3165 3268 case TYcfloat: 3166 3269 op = 0xD9; 3167 3270 r = 3; 3168 3271 break; 3169 3272 3170 3273 case TYdouble: 3171 3274 case TYidouble: 3172 3275 case TYdouble_alias: 3173 3276 case TYcdouble: 3174 3277 op = 0xDD; 3175 3278 r = 3; 3176 3279 break; 3177 3280 3178 3281 case TYldouble: 3179 3282 case TYildouble: 3180 3283 case TYcldouble: 3181 3284 op = 0xDB; 3182 3285 r = 7; 3183 3286 break; 3184 3287 3185 3288 default: 3186 3289 assert(0); 3187 3290 } 3188 if ( I32)3291 if (!I16) 3189 3292 { 3190 3293 c1 = NULL; 3191 3294 c2 = NULL; 3192 3295 if (tycomplex(tym)) 3193 3296 { 3194 3297 // FSTP sz/2[ESP] 3195 c2 = genc1(CNIL,op,modregrm(2,r,4),FLconst,sz/2); 3196 c2->Isib = modregrm(0,4,SP); 3298 c2 = genc1(CNIL,op,(modregrm(0,4,SP) << 8) | modregxrm(2,r,4),FLconst,sz/2); 3197 3299 pop87(); 3198 3300 } 3199 3301 pop87(); 3200 3302 c2 = gen2sib(c2,op,modregrm(0,r,4),modregrm(0,4,SP)); // FSTP [ESP] 3201 3303 } 3202 3304 else 3203 3305 { 3204 3306 retregs = IDXREGS; /* get an index reg */ 3205 3307 c1 = allocreg(&retregs,®,TYoffset); 3206 3308 c1 = genregs(c1,0x89,SP,reg); /* MOV reg,SP */ 3207 3309 pop87(); 3208 3310 c2 = gen2(CNIL,op,modregrm(0,r,regtorm[reg])); // FSTP [reg] 3209 3311 } 3210 3312 if (LARGEDATA) 3211 3313 c2->Iflags |= CFss; /* want to store into stack */ 3212 3314 genfwait(c2); // FWAIT 3213 3315 c = cat3(c,c1,c2); 3214 3316 goto ret; 3215 3317 } 3216 3318 else if (!I32 && (tym == TYdouble || tym == TYdouble_alias)) 3217 3319 retregs = mSTACK; 3218 3320 } 3219 3321 #if LONGLONG 3220 3322 else if (!I32 && sz == 8) // if long long 3221 3323 retregs = mSTACK; 3222 3324 #endif 3223 3325 c = cat(c,scodelem(e,&retregs,0,TRUE)); 3224 3326 if (retregs != mSTACK) /* if stackpush not already inc'd */ 3225 3327 stackpush += sz; 3226 3328 if (sz <= REGSIZE) 3227 3329 { 3228 c = gen 1(c,0x50+findreg(retregs)); /* PUSH reg */3330 c = genpush(c,findreg(retregs)); // PUSH reg 3229 3331 genadjesp(c,REGSIZE); 3230 3332 } 3231 3333 else if (sz == REGSIZE * 2) 3232 { c = gen 1(c,0x50+findregmsw(retregs)); /* PUSH msreg */3233 gen 1(c,0x50+findreglsw(retregs)); /* PUSH lsreg */3334 { c = genpush(c,findregmsw(retregs)); // PUSH msreg 3335 genpush(c,findreglsw(retregs)); // PUSH lsreg 3234 3336 genadjesp(c,sz); 3235 3337 } 3236 3338 ret: 3237 3339 return cat(cp,c); 3238 3340 } 3239 3341 3240 3342 3241 3343 /******************************* 3242 3344 * Get offset portion of e, and store it in an index 3243 3345 * register. Return mask of index register in *pretregs. 3244 3346 */ 3245 3347 3246 3348 code *offsetinreg( elem *e, regm_t *pretregs) 3247 3349 { regm_t retregs; 3248 3350 code *c; 3249 3351 unsigned reg; 3250 3352 3251 3353 retregs = mLSW; /* want only offset */ 3252 3354 if (e->Ecount && e->Ecount != e->Ecomsub) 3253 3355 { unsigned i; … … 3303 3405 if (config.inline8087) 3304 3406 { if (*pretregs & mST0) 3305 3407 return load87(e,0,pretregs,NULL,-1); 3306 3408 else if (tycomplex(tym)) 3307 3409 return cload87(e, pretregs); 3308 3410 } 3309 3411 } 3310 3412 sz = tysize[tym]; 3311 3413 cs.Iflags = 0; 3312 3414 cs.Irex = 0; 3313 3415 if (*pretregs == mPSW) 3314 3416 { 3315 3417 regm = allregs; 3316 3418 if (e->Eoper == OPconst) 3317 3419 { /* TRUE: OR SP,SP (SP is never 0) */ 3318 3420 /* FALSE: CMP SP,SP (always equal) */ 3319 3421 c = genregs(CNIL,(boolres(e)) ? 0x09 : 0x39,SP,SP); 3320 3422 } 3321 3423 else if (sz <= REGSIZE) 3322 3424 { 3323 if ( I32&& (tym == TYfloat || tym == TYifloat))3425 if (!I16 && (tym == TYfloat || tym == TYifloat)) 3324 3426 { c = allocreg(®m,®,TYoffset); /* get a register */ 3325 3427 ce = loadea(e,&cs,0x8B,reg,0,0,0); // MOV reg,data 3326 3428 c = cat(c,ce); 3327 ce = gen2(CNIL,0xD1,modregrm (3,4,reg)); /* SHL reg,1 */3429 ce = gen2(CNIL,0xD1,modregrmx(3,4,reg)); /* SHL reg,1 */ 3328 3430 c = cat(c,ce); 3329 3431 } 3330 3432 else 3331 3433 { cs.IFL2 = FLconst; 3332 3434 cs.IEV2.Vint = 0; 3333 3435 op = (sz == 1) ? 0x80 : 0x81; 3334 3436 c = loadea(e,&cs,op,7,0,0,0); /* CMP EA,0 */ 3335 3437 3336 3438 // Convert to TEST instruction if EA is a register 3337 3439 // (to avoid register contention on Pentium) 3338 3440 if ((c->Iop & 0xFE) == 0x38 && 3339 3441 (c->Irm & modregrm(3,0,0)) == modregrm(3,0,0) 3340 3442 ) 3341 3443 { c->Iop = (c->Iop & 1) | 0x84; 3342 c->Irm = (c->Irm & modregrm(3,0,7)) | modregrm(0,c->Irm & 7,0); 3444 code_newreg(c, c->Irm & 7); 3445 if (c->Irex & REX_B) 3446 c->Irex = (c->Irex & ~REX_B) | REX_R; 3343 3447 } 3344 3448 } 3345 3449 } 3346 3450 else if (sz < 8) 3347 3451 { 3348 3452 c = allocreg(®m,®,TYoffset); /* get a register */ 3349 3453 if (I32) // it's a 48 bit pointer 3350 3454 ce = loadea(e,&cs,0x0FB7,reg,REGSIZE,0,0); /* MOVZX reg,data+4 */ 3351 3455 else 3352 3456 { ce = loadea(e,&cs,0x8B,reg,REGSIZE,0,0); /* MOV reg,data+2 */ 3353 3457 if (tym == TYfloat || tym == TYifloat) // dump sign bit 3354 3458 gen2(ce,0xD1,modregrm(3,4,reg)); /* SHL reg,1 */ 3355 3459 } 3356 3460 c = cat(c,ce); 3357 3461 ce = loadea(e,&cs,0x0B,reg,0,regm,0); /* OR reg,data */ 3358 3462 c = cat(c,ce); 3359 3463 } 3360 3464 else if (sz == 8) 3361 3465 { code *c1; 3362 3466 int i; … … 3383 3487 return c; 3384 3488 } 3385 3489 /* not for flags only */ 3386 3490 flags = *pretregs & mPSW; /* save original */ 3387 3491 forregs = *pretregs & (mBP | ALLREGS | mES); 3388 3492 if (*pretregs & mSTACK) 3389 3493 forregs |= DOUBLEREGS; 3390 3494 if (e->Eoper == OPconst) 3391 3495 { regm_t save; 3392 3496 3393 3497 if (sz == REGSIZE && reghasvalue(forregs,e->EV.Vint,®)) 3394 3498 forregs = mask[reg]; 3395 3499 3396 3500 save = regcon.immed.mval; 3397 3501 c = allocreg(&forregs,®,tym); /* allocate registers */ 3398 3502 regcon.immed.mval = save; // KLUDGE! 3399 3503 if (sz <= REGSIZE) 3400 3504 { 3401 3505 if (sz == 1) 3402 3506 flags |= 1; 3403 else if ( I32&& sz == SHORTSIZE &&3507 else if (!I16 && sz == SHORTSIZE && 3404 3508 !(mask[reg] & regcon.mvar) && 3405 3509 !(config.flags4 & CFG4speed) 3406 3510 ) 3407 3511 flags |= 2; 3512 if (sz == 8) 3513 flags |= 64; 3408 3514 ce = movregconst(CNIL,reg,e->EV.Vint,flags); 3409 3515 flags = 0; // flags are already set 3410 3516 } 3411 3517 else if (sz < 8) // far pointers, longs for 16 bit targets 3412 3518 { 3413 3519 targ_int msw,lsw; 3414 3520 regm_t mswflags; 3415 3521 3416 3522 msw = I32 ? e->EV.Vfp.Vseg 3417 3523 : (e->EV.Vulong >> 16); 3418 3524 lsw = e->EV.Vfp.Voff; 3419 3525 mswflags = 0; 3420 3526 if (forregs & mES) 3421 3527 { 3422 3528 ce = movregconst(CNIL,reg,msw,0); // MOV reg,segment 3423 3529 genregs(ce,0x8E,0,reg); // MOV ES,reg 3424 3530 msw = lsw; // MOV reg,offset 3425 3531 } 3426 3532 else 3427 3533 { … … 3467 3573 { 3468 3574 reg = e->EV.sp.Vsym->Spreg; 3469 3575 forregs = mask[reg]; 3470 3576 mfuncreg &= ~forregs; 3471 3577 regcon.used |= forregs; 3472 3578 return fixresult(e,forregs,pretregs); 3473 3579 } 3474 3580 3475 3581 c = allocreg(&forregs,®,tym); /* allocate registers */ 3476 3582 3477 3583 if (sz == 1) 3478 3584 { regm_t nregm; 3479 3585 3480 3586 #ifdef DEBUG 3481 3587 if (!(forregs & BYTEREGS)) 3482 3588 { elem_print(e); 3483 3589 printf("forregs = x%x\n",forregs); 3484 3590 } 3485 3591 #endif 3486 3592 assert(forregs & BYTEREGS); 3487 if ( I32)3593 if (!I16) 3488 3594 c = cat(c,loadea(e,&cs,0x8A,reg,0,0,0)); // MOV regL,data 3489 3595 else 3490 3596 { nregm = tyuns(tym) ? BYTEREGS : mAX; 3491 3597 if (*pretregs & nregm) 3492 3598 nreg = reg; /* already allocated */ 3493 3599 else 3494 3600 c = cat(c,allocreg(&nregm,&nreg,tym)); 3495 3601 ce = loadea(e,&cs,0x8A,nreg,0,0,0); /* MOV nregL,data */ 3496 3602 c = cat(c,ce); 3497 3603 if (reg != nreg) 3498 3604 { genmovreg(c,reg,nreg); /* MOV reg,nreg */ 3499 3605 cssave(e,mask[nreg],FALSE); 3500 3606 } 3501 3607 } 3502 3608 } 3503 3609 else if (sz <= REGSIZE) 3504 3610 { 3505 3611 ce = loadea(e,&cs,0x8B,reg,0,RMload,0); // MOV reg,data 3506 3612 c = cat(c,ce); 3507 3613 } branches/dmd-1.x/src/backend/cod2.c
r552 r577 19 19 #include "oper.h" 20 20 #include "el.h" 21 21 #include "code.h" 22 22 #include "global.h" 23 23 #include "type.h" 24 24 #if SCPP 25 25 #include "exh.h" 26 26 #endif 27 27 28 28 static char __file__[] = __FILE__; /* for tassert.h */ 29 29 #include "tassert.h" 30 30 31 31 int cdcmp_flag; 32 32 extern signed char regtorm[8]; 33 33 34 34 /******************************** 35 35 * Return mask of index registers used by addressing mode. 36 36 * Index is rm of modregrm field. 37 37 */ 38 38 39 regm_t idxregm( unsigned rm,unsigned sib)39 regm_t idxregm(code *c) 40 40 { 41 41 static const unsigned char idxsib[8] = { mAX,mCX,mDX,mBX,0,mBP,mSI,mDI }; 42 42 static const unsigned char idxrm[8] = {mBX|mSI,mBX|mDI,mSI,mDI,mSI,mDI,0,mBX}; 43 regm_t idxm; 44 45 idxm = 0;43 44 unsigned rm = c->Irm; 45 regm_t idxm = 0; 46 46 if ((rm & 0xC0) != 0xC0) /* if register is not the destination */ 47 47 { 48 if (I32) 48 if (I16) 49 idxm = idxrm[rm & 7]; 50 else 49 51 { 50 52 if ((rm & 7) == 4) /* if sib byte */ 51 53 { 52 idxm = idxsib[(sib >> 3) & 7]; /* scaled index reg */ 54 unsigned sib = c->Isib; 55 unsigned idxreg = (sib >> 3) & 7; 56 if (c->Irex & REX_X) 57 { idxreg |= 8; 58 idxm = mask[idxreg]; // scaled index reg 59 } 60 else 61 idxm = idxsib[idxreg]; // scaled index reg 53 62 if ((sib & 7) == 5 && (rm & 0xC0) == 0) 54 63 ; 55 64 else 56 idxm |= idxsib[sib & 7]; 65 { unsigned base = sib & 7; 66 if (c->Irex & REX_B) 67 idxm |= mask[base | 8]; 68 else 69 idxm |= idxsib[base]; 70 } 57 71 } 58 72 else 59 idxm |= idxsib[rm & 7]; 60 } 61 else 62 idxm = idxrm[rm & 7]; 73 { unsigned base = rm & 7; 74 if (c->Irex & REX_B) 75 idxm |= mask[base | 8]; 76 else 77 idxm |= idxsib[base]; 78 } 79 } 63 80 } 64 81 return idxm; 65 82 } 66 83 67 84 #if TARGET_WINDOS 68 85 /*************************** 69 86 * Gen code for call to floating point routine. 70 87 */ 71 88 72 89 code *opdouble(elem *e,regm_t *pretregs,unsigned clib) 73 90 { 74 91 regm_t retregs1,retregs2; 75 92 code *cl, *cr, *c; 76 93 77 94 if (config.inline8087) 78 95 return orth87(e,pretregs); 79 96 80 97 if (tybasic(e->E1->Ety) == TYfloat) 81 98 { 82 99 clib += CLIBfadd - CLIBdadd; /* convert to float operation */ … … 95 113 } 96 114 } 97 115 cl = codelem(e->E1, &retregs1,FALSE); 98 116 if (retregs1 & mSTACK) 99 117 cgstate.stackclean++; 100 118 cr = scodelem(e->E2, &retregs2, retregs1 & ~mSTACK, FALSE); 101 119 if (retregs1 & mSTACK) 102 120 cgstate.stackclean--; 103 121 c = callclib(e, clib, pretregs, 0); 104 122 return cat3(cl, cr, c); 105 123 } 106 124 #endif 107 125 108 126 109 127 /***************************** 110 128 * Handle operators which are more or less orthogonal 111 129 * ( + - & | ^ ) 112 130 */ 113 131 114 132 code *cdorth(elem *e,regm_t *pretregs) 115 { tym_t ty ,ty1,ty2;133 { tym_t ty1; 116 134 regm_t retregs,rretregs,posregs; 117 unsigned reg,rreg,op1,op2,mode ,test,byte;135 unsigned reg,rreg,op1,op2,mode; 118 136 int rval; 119 code *c,*cg,*cl ,*cr,cs;120 targ_ int i;137 code *c,*cg,*cl; 138 targ_size_t i; 121 139 elem *e1,*e2; 122 140 int numwords; /* # of words to be operated on */ 123 unsigned char word; /* if word operands */124 int e2oper;125 unsigned sz;126 141 static int nest; 127 142 128 143 //printf("cdorth(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs)); 129 144 e1 = e->E1; 130 145 e2 = e->E2; 131 146 if (*pretregs == 0) /* if don't want result */ 132 147 { c = codelem(e1,pretregs,FALSE); /* eval left leaf */ 133 148 *pretregs = 0; /* in case they got set */ 134 149 return cat(c,codelem(e2,pretregs,FALSE)); 135 150 } 136 151 137 152 ty1 = tybasic(e1->Ety); 138 153 if (tyfloating(ty1)) 139 154 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 140 155 return orth87(e,pretregs); 141 156 #else 142 157 return opdouble(e,pretregs,(e->Eoper == OPadd) ? CLIBdadd 143 158 : CLIBdsub); 144 159 #endif 145 ty2 = tybasic(e2->Ety); 146 e2oper = e2->Eoper; 147 ty = tybasic(e->Ety); 148 sz = tysize[ty]; 149 byte = (sz == 1); 150 word = (I32 && sz == SHORTSIZE) ? CFopsize : 0; 160 tym_t ty2 = tybasic(e2->Ety); 161 int e2oper = e2->Eoper; 162 tym_t ty = tybasic(e->Ety); 163 unsigned sz = tysize[ty]; 164 unsigned byte = (sz == 1); 165 unsigned char word = (!I16 && sz == SHORTSIZE) ? CFopsize : 0; 166 unsigned test = FALSE; // assume we destroyed lvalue 167 code cs; 151 168 cs.Iflags = 0; 152 169 cs.Irex = 0; 153 test = FALSE; /* assume we destroyed lvalue */ 154 cr = CNIL; /* initialize */ 170 code *cr = CNIL; 155 171 156 172 switch (e->Eoper) 157 173 { case OPadd: mode = 0; 158 174 op1 = 0x03; op2 = 0x13; break; /* ADD, ADC */ 159 175 case OPmin: mode = 5; 160 176 op1 = 0x2B; op2 = 0x1B; break; /* SUB, SBB */ 161 177 case OPor: mode = 1; 162 178 op1 = 0x0B; op2 = 0x0B; break; /* OR , OR */ 163 179 case OPxor: mode = 6; 164 180 op1 = 0x33; op2 = 0x33; break; /* XOR, XOR */ 165 181 case OPand: mode = 4; 166 182 op1 = 0x23; op2 = 0x23; /* AND, AND */ 167 183 if (tyreg(ty1) && 168 184 *pretregs == mPSW) /* if flags only */ 169 185 { test = TRUE; 170 186 op1 = 0x85; /* TEST */ 171 187 mode = 0; 172 188 } 173 189 break; 174 190 default: 175 191 assert(0); 176 192 } 177 193 op1 ^= byte; /* if byte operation */ 178 194 179 195 /* Compute number of words to operate on. */ 180 196 numwords = 1; 181 if ( I32)197 if (!I16) 182 198 { /* Cannot operate on longs and then do a 'paint' to a far */ 183 199 /* pointer, because far pointers are 48 bits and longs are 32. */ 184 200 /* Therefore, numwords can never be 2. */ 185 201 assert(!(tyfv(ty1) && tyfv(ty2))); 186 202 if (sz == 2 * REGSIZE) 187 203 { 188 204 numwords++; 189 205 } 190 206 } 191 207 else 192 208 { /* If ty is a TYfptr, but both operands are long, treat the */ 193 209 /* operation as a long. */ 194 210 if ((tylong(ty1) || ty1 == TYhptr) && 195 211 (tylong(ty2) || ty2 == TYhptr)) 196 212 numwords++; 197 213 } 198 214 199 215 // Special cases where only flags are set 200 216 if (test && tysize[ty1] <= REGSIZE && 201 217 (e1->Eoper == OPvar || (e1->Eoper == OPind && !e1->Ecount))) 202 218 { 203 219 // Handle the case of (var & const) 204 220 if (e2->Eoper == OPconst) 205 { targ_int value; 206 221 { 207 222 c = getlvalue(&cs,e1,0); 208 value = e2->EV.Vint;223 targ_size_t value = e2->EV.Vpointer; 209 224 if (sz == 2) 210 225 value &= 0xFFFF; 226 else if (sz == 4) 227 value &= 0xFFFFFFFF; 211 228 if (reghasvalue(byte ? BYTEREGS : ALLREGS,value,®)) 212 229 goto L11; 230 if (sz == 8) 231 { 232 assert(value == (int)value); // sign extend imm32 233 } 213 234 op1 = 0xF7; 214 235 cs.IEV2.Vint = value; 215 236 cs.IFL2 = FLconst; 216 237 goto L10; 217 238 } 218 239 219 240 // Handle (exp & reg) 220 241 if (isregvar(e2,&retregs,®)) 221 242 { 222 243 c = getlvalue(&cs,e1,0); 223 244 L11: 224 c s.Irm |= modregrm(0,reg,0);245 code_newreg(&cs, reg); 225 246 L10: 226 247 cs.Iop = op1 ^ byte; 227 248 cs.Iflags |= word | CFpsw; 228 249 freenode(e1); 229 250 freenode(e2); 230 251 return gen(c,&cs); 231 252 } 232 253 } 233 254 234 255 // Look for possible uses of LEA 235 256 if (e->Eoper == OPadd && 236 257 !(*pretregs & mPSW) && /* flags aren't set by LEA */ 237 258 !nest && // could cause infinite recursion if e->Ecount 238 sz == REGSIZE) // far pointers aren't handled 239 { int e1oper; 240 259 (sz == REGSIZE || (I64 && sz == 4))) // far pointers aren't handled 260 { 241 261 // Handle the case of (e + &var) 242 e1oper = e1->Eoper;262 int e1oper = e1->Eoper; 243 263 if ((e2oper == OPrelconst && (config.target_cpu >= TARGET_Pentium || (!e2->Ecount && stackfl[el_fl(e2)]))) 244 264 || // LEA costs too much for simple EAs on older CPUs 245 265 (e2oper == OPconst && (e1->Eoper == OPcall || e1->Eoper == OPcallns) && !(*pretregs & mAX)) || 246 ( I32&& (isscaledindex(e1) || isscaledindex(e2))) ||247 ( I32&& e1oper == OPvar && e1->EV.sp.Vsym->Sfl == FLreg && (e2oper == OPconst || (e2oper == OPvar && e2->EV.sp.Vsym->Sfl == FLreg))) ||266 (!I16 && (isscaledindex(e1) || isscaledindex(e2))) || 267 (!I16 && e1oper == OPvar && e1->EV.sp.Vsym->Sfl == FLreg && (e2oper == OPconst || (e2oper == OPvar && e2->EV.sp.Vsym->Sfl == FLreg))) || 248 268 (e2oper == OPconst && e1oper == OPeq && e1->E1->Eoper == OPvar) || 249 ( I32&& e2oper == OPrelconst && !e1->Ecount &&269 (!I16 && e2oper == OPrelconst && !e1->Ecount && 250 270 (e1oper == OPmul || e1oper == OPshl) && 251 271 e1->E2->Eoper == OPconst && 252 272 ssindex(e1oper,e1->E2->EV.Vuns) 253 273 ) || 254 ( I32&& e1->Ecount)274 (!I16 && e1->Ecount) 255 275 ) 256 { int inc; 257 258 inc = e->Ecount != 0; 276 { 277 int inc = e->Ecount != 0; 259 278 nest += inc; 260 279 c = getlvalue(&cs,e,0); 261 280 nest -= inc; 281 unsigned reg; 262 282 c = cat(c,allocreg(pretregs,®,ty)); 263 283 cs.Iop = 0x8D; 264 c s.Irm |= modregrm(0,reg,0);284 code_newreg(&cs, reg); 265 285 return gen(c,&cs); /* LEA reg,EA */ 266 286 } 267 287 268 288 // Handle the case of ((e + c) + e2) 269 if ( I32&&289 if (!I16 && 270 290 e1oper == OPadd && 271 291 (e1->E2->Eoper == OPconst || e2oper == OPconst) && 272 292 !e1->Ecount 273 293 ) 274 294 { elem *e11; 275 295 elem *ebase; 276 296 elem *edisp; 277 297 int ss; 278 298 int ss2; 279 299 unsigned reg1,reg2; 280 300 code *c1,*c2,*c3; 281 301 282 302 if (e2oper == OPconst) 283 303 { edisp = e2; 284 304 ebase = e1->E2; 285 305 } 286 306 else 287 307 { edisp = e1->E2; 288 308 ebase = e2; 289 309 } 290 310 291 311 e11 = e1->E1; 292 312 retregs = *pretregs & ALLREGS; 293 313 if (!retregs) 294 314 retregs = ALLREGS; 295 315 ss = 0; 296 316 ss2 = 0; 297 317 298 318 // Handle the case of (((e * c1) + c2) + e2) 299 319 // Handle the case of (((e << c1) + c2) + e2) 300 320 if ((e11->Eoper == OPmul || e11->Eoper == OPshl) && 301 321 e11->E2->Eoper == OPconst && 302 322 !e11->Ecount 303 323 ) 304 { targ_size_t co1; 305 306 co1 = el_tolong(e11->E2); 324 { 325 targ_size_t co1 = el_tolong(e11->E2); 307 326 if (e11->Eoper == OPshl) 308 327 { 309 328 if (co1 > 3) 310 329 goto L13; 311 330 ss = co1; 312 331 } 313 332 else 314 333 { 315 334 ss2 = 1; 316 335 switch (co1) 317 336 { 318 337 case 6: ss = 1; break; 319 338 case 12: ss = 1; ss2 = 2; break; 320 339 case 24: ss = 1; ss2 = 3; break; 321 340 case 10: ss = 2; break; 322 341 case 20: ss = 2; ss2 = 2; break; 323 342 case 40: ss = 2; ss2 = 3; break; 324 343 case 18: ss = 3; break; 325 344 case 36: ss = 3; ss2 = 2; break; 326 345 case 72: ss = 3; ss2 = 3; break; 327 346 default: 328 347 ss2 = 0; 329 348 goto L13; 330 349 } 331 350 } 332 351 freenode(e11->E2); 333 352 freenode(e11); 334 353 e11 = e11->E1; 335 354 goto L13; 336 355 } 337 356 else 338 { regm_t regm;357 { 339 358 L13: 359 regm_t regm; 340 360 if (e11->Eoper == OPvar && isregvar(e11,®m,®1)) 341 361 { 342 362 retregs = regm; 343 363 c1 = NULL; 344 364 freenode(e11); 345 365 } 346 366 else 347 367 c1 = codelem(e11,&retregs,FALSE); 348 368 } 349 369 rretregs = ALLREGS & ~retregs; 350 370 c2 = scodelem(ebase,&rretregs,retregs,TRUE); 351 { regm_t sregs; 352 353 sregs = *pretregs & ~rretregs; 371 { 372 regm_t sregs = *pretregs & ~rretregs; 354 373 if (!sregs) 355 374 sregs = ALLREGS & ~rretregs; 356 375 c3 = allocreg(&sregs,®,ty); 357 376 } 358 377 359 378 reg1 = findreg(retregs); 360 379 reg2 = findreg(rretregs); 361 380 362 381 if (ss2) 363 382 { 364 383 assert(reg != reg2); 365 384 if (reg1 == BP) 366 385 { static unsigned imm32[4] = {1+1,2+1,4+1,8+1}; 367 386 368 387 // IMUL reg,imm32 369 c = genc2(CNIL,0x69,modreg rm(3,reg,reg1),imm32[ss]);388 c = genc2(CNIL,0x69,modregxrm(3,reg,BP),imm32[ss]); 370 389 } 371 390 else 372 391 { // LEA reg,[reg1*ss][reg1] 373 c = gen2sib(CNIL,0x8D,modregrm(0,reg,4),modregrm(ss,reg1,reg1)); 392 c = gen2sib(CNIL,0x8D,modregxrm(0,reg,4),modregrm(ss,reg1 & 7,reg1 & 7)); 393 if (reg1 & 8) 394 code_orrex(c, REX_X | REX_B); 374 395 } 375 396 reg1 = reg; 376 397 ss = ss2; // use *2 for scale 377 398 } 378 399 else 379 400 c = NULL; 380 401 c = cat4(c1,c2,c3,c); 381 402 382 403 cs.Iop = 0x8D; // LEA reg,c[reg1*ss][reg2] 383 cs.Irm = modregrm(2,reg ,4);384 cs.Isib = modregrm(ss,reg1 ,reg2);404 cs.Irm = modregrm(2,reg & 7,4); 405 cs.Isib = modregrm(ss,reg1 & 7,reg2 & 7); 385 406 cs.Iflags = CFoff; 386 407 cs.Irex = 0; 408 if (reg & 8) 409 cs.Irex |= REX_R; 410 if (reg1 & 8) 411 cs.Irex |= REX_X; 412 if (reg2 & 8) 413 cs.Irex |= REX_B; 387 414 cs.IFL1 = FLconst; 388 415 cs.IEV1.Vuns = edisp->EV.Vuns; 389 416 390 417 freenode(edisp); 391 418 freenode(e1); 392 419 c = gen(c,&cs); 393 420 return cat(c,fixresult(e,mask[reg],pretregs)); 394 421 } 395 422 } 396 423 397 424 posregs = (byte) ? BYTEREGS : (mES | ALLREGS | mBP); 398 425 retregs = *pretregs & posregs; 399 426 if (retregs == 0) /* if no return regs speced */ 400 427 /* (like if wanted flags only) */ 401 428 retregs = ALLREGS & posregs; // give us some 402 429 403 430 if (tysize[ty1] > REGSIZE && numwords == 1) 404 431 { /* The only possibilities are (TYfptr + tyword) or (TYfptr - tyword) */ 405 432 #if DEBUG 406 433 if (tysize[ty2] != REGSIZE) … … 489 516 490 517 /* if retregs doesn't have any regs in it that aren't reg vars */ 491 518 if ((retregs & ~regcon.mvar) == 0) 492 519 retregs |= mAX; 493 520 } 494 521 else if (numwords == 2 && retregs & mES) 495 522 retregs = (retregs | mMSW) & ALLREGS; 496 523 497 524 // Determine if we should swap operands, because 498 525 // mov EAX,x 499 526 // add EAX,reg 500 527 // is faster than: 501 528 // mov EAX,reg 502 529 // add EAX,x 503 530 else if (e2oper == OPvar && 504 531 e1->Eoper == OPvar && 505 532 e->Eoper != OPmin && 506 533 isregvar(e1,®m,NULL) && 507 534 regm != retregs && 508 535 tysize[ty1] == tysize[ty2]) 509 { elem *es; 510 511 es = e1; 536 { 537 elem *es = e1; 512 538 e1 = e2; 513 539 e2 = es; 514 540 } 515 541 cl = codelem(e1,&retregs,test); /* eval left leaf */ 516 542 reg = findreg(retregs); 517 543 } 518 544 switch (e2oper) 519 545 { 520 546 case OPind: /* if addressing mode */ 521 547 if (!e2->Ecount) /* if not CSE */ 522 548 goto L1; /* try OP reg,EA */ 523 549 /* FALL-THROUGH */ 524 550 default: /* operator node */ 525 551 L2: 526 552 rretregs = ALLREGS & ~retregs; 527 553 /* Be careful not to do arithmetic on ES */ 528 554 if (tysize[ty1] == REGSIZE && tysize[ty2] > REGSIZE && *pretregs != mPSW) 529 555 rretregs = *pretregs & (mES | ALLREGS | mBP) & ~retregs; 530 556 else if (byte) 531 557 rretregs &= BYTEREGS; 532 558 533 559 cr = scodelem(e2,&rretregs,retregs,TRUE); /* get rvalue */ 534 560 rreg = (tysize[ty2] > REGSIZE) ? findreglsw(rretregs) : findreg(rretregs); 535 561 c = CNIL; 536 562 if (numwords == 1) /* ADD reg,rreg */ 537 563 { 538 564 /* reverse operands to avoid moving around the segment value */ 539 565 if (tysize[ty2] > REGSIZE) 540 566 { c = cat(c,getregs(rretregs)); 541 567 c = genregs(c,op1,rreg,reg); 542 568 retregs = rretregs; /* reverse operands */ 543 569 } 544 570 else 545 571 { c = genregs(c,op1,reg,rreg); 546 if ( I32&& *pretregs & mPSW)572 if (!I16 && *pretregs & mPSW) 547 573 c->Iflags |= word; 548 574 } 575 if (I64 && sz == 8) 576 code_orrex(c, REX_W); 549 577 } 550 578 else /* numwords == 2 */ /* ADD lsreg,lsrreg */ 551 579 { 552 580 reg = findreglsw(retregs); 553 581 rreg = findreglsw(rretregs); 554 582 c = genregs(c,op1,reg,rreg); 555 583 if (e->Eoper == OPadd || e->Eoper == OPmin) 556 584 code_orflag(c,CFpsw); 557 585 reg = findregmsw(retregs); 558 586 rreg = findregmsw(rretregs); 559 587 if (!(e2oper == OPu16_32 && // if second operand is 0 560 588 (op2 == 0x0B || op2 == 0x33)) // and OR or XOR 561 589 ) 562 590 genregs(c,op2,reg,rreg); // ADC msreg,msrreg 563 591 } 564 592 break; 565 593 566 594 case OPrelconst: 567 595 if (sz != REGSIZE) 568 596 goto L2; 569 597 if (segfl[el_fl(e2)] != 3) /* if not in data segment */ 570 598 goto L2; 571 599 if (evalinregister(e2)) 572 600 goto L2; 573 601 cs.IEVoffset2 = e2->EV.sp.Voffset; 574 602 cs.IEVsym2 = e2->EV.sp.Vsym; 575 603 cs.Iflags |= CFoff; 576 604 i = 0; /* no INC or DEC opcode */ 577 605 rval = 0; 578 606 goto L3; 579 607 580 608 case OPconst: 581 609 if (tyfv(ty2)) 582 610 goto L2; 583 611 if (numwords == 1) 584 612 { 585 i = e2->EV.V int;613 i = e2->EV.Vpointer; 586 614 if (word) 587 615 { 588 616 if (!(*pretregs & mPSW) && 589 617 config.flags4 & CFG4speed && 590 618 (e->Eoper == OPor || e->Eoper == OPxor || test || 591 619 (e1->Eoper != OPvar && e1->Eoper != OPind))) 592 620 { word = 0; 593 621 i &= 0xFFFF; 594 622 } 595 623 } 596 624 rval = reghasvalue(byte ? BYTEREGS : ALLREGS,i,&rreg); 597 625 cs.IEV2.Vint = i; 598 626 L3: 599 627 op1 ^= byte; 600 628 cs.Iflags |= word; 601 629 if (rval) 602 630 { cs.Iop = op1 ^ 2; 603 631 mode = rreg; 604 632 } 605 633 else 606 634 cs.Iop = 0x81; 607 cs.Irm = modregrm(3,mode,reg); 635 cs.Irm = modregrm(3,mode&7,reg&7); 636 if (mode & 8) 637 cs.Irex |= REX_R; 638 if (reg & 8) 639 cs.Irex |= REX_B; 608 640 cs.IFL2 = (e2->Eoper == OPconst) ? FLconst : el_fl(e2); 609 641 /* Modify instruction for special cases */ 610 642 switch (e->Eoper) 611 643 { case OPadd: 612 644 { int iop; 613 645 614 646 if (i == 1) 615 647 iop = 0; /* INC reg */ 616 648 else if (i == -1) 617 649 iop = 8; /* DEC reg */ 618 650 else 619 651 break; 620 652 cs.Iop = (0x40 | iop | reg) ^ byte; 621 if ( byte && *pretregs & mPSW)622 { cs.Irm = modregrm(3,0,reg ) | iop;653 if ((byte && *pretregs & mPSW) || I64) 654 { cs.Irm = modregrm(3,0,reg & 7) | iop; 623 655 cs.Iop = 0xFF; 624 656 } 625 657 break; 626 658 } 627 659 case OPand: 628 660 if (test) 629 661 cs.Iop = rval ? op1 : 0xF7; // TEST 630 662 break; 631 663 } 632 664 if (*pretregs & mPSW) 633 665 cs.Iflags |= CFpsw; 634 666 cs.Iop ^= byte; 635 667 c = gen(CNIL,&cs); 636 668 cs.Iflags &= ~CFpsw; 637 669 } 638 670 else if (numwords == 2) 639 671 { unsigned lsreg; 640 672 targ_int msw; 641 673 642 674 c = getregs(retregs); … … 657 689 cs.Iflags &= ~CFpsw; 658 690 659 691 cs.Irm = (cs.Irm & modregrm(3,7,0)) | reg; 660 692 cs.IEV2.Vint = msw; 661 693 if (e->Eoper == OPadd) 662 694 cs.Irm |= modregrm(0,2,0); /* ADC */ 663 695 c = gen(c,&cs); 664 696 } 665 697 else 666 698 assert(0); 667 699 freenode(e2); 668 700 break; 669 701 670 702 case OPvar: 671 703 L1: 672 704 if (tyfv(ty2)) 673 705 goto L2; 674 706 c = loadea(e2,&cs,op1, 675 707 ((numwords == 2) ? findreglsw(retregs) : reg), 676 708 0,retregs,retregs); 677 if ( I32&& word)709 if (!I16 && word) 678 710 { if (*pretregs & mPSW) 679 711 code_orflag(c,word); 680 712 else 681 { code *ce; 682 683 ce = code_last(c); 713 { 714 code *ce = code_last(c); 684 715 ce->Iflags &= ~word; 685 716 } 686 717 } 687 718 else if (numwords == 2) 688 719 { 689 720 if (e->Eoper == OPadd || e->Eoper == OPmin) 690 721 code_orflag(c,CFpsw); 691 722 reg = findregmsw(retregs); 692 723 if (EOP(e2)) 693 724 { getlvalue_msw(&cs); 694 725 cs.Iop = op2; 695 726 NEWREG(cs.Irm,reg); 696 727 c = gen(c,&cs); /* ADC reg,data+2 */ 697 728 } 698 729 else 699 730 c = cat(c,loadea(e2,&cs,op2,reg,REGSIZE,retregs,0)); 700 731 } 701 732 freenode(e2); 702 733 break; 703 734 } … … 733 764 int opunslng; 734 765 int pow2; 735 766 736 767 if (*pretregs == 0) // if don't want result 737 768 { c = codelem(e->E1,pretregs,FALSE); // eval left leaf 738 769 *pretregs = 0; // in case they got set 739 770 return cat(c,codelem(e->E2,pretregs,FALSE)); 740 771 } 741 772 742 773 keepregs = 0; 743 774 cs.Iflags = 0; 744 775 cs.Irex = 0; 745 776 c = cg = cr = CNIL; // initialize 746 777 e2 = e->E2; 747 778 e1 = e->E1; 748 779 tyml = tybasic(e1->Ety); 749 780 sz = tysize[tyml]; 750 781 byte = tybyte(e->Ety) != 0; 751 782 uns = tyuns(tyml) || tyuns(e2->Ety); 752 783 oper = e->Eoper; 784 unsigned rex = (I64 && sz == 8) ? REX_W : 0; 785 unsigned grex = rex << 16; 753 786 754 787 if (tyfloating(tyml)) 755 788 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 756 789 return orth87(e,pretregs); 757 790 #else 758 791 return opdouble(e,pretregs,(oper == OPmul) ? CLIBdmul : CLIBddiv); 759 792 #endif 760 793 761 opunslng = I 32 ? OPu32_64 : OPu16_32;794 opunslng = I16 ? OPu16_32 : OPu32_64; 762 795 switch (oper) 763 796 { 764 797 case OPmul: 765 798 resreg = mAX; 766 799 op = 5 - uns; 767 800 lib = CLIBlmul; 768 801 break; 769 802 770 803 case OPdiv: 771 804 resreg = mAX; 772 805 op = 7 - uns; 773 806 lib = uns ? CLIBuldiv : CLIBldiv; 774 807 if (I32) 775 808 keepregs |= mSI | mDI; 776 809 break; 777 810 778 811 case OPmod: 779 812 resreg = mDX; 780 813 op = 7 - uns; 781 814 lib = uns ? CLIBulmod : CLIBlmod; … … 815 848 case OPulngllng: 816 849 case OPlngllng: 817 850 if (sz != 2 * REGSIZE || oper != OPmul || e1->Eoper != e2->Eoper || 818 851 e1->Ecount || e2->Ecount) 819 852 goto L2; 820 853 op = (e2->Eoper == opunslng) ? 4 : 5; 821 854 retregs = mAX; 822 855 cl = codelem(e1->E1,&retregs,FALSE); /* eval left leaf */ 823 856 if (e2->E1->Eoper == OPvar || 824 857 (e2->E1->Eoper == OPind && !e2->E1->Ecount) 825 858 ) 826 859 { 827 860 cr = loadea(e2->E1,&cs,0xF7,op,0,mAX,mAX | mDX); 828 861 } 829 862 else 830 863 { 831 864 rretregs = ALLREGS & ~mAX; 832 865 cr = scodelem(e2->E1,&rretregs,retregs,TRUE); // get rvalue 833 866 cg = getregs(mAX | mDX); 834 867 rreg = findreg(rretregs); 835 cg = gen2(cg,0xF7, modregrm(3,op,rreg)); // OP AX,rreg868 cg = gen2(cg,0xF7,grex | modregrmx(3,op,rreg)); // OP AX,rreg 836 869 } 837 870 freenode(e->E1); 838 871 freenode(e2); 839 872 c = fixresult(e,mAX | mDX,pretregs); 840 873 break; 841 874 842 875 case OPconst: 843 876 e2factor = el_tolong(e2); 844 877 845 878 if (oper == OPmul && I32 && sz == REGSIZE * 2) 846 879 { targ_int msw,lsw; 847 880 regm_t scratch; 848 881 unsigned reg; 849 882 targ_llong e2factor; 850 883 851 884 cl = codelem(e1,&retregs,FALSE); // eval left leaf 852 885 /* IMUL EDX,EDX,lsw 853 886 IMUL reg,EAX,msw 854 887 ADD reg,EDX 855 888 MOV EDX,lsw … … 871 904 msw = e2factor >> (REGSIZE * 8); 872 905 873 906 if (msw) 874 907 { cg = genmulimm(cg,DX,DX,lsw); 875 908 cg = genmulimm(cg,reg,AX,msw); 876 909 cg = gen2(cg,0x03,modregrm(3,reg,DX)); 877 910 } 878 911 else 879 912 cg = genmulimm(cg,reg,DX,lsw); 880 913 881 914 cg = movregconst(cg,DX,lsw,0); // MOV EDX,lsw 882 915 cg = cat(cg,getregs(mDX)); 883 916 cg = gen2(cg,0xF7,modregrm(3,4,DX)); // MUL EDX 884 917 gen2(cg,0x03,modregrm(3,DX,reg)); // ADD EDX,reg 885 918 886 919 resreg = mDX | mAX; 887 920 freenode(e2); 888 921 goto L3; 889 922 } 890 923 891 if (oper != OPmul && e2factor == 10 && sz == REGSIZE && 924 if (oper != OPmul && e2factor == 10 && 925 (!I16 && sz == 4) && 892 926 config.flags4 & CFG4speed && !uns) 893 927 { 894 928 /* R1 / 10 895 929 * 896 930 * MOV EAX,0x66666667 897 931 * IMUL R1 898 932 * MOV EAX,R1 899 933 * SAR EAX,31 900 934 * SAR EDX,2 901 935 * SUB EDX,EAX 902 936 * IMUL EAX,EDX,10 903 937 * SUB R1,EAX 904 938 * 905 939 * EDX = quotient 906 940 * R1 = remainder 907 941 */ 908 942 regm_t regm; 909 943 unsigned reg; 910 944 911 945 regm = allregs & ~(mAX | mDX); 912 946 cl = codelem(e1,®m,FALSE); // eval left leaf 913 947 reg = findreg(regm); 914 948 cg = getregs(regm | mDX | mAX); 915 949 916 950 cg = movregconst(cg, AX, 0x66666667, 0); // MOV EAX,0x66666667 917 cg = gen2(cg,0xF7,modregrm (3,5,reg));// IMUL R1951 cg = gen2(cg,0xF7,modregrmx(3,5,reg)); // IMUL R1 918 952 genmovreg(cg, AX, reg); // MOV EAX,R1 919 953 genc2(cg,0xC1,modregrm(3,7,AX),31); // SAR EAX,31 920 954 genc2(cg,0xC1,modregrm(3,7,DX),2); // SAR EDX,2 921 955 gen2(cg,0x2B,modregrm(3,DX,AX)); // SUB EDX,EAX 922 956 923 957 switch (oper) 924 958 { case OPdiv: 925 959 resreg = mDX; 926 960 break; 927 961 928 962 case OPmod: 929 963 genmulimm(cg,AX,DX,10); // IMUL EAX,EDX,10 930 gen2(cg,0x2B,modreg rm(3,reg,AX));// SUB R1,EAX964 gen2(cg,0x2B,modregxrm(3,reg,AX)); // SUB R1,EAX 931 965 resreg = regm; 932 966 break; 933 967 934 968 case OPremquo: 935 969 genmulimm(cg,AX,DX,10); // IMUL EAX,EDX,10 936 gen2(cg,0x2B,modreg rm(3,reg,AX));// SUB R1,EAX970 gen2(cg,0x2B,modregxrm(3,reg,AX)); // SUB R1,EAX 937 971 genmovreg(cg, AX, DX); // MOV EAX,EDX 938 972 genmovreg(cg, DX, reg); // MOV EDX,R1 939 973 resreg = mDX | mAX; 940 974 break; 941 975 942 976 default: 943 977 assert(0); 944 978 } 945 979 freenode(e2); 946 980 goto L3; 947 981 } 948 982 949 983 if (sz > REGSIZE) 950 984 goto L2; 951 985 952 986 if (oper == OPmul && config.target_cpu >= TARGET_80286) 953 987 { unsigned reg; 954 988 int ss; 955 989 956 990 freenode(e2); 957 991 retregs = byte ? BYTEREGS : ALLREGS; 958 992 resreg = *pretregs & (ALLREGS | mBP); 959 993 if (!resreg) 960 994 resreg = retregs; 961 995 962 if ( I32)996 if (!I16) 963 997 { // See if we can use an LEA instruction 964 998 int ss2 = 0; 965 999 int shift; 966 1000 967 1001 switch (e2factor) 968 1002 { 969 1003 case 12: ss = 1; ss2 = 2; goto L4; 970 1004 case 24: ss = 1; ss2 = 3; goto L4; 971 1005 972 1006 case 6: 973 1007 case 3: ss = 1; goto L4; 974 1008 975 1009 case 20: ss = 2; ss2 = 2; goto L4; 976 1010 case 40: ss = 2; ss2 = 3; goto L4; 977 1011 978 1012 case 10: 979 1013 case 5: ss = 2; goto L4; 980 1014 981 1015 case 36: ss = 3; ss2 = 2; goto L4; 982 1016 case 72: ss = 3; ss2 = 3; goto L4; 983 1017 984 1018 case 18: 985 1019 case 9: ss = 3; goto L4; 986 1020 987 1021 L4: 988 1022 { 989 1023 #if 1 990 regm_t regm; 991 int r; 992 993 regm = byte ? BYTEREGS : ALLREGS; // don't use EBP 1024 regm_t regm = byte ? BYTEREGS : ALLREGS; // don't use EBP 994 1025 cl = codelem(e->E1,®m,TRUE); 995 r = findreg(regm);1026 unsigned r = findreg(regm); 996 1027 997 1028 if (ss2) 998 1029 { // Don't use EBP 999 1030 resreg &= ~mBP; 1000 1031 if (!resreg) 1001 1032 resreg = retregs; 1002 1033 } 1003 1034 cg = allocreg(&resreg,®,tyml); 1004 1035 1005 c = gen2sib(CNIL,0x8D, modregrm(0,reg,4),1006 modreg rm(ss,r,r));1036 c = gen2sib(CNIL,0x8D,grex | modregxrm(0,reg,4), 1037 modregxrmx(ss,r,r)); 1007 1038 if (ss2) 1008 1039 { 1009 gen2sib(c,0x8D, modregrm(0,reg,4),1010 modreg rm(ss2,reg,5));1040 gen2sib(c,0x8D,grex | modregxrm(0,reg,4), 1041 modregxrm(ss2,reg,5)); 1011 1042 code_last(c)->IFL1 = FLconst; 1012 1043 code_last(c)->IEV1.Vint = 0; 1013 1044 } 1014 1045 else if (!(e2factor & 1)) // if even factor 1015 genregs(c,0x03,reg,reg); // ADD reg,reg 1046 { genregs(c,0x03,reg,reg); // ADD reg,reg 1047 code_orrex(c,rex); 1048 } 1016 1049 cg = cat(cg,c); 1017 1050 goto L3; 1018 1051 #else 1019 1052 1020 1053 // Don't use EBP 1021 1054 resreg &= ~mBP; 1022 1055 if (!resreg) 1023 1056 resreg = retregs; 1024 1057 1025 1058 cl = codelem(e->E1,&resreg,FALSE); 1026 1059 reg = findreg(resreg); 1027 1060 cg = getregs(resreg); 1028 1061 c = gen2sib(CNIL,0x8D,modregrm(0,reg,4), 1029 1062 modregrm(ss,reg,reg)); 1030 1063 if (ss2) 1031 1064 { 1032 1065 gen2sib(c,0x8D,modregrm(0,reg,4), 1033 1066 modregrm(ss2,reg,5)); 1034 1067 code_last(c)->IFL1 = FLconst; 1035 1068 code_last(c)->IEV1.Vint = 0; 1036 1069 } 1037 1070 else if (!(e2factor & 1)) // if even factor 1038 1071 genregs(c,0x03,reg,reg); // ADD reg,reg 1039 1072 cg = cat(cg,c); 1040 1073 goto L3; 1041 1074 #endif 1042 1075 } 1043 1076 case 37: 1044 1077 case 74: shift = 2; 1045 1078 goto L5; 1046 1079 case 13: 1047 1080 case 26: shift = 0; 1048 1081 goto L5; 1049 1082 L5: 1050 { regm_t sregm; 1051 unsigned sreg; 1052 1083 { 1053 1084 // Don't use EBP 1054 1085 resreg &= ~mBP; 1055 1086 if (!resreg) 1056 1087 resreg = retregs; 1057 1088 cl = allocreg(&resreg,®,TYint); 1058 1089 1059 sregm = ALLREGS & ~resreg;1090 regm_t sregm = ALLREGS & ~resreg; 1060 1091 cl = cat(cl,codelem(e->E1,&sregm,FALSE)); 1061 sreg = findreg(sregm);1092 unsigned sreg = findreg(sregm); 1062 1093 cg = getregs(resreg | sregm); 1063 1094 // LEA reg,[sreg * 4][sreg] 1064 1095 // SHL sreg,shift 1065 1096 // LEA reg,[sreg * 8][reg] 1066 c = gen2sib(CNIL,0x8D, modregrm(0,reg,4),1067 modreg rm(2,sreg,sreg));1097 c = gen2sib(CNIL,0x8D,grex | modregxrm(0,reg,4), 1098 modregxrmx(2,sreg,sreg)); 1068 1099 if (shift) 1069 genc2(c,0xC1, modregrm(3,4,sreg),shift);1070 gen2sib(c,0x8D, modregrm(0,reg,4),1071 modreg rm(3,sreg,reg));1100 genc2(c,0xC1,grex | modregrmx(3,4,sreg),shift); 1101 gen2sib(c,0x8D,grex | modregxrm(0,reg,4), 1102 modregxrmx(3,sreg,reg)); 1072 1103 if (!(e2factor & 1)) // if even factor 1073 genregs(c,0x03,reg,reg); // ADD reg,reg 1104 { genregs(c,0x03,reg,reg); // ADD reg,reg 1105 code_orrex(c,rex); 1106 } 1074 1107 cg = cat(cg,c); 1075 1108 goto L3; 1076 1109 } 1077 1110 } 1078 1111 } 1079 1112 1080 1113 cl = scodelem(e->E1,&retregs,0,TRUE); // eval left leaf 1081 1114 reg = findreg(retregs); 1082 1115 cg = allocreg(&resreg,&rreg,e->Ety); 1083 1116 1084 1117 /* IMUL reg,imm16 */ 1085 cg = genc2(cg,0x69, modregrm(3,rreg,reg),e2factor);1118 cg = genc2(cg,0x69,grex | modregxrmx(3,rreg,reg),e2factor); 1086 1119 goto L3; 1087 1120 } 1088 1121 1089 1122 // Special code for signed divide or modulo by power of 2 1090 if (sz == REGSIZE && (oper == OPdiv || oper == OPmod) && !uns && 1123 if ((sz == REGSIZE || (I64 && sz == 4)) && 1124 (oper == OPdiv || oper == OPmod) && !uns && 1091 1125 (pow2 = ispow2(e2factor)) != -1 && 1092 1126 !(config.target_cpu < TARGET_80286 && pow2 != 1 && oper == OPdiv) 1093 1127 ) 1094 1128 { 1095 1129 if (pow2 == 1 && oper == OPdiv && config.target_cpu > TARGET_80386) 1096 1130 { 1097 1131 // test eax,eax 1098 1132 // jns L1 1099 1133 // add eax,1 1100 1134 // L1: sar eax,1 1101 1135 1102 1136 code *cnop; 1103 unsigned reg;1104 1137 1105 1138 retregs = allregs; 1106 1139 cl = codelem(e->E1,&retregs,FALSE); // eval left leaf 1107 reg = findreg(retregs);1140 unsigned reg = findreg(retregs); 1108 1141 freenode(e2); 1109 1142 cg = getregs(retregs); 1110 1143 cg = gentstreg(cg,reg); // TEST reg,reg 1144 code_orrex(cg, rex); 1111 1145 cnop = gennop(CNIL); 1112 1146 genjmp(cg,JNS,FLcode,(block *)cnop); // JNS cnop 1113 gen1(cg,0x40 + reg); // INC reg 1147 if (I64) 1148 { 1149 gen2(cg,0xFF,modregrmx(3,0,reg)); // INC reg 1150 code_orrex(cg,rex); 1151 } 1152 else 1153 gen1(cg,0x40 + reg); // INC reg 1114 1154 cg = cat(cg,cnop); 1115 gen2(cg,0xD1, modregrm(3,7,reg)); // SAR reg,11155 gen2(cg,0xD1,grex | modregrmx(3,7,reg)); // SAR reg,1 1116 1156 resreg = retregs; 1117 1157 goto L3; 1118 1158 } 1119 1159 cl = codelem(e->E1,&retregs,FALSE); // eval left leaf 1120 1160 freenode(e2); 1121 1161 cg = getregs(mAX | mDX); // trash these regs 1122 1162 cg = gen1(cg,0x99); // CWD 1163 code_orrex(cg, rex); 1123 1164 if (pow2 == 1) 1124 1165 { 1125 1166 if (oper == OPdiv) 1126 { gen2(cg,0x2B, modregrm(3,AX,DX)); // SUB AX,DX1127 gen2(cg,0xD1, modregrm(3,7,AX)); // SAR AX,11167 { gen2(cg,0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1168 gen2(cg,0xD1,grex | modregrm(3,7,AX)); // SAR AX,1 1128 1169 } 1129 1170 else // OPmod 1130 { gen2(cg,0x33, modregrm(3,AX,DX)); // XOR AX,DX1131 genc2(cg,0x81, modregrm(3,4,AX),1); // AND AX,11132 gen2(cg,0x03, modregrm(3,DX,AX)); // ADD DX,AX1171 { gen2(cg,0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1172 genc2(cg,0x81,grex | modregrm(3,4,AX),1); // AND AX,1 1173 gen2(cg,0x03,grex | modregrm(3,DX,AX)); // ADD DX,AX 1133 1174 } 1134 1175 } 1135 1176 else 1136 1177 { targ_ulong m; 1137 1178 1138 1179 m = (1 << pow2) - 1; 1139 1180 if (oper == OPdiv) 1140 { genc2(cg,0x81, modregrm(3,4,DX),m); // AND DX,m1141 gen2(cg,0x03, modregrm(3,AX,DX)); // ADD AX,DX1181 { genc2(cg,0x81,grex | modregrm(3,4,DX),m); // AND DX,m 1182 gen2(cg,0x03,grex | modregrm(3,AX,DX)); // ADD AX,DX 1142 1183 // Be careful not to generate this for 8088 1143 1184 assert(config.target_cpu >= TARGET_80286); 1144 genc2(cg,0xC1, modregrm(3,7,AX),pow2); // SAR AX,pow21185 genc2(cg,0xC1,grex | modregrm(3,7,AX),pow2); // SAR AX,pow2 1145 1186 } 1146 1187 else // OPmod 1147 { gen2(cg,0x33, modregrm(3,AX,DX)); // XOR AX,DX1148 gen2(cg,0x2B, modregrm(3,AX,DX)); // SUB AX,DX1149 genc2(cg,0x81, modregrm(3,4,AX),m); // AND AX,mask1150 gen2(cg,0x33, modregrm(3,AX,DX)); // XOR AX,DX1151 gen2(cg,0x2B, modregrm(3,AX,DX)); // SUB AX,DX1188 { gen2(cg,0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1189 gen2(cg,0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1190 genc2(cg,0x81,grex | modregrm(3,4,AX),m); // AND AX,mask 1191 gen2(cg,0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1192 gen2(cg,0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1152 1193 resreg = mAX; 1153 1194 } 1154 1195 } 1155 1196 goto L3; 1156 1197 } 1157 1198 goto L2; 1158 1199 case OPind: 1159 1200 if (!e2->Ecount) /* if not CSE */ 1160 1201 goto L1; /* try OP reg,EA */ 1161 1202 goto L2; 1162 1203 default: /* OPconst and operators */ 1163 1204 L2: 1164 1205 cl = codelem(e1,&retregs,FALSE); /* eval left leaf */ 1165 1206 cr = scodelem(e2,&rretregs,retregs,TRUE); /* get rvalue */ 1166 1207 if (sz <= REGSIZE) 1167 1208 { cg = getregs(mAX | mDX); /* trash these regs */ 1168 1209 if (op == 7) /* signed divide */ 1169 cg = gen1(cg,0x99); /* CWD */ 1210 { cg = gen1(cg,0x99); // CWD 1211 code_orrex(cg,rex); 1212 } 1170 1213 else if (op == 6) /* unsigned divide */ 1171 { cg = movregconst(cg,DX,0,0); // MOV DX,0 1214 { 1215 cg = movregconst(cg,DX,0,(sz == 8) ? 64 : 0); // MOV DX,0 1172 1216 cg = cat(cg,getregs(mDX)); 1173 1217 } 1174 1218 rreg = findreg(rretregs); 1175 cg = gen2(cg,0xF7 ^ byte, modregrm(3,op,rreg)); /* OP AX,rreg */1219 cg = gen2(cg,0xF7 ^ byte,grex | modregrmx(3,op,rreg)); // OP AX,rreg 1176 1220 L3: 1177 1221 c = fixresult(e,resreg,pretregs); 1178 1222 } 1179 1223 else if (sz == 2 * REGSIZE) 1180 1224 { 1181 1225 if (config.target_cpu >= TARGET_PentiumPro && oper == OPmul) 1182 1226 { 1183 1227 /* IMUL ECX,EAX 1184 1228 IMUL EDX,EBX 1185 1229 ADD ECX,EDX 1186 1230 MUL EBX 1187 1231 ADD EDX,ECX 1188 1232 */ 1189 1233 cg = getregs(mAX|mDX|mCX); 1190 1234 cg = gen2(cg,0x0FAF,modregrm(3,CX,AX)); 1191 1235 gen2(cg,0x0FAF,modregrm(3,DX,BX)); 1192 1236 gen2(cg,0x03,modregrm(3,CX,DX)); 1193 1237 gen2(cg,0xF7,modregrm(3,4,BX)); 1194 1238 gen2(cg,0x03,modregrm(3,DX,CX)); 1195 1239 c = fixresult(e,mDX|mAX,pretregs); 1196 1240 } 1197 1241 else 1198 1242 c = callclib(e,lib,pretregs,keepregs); 1199 1243 } 1200 1244 else 1201 1245 assert(0); 1202 1246 break; 1203 1247 case OPvar: 1204 1248 L1: 1205 if ( I32&& sz <= REGSIZE)1249 if (!I16 && sz <= REGSIZE) 1206 1250 { 1207 1251 if (oper == OPmul && sz > 1) /* no byte version */ 1208 1252 { 1209 1253 /* Generate IMUL r32,r/m32 */ 1210 1254 retregs = *pretregs & (ALLREGS | mBP); 1211 1255 if (!retregs) 1212 1256 retregs = ALLREGS; 1213 1257 cl = codelem(e1,&retregs,FALSE); /* eval left leaf */ 1214 1258 resreg = retregs; 1215 1259 cr = loadea(e2,&cs,0x0FAF,findreg(resreg),0,retregs,retregs); 1216 1260 freenode(e2); 1217 1261 goto L3; 1218 1262 } 1219 1263 } 1220 1264 else 1221 1265 { 1222 1266 if (sz == 2 * REGSIZE) 1223 1267 { int reg; 1224 1268 1225 1269 if (oper != OPmul || e->E1->Eoper != opunslng || … … 1278 1322 code *cdnot(elem *e,regm_t *pretregs) 1279 1323 { unsigned reg; 1280 1324 tym_t forflags; 1281 1325 code *c1,*c,*cfalse,*ctrue,*cnop; 1282 1326 unsigned sz; 1283 1327 regm_t retregs; 1284 1328 elem *e1; 1285 1329 int op; 1286 1330 1287 1331 e1 = e->E1; 1288 1332 if (*pretregs == 0) 1289 1333 goto L1; 1290 1334 if (*pretregs == mPSW) 1291 1335 { /*assert(e->Eoper != OPnot && e->Eoper != OPbool);*/ /* should've been optimized */ 1292 1336 L1: 1293 1337 return codelem(e1,pretregs,FALSE); /* evaluate e1 for cc */ 1294 1338 } 1295 1339 1296 1340 op = e->Eoper; 1297 1341 sz = tysize(e1->Ety); 1342 unsigned rex = (I64 && sz == 8) ? REX_W : 0; 1343 unsigned grex = rex << 16; 1298 1344 if (!tyfloating(e1->Ety)) 1299 1345 { 1300 1346 if (sz <= REGSIZE && e1->Eoper == OPvar) 1301 1347 { code cs; 1302 1348 1303 1349 c = getlvalue(&cs,e1,0); 1304 1350 freenode(e1); 1305 if ( I32&& sz == 2)1351 if (!I16 && sz == 2) 1306 1352 cs.Iflags |= CFopsize; 1307 1353 1308 1354 retregs = *pretregs & (ALLREGS | mBP); 1309 1355 if (config.target_cpu >= TARGET_80486 && 1310 1356 tysize(e->Ety) == 1) 1311 1357 { 1312 1358 if (reghasvalue((sz == 1) ? BYTEREGS : ALLREGS,0,®)) 1313 1359 cs.Iop = 0x39; 1314 1360 else 1315 1361 { cs.Iop = 0x81; 1316 1362 reg = 7; 1317 1363 cs.IFL2 = FLconst; 1318 1364 cs.IEV2.Vint = 0; 1319 1365 } 1320 1366 cs.Iop ^= (sz == 1); 1321 c s.Irm |= modregrm(0,reg,0);1367 code_newreg(&cs,reg); 1322 1368 c = gen(c,&cs); // CMP e1,0 1323 1369 1324 1370 retregs &= BYTEREGS; 1325 1371 if (!retregs) 1326 1372 retregs = BYTEREGS; 1327 1373 c1 = allocreg(&retregs,®,TYint); 1328 1374 1329 1375 int iop; 1330 1376 if (op == OPbool) 1331 1377 { 1332 1378 iop = 0x0F95; // SETNZ rm8 1333 1379 } 1334 1380 else 1335 1381 { 1336 1382 iop = 0x0F94; // SETZ rm8 1337 1383 } 1338 c1 = gen2(c1,iop, modregrm(3,0,reg));1384 c1 = gen2(c1,iop,grex | modregrmx(3,0,reg)); 1339 1385 if (op == OPbool) 1340 1386 *pretregs &= ~mPSW; 1341 1387 goto L4; 1342 1388 } 1343 1389 1344 1390 if (reghasvalue((sz == 1) ? BYTEREGS : ALLREGS,1,®)) 1345 1391 cs.Iop = 0x39; 1346 1392 else 1347 1393 { cs.Iop = 0x81; 1348 1394 reg = 7; 1349 1395 cs.IFL2 = FLconst; 1350 1396 cs.IEV2.Vint = 1; 1351 1397 } 1352 1398 cs.Iop ^= (sz == 1); 1353 c s.Irm |= modregrm(0,reg,0);1399 code_newreg(&cs,reg); 1354 1400 c = gen(c,&cs); // CMP e1,1 1355 1401 1356 1402 c1 = allocreg(&retregs,®,TYint); 1357 1403 op ^= (OPbool ^ OPnot); // switch operators 1358 1404 goto L2; 1359 1405 } 1360 1406 else if (sz <= REGSIZE && 1361 1407 // NEG bytereg is too expensive 1362 1408 (sz != 1 || config.target_cpu < TARGET_PentiumPro)) 1363 1409 { 1364 1410 retregs = *pretregs & (ALLREGS | mBP); 1365 1411 if (sz == 1 && !(retregs &= BYTEREGS)) 1366 1412 retregs = BYTEREGS; 1367 1413 c = codelem(e->E1,&retregs,FALSE); 1368 1414 reg = findreg(retregs); 1369 1415 c1 = getregs(retregs); 1370 c1 = gen2(c1,0xF7 ^ (sz == 1), modregrm(3,3,reg));// NEG reg1416 c1 = gen2(c1,0xF7 ^ (sz == 1),grex | modregrmx(3,3,reg)); // NEG reg 1371 1417 code_orflag(c1,CFpsw); 1372 1418 if (I32 && sz == SHORTSIZE) 1373 1419 code_orflag(c1,CFopsize); 1374 1420 L2: 1375 1421 c1 = genregs(c1,0x19,reg,reg); // SBB reg,reg 1422 code_orrex(c1, rex); 1376 1423 // At this point, reg==0 if e1==0, reg==-1 if e1!=0 1377 1424 if (op == OPnot) 1378 gen1(c1,0x40 + reg); // INC reg 1425 { 1426 if (I64) 1427 gen2(c1,0xFF,grex | modregrmx(3,0,reg)); // INC reg 1428 else 1429 gen1(c1,0x40 + reg); // INC reg 1430 } 1379 1431 else 1380 gen2(c1,0xF7, modregrm(3,3,reg));// NEG reg1432 gen2(c1,0xF7,grex | modregrmx(3,3,reg)); // NEG reg 1381 1433 if (*pretregs & mPSW) 1382 1434 { code_orflag(c1,CFpsw); 1383 1435 *pretregs &= ~mPSW; // flags are always set anyway 1384 1436 } 1385 1437 L4: 1386 1438 return cat3(c,c1,fixresult(e,retregs,pretregs)); 1387 1439 } 1388 1440 } 1389 1441 cnop = gennop(CNIL); 1390 1442 ctrue = gennop(CNIL); 1391 1443 c = logexp(e->E1,(op == OPnot) ? FALSE : TRUE,FLcode,ctrue); 1392 1444 forflags = *pretregs & mPSW; 1445 if (I64 && sz == 8) 1446 forflags |= 64; 1393 1447 assert(tysize(e->Ety) <= REGSIZE); // result better be int 1394 1448 cfalse = allocreg(pretregs,®,e->Ety); // allocate reg for result 1395 1449 for (c1 = cfalse; c1; c1 = code_next(c1)) 1396 1450 gen(ctrue,c1); // duplicate reg save code 1397 1451 cfalse = movregconst(cfalse,reg,0,forflags); // mov 0 into reg 1398 1452 regcon.immed.mval &= ~mask[reg]; // mark reg as unavail 1399 1453 ctrue = movregconst(ctrue,reg,1,forflags); // mov 1 into reg 1400 1454 regcon.immed.mval &= ~mask[reg]; // mark reg as unavail 1401 1455 genjmp(cfalse,JMP,FLcode,(block *) cnop); // skip over ctrue 1402 1456 c = cat4(c,cfalse,ctrue,cnop); 1403 1457 return c; 1404 1458 } 1405 1459 1406 1460 1407 1461 /************************ 1408 1462 * Complement operator 1409 1463 */ 1410 1464 1411 1465 code *cdcom(elem *e,regm_t *pretregs) 1412 1466 { unsigned reg,op; 1413 1467 regm_t retregs,possregs; 1414 1468 code *c,*c1,*cg; 1415 1469 tym_t tym; 1416 1470 int sz; 1417 1471 1418 1472 if (*pretregs == 0) 1419 1473 return codelem(e->E1,pretregs,FALSE); 1420 1474 tym = tybasic(e->Ety); 1421 1475 sz = tysize[tym]; 1476 unsigned rex = (I64 && sz == 8) ? REX_W : 0; 1477 unsigned grex = rex << 16; 1422 1478 possregs = (sz == 1) ? BYTEREGS : allregs; 1423 1479 retregs = *pretregs & possregs; 1424 1480 if (retregs == 0) 1425 1481 retregs = possregs; 1426 1482 c1 = codelem(e->E1,&retregs,FALSE); 1427 1483 cg = getregs(retregs); /* retregs will be destroyed */ 1428 1484 #if 0 1429 1485 if (sz == 4 * REGSIZE) 1430 1486 { 1431 1487 c = gen2(CNIL,0xF7,modregrm(3,2,AX)); // NOT AX 1432 1488 gen2(c,0xF7,modregrm(3,2,BX)); // NOT BX 1433 1489 gen2(c,0xF7,modregrm(3,2,CX)); // NOT CX 1434 1490 gen2(c,0xF7,modregrm(3,2,DX)); // NOT DX 1435 1491 } 1436 1492 else 1437 1493 #endif 1438 1494 { 1439 1495 reg = (sz <= REGSIZE) ? findreg(retregs) : findregmsw(retregs); 1440 1496 op = (sz == 1) ? 0xF6 : 0xF7; 1441 1497 c = genregs(CNIL,op,2,reg); // NOT reg 1498 code_orrex(c, rex); 1442 1499 if (sz == 2 * REGSIZE) 1443 1500 { reg = findreglsw(retregs); 1444 1501 genregs(c,op,2,reg); // NOT reg+1 1445 1502 } 1446 1503 } 1447 1504 return cat4(c1,cg,c,fixresult(e,retregs,pretregs)); 1448 1505 } 1449 1506 1450 1507 /************************ 1451 1508 * Bswap operator 1452 1509 */ 1453 1510 1454 1511 code *cdbswap(elem *e,regm_t *pretregs) 1455 1512 { unsigned reg,op; 1456 1513 regm_t retregs; 1457 1514 code *c,*c1,*cg; 1458 1515 tym_t tym; 1459 1516 int sz; 1460 1517 1461 1518 if (*pretregs == 0) 1462 1519 return codelem(e->E1,pretregs,FALSE); 1463 1520 1464 1521 tym = tybasic(e->Ety); 1465 1522 assert(tysize[tym] == 4); 1466 1523 retregs = *pretregs & allregs; 1467 1524 if (retregs == 0) 1468 1525 retregs = allregs; 1469 1526 c1 = codelem(e->E1,&retregs,FALSE); 1470 1527 cg = getregs(retregs); // retregs will be destroyed 1471 1528 reg = findreg(retregs); 1472 c = gen2(CNIL,0x0FC8 + reg,0); // BSWAP reg 1529 c = gen2(CNIL,0x0FC8 + (reg & 7),0); // BSWAP reg 1530 if (reg & 8) 1531 code_orrex(c, REX_B); 1473 1532 return cat4(c1,cg,c,fixresult(e,retregs,pretregs)); 1474 1533 } 1475 1534 1476 1535 /************************* 1477 1536 * ?: operator 1478 1537 */ 1479 1538 1480 1539 code *cdcond(elem *e,regm_t *pretregs) 1481 1540 { regm_t psw; 1482 1541 code *cc,*c,*c1,*cnop1,*c2,*cnop2; 1483 1542 con_t regconold,regconsave; 1484 1543 unsigned stackpushold,stackpushsave; 1485 1544 int ehindexold,ehindexsave; 1486 1545 unsigned jop; 1487 1546 unsigned op1; 1488 1547 unsigned sz1; 1489 1548 unsigned sz2; 1490 1549 elem *e1; 1491 1550 elem *e2; 1492 1551 elem *e21; 1493 1552 elem *e22; 1494 1553 1495 1554 /* vars to save state of 8087 */ 1496 1555 int stackusedold,stackusedsave; 1497 1556 NDP _8087old[arraysize(_8087elems)]; 1498 1557 NDP _8087save[arraysize(_8087elems)]; 1499 1558 1500 1559 _chkstack(); 1501 1560 1502 1561 //dbg_printf("cdcond(e = %p, *pretregs = x%x)\n",e,*pretregs); 1503 1562 e1 = e->E1; 1504 1563 e2 = e->E2; 1505 1564 e21 = e2->E1; 1506 1565 e22 = e2->E2; 1507 1566 cc = docommas(&e1); 1508 1567 cgstate.stackclean++; 1509 1568 psw = *pretregs & mPSW; /* save PSW bit */ 1510 1569 op1 = e1->Eoper; 1511 1570 sz1 = tysize(e1->Ety); 1571 unsigned rex = (I64 && sz1 == 8) ? REX_W : 0; 1572 unsigned grex = rex << 16; 1512 1573 jop = jmpopcode(e1); 1513 1574 1514 1575 if (!OTrel(op1) && e1 == e21 && 1515 1576 sz1 <= REGSIZE && !tyfloating(e1->Ety)) 1516 1577 { // Recognize (e ? e : f) 1517 1578 regm_t retregs; 1518 1579 1519 1580 cnop1 = gennop(CNIL); 1520 1581 retregs = *pretregs | mPSW; 1521 1582 c = codelem(e1,&retregs,FALSE); 1522 1583 1523 1584 c = cat(c,cse_flush(1)); // flush CSEs to memory 1524 1585 c = genjmp(c,jop,FLcode,(block *)cnop1); 1525 1586 freenode(e21); 1526 1587 1527 1588 regconsave = regcon; 1528 1589 stackpushsave = stackpush; 1529 1590 1530 1591 retregs |= psw; 1531 1592 if (retregs & (mBP | ALLREGS)) … … 1561 1622 reg = findreg(retregs); 1562 1623 v1 = e21->EV.Vlong; 1563 1624 v2 = e22->EV.Vlong; 1564 1625 if (jop == JNC) 1565 1626 { v1 = v2; 1566 1627 v2 = e21->EV.Vlong; 1567 1628 } 1568 1629 1569 1630 opcode = 0x81; 1570 1631 switch (sz2) 1571 1632 { case 1: opcode--; 1572 1633 v1 = (signed char) v1; 1573 1634 v2 = (signed char) v2; 1574 1635 break; 1575 1636 case 2: v1 = (short) v1; 1576 1637 v2 = (short) v2; 1577 1638 break; 1578 1639 } 1579 1640 1580 1641 if (v1 == 0 && v2 == -1L) 1581 c = gen2(c,0xF6 + (opcode & 1), modregrm(3,2,reg)); // NOT reg1642 c = gen2(c,0xF6 + (opcode & 1),grex | modregrmx(3,2,reg)); // NOT reg 1582 1643 else 1583 1644 { 1584 1645 v1 -= v2; 1585 c = genc2(c,opcode, modregrm(3,4,reg),v1); // AND reg,v1-v21586 if (v2 == 1 )1646 c = genc2(c,opcode,grex | modregrmx(3,4,reg),v1); // AND reg,v1-v2 1647 if (v2 == 1 && !I64) 1587 1648 gen1(c,0x40 + reg); // INC reg 1588 else if (v2 == -1L )1649 else if (v2 == -1L && !I64) 1589 1650 gen1(c,0x48 + reg); // DEC reg 1590 1651 else 1591 genc2(c,opcode, modregrm(3,0,reg),v2); // ADD reg,v21652 genc2(c,opcode,grex | modregrmx(3,0,reg),v2); // ADD reg,v2 1592 1653 } 1593 1654 1594 1655 freenode(e21); 1595 1656 freenode(e22); 1596 1657 freenode(e2); 1597 1658 1598 1659 c = cat(c,fixresult(e,retregs,pretregs)); 1599 1660 goto Lret; 1600 1661 } 1601 1662 1602 1663 if (op1 != OPcond && op1 != OPandand && op1 != OPoror && 1603 1664 op1 != OPnot && op1 != OPbool && 1604 1665 e21->Eoper == OPconst && 1605 1666 sz1 <= REGSIZE && 1606 1667 *pretregs & (mBP | ALLREGS) && 1607 1668 tysize(e21->Ety) <= REGSIZE && !tyfloating(e21->Ety)) 1608 1669 { // Recognize (e ? c : f) 1609 1670 unsigned reg; 1610 1671 regm_t retregs; 1611 1672 1612 1673 cnop1 = gennop(CNIL); 1613 1674 retregs = mPSW; 1614 1675 jop = jmpopcode(e1); // get jmp condition 1615 1676 c = codelem(e1,&retregs,FALSE); 1616 1677 1617 1678 // Set the register with e21 without affecting the flags 1618 1679 retregs = *pretregs & (ALLREGS | mBP); 1619 1680 if (retregs & ~regcon.mvar) 1620 1681 retregs &= ~regcon.mvar; // don't disturb register variables 1621 c = regwithvalue(c,retregs,e21->EV.Vint,®, 8);1682 c = regwithvalue(c,retregs,e21->EV.Vint,®,sz1 == 8 ? 64|8 : 8); 1622 1683 retregs = mask[reg]; 1623 1684 1624 1685 c = cat(c,cse_flush(1)); // flush CSE's to memory 1625 1686 c = genjmp(c,jop,FLcode,(block *)cnop1); 1626 1687 freenode(e21); 1627 1688 1628 1689 regconsave = regcon; 1629 1690 stackpushsave = stackpush; 1630 1691 1631 1692 c2 = codelem(e22,&retregs,FALSE); 1632 1693 1633 1694 andregcon(®consave); 1634 1695 assert(stackpushsave == stackpush); 1635 1696 1636 1697 freenode(e2); 1637 1698 c = cat6(cc,c,c2,cnop1,fixresult(e,retregs,pretregs),NULL); 1638 1699 goto Lret; 1639 1700 } 1640 1701 1641 1702 cnop1 = gennop(CNIL); … … 1716 1777 * cg: [save reg code] ;if we must preserve reg 1717 1778 * CLR reg ;FALSE result (set Z also) 1718 1779 * JMP cnop2 1719 1780 * 1720 1781 * cnop1: NOP ;if e1 evaluates to TRUE 1721 1782 * [save reg code] ;preserve reg 1722 1783 * 1723 1784 * MOV reg,1 ;TRUE result 1724 1785 * or 1725 1786 * CLR reg ;if return result in flags 1726 1787 * INC reg 1727 1788 * 1728 1789 * cnop2: NOP ;mark end of code 1729 1790 */ 1730 1791 1731 1792 code *cdloglog(elem *e,regm_t *pretregs) 1732 1793 { regm_t retregs; 1733 1794 unsigned reg; 1734 1795 code *c; 1735 1796 code *cl,*cr,*cg,*cnop1,*cnop2,*cnop3; 1736 registercode *c1;1797 code *c1; 1737 1798 con_t regconsave; 1738 1799 unsigned stackpushsave; 1739 1800 int jcond; 1740 1801 elem *e2; 1802 unsigned sz = tysize(e->Ety); 1741 1803 1742 1804 /* We can trip the assert with the following: */ 1743 1805 /* if ( (b<=a) ? (c<b || a<=c) : c>=a ) */ 1744 1806 /* We'll generate ugly code for it, but it's too obscure a case */ 1745 1807 /* to expend much effort on it. */ 1746 1808 /*assert(*pretregs != mPSW);*/ 1747 1809 1748 1810 cgstate.stackclean++; 1749 1811 cnop1 = gennop(CNIL); 1750 1812 cnop3 = gennop(CNIL); 1751 1813 jcond = 0; 1752 1814 e2 = e->E2; 1753 1815 cl = (e->Eoper == OPoror) 1754 1816 ? logexp(e->E1,jcond | 1,FLcode,cnop1) 1755 1817 : logexp(e->E1,jcond,FLcode,cnop3); 1756 1818 regconsave = regcon; 1757 1819 stackpushsave = stackpush; 1758 1820 if (*pretregs == 0) /* if don't want result */ 1759 1821 { int noreturn = el_noreturn(e2); 1760 1822 1761 1823 cr = codelem(e2,pretregs,FALSE); 1762 1824 if (noreturn) 1763 1825 { 1764 1826 regconsave.used |= regcon.used; 1765 1827 regcon = regconsave; 1766 1828 } 1767 1829 else 1768 1830 andregcon(®consave); 1769 1831 assert(stackpush == stackpushsave); 1770 1832 c = cat4(cl,cr,cnop3,cnop1); // eval code, throw away result 1771 1833 goto Lret; 1772 1834 } 1773 1835 cnop2 = gennop(CNIL); 1774 1836 if (tybasic(e2->Ety) == TYbool && 1775 tysize(e->Ety)== tysize(e2->Ety) &&1837 sz == tysize(e2->Ety) && 1776 1838 !(*pretregs & mPSW) && 1777 1839 e2->Eoper == OPcall) 1778 1840 { 1779 1841 cr = codelem(e2,pretregs,FALSE); 1780 1842 1781 1843 andregcon(®consave); 1782 1844 1783 1845 // stack depth should not change when evaluating E2 1784 1846 assert(stackpush == stackpushsave); 1785 1847 1786 assert( tysize(e->Ety) <= REGSIZE);// result better be int1848 assert(sz <= 4); // result better be int 1787 1849 retregs = *pretregs & allregs; 1788 1850 cnop1 = cat(cnop1,allocreg(&retregs,®,TYint)); // allocate reg for result 1789 1851 cg = genjmp(NULL,JMP,FLcode,(block *) cnop2); // JMP cnop2 1790 1852 cnop1 = movregconst(cnop1,reg,e->Eoper == OPoror,0); // reg = 1 1791 1853 regcon.immed.mval &= ~mask[reg]; // mark reg as unavail 1792 1854 *pretregs = retregs; 1793 1855 if (e->Eoper == OPoror) 1794 1856 c = cat6(cl,cr,cnop3,cg,cnop1,cnop2); 1795 1857 else 1796 1858 c = cat6(cl,cr,cg,cnop3,cnop1,cnop2); 1797 1859 1798 1860 goto Lret; 1799 1861 } 1800 1862 cr = logexp(e2,1,FLcode,cnop1); 1801 1863 andregcon(®consave); 1802 1864 1803 1865 /* stack depth should not change when evaluating E2 */ 1804 1866 assert(stackpush == stackpushsave); 1805 1867 1806 assert( tysize(e->Ety) <= REGSIZE);// result better be int1868 assert(sz <= 4); // result better be int 1807 1869 retregs = *pretregs & (ALLREGS | mBP); 1808 1870 if (!retregs) retregs = ALLREGS; // if mPSW only 1809 1871 cg = allocreg(&retregs,®,TYint); // allocate reg for result 1810 1872 for (c1 = cg; c1; c1 = code_next(c1)) // for each instruction 1811 1873 gen(cnop1,c1); // duplicate it 1812 1874 cg = movregconst(cg,reg,0,*pretregs & mPSW); // MOV reg,0 1813 1875 regcon.immed.mval &= ~mask[reg]; // mark reg as unavail 1814 1876 genjmp(cg,JMP,FLcode,(block *) cnop2); // JMP cnop2 1815 1877 cnop1 = movregconst(cnop1,reg,1,*pretregs & mPSW); // reg = 1 1816 1878 regcon.immed.mval &= ~mask[reg]; // mark reg as unavail 1817 1879 *pretregs = retregs; 1818 1880 c = cat6(cl,cr,cnop3,cg,cnop1,cnop2); 1819 1881 Lret: 1820 1882 cgstate.stackclean--; 1821 1883 return c; 1822 1884 } 1823 1885 1824 1886 1825 1887 /********************* 1826 1888 * Generate code for shift left or shift right (OPshl,OPshr,OPashr). … … 1834 1896 regm_t retregs,rretregs; 1835 1897 code *cg,*cl,*cr; 1836 1898 code *c; 1837 1899 elem *e1; 1838 1900 elem *e2; 1839 1901 regm_t forccs,forregs; 1840 1902 bool e2isconst; 1841 1903 1842 1904 e1 = e->E1; 1843 1905 if (*pretregs == 0) // if don't want result 1844 1906 { c = codelem(e1,pretregs,FALSE); // eval left leaf 1845 1907 *pretregs = 0; // in case they got set 1846 1908 return cat(c,codelem(e->E2,pretregs,FALSE)); 1847 1909 } 1848 1910 1849 1911 tyml = tybasic(e1->Ety); 1850 1912 sz = tysize[tyml]; 1851 1913 assert(!tyfloating(tyml)); 1852 1914 uns = tyuns(tyml); 1853 1915 oper = e->Eoper; 1916 unsigned rex = (I64 && sz == 8) ? REX_W : 0; 1917 unsigned grex = rex << 16; 1854 1918 1855 1919 #if SCPP 1856 1920 // Do this until the rest of the compiler does OPshr/OPashr correctly 1857 1921 if (oper == OPshr) 1858 1922 oper = (uns) ? OPshr : OPashr; 1859 1923 #endif 1860 1924 1861 1925 switch (oper) 1862 1926 { case OPshl: 1863 1927 s1 = 4; // SHL 1864 1928 s2 = 2; // RCL 1865 1929 break; 1866 1930 case OPshr: 1867 1931 s1 = 5; // SHR 1868 1932 s2 = 3; // RCR 1869 1933 break; 1870 1934 case OPashr: 1871 1935 s1 = 7; // SAR 1872 1936 s2 = 3; // RCR 1873 1937 break; 1874 1938 default: 1875 1939 assert(0); 1876 1940 } 1877 1941 1878 1942 c = cg = cr = CNIL; /* initialize */ 1879 1943 e2 = e->E2; 1880 1944 forccs = *pretregs & mPSW; /* if return result in CCs */ 1881 1945 forregs = *pretregs & (ALLREGS | mBP); // mask of possible return regs 1882 1946 e2isconst = FALSE; /* assume for the moment */ 1883 1947 byte = (sz == 1); 1884 1948 switch (e2->Eoper) 1885 1949 { 1886 1950 case OPconst: 1887 1951 e2isconst = TRUE; /* e2 is a constant */ 1888 1952 shiftcnt = e2->EV.Vint; /* get shift count */ 1889 if (( I32&& sz <= REGSIZE) ||1953 if ((!I16 && sz <= REGSIZE) || 1890 1954 shiftcnt <= 4 || /* if sequence of shifts */ 1891 1955 (sz == 2 && 1892 1956 (shiftcnt == 8 || config.target_cpu >= TARGET_80286)) || 1893 1957 (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE) 1894 1958 ) 1895 1959 { retregs = (forregs) ? forregs 1896 1960 : ALLREGS; 1897 1961 if (byte) 1898 1962 { retregs &= BYTEREGS; 1899 1963 if (!retregs) 1900 1964 retregs = BYTEREGS; 1901 1965 } 1902 1966 else if (sz > REGSIZE && sz <= 2 * REGSIZE && 1903 1967 !(retregs & mMSW)) 1904 1968 retregs |= mMSW & ALLREGS; 1905 1969 if (s1 == 7) /* if arithmetic right shift */ 1906 1970 { 1907 1971 if (shiftcnt == 8) 1908 1972 retregs = mAX; 1909 1973 else if (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE) … … 1918 1982 ) 1919 1983 { // Handle (shtlng)s << 16 1920 1984 regm_t r; 1921 1985 1922 1986 r = retregs & mMSW; 1923 1987 cl = codelem(e1->E1,&r,FALSE); // eval left leaf 1924 1988 cl = regwithvalue(cl,retregs & mLSW,0,&resreg,0); 1925 1989 cg = getregs(r); 1926 1990 retregs = r | mask[resreg]; 1927 1991 if (forccs) 1928 1992 { sreg = findreg(r); 1929 1993 cg = gentstreg(cg,sreg); 1930 1994 *pretregs &= ~mPSW; // already set 1931 1995 } 1932 1996 freenode(e1); 1933 1997 freenode(e2); 1934 1998 break; 1935 1999 } 1936 2000 1937 2001 // See if we should use LEA reg,xxx instead of shift 1938 if (I32 && shiftcnt >= 1 && shiftcnt <= 3 && 1939 sz == REGSIZE && oper == OPshl && 2002 if (!I16 && shiftcnt >= 1 && shiftcnt <= 3 && 2003 (sz == REGSIZE || (I64 && sz == 4)) && 2004 oper == OPshl && 1940 2005 e1->Eoper == OPvar && 1941 2006 !(*pretregs & mPSW) && 1942 2007 config.flags4 & CFG4speed 1943 2008 ) 1944 2009 { Symbol *s1 = e1->EV.sp.Vsym; 1945 2010 unsigned reg; 1946 2011 regm_t regm; 1947 2012 code cs; 1948 2013 1949 2014 if (isregvar(e1,®m,®) && !(regm & retregs)) 1950 2015 { 1951 2016 cl = allocreg(&retregs,&resreg,e->Ety); 1952 2017 buildEA(&cs,-1,reg,1 << shiftcnt,0); 1953 2018 cs.Iop = 0x8D; 1954 c s.Irm |= modregrm(0,resreg,0);2019 code_newreg(&cs,resreg); 1955 2020 cs.Iflags = 0; 1956 2021 cg = gen(NULL,&cs); // LEA resreg,[reg * ss] 1957 2022 freenode(e1); 1958 2023 freenode(e2); 1959 2024 break; 1960 2025 } 1961 2026 } 1962 2027 1963 2028 cl = codelem(e1,&retregs,FALSE); // eval left leaf 1964 2029 //assert((retregs & regcon.mvar) == 0); 1965 2030 cg = getregs(retregs); // trash these regs 1966 2031 1967 2032 { 1968 2033 if (sz == 2 * REGSIZE) 1969 2034 { resreg = findregmsw(retregs); 1970 2035 sreg = findreglsw(retregs); 1971 2036 } 1972 2037 else 1973 2038 resreg = findreg(retregs); 1974 2039 if (config.target_cpu >= TARGET_80286 && 1975 2040 sz <= REGSIZE) 1976 2041 { 1977 2042 /* SHL resreg,shiftcnt */ 1978 2043 assert(!(sz == 1 && (mask[resreg] & ~BYTEREGS))); 1979 c = genc2(CNIL,0xC1 ^ byte, modregrm(3,s1,resreg),shiftcnt);2044 c = genc2(CNIL,0xC1 ^ byte,grex | modregxrmx(3,s1,resreg),shiftcnt); 1980 2045 if (shiftcnt == 1) 1981 2046 c->Iop += 0x10; /* short form of shift */ 1982 2047 // See if we need operand size prefix 1983 if ( I32&& oper != OPshl && sz == 2)2048 if (!I16 && oper != OPshl && sz == 2) 1984 2049 c->Iflags |= CFopsize; 1985 2050 if (forccs) 1986 2051 c->Iflags |= CFpsw; // need flags result 1987 2052 } 1988 2053 else if (shiftcnt == 8) 1989 { if (!(retregs & BYTEREGS) )2054 { if (!(retregs & BYTEREGS) || resreg >= 4) 1990 2055 { 1991 2056 cl = cat(cl,cg); 1992 2057 goto L1; 1993 2058 } 1994 2059 1995 2060 if (pass != PASSfinal && (!forregs || forregs & (mSI | mDI))) 1996 2061 { 1997 2062 // e1 might get into SI or DI in a later pass, 1998 2063 // so don't put CX into a register 1999 2064 cg = cat(cg, getregs(mCX)); 2000 2065 } 2001 2066 2002 2067 if (oper == OPshl) 2003 2068 { /* MOV regH,regL XOR regL,regL */ 2069 assert(resreg < 4 && !rex); 2004 2070 c = genregs(CNIL,0x8A,resreg+4,resreg); 2005 2071 genregs(c,0x32,resreg,resreg); 2006 2072 } 2007 2073 else // OPshr/OPashr 2008 2074 { 2009 2075 /* MOV regL,regH */ 2010 2076 c = genregs(CNIL,0x8A,resreg,resreg+4); 2011 2077 if (oper == OPashr) 2012 2078 gen1(c,0x98); /* CBW */ 2013 2079 else 2014 2080 genregs(c,0x32,resreg+4,resreg+4); /* CLR regH */ 2015 2081 } 2016 2082 if (forccs) 2017 2083 gentstreg(c,resreg); 2018 2084 } 2019 2085 else if (shiftcnt == REGSIZE * 8) // it's an lword 2020 2086 { 2021 2087 if (oper == OPshl) 2022 2088 swap((int *) &resreg,(int *) &sreg); 2023 2089 c = genmovreg(CNIL,sreg,resreg); // MOV sreg,resreg … … 2066 2132 else 2067 2133 { 2068 2134 if (!(retregs & mMSW)) 2069 2135 retregs = ALLREGS & ~mCX; 2070 2136 } 2071 2137 cl = codelem(e->E1,&retregs,FALSE); /* eval left leaf */ 2072 2138 2073 2139 if (sz <= REGSIZE) 2074 2140 resreg = findreg(retregs); 2075 2141 else 2076 2142 { 2077 2143 resreg = findregmsw(retregs); 2078 2144 sreg = findreglsw(retregs); 2079 2145 } 2080 2146 L1: 2081 2147 rretregs = mCX; /* CX is shift count */ 2082 2148 if (sz <= REGSIZE) 2083 2149 { 2084 2150 cr = scodelem(e2,&rretregs,retregs,FALSE); /* get rvalue */ 2085 2151 cg = getregs(retregs); /* trash these regs */ 2086 c = gen2(CNIL,0xD3 ^ byte, modregrm(3,s1,resreg)); /* Sxx resreg,CX */2152 c = gen2(CNIL,0xD3 ^ byte,grex | modregrmx(3,s1,resreg)); /* Sxx resreg,CX */ 2087 2153 2088 2154 // Note that a shift by CL does not set the flags if 2089 2155 // CL == 0. If e2 is a constant, we know it isn't 0 2090 2156 // (it would have been optimized out). 2091 2157 if (e2isconst) 2092 2158 *pretregs &= mBP | ALLREGS; // flags already set with result 2093 2159 } 2094 2160 else if (sz == 2 * REGSIZE && 2095 2161 config.target_cpu >= TARGET_80386) 2096 2162 { unsigned hreg,lreg; 2097 2163 2098 2164 hreg = resreg; 2099 2165 lreg = sreg; 2100 2166 if (e2isconst) 2101 2167 { 2102 2168 cr = NULL; 2103 2169 cg = getregs(retregs); 2104 2170 if (shiftcnt & (REGSIZE * 8)) 2105 2171 { 2106 2172 if (oper == OPshr) … … 2309 2375 return cdind87(e, pretregs); 2310 2376 } 2311 2377 } 2312 2378 2313 2379 e1 = e->E1; 2314 2380 assert(e1); 2315 2381 switch (tym) 2316 2382 { case TYstruct: 2317 2383 case TYarray: 2318 2384 // This case should never happen, why is it here? 2319 2385 tym = TYnptr; // don't confuse allocreg() 2320 2386 #if !TARGET_FLAT 2321 2387 if (*pretregs & (mES | mCX) || e->Ety & mTYfar) 2322 2388 tym = TYfptr; 2323 2389 #endif 2324 2390 2325 2391 #if 0 2326 2392 c = getlvalue(&cs,e,RMload); // get addressing mode 2327 2393 if (*pretregs == 0) 2328 2394 return c; 2329 idxregs = idxregm( cs.Irm,cs.Isib); /* mask of index regs used */2395 idxregs = idxregm(&cs); // mask of index regs used 2330 2396 c = cat(c,fixresult(e,idxregs,pretregs)); 2331 2397 return c; 2332 2398 #endif 2333 2399 break; 2334 2400 } 2335 2401 sz = tysize[tym]; 2336 2402 byte = tybyte(tym) != 0; 2337 2403 2338 2404 c = getlvalue(&cs,e,RMload); // get addressing mode 2339 2405 /*fprintf(stderr,"cd2 :\n"); WRcodlst(c);*/ 2340 2406 if (*pretregs == 0) 2341 2407 return c; 2342 2408 2343 idxregs = idxregm( cs.Irm,cs.Isib); /* mask of index regs used */2409 idxregs = idxregm(&cs); // mask of index regs used 2344 2410 2345 2411 if (*pretregs == mPSW) 2346 2412 { 2347 if ( I32&& tym == TYfloat)2413 if (!I16 && tym == TYfloat) 2348 2414 { retregs = ALLREGS & ~idxregs; 2349 2415 c = cat(c,allocreg(&retregs,®,TYfloat)); 2350 2416 cs.Iop = 0x8B; 2351 c s.Irm |= modregrm(0,reg,0);2352 ce = gen(CNIL,&cs); / * MOV reg,lsw */2353 gen2(ce,0xD1,modregrm (3,4,reg)); /* SHL reg,1 */2417 code_newreg(&cs,reg); 2418 ce = gen(CNIL,&cs); // MOV reg,lsw 2419 gen2(ce,0xD1,modregrmx(3,4,reg)); // SHL reg,1 2354 2420 } 2355 2421 else if (sz <= REGSIZE) 2356 2422 { 2357 2423 cs.Iop = 0x81 ^ byte; 2358 2424 cs.Irm |= modregrm(0,7,0); 2359 2425 cs.IFL2 = FLconst; 2360 2426 cs.IEV2.Vint = 0; 2361 2427 ce = gen(CNIL,&cs); /* CMP [idx],0 */ 2362 2428 } 2363 else if ( I32&& sz == REGSIZE + 2) // if far pointer2429 else if (!I16 && sz == REGSIZE + 2) // if far pointer 2364 2430 { retregs = ALLREGS & ~idxregs; 2365 2431 c = cat(c,allocreg(&retregs,®,TYint)); 2366 2432 cs.Iop = 0x0F; 2367 2433 cs.Iop2 = 0xB7; 2368 2434 cs.Irm |= modregrm(0,reg,0); 2369 2435 getlvalue_msw(&cs); 2370 2436 ce = gen(CNIL,&cs); /* MOVZX reg,msw */ 2371 2437 goto L4; 2372 2438 } 2373 2439 else if (sz <= 2 * REGSIZE) 2374 2440 { retregs = ALLREGS & ~idxregs; 2375 2441 c = cat(c,allocreg(&retregs,®,TYint)); 2376 2442 cs.Iop = 0x8B; 2377 2443 cs.Irm |= modregrm(0,reg,0); 2378 2444 getlvalue_msw(&cs); 2379 2445 ce = gen(CNIL,&cs); /* MOV reg,msw */ 2380 2446 if (I32) 2381 2447 { if (tym == TYdouble || tym == TYdouble_alias) 2382 2448 gen2(ce,0xD1,modregrm(3,4,reg)); // SHL reg,1 2383 2449 } … … 2407 2473 2408 2474 /* Optimizer should not CSE these, as the result is worse code! */ 2409 2475 assert(!e->Ecount); 2410 2476 2411 2477 cs.Iop = 0xFF; 2412 2478 cs.Irm |= modregrm(0,6,0); 2413 2479 cs.IEVoffset1 += 8 - REGSIZE; 2414 2480 stackchanged = 1; 2415 2481 i = 8 - REGSIZE; 2416 2482 do 2417 2483 { 2418 2484 c = gen(c,&cs); /* PUSH EA+i */ 2419 2485 c = genadjesp(c,REGSIZE); 2420 2486 cs.IEVoffset1 -= REGSIZE; 2421 2487 stackpush += REGSIZE; 2422 2488 i -= REGSIZE; 2423 2489 } 2424 2490 while (i >= 0); 2425 2491 goto L3; 2426 2492 } 2427 if ( !I32&& sz == 8)2493 if (I16 && sz == 8) 2428 2494 retregs = DOUBLEREGS_16; 2429 2495 2430 2496 /* Watch out for loading an lptr from an lptr! We must have */ 2431 2497 /* the offset loaded into a different register. */ 2432 2498 /*if (retregs & mES && (cs.Iflags & CFSEG) == CFes) 2433 2499 retregs = ALLREGS;*/ 2434 2500 2435 2501 { 2436 2502 assert(!byte || retregs & BYTEREGS); 2437 2503 c = cat(c,allocreg(&retregs,®,tym)); /* alloc registers */ 2438 2504 } 2439 2505 if (sz <= REGSIZE) 2440 2506 { 2441 2507 cs.Iop = 0x8B ^ byte; 2442 L2: c s.Irm |= modregrm(0,reg,0);2508 L2: code_newreg(&cs,reg); 2443 2509 ce = gen(CNIL,&cs); /* MOV reg,[idx] */ 2444 2510 } 2445 2511 else if ((tym == TYfptr || tym == TYhptr) && retregs & mES) 2446 2512 { 2447 2513 cs.Iop = 0xC4; /* LES reg,[idx] */ 2448 2514 goto L2; 2449 2515 } 2450 2516 else if (sz <= 2 * REGSIZE) 2451 2517 { unsigned lsreg; 2452 2518 2453 2519 cs.Iop = 0x8B; 2454 2520 /* Be careful not to interfere with index registers */ 2455 2521 if (I32) 2456 2522 { 2457 2523 /* Can't handle if both result registers are used in */ 2458 2524 /* the addressing mode. */ 2459 2525 if ((retregs & idxregs) == retregs) 2460 2526 { 2461 2527 retregs = mMSW & allregs & ~idxregs; 2462 2528 if (!retregs) … … 2501 2567 ce = gen(CNIL,&cs); // MOV reg,msw 2502 2568 if (sz == REGSIZE + 2) 2503 2569 ce->Iflags |= CFopsize; 2504 2570 getlvalue_lsw(&cs); // MOV lsreg,lsw 2505 2571 } 2506 2572 NEWREG(cs.Irm,lsreg); 2507 2573 gen(ce,&cs); 2508 2574 } 2509 2575 else 2510 2576 { 2511 2577 /* Index registers are always the lsw! */ 2512 2578 cs.Irm |= modregrm(0,reg,0); 2513 2579 getlvalue_msw(&cs); 2514 2580 ce = gen(CNIL,&cs); /* MOV reg,msw */ 2515 2581 lsreg = findreglsw(retregs); 2516 2582 NEWREG(cs.Irm,lsreg); 2517 2583 getlvalue_lsw(&cs); /* MOV lsreg,lsw */ 2518 2584 gen(ce,&cs); 2519 2585 } 2520 2586 } 2521 else if ( !I32&& sz == 8)2587 else if (I16 && sz == 8) 2522 2588 { 2523 2589 assert(reg == AX); 2524 2590 cs.Iop = 0x8B; 2525 2591 cs.IEVoffset1 += 6; 2526 2592 ce = gen(CNIL,&cs); /* MOV AX,EA+6 */ 2527 2593 cs.Irm |= modregrm(0,CX,0); 2528 2594 cs.IEVoffset1 -= 4; 2529 2595 gen(ce,&cs); /* MOV CX,EA+2 */ 2530 2596 NEWREG(cs.Irm,DX); 2531 2597 cs.IEVoffset1 -= 2; 2532 2598 gen(ce,&cs); /* MOV DX,EA */ 2533 2599 cs.IEVoffset1 += 4; 2534 2600 NEWREG(cs.Irm,BX); 2535 2601 gen(ce,&cs); /* MOV BX,EA+4 */ 2536 2602 } 2537 2603 else 2538 2604 assert(0); 2539 2605 c = cat(c,ce); 2540 2606 L3: 2541 2607 c = cat(c,fixresult(e,retregs,pretregs)); 2542 2608 } 2543 2609 /*fprintf(stderr,"cdafter :\n"); WRcodlst(c);*/ 2544 2610 return c; 2545 2611 } 2546 2612 2547 2613 2548 2614 2549 2615 #if TARGET_FLAT 2550 2616 #define cod2_setES(ty) NULL 2551 2617 #else 2552 2618 /******************************** 2553 2619 * Generate code to load ES with the right segment value, 2554 2620 * do nothing if e is a far pointer. 2555 2621 */ 2556 2622 2557 STATIC code * cod2_setES(tym_t ty)2623 STATIC code *cod2_setES(tym_t ty) 2558 2624 { code *c2; 2559 2625 int push; 2560 2626 2561 2627 c2 = CNIL; 2562 2628 switch (tybasic(ty)) 2563 2629 { 2564 2630 case TYnptr: 2565 2631 if (!(config.flags3 & CFG3eseqds)) 2566 2632 { push = 0x1E; /* PUSH DS */ 2567 2633 goto L1; 2568 2634 } 2569 2635 break; 2570 2636 case TYcptr: 2571 2637 push = 0x0E; /* PUSH CS */ 2572 2638 goto L1; 2573 2639 case TYsptr: 2574 2640 if ((config.wflags & WFssneds) || !(config.flags3 & CFG3eseqds)) 2575 2641 { push = 0x16; /* PUSH SS */ 2576 2642 L1: 2577 2643 /* Must load ES */ 2578 2644 c2 = getregs(mES); 2579 2645 c2 = gen1(c2,push); 2580 2646 gen1(c2,0x07); /* POP ES */ 2581 2647 } 2582 2648 break; 2583 2649 } 2584 2650 return c2; 2585 2651 } 2586 2652 #endif 2587 2653 2588 2654 /******************************** 2589 2655 * Generate code for intrinsic strlen(). 2590 2656 */ 2591 2657 2592 2658 code *cdstrlen( elem *e, regm_t *pretregs) 2593 2659 { code *c1,*c2,*c3,*c4; 2594 regm_t retregs;2595 tym_t ty1;2596 2660 2597 2661 /* Generate strlen in CX: 2598 2662 LES DI,e1 2599 2663 CLR AX ;scan for 0 2600 2664 MOV CX,-1 ;largest possible string 2601 2665 REPNE SCASB 2602 2666 NOT CX 2603 2667 DEC CX 2604 2668 */ 2605 2669 2606 re tregs = mDI;2607 ty 1 = e->E1->Ety;2670 regm_t retregs = mDI; 2671 tym_t ty1 = e->E1->Ety; 2608 2672 if (!tyreg(ty1)) 2609 2673 retregs |= mES; 2610 2674 c1 = codelem(e->E1,&retregs,FALSE); 2611 2675 2612 2676 /* Make sure ES contains proper segment value */ 2613 2677 c2 = cod2_setES(ty1); 2614 2678 2679 unsigned char rex = I64 ? REX_W : 0; 2680 2615 2681 c3 = getregs_imm(mAX | mCX); 2616 2682 c3 = movregconst(c3,AX,0,1); /* MOV AL,0 */ 2617 c3 = movregconst(c3,CX,-1 ,0); /* MOV CX,-1 */2683 c3 = movregconst(c3,CX,-1LL,I64 ? 64 : 0); // MOV CX,-1 2618 2684 c3 = cat(c3,getregs(mDI|mCX)); 2619 2685 c3 = gen1(c3,0xF2); /* REPNE */ 2620 2686 gen1(c3,0xAE); /* SCASB */ 2621 2687 genregs(c3,0xF7,2,CX); /* NOT CX */ 2622 c4 = gen1(CNIL,0x48 + CX); /* DEC CX */ 2688 code_orrex(c3,rex); 2689 if (I64) 2690 c4 = gen2(CNIL,0xFF,(rex << 16) | modregrm(3,1,CX)); // DEC reg 2691 else 2692 c4 = gen1(CNIL,0x48 + CX); // DEC CX 2623 2693 2624 2694 if (*pretregs & mPSW) 2625 2695 { 2626 2696 c4->Iflags |= CFpsw; 2627 2697 *pretregs &= ~mPSW; 2628 2698 } 2629 2699 return cat6(c1,c2,c3,c4,fixresult(e,mCX,pretregs),CNIL); 2630 2700 } 2631 2701 2632 2702 2633 2703 /********************************* 2634 2704 * Generate code for strcmp(s1,s2) intrinsic. 2635 2705 */ 2636 2706 2637 2707 code *cdstrcmp( elem *e, regm_t *pretregs) 2638 2708 { code *c1,*c1a,*c2,*c3,*c4; 2639 regm_t retregs1;2640 regm_t retregs;2641 tym_t ty1,ty2;2642 2709 char need_DS; 2643 2710 int segreg; 2644 2711 2645 2712 /* 2646 2713 MOV SI,s1 ;get destination pointer (s1) 2647 2714 MOV CX,s1+2 2648 2715 LES DI,s2 ;get source pointer (s2) 2649 2716 PUSH DS 2650 2717 MOV DS,CX 2651 2718 CLR AX ;scan for 0 2652 2719 MOV CX,-1 ;largest possible string 2653 2720 REPNE SCASB 2654 2721 NOT CX ;CX = string length of s2 2655 2722 SUB DI,CX ;point DI back to beginning 2656 2723 REPE CMPSB ;compare string 2657 2724 POP DS 2658 2725 JE L1 ;strings are equal 2659 2726 SBB AX,AX 2660 2727 SBB AX,-1 2661 2728 L1: 2662 2729 */ 2663 2730 2664 re tregs1 = mSI;2665 ty 1 = e->E1->Ety;2731 regm_t retregs1 = mSI; 2732 tym_t ty1 = e->E1->Ety; 2666 2733 if (!tyreg(ty1)) 2667 2734 retregs1 |= mCX; 2668 2735 c1 = codelem(e->E1,&retregs1,FALSE); 2669 2736 2670 re tregs = mDI;2671 ty 2 = e->E2->Ety;2737 regm_t retregs = mDI; 2738 tym_t ty2 = e->E2->Ety; 2672 2739 if (!tyreg(ty2)) 2673 2740 retregs |= mES; 2674 2741 c1 = cat(c1,scodelem(e->E2,&retregs,retregs1,FALSE)); 2675 2742 2676 2743 /* Make sure ES contains proper segment value */ 2677 2744 c2 = cod2_setES(ty2); 2678 2745 c3 = getregs_imm(mAX | mCX); 2746 2747 unsigned char rex = I64 ? REX_W : 0; 2679 2748 2680 2749 /* Load DS with right value */ 2681 2750 switch (tybasic(ty1)) 2682 2751 { 2683 2752 case TYnptr: 2684 2753 need_DS = FALSE; 2685 2754 break; 2686 2755 case TYsptr: 2687 2756 if (config.wflags & WFssneds) /* if sptr can't use DS segment */ 2688 2757 segreg = SEG_SS; 2689 2758 else 2690 2759 segreg = SEG_DS; 2691 2760 goto L1; 2692 2761 case TYcptr: 2693 2762 segreg = SEG_CS; 2694 2763 L1: 2695 2764 c3 = gen1(c3,0x1E); /* PUSH DS */ 2696 2765 gen1(c3,0x06 + (segreg << 3)); /* PUSH segreg */ 2697 2766 gen1(c3,0x1F); /* POP DS */ 2698 2767 need_DS = TRUE; 2699 2768 break; 2700 2769 case TYfptr: 2701 2770 case TYvptr: 2702 2771 case TYhptr: 2703 2772 c3 = gen1(c3,0x1E); /* PUSH DS */ 2704 2773 gen2(c3,0x8E,modregrm(3,SEG_DS,CX)); /* MOV DS,CX */ 2705 2774 need_DS = TRUE; 2706 2775 break; 2707 2776 default: 2708 2777 assert(0); 2709 2778 } 2710 2779 2711 2780 c3 = movregconst(c3,AX,0,0); /* MOV AX,0 */ 2712 c3 = movregconst(c3,CX,-1 ,0); /* MOV CX,-1 */2781 c3 = movregconst(c3,CX,-1LL,I64 ? 64 : 0); // MOV CX,-1 2713 2782 c3 = cat(c3,getregs(mSI|mDI|mCX)); 2714 2783 c3 = gen1(c3,0xF2); /* REPNE */ 2715 2784 gen1(c3,0xAE); /* SCASB */ 2716 2785 genregs(c3,0xF7,2,CX); /* NOT CX */ 2786 code_orrex(c3,rex); 2717 2787 genregs(c3,0x2B,DI,CX); /* SUB DI,CX */ 2788 code_orrex(c3,rex); 2718 2789 gen1(c3,0xF3); /* REPE */ 2719 2790 gen1(c3,0xA6); /* CMPSB */ 2720 2791 if (need_DS) 2721 2792 gen1(c3,0x1F); /* POP DS */ 2722 2793 c4 = gennop(CNIL); 2723 2794 if (*pretregs != mPSW) /* if not flags only */ 2724 2795 { 2725 2796 genjmp(c3,JE,FLcode,(block *) c4); /* JE L1 */ 2726 2797 c3 = cat(c3,getregs(mAX)); 2727 2798 genregs(c3,0x1B,AX,AX); /* SBB AX,AX */ 2728 genc2(c3,0x81,modregrm(3,3,AX),(targ_uns)-1); /* SBB AX,-1 */ 2799 code_orrex(c3,rex); 2800 genc2(c3,0x81,(rex << 16) | modregrm(3,3,AX),(targ_uns)-1); // SBB AX,-1 2729 2801 } 2730 2802 2731 2803 *pretregs &= ~mPSW; 2732 2804 return cat6(c1,c2,c3,c4,fixresult(e,mAX,pretregs),CNIL); 2733 2805 } 2734 2806 2735 2807 /********************************* 2736 2808 * Generate code for memcmp(s1,s2,n) intrinsic. 2737 2809 */ 2738 2810 2739 2811 code *cdmemcmp(elem *e,regm_t *pretregs) 2740 2812 { code *c1,*c2,*c3,*c4; 2741 regm_t retregs1;2742 regm_t retregs;2743 regm_t retregs3;2744 tym_t ty1,ty2;2745 2813 char need_DS; 2746 2814 int segreg; 2747 elem *e1;2748 2815 2749 2816 /* 2750 2817 MOV SI,s1 ;get destination pointer (s1) 2751 2818 MOV DX,s1+2 2752 2819 LES DI,s2 ;get source pointer (s2) 2753 2820 MOV CX,n ;get number of bytes to compare 2754 2821 PUSH DS 2755 2822 MOV DS,DX 2756 2823 XOR AX,AX 2757 2824 REPE CMPSB ;compare string 2758 2825 POP DS 2759 2826 JE L1 ;strings are equal 2760 2827 SBB AX,AX 2761 2828 SBB AX,-1 2762 2829 L1: 2763 2830 */ 2764 2831 2765 e 1 = e->E1;2832 elem *e1 = e->E1; 2766 2833 assert(e1->Eoper == OPparam); 2767 2834 2768 2835 // Get s1 into DX:SI 2769 re tregs1 = mSI;2770 ty 1 = e1->E1->Ety;2836 regm_t retregs1 = mSI; 2837 tym_t ty1 = e1->E1->Ety; 2771 2838 if (!tyreg(ty1)) 2772 2839 retregs1 |= mDX; 2773 2840 c1 = codelem(e1->E1,&retregs1,FALSE); 2774 2841 2775 2842 // Get s2 into ES:DI 2776 re tregs = mDI;2777 ty 2 = e1->E2->Ety;2843 regm_t retregs = mDI; 2844 tym_t ty2 = e1->E2->Ety; 2778 2845 if (!tyreg(ty2)) 2779 2846 retregs |= mES; 2780 2847 c1 = cat(c1,scodelem(e1->E2,&retregs,retregs1,FALSE)); 2781 2848 freenode(e1); 2782 2849 2783 2850 // Get nbytes into CX 2784 re tregs3 = mCX;2851 regm_t retregs3 = mCX; 2785 2852 c1 = cat(c1,scodelem(e->E2,&retregs3,retregs | retregs1,FALSE)); 2786 2853 2787 2854 /* Make sure ES contains proper segment value */ 2788 2855 c2 = cod2_setES(ty2); 2789 2856 2790 2857 /* Load DS with right value */ 2791 2858 c3 = NULL; 2792 2859 switch (tybasic(ty1)) 2793 2860 { 2794 2861 case TYnptr: 2795 2862 need_DS = FALSE; 2796 2863 break; 2797 2864 case TYsptr: 2798 2865 if (config.wflags & WFssneds) /* if sptr can't use DS segment */ 2799 2866 segreg = SEG_SS; 2800 2867 else 2801 2868 segreg = SEG_DS; 2802 2869 goto L1; 2803 2870 case TYcptr: 2804 2871 segreg = SEG_CS; … … 2863 2930 CLR AX ;scan for 0 2864 2931 MOV CX,-1 ;largest possible string 2865 2932 REPNE SCASB ;find end of s2 2866 2933 NOT CX ;CX = strlen(s2) + 1 (for EOS) 2867 2934 SUB DI,CX 2868 2935 MOV SI,DI 2869 2936 PUSH DS 2870 2937 PUSH ES 2871 2938 LES DI,s1 2872 2939 POP DS 2873 2940 MOV AX,DI ;return value is s1 2874 2941 REP MOVSB 2875 2942 POP DS 2876 2943 */ 2877 2944 2878 2945 stackchanged = 1; 2879 2946 retregs = mDI; 2880 2947 ty2 = tybasic(e->E2->Ety); 2881 2948 if (!tyreg(ty2)) 2882 2949 retregs |= mES; 2950 unsigned char rex = I64 ? REX_W : 0; 2883 2951 c1 = codelem(e->E2,&retregs,FALSE); 2884 2952 2885 2953 /* Make sure ES contains proper segment value */ 2886 2954 c2 = cod2_setES(ty2); 2887 2955 c3 = getregs_imm(mAX | mCX); 2888 2956 c3 = movregconst(c3,AX,0,1); /* MOV AL,0 */ 2889 c3 = movregconst(c3,CX,-1, 0); /* MOV CX,-1 */2957 c3 = movregconst(c3,CX,-1,I64?64:0); // MOV CX,-1 2890 2958 c3 = cat(c3,getregs(mAX|mCX|mSI|mDI)); 2891 2959 c3 = gen1(c3,0xF2); /* REPNE */ 2892 2960 gen1(c3,0xAE); /* SCASB */ 2893 2961 genregs(c3,0xF7,2,CX); /* NOT CX */ 2962 code_orrex(c3,rex); 2894 2963 genregs(c3,0x2B,DI,CX); /* SUB DI,CX */ 2964 code_orrex(c3,rex); 2895 2965 genmovreg(c3,SI,DI); /* MOV SI,DI */ 2966 code_orrex(c3,rex); 2896 2967 2897 2968 /* Load DS with right value */ 2898 2969 switch (ty2) 2899 2970 { 2900 2971 case TYnptr: 2901 2972 need_DS = FALSE; 2902 2973 break; 2903 2974 case TYsptr: 2904 2975 if (config.wflags & WFssneds) /* if sptr can't use DS segment */ 2905 2976 segreg = SEG_SS; 2906 2977 else 2907 2978 segreg = SEG_DS; 2908 2979 goto L1; 2909 2980 case TYcptr: 2910 2981 segreg = SEG_CS; 2911 2982 L1: 2912 2983 c3 = gen1(c3,0x1E); /* PUSH DS */ 2913 2984 gen1(c3,0x06 + (segreg << 3)); /* PUSH segreg */ 2914 2985 genadjesp(c3,REGSIZE * 2); 2915 2986 need_DS = TRUE; … … 2923 2994 default: 2924 2995 assert(0); 2925 2996 } 2926 2997 2927 2998 retregs = mDI; 2928 2999 ty1 = tybasic(e->E1->Ety); 2929 3000 if (!tyreg(ty1)) 2930 3001 retregs |= mES; 2931 3002 c3 = cat(c3,scodelem(e->E1,&retregs,mCX|mSI,FALSE)); 2932 3003 c3 = cat(c3,getregs(mAX|mCX|mSI|mDI)); 2933 3004 2934 3005 /* Make sure ES contains proper segment value */ 2935 3006 if (ty2 != TYnptr || ty1 != ty2) 2936 3007 c4 = cod2_setES(ty1); 2937 3008 else 2938 3009 c4 = CNIL; /* ES is already same as DS */ 2939 3010 2940 3011 if (need_DS) 2941 3012 c4 = gen1(c4,0x1F); /* POP DS */ 2942 3013 if (*pretregs) 2943 c4 = genmovreg(c4,AX,DI); /* MOV AX,DI */ 3014 { c4 = genmovreg(c4,AX,DI); /* MOV AX,DI */ 3015 code_orrex(c4,rex); 3016 } 2944 3017 c4 = gen1(c4,0xF3); /* REP */ 2945 3018 gen1(c4,0xA4); /* MOVSB */ 2946 3019 2947 3020 if (need_DS) 2948 3021 { gen1(c4,0x1F); /* POP DS */ 2949 3022 genadjesp(c4,-(REGSIZE * 2)); 2950 3023 } 2951 3024 return cat6(c1,c2,c3,c4,fixresult(e,mAX | mES,pretregs),CNIL); 2952 3025 } 2953 3026 2954 3027 /********************************* 2955 3028 * Generate code for memcpy(s1,s2,n) intrinsic. 2956 3029 * OPmemcpy 2957 3030 * / \ 2958 3031 * s1 OPparam 2959 3032 * / \ 2960 3033 * s2 n 2961 3034 */ 2962 3035 2963 3036 code *cdmemcpy(elem *e,regm_t *pretregs) … … 2986 3059 assert(e2->Eoper == OPparam); 2987 3060 2988 3061 // Get s2 into DX:SI 2989 3062 retregs2 = mSI; 2990 3063 ty2 = e2->E1->Ety; 2991 3064 if (!tyreg(ty2)) 2992 3065 retregs2 |= mDX; 2993 3066 c1 = codelem(e2->E1,&retregs2,FALSE); 2994 3067 2995 3068 // Get nbytes into CX 2996 3069 retregs3 = mCX; 2997 3070 c1 = cat(c1,scodelem(e2->E2,&retregs3,retregs2,FALSE)); 2998 3071 freenode(e2); 2999 3072 3000 3073 // Get s1 into ES:DI 3001 3074 retregs1 = mDI; 3002 3075 ty1 = e->E1->Ety; 3003 3076 if (!tyreg(ty1)) 3004 3077 retregs1 |= mES; 3005 3078 c1 = cat(c1,scodelem(e->E1,&retregs1,retregs2 | retregs3,FALSE)); 3079 3080 unsigned char rex = I64 ? REX_W : 0; 3006 3081 3007 3082 /* Make sure ES contains proper segment value */ 3008 3083 c2 = cod2_setES(ty1); 3009 3084 3010 3085 /* Load DS with right value */ 3011 3086 c3 = NULL; 3012 3087 switch (tybasic(ty2)) 3013 3088 { 3014 3089 case TYnptr: 3015 3090 need_DS = FALSE; 3016 3091 break; 3017 3092 case TYsptr: 3018 3093 if (config.wflags & WFssneds) /* if sptr can't use DS segment */ 3019 3094 segreg = SEG_SS; 3020 3095 else 3021 3096 segreg = SEG_DS; 3022 3097 goto L1; 3023 3098 case TYcptr: 3024 3099 segreg = SEG_CS; 3025 3100 L1: 3026 3101 c3 = gen1(c3,0x1E); /* PUSH DS */ 3027 3102 gen1(c3,0x06 + (segreg << 3)); /* PUSH segreg */ 3028 3103 gen1(c3,0x1F); /* POP DS */ 3029 3104 need_DS = TRUE; 3030 3105 break; 3031 3106 case TYfptr: 3032 3107 case TYvptr: 3033 3108 case TYhptr: 3034 3109 c3 = gen1(c3,0x1E); /* PUSH DS */ 3035 3110 gen2(c3,0x8E,modregrm(3,SEG_DS,DX)); /* MOV DS,DX */ 3036 3111 need_DS = TRUE; 3037 3112 break; 3038 3113 default: 3039 3114 assert(0); 3040 3115 } 3041 3116 3042 3117 if (*pretregs) // if need return value 3043 3118 { c3 = cat(c3,getregs(mAX)); 3044 3119 c3 = genmovreg(c3,AX,DI); 3120 code_orrex(c3, rex); 3045 3121 } 3046 3122 3047 3123 if (0 && I32 && config.flags4 & CFG4speed) 3048 3124 { 3049 3125 /* This is only faster if the memory is dword aligned, if not 3050 3126 * it is significantly slower than just a rep movsb. 3051 3127 */ 3052 3128 /* mov EDX,ECX 3053 3129 * shr ECX,2 3054 3130 * jz L1 3055 3131 * repe movsd 3056 3132 * L1: and EDX,3 3057 3133 * jz L2 3058 3134 * mov ECX,EDX 3059 3135 * repe movsb 3060 3136 * L2: nop 3061 3137 */ 3062 3138 c3 = cat(c3,getregs(mSI | mDI | mCX | mDX)); 3063 3139 c3 = genmovreg(c3,DX,CX); // MOV EDX,ECX 3064 3140 c3 = genc2(c3,0xC1,modregrm(3,5,CX),2); // SHR ECX,2 3065 3141 code *cx = genc2(CNIL, 0x81, modregrm(3,4,DX),3); // AND EDX,3 3066 3142 genjmp(c3, JE, FLcode, (block *)cx); // JZ L1 3067 3143 gen1(c3,0xF3); // REPE 3068 3144 gen1(c3,0xA5); // MOVSW 3069 3145 c3 = cat(c3,cx); 3070 3146 3071 3147 code *cnop = gennop(CNIL); 3072 3148 genjmp(c3, JE, FLcode, (block *)cnop); // JZ L2 3073 3149 genmovreg(c3,CX,DX); // MOV ECX,EDX 3074 3150 gen1(c3,0xF3); // REPE 3075 3151 gen1(c3,0xA4); // MOVSB 3076 3152 c3 = cat(c3, cnop); 3077 3153 } 3078 3154 else 3079 3155 { 3080 3156 c3 = cat(c3,getregs(mSI | mDI | mCX)); 3081 3157 if (!I32 && config.flags4 & CFG4speed) // if speed optimization 3082 { c3 = gen2(c3,0xD1, modregrm(3,5,CX)); // SHR CX,13158 { c3 = gen2(c3,0xD1,(rex << 16) | modregrm(3,5,CX)); // SHR CX,1 3083 3159 gen1(c3,0xF3); // REPE 3084 3160 gen1(c3,0xA5); // MOVSW 3085 gen2(c3,0x11, modregrm(3,CX,CX)); // ADC CX,CX3161 gen2(c3,0x11,(rex << 16) | modregrm(3,CX,CX)); // ADC CX,CX 3086 3162 } 3087 3163 c3 = gen1(c3,0xF3); // REPE 3088 3164 gen1(c3,0xA4); // MOVSB 3089 3165 if (need_DS) 3090 3166 gen1(c3,0x1F); // POP DS 3091 3167 } 3092 3168 return cat4(c1,c2,c3,fixresult(e,mES|mAX,pretregs)); 3093 3169 } 3094 3170 3095 3171 3096 3172 /********************************* 3097 3173 * Generate code for memset(s,val,n) intrinsic. 3098 3174 * (s OPmemset (n OPparam val)) 3099 3175 */ 3100 3176 3101 3177 #if 1 3102 3178 code *cdmemset(elem *e,regm_t *pretregs) 3103 3179 { code *c1,*c2,*c3 = NULL,*c4; 3104 3180 regm_t retregs1; 3105 3181 regm_t retregs2; 3106 3182 regm_t retregs3; 3107 3183 unsigned reg,vreg; 3108 3184 tym_t ty1; 3109 3185 elem *e2,*e1; 3110 3186 int segreg; 3111 3187 unsigned remainder; 3112 3188 targ_uns numbytes,numwords; 3113 3189 int op; 3114 targ_ unsvalue;3190 targ_size_t value; 3115 3191 3116 3192 //printf("cdmemset(*pretregs = x%x)\n", *pretregs); 3117 3193 e1 = e->E1; 3118 3194 e2 = e->E2; 3119 3195 assert(e2->Eoper == OPparam); 3196 3197 unsigned char rex = I64 ? REX_W : 0; 3120 3198 3121 3199 if (e2->E2->Eoper == OPconst) 3122 3200 { 3123 3201 value = el_tolong(e2->E2); 3124 3202 value &= 0xFF; 3125 3203 value |= value << 8; 3126 3204 value |= value << 16; 3205 value |= value << 32; 3127 3206 } 3128 3207 3129 3208 if (e2->E1->Eoper == OPconst) 3130 3209 { 3131 3210 numbytes = el_tolong(e2->E1); 3132 if (numbytes <= REP_THRESHOLD && I32 && // doesn't work for 16 bits 3211 if (numbytes <= REP_THRESHOLD && 3212 !I16 && // doesn't work for 16 bits 3133 3213 e2->E2->Eoper == OPconst) 3134 3214 { 3135 3215 targ_uns offset = 0; 3136 3216 retregs1 = *pretregs; 3137 3217 if (!retregs1) 3138 3218 retregs1 = ALLREGS; 3139 3219 c1 = codelem(e->E1,&retregs1,FALSE); 3140 3220 reg = findreg(retregs1); 3141 3221 if (e2->E2->Eoper == OPconst) 3142 3222 { 3143 3223 switch (numbytes) 3144 3224 { 3145 3225 case 4: // MOV [reg],imm32 3146 c3 = genc2(CNIL,0xC7,modregrm (0,0,reg),value);3226 c3 = genc2(CNIL,0xC7,modregrmx(0,0,reg),value); 3147 3227 goto fixres; 3148 3228 case 2: // MOV [reg],imm16 3149 c3 = genc2(CNIL,0xC7,modregrm (0,0,reg),value);3229 c3 = genc2(CNIL,0xC7,modregrmx(0,0,reg),value); 3150 3230 c3->Iflags = CFopsize; 3151 3231 goto fixres; 3152 3232 case 1: // MOV [reg],imm8 3153 c3 = genc2(CNIL,0xC6,modregrm (0,0,reg),value);3233 c3 = genc2(CNIL,0xC6,modregrmx(0,0,reg),value); 3154 3234 goto fixres; 3155 3235 } 3156 3236 } 3157 3237 3158 c1 = regwithvalue(c1, BYTEREGS & ~retregs1, value, &vreg, 0);3238 c1 = regwithvalue(c1, BYTEREGS & ~retregs1, value, &vreg, I64 ? 64 : 0); 3159 3239 freenode(e2->E2); 3160 3240 freenode(e2); 3161 3241 3162 3242 while (numbytes >= REGSIZE) 3163 3243 { // MOV dword ptr offset[reg],vreg 3164 c2 = gen2(CNIL,0x89, modregrm(2,vreg,reg));3244 c2 = gen2(CNIL,0x89,(rex << 16) | modregxrmx(2,vreg,reg)); 3165 3245 c2->IEVoffset1 = offset; 3166 3246 c2->IFL1 = FLconst; 3167 3247 numbytes -= REGSIZE; 3168 3248 offset += REGSIZE; 3169 3249 c3 = cat(c3,c2); 3170 3250 } 3251 if (numbytes & 4) 3252 { // MOV dword ptr offset[reg],vreg 3253 c2 = gen2(CNIL,0x89,modregxrmx(2,vreg,reg)); 3254 c2->IEVoffset1 = offset; 3255 c2->IFL1 = FLconst; 3256 offset += 4; 3257 c3 = cat(c3,c2); 3258 } 3171 3259 if (numbytes & 2) 3172 3260 { // MOV word ptr offset[reg],vreg 3173 c2 = gen2(CNIL,0x89,modreg rm(2,vreg,reg));3261 c2 = gen2(CNIL,0x89,modregxrmx(2,vreg,reg)); 3174 3262 c2->IEVoffset1 = offset; 3175 3263 c2->IFL1 = FLconst; 3176 3264 c2->Iflags = CFopsize; 3177 3265 offset += 2; 3178 3266 c3 = cat(c3,c2); 3179 3267 } 3180 3268 if (numbytes & 1) 3181 3269 { // MOV byte ptr offset[reg],vreg 3182 c2 = gen2(CNIL,0x88,modreg rm(2,vreg,reg));3270 c2 = gen2(CNIL,0x88,modregxrmx(2,vreg,reg)); 3183 3271 c2->IEVoffset1 = offset; 3184 3272 c2->IFL1 = FLconst; 3185 3273 c3 = cat(c3,c2); 3186 3274 } 3187 3275 fixres: 3188 3276 return cat3(c1,c3,fixresult(e,retregs1,pretregs)); 3189 3277 } 3190 3278 } 3191 3279 3192 3280 // Get nbytes into CX 3193 3281 retregs2 = mCX; 3194 if ( I32&& e2->E1->Eoper == OPconst && e2->E2->Eoper == OPconst)3282 if (!I16 && e2->E1->Eoper == OPconst && e2->E2->Eoper == OPconst) 3195 3283 { 3196 3284 remainder = numbytes & (REGSIZE - 1); 3197 3285 numwords = numbytes / REGSIZE; // number of words 3198 3286 op = 0xAB; // moving by words 3199 3287 c1 = getregs(mCX); 3200 c1 = movregconst(c1,CX,numwords, 0); // # of bytes/words3288 c1 = movregconst(c1,CX,numwords,I64?64:0); // # of bytes/words 3201 3289 } 3202 3290 else 3203 3291 { 3204 3292 remainder = 0; 3205 3293 op = 0xAA; // must move by bytes 3206 3294 c1 = codelem(e2->E1,&retregs2,FALSE); 3207 3295 } 3208 3296 3209 3297 // Get val into AX 3210 3298 3211 3299 retregs3 = mAX; 3212 if ( I32&& e2->E2->Eoper == OPconst)3300 if (!I16 && e2->E2->Eoper == OPconst) 3213 3301 { 3214 c1 = regwithvalue(c1, mAX, value, NULL, 0);3302 c1 = regwithvalue(c1, mAX, value, NULL, I64?64:0); 3215 3303 freenode(e2->E2); 3216 3304 } 3217 3305 else 3218 3306 { 3219 3307 c1 = cat(c1,scodelem(e2->E2,&retregs3,retregs2,FALSE)); 3220 3308 #if 0 3221 3309 if (I32) 3222 3310 { 3223 3311 c1 = gen2(c1,0x8A,modregrm(3,AH,AL)); // MOV AH,AL 3224 3312 c1 = genc2(c1,0xC1,modregrm(3,4,AX),8); // SHL EAX,8 3225 3313 c1 = gen2(c1,0x8A,modregrm(3,AL,AH)); // MOV AL,AH 3226 3314 c1 = genc2(c1,0xC1,modregrm(3,4,AX),8); // SHL EAX,8 3227 3315 c1 = gen2(c1,0x8A,modregrm(3,AL,AH)); // MOV AL,AH 3228 3316 } 3229 3317 #endif 3230 3318 } 3231 3319 freenode(e2); 3232 3320 3233 3321 // Get s into ES:DI 3234 3322 retregs1 = mDI; 3235 3323 ty1 = e->E1->Ety; 3236 3324 if (!tyreg(ty1)) 3237 3325 retregs1 |= mES; 3238 3326 c1 = cat(c1,scodelem(e->E1,&retregs1,retregs2 | retregs3,FALSE)); 3239 3327 reg = DI; //findreg(retregs1); 3240 3328 3241 3329 // Make sure ES contains proper segment value 3242 3330 c2 = cod2_setES(ty1); 3243 3331 3244 3332 c3 = NULL; 3245 3333 if (*pretregs) // if need return value 3246 3334 { c3 = getregs(mBX); 3247 3335 c3 = genmovreg(c3,BX,DI); 3336 code_orrex(c3,rex); 3248 3337 } 3249 3338 3250 3339 c3 = cat(c3,getregs(mDI | mCX)); 3251 if ( !I32&& config.flags4 & CFG4speed) // if speed optimization3340 if (I16 && config.flags4 & CFG4speed) // if speed optimization 3252 3341 { 3253 3342 c3 = cat(c3,getregs(mAX)); 3254 3343 c3 = gen2(c3,0x8A,modregrm(3,AH,AL)); // MOV AH,AL 3255 3344 gen2(c3,0xD1,modregrm(3,5,CX)); // SHR CX,1 3256 3345 gen1(c3,0xF3); // REP 3257 3346 gen1(c3,0xAB); // STOSW 3258 3347 gen2(c3,0x11,modregrm(3,CX,CX)); // ADC CX,CX 3259 3348 op = 0xAA; 3260 3349 } 3261 3350 3262 3351 c3 = gen1(c3,0xF3); // REP 3263 3352 gen1(c3,op); // STOSD 3353 if (remainder & 4) 3354 { 3355 code *ctmp; 3356 ctmp = gen2(CNIL,0x89,modregrmx(2,AX,reg)); 3357 ctmp->IFL1 = FLconst; 3358 c3 = cat(c3,ctmp); 3359 } 3264 3360 if (remainder & 2) 3265 3361 { 3266 3362 code *ctmp; 3267 ctmp = gen2(CNIL,0x89,modregrm (2,AX,reg));3363 ctmp = gen2(CNIL,0x89,modregrmx(2,AX,reg)); 3268 3364 ctmp->Iflags = CFopsize; 3365 ctmp->IEVoffset1 = remainder & 4; 3269 3366 ctmp->IFL1 = FLconst; 3270 3367 c3 = cat(c3,ctmp); 3271 3368 } 3272 3369 if (remainder & 1) 3273 3370 { 3274 3371 code *ctmp; 3275 ctmp = gen2(CNIL,0x88,modregrm (2,AX,reg));3276 ctmp->IEVoffset1 = (remainder & 2) ? 2 : 0;3372 ctmp = gen2(CNIL,0x88,modregrmx(2,AX,reg)); 3373 ctmp->IEVoffset1 = remainder & ~1; 3277 3374 ctmp->IFL1 = FLconst; 3278 3375 c3 = cat(c3,ctmp); 3279 3376 } 3280 3377 regimmed_set(CX,0); 3281 3378 return cat4(c1,c2,c3,fixresult(e,mES|mBX,pretregs)); 3282 3379 } 3283 3380 #else 3284 3381 // BUG: Pat made many improvements in the linux version, I need 3285 3382 // to verify they work for 16 bits and fold them in. -Walter 3286 3383 3287 3384 code *cdmemset(elem *e,regm_t *pretregs) 3288 3385 { code *c1,*c2,*c3 = NULL,*c4; 3289 3386 regm_t retregs1; 3290 3387 regm_t retregs2; 3291 3388 regm_t retregs3; 3292 3389 tym_t ty1; 3293 3390 elem *e2; 3294 3391 targ_size_t value; 3295 3392 3296 3393 /* … … 3349 3445 return cat4(c1,c2,c3,fixresult(e,mES|mBX,pretregs)); 3350 3446 } 3351 3447 #endif 3352 3448 3353 3449 3354 3450 /********************** 3355 3451 * Do structure assignments. 3356 3452 * This should be fixed so that (s1 = s2) is rewritten to (&s1 = &s2). 3357 3453 * Mebbe call cdstreq() for double assignments??? 3358 3454 */ 3359 3455 3360 3456 code *cdstreq(elem *e,regm_t *pretregs) 3361 3457 { code *c1,*c2,*c3; 3362 3458 code *c1a; 3363 3459 regm_t srcregs,dstregs; /* source & destination reg masks */ 3364 3460 targ_uns numbytes; 3365 3461 char need_DS = FALSE; 3366 3462 elem *e1 = e->E1,*e2 = e->E2; 3367 3463 int segreg; 3368 3464 3369 numbytes = e->Enumbytes; /* # of bytes in structure/union */ 3465 numbytes = e->Enumbytes; // # of bytes in structure/union 3466 unsigned char rex = I64 ? REX_W : 0; 3370 3467 3371 3468 //printf("cdstreq(e = %p, *pretregs = x%x)\n", e, *pretregs); 3372 3469 3373 3470 /* First, load pointer to rvalue into SI */ 3374 3471 srcregs = mSI; /* source is DS:SI */ 3375 3472 c1 = docommas(&e2); 3376 3473 if (e2->Eoper == OPind) /* if (.. = *p) */ 3377 3474 { elem *e21 = e2->E1; 3378 3475 3379 3476 segreg = SEG_DS; 3380 3477 switch (tybasic(e21->Ety)) 3381 3478 { 3382 3479 case TYsptr: 3383 3480 if (config.wflags & WFssneds) /* if sptr can't use DS segment */ 3384 3481 segreg = SEG_SS; 3385 3482 break; 3386 3483 case TYcptr: 3387 3484 if (!(config.exe & EX_flat)) 3388 3485 segreg = SEG_CS; 3389 3486 break; … … 3445 3542 if (e1->Eoper == OPind) /* if (*p = ..) */ 3446 3543 { 3447 3544 if (tyreg(e1->E1->Ety)) 3448 3545 dstregs = mDI; 3449 3546 c2 = cod2_setES(e1->E1->Ety); 3450 3547 c2 = cat(c2,scodelem(e1->E1,&dstregs,srcregs,FALSE)); 3451 3548 } 3452 3549 else 3453 3550 c2 = cdrelconst(e1,&dstregs); 3454 3551 freenode(e1); 3455 3552 3456 3553 c3 = getregs((srcregs | dstregs) & (mLSW | mDI)); 3457 3554 if (need_DS) 3458 3555 { assert(!(config.exe & EX_flat)); 3459 3556 c3 = gen1(c3,0x1E); /* PUSH DS */ 3460 3557 gen2(c3,0x8E,modregrm(3,SEG_DS,CX)); /* MOV DS,CX */ 3461 3558 } 3462 3559 if (numbytes <= REGSIZE * (6 + (REGSIZE == 4))) 3463 3560 { while (numbytes >= REGSIZE) 3464 3561 { c3 = gen1(c3,0xA5); /* MOVSW */ 3562 code_orrex(c3, rex); 3465 3563 numbytes -= REGSIZE; 3466 3564 } 3467 3565 //if (numbytes) 3468 3566 // printf("cdstreq numbytes %d\n",numbytes); 3469 3567 while (numbytes--) 3470 3568 c3 = gen1(c3,0xA4); /* MOVSB */ 3471 3569 } 3472 3570 else 3473 3571 { 3474 3572 #if 1 3475 3573 unsigned remainder; 3476 3574 3477 3575 remainder = numbytes & (REGSIZE - 1); 3478 3576 numbytes /= REGSIZE; // number of words 3479 3577 c3 = cat(c3,getregs_imm(mCX)); 3480 3578 c3 = movregconst(c3,CX,numbytes,0); // # of bytes/words 3481 3579 gen1(c3,0xF3); // REP 3482 3580 gen1(c3,0xA5); // REP MOVSD 3483 3581 regimmed_set(CX,0); // note that CX == 0 3484 3582 for (; remainder; remainder--) … … 3491 3589 if (numbytes & (REGSIZE - 1)) /* if odd */ 3492 3590 movs = 0xA4; /* MOVSB */ 3493 3591 else 3494 3592 { movs = 0xA5; /* MOVSW */ 3495 3593 numbytes /= REGSIZE; /* # of words */ 3496 3594 } 3497 3595 c3 = cat(c3,getregs_imm(mCX)); 3498 3596 c3 = movregconst(c3,CX,numbytes,0); /* # of bytes/words */ 3499 3597 gen1(c3,0xF3); /* REP */ 3500 3598 gen1(c3,movs); 3501 3599 regimmed_set(CX,0); /* note that CX == 0 */ 3502 3600 #endif 3503 3601 } 3504 3602 if (need_DS) 3505 3603 gen1(c3,0x1F); // POP DS 3506 3604 assert(!(*pretregs & mPSW)); 3507 3605 if (*pretregs) 3508 3606 { /* ES:DI points past what we want */ 3509 3607 regm_t retregs; 3510 3608 3511 genc2(c3,0x81, modregrm(3,5,DI),e->Enumbytes); /* SUB DI,numbytes */3609 genc2(c3,0x81,(rex << 16) | modregrm(3,5,DI),e->Enumbytes); // SUB DI,numbytes 3512 3610 retregs = mDI; 3513 3611 if (*pretregs & mMSW && !(config.exe & EX_flat)) 3514 3612 retregs |= mES; 3515 3613 c3 = cat(c3,fixresult(e,retregs,pretregs)); 3516 3614 } 3517 3615 return cat3(c1,c2,c3); 3518 3616 } 3519 3617 3520 3618 3521 3619 /********************** 3522 3620 * Get the address of. 3523 3621 * Is also called by cdstreq() to set up pointer to a structure. 3524 3622 */ 3525 3623 3526 3624 code *cdrelconst(elem *e,regm_t *pretregs) 3527 3625 { code *c,*c1; 3528 3626 enum SC sclass; 3529 3627 unsigned mreg, /* segment of the address (TYfptrs only) */ 3530 3628 lreg; /* offset of the address */ 3531 3629 tym_t tym; … … 3620 3718 fl = s->Sfl; 3621 3719 if (s->ty() & mTYcs) 3622 3720 fl = FLcsdata; 3623 3721 c = gen2(c,0x8C, /* MOV mreg,SEG REGISTER */ 3624 3722 modregrm(3,segfl[fl],mreg)); 3625 3723 } 3626 3724 if (*pretregs & mES) 3627 3725 gen2(c,0x8E,modregrm(3,0,mreg)); /* MOV ES,mreg */ 3628 3726 } 3629 3727 return cat(c,getoffset(e,lreg)); 3630 3728 } 3631 3729 3632 3730 /********************************* 3633 3731 * Load the offset portion of the address represented by e into 3634 3732 * reg. 3635 3733 */ 3636 3734 3637 3735 code *getoffset(elem *e,unsigned reg) 3638 3736 { code cs; 3639 3737 code *c; 3640 enum FL fl;3641 3738 3642 3739 cs.Iflags = 0; 3643 cs.Irex = 0; 3740 unsigned char rex = I64 ? REX_W : 0; 3741 cs.Irex = rex; 3644 3742 assert(e->Eoper == OPvar || e->Eoper == OPrelconst); 3645 fl = el_fl(e);3743 enum FL fl = el_fl(e); 3646 3744 switch (fl) 3647 3745 { 3648 3746 case FLdatseg: 3649 3747 cs.IEV2._EP.Vpointer = e->EV.Vpointer; 3650 3748 goto L3; 3651 3749 3652 3750 case FLfardata: 3653 3751 assert(!TARGET_FLAT); 3654 3752 goto L4; 3655 3753 3656 3754 case FLtlsdata: 3657 3755 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 3658 3756 { /* Generate: 3659 3757 * MOV reg,GS:[00000000] 3660 3758 * ADD reg, offset s@TLS_LE 3661 3759 * for locals, and for globals: 3662 3760 * MOV reg,GS:[00000000] 3663 3761 * ADD reg, s@TLS_IE 3664 3762 * note different fixup 3665 3763 */ 3666 3764 L5: 3667 3765 int stack = 0; 3668 3766 c = NULL; 3669 3767 if (reg == STACK) 3670 3768 { regm_t retregs = ALLREGS; 3671 3769 3672 3770 c = allocreg(&retregs,®,TYoffset); 3673 3771 reg = findreg(retregs); 3674 3772 stack = 1; 3675 3773 } 3676 3774 3677 3775 code css; 3776 css.Irex = rex; 3678 3777 css.Iop = 0x8B; 3679 css.Irm = modregrm(0, reg, BPRM); 3778 css.Irm = modregrm(0, 0, BPRM); 3779 code_newreg(&css, reg); 3680 3780 css.Iflags = CFgs; 3681 css.Irex = 0;3682 3781 css.IFL1 = FLconst; 3683 3782 css.IEV1.Vuns = 0; 3684 3783 c = gen(c, &css); // MOV reg,GS:[00000000] 3685 3784 3686 3785 if (e->EV.sp.Vsym->Sclass == SCstatic || e->EV.sp.Vsym->Sclass == SClocstat) 3687 3786 { // ADD reg, offset s 3787 cs.Irex = rex; 3688 3788 cs.Iop = 0x81; 3689 cs.Irm = modregrm(3,0,reg); 3789 cs.Irm = modregrm(3,0,reg & 7); 3790 if (reg & 8) 3791 cs.Irex |= REX_B; 3690 3792 cs.Iflags = CFoff; 3691 css.Irex = 0;3692 3793 cs.IFL2 = fl; 3693 3794 cs.IEVsym2 = e->EV.sp.Vsym; 3694 3795 cs.IEVoffset2 = e->EV.sp.Voffset; 3695 3796 } 3696 3797 else 3697 3798 { // ADD reg, s 3799 cs.Irex = rex; 3698 3800 cs.Iop = 0x03; 3699 cs.Irm = modregrm(0,reg,BPRM); 3801 cs.Irm = modregrm(0,0,BPRM); 3802 code_newreg(&cs, reg); 3700 3803 cs.Iflags = CFoff; 3701 css.Irex = 0;3702 3804 cs.IFL1 = fl; 3703 3805 cs.IEVsym1 = e->EV.sp.Vsym; 3704 3806 cs.IEVoffset1 = e->EV.sp.Voffset; 3705 3807 } 3706 3808 c = gen(c, &cs); // ADD reg, xxxx 3707 3809 3708 3810 if (stack) 3709 3811 { 3710 c = gen1(c,0x50 + reg); /* PUSH reg */ 3812 c = gen1(c,0x50 + (reg & 7)); // PUSH reg 3813 if (reg & 8) 3814 code_orrex(c, REX_B); 3711 3815 c = genadjesp(c,REGSIZE); 3712 3816 stackchanged = 1; 3713 3817 } 3714 3818 break; 3715 3819 } 3716 3820 #else 3717 3821 goto L4; 3718 3822 #endif 3719 3823 3720 3824 case FLfunc: 3721 3825 fl = FLextern; /* don't want PC relative addresses */ 3722 3826 goto L4; 3723 3827 3724 3828 case FLextern: 3725 3829 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 3726 3830 if (e->EV.sp.Vsym->ty() & mTYthread) 3727 3831 goto L5; 3728 3832 #endif 3729 3833 case FLdata: 3730 3834 case FLudata: 3731 3835 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 3732 3836 case FLgot: 3733 3837 case FLgotoff: 3734 3838 #endif 3735 3839 case FLcsdata: 3736 3840 L4: 3737 3841 cs.IEVsym2 = e->EV.sp.Vsym; 3738 3842 cs.IEVoffset2 = e->EV.sp.Voffset; 3739 3843 L3: 3740 3844 if (reg == STACK) 3741 3845 { stackchanged = 1; 3742 3846 cs.Iop = 0x68; /* PUSH immed16 */ 3743 3847 c = genadjesp(NULL,REGSIZE); 3744 3848 } 3745 3849 else 3746 { cs.Iop = 0xB8 + reg; /* MOV reg,immed16 */ 3850 { cs.Iop = 0xB8 + (reg & 7); // MOV reg,immed16 3851 if (reg & 8) 3852 cs.Irex |= REX_B; 3747 3853 c = NULL; 3748 3854 } 3749 3855 cs.Iflags = CFoff; /* want offset only */ 3750 3856 cs.IFL2 = fl; 3751 3857 c = gen(c,&cs); 3752 3858 break; 3753 3859 3754 3860 #if 0 && TARGET_LINUX 3755 3861 case FLgot: 3756 3862 case FLgotoff: 3757 3863 { 3758 3864 gotref = 1; 3759 3865 symbol *s = e->EV.sp.Vsym; 3760 3866 // When using 8B (MOV), indicating that rm is used 3761 3867 // rm operands are always placed in IEV1 not IEV2 3762 3868 cs.IEVsym1 = s; 3763 3869 cs.IEVoffset1 = e->EV.sp.Voffset; 3764 3870 cs.Irm = modregrm(2,reg,BX); // reg,disp32[EBX] 3765 3871 cs.IFL1 = fl; 3766 3872 cs.Iop = (fl == FLgotoff) … … 3774 3880 3775 3881 case FLreg: 3776 3882 /* Allow this since the tree optimizer puts & in front of */ 3777 3883 /* register doubles. */ 3778 3884 goto L2; 3779 3885 case FLauto: 3780 3886 case FLtmp: 3781 3887 case FLbprel: 3782 3888 case FLfltreg: 3783 3889 reflocal = TRUE; 3784 3890 goto L2; 3785 3891 case FLpara: 3786 3892 refparam = TRUE; 3787 3893 L2: 3788 3894 if (reg == STACK) 3789 3895 { regm_t retregs = ALLREGS; 3790 3896 3791 3897 c = allocreg(&retregs,®,TYoffset); 3792 3898 reg = findreg(retregs); 3793 3899 c = cat(c,loadea(e,&cs,0x8D,reg,0,0,0)); /* LEA reg,EA */ 3794 c = gen1(c,0x50 + reg); /* PUSH reg */ 3900 c = gen1(c,0x50 + (reg & 7)); // PUSH reg 3901 if (reg & 8) 3902 code_orrex(c, REX_B); 3795 3903 c = genadjesp(c,REGSIZE); 3796 3904 stackchanged = 1; 3797 3905 } 3798 3906 else 3799 3907 c = loadea(e,&cs,0x8D,reg,0,0,0); /* LEA reg,EA */ 3800 3908 break; 3801 3909 default: 3802 3910 #ifdef DEBUG 3803 3911 elem_print(e); 3804 3912 debugx(WRFL(fl)); 3805 3913 #endif 3806 3914 assert(0); 3807 3915 } 3808 3916 return c; 3809 3917 } 3810 3918 3811 3919 3812 3920 /****************** 3813 3921 * Negate, sqrt operator 3814 3922 */ 3815 3923 3816 3924 code *cdneg(elem *e,regm_t *pretregs) 3817 3925 { unsigned byte; 3818 3926 regm_t retregs,possregs; 3819 3927 int reg; 3820 3928 int sz; 3821 3929 tym_t tyml; 3822 3930 code *c,*c1,*cg; 3823 3931 3824 3932 //printf("cdneg()\n"); 3825 3933 //elem_print(e); 3826 3934 if (*pretregs == 0) 3827 3935 return codelem(e->E1,pretregs,FALSE); 3828 3936 tyml = tybasic(e->E1->Ety); 3829 3937 sz = tysize[tyml]; 3830 3938 if (tyfloating(tyml)) 3831 3939 { if (tycomplex(tyml)) 3832 3940 return neg_complex87(e, pretregs); 3833 if (config.inline8087 && ((*pretregs & (ALLREGS | mBP)) == 0 || e->Eoper == OPsqrt)) 3941 if (config.inline8087 && 3942 ((*pretregs & (ALLREGS | mBP)) == 0 || e->Eoper == OPsqrt || I64)) 3834 3943 return neg87(e,pretregs); 3835 retregs = ( !I32&& sz == 8) ? DOUBLEREGS_16 : ALLREGS;3944 retregs = (I16 && sz == 8) ? DOUBLEREGS_16 : ALLREGS; 3836 3945 c1 = codelem(e->E1,&retregs,FALSE); 3837 3946 c1 = cat(c1,getregs(retregs)); 3838 3947 if (I32) 3839 3948 { reg = (sz == 8) ? findregmsw(retregs) : findreg(retregs); 3840 3949 c1 = genc2(c1,0x81,modregrm(3,6,reg),0x80000000); /* XOR EDX,sign bit */ 3841 3950 } 3842 3951 else 3843 3952 { reg = (sz == 8) ? AX : findregmsw(retregs); 3844 3953 c1 = genc2(c1,0x81,modregrm(3,6,reg),0x8000); /* XOR AX,0x8000 */ 3845 3954 } 3846 3955 return cat(c1,fixresult(e,retregs,pretregs)); 3847 3956 } 3848 3957 3849 3958 byte = sz == 1; 3850 3959 possregs = (byte) ? BYTEREGS : allregs; 3851 3960 retregs = *pretregs & possregs; 3852 3961 if (retregs == 0) 3853 3962 retregs = possregs; 3854 3963 c1 = codelem(e->E1,&retregs,FALSE); 3855 3964 cg = getregs(retregs); /* retregs will be destroyed */ 3856 3965 if (sz <= REGSIZE) 3857 { unsigned reg;3858 3859 reg = findreg(retregs);3860 c = gen2(CNIL,0xF7 ^ byte, modregrm(3,3,reg)); /* NEG reg */3861 if ( I32&& tysize[tyml] == SHORTSIZE && *pretregs & mPSW)3966 { 3967 unsigned reg = findreg(retregs); 3968 unsigned rex = (I64 && sz == 8) ? REX_W : 0; 3969 c = gen2(CNIL,0xF7 ^ byte,(rex << 16) | modregrmx(3,3,reg)); // NEG reg 3970 if (!I16 && tysize[tyml] == SHORTSIZE && *pretregs & mPSW) 3862 3971 c->Iflags |= CFopsize | CFpsw; 3863 3972 *pretregs &= mBP | ALLREGS; // flags already set 3864 3973 } 3865 3974 else if (sz == 2 * REGSIZE) 3866 3975 { unsigned msreg,lsreg; 3867 3976 3868 3977 msreg = findregmsw(retregs); 3869 3978 c = gen2(CNIL,0xF7,modregrm(3,3,msreg)); /* NEG msreg */ 3870 3979 lsreg = findreglsw(retregs); 3871 3980 gen2(c,0xF7,modregrm(3,3,lsreg)); /* NEG lsreg */ 3872 3981 genc2(c,0x81,modregrm(3,3,msreg),0); /* SBB msreg,0 */ 3873 3982 } 3874 3983 else 3875 3984 assert(0); 3876 3985 return cat4(c1,cg,c,fixresult(e,retregs,pretregs)); 3877 3986 } 3878 3987 3879 3988 3880 3989 /****************** 3881 3990 * Absolute value operator 3882 3991 */ 3883 3992 3884 3993 code *cdabs( elem *e, regm_t *pretregs) 3885 3994 { unsigned byte; 3886 3995 regm_t retregs,possregs; 3887 3996 int reg; 3888 3997 tym_t tyml; 3889 3998 code *c,*c1,*cg; 3890 int sz;3891 3999 3892 4000 if (*pretregs == 0) 3893 4001 return codelem(e->E1,pretregs,FALSE); 3894 4002 tyml = tybasic(e->E1->Ety); 3895 sz = tysize[tyml]; 4003 int sz = tysize[tyml]; 4004 unsigned rex = (I64 && sz == 8) ? REX_W : 0; 3896 4005 if (tyfloating(tyml)) 3897 { if (config.inline8087 && ( *pretregs & (ALLREGS | mBP)) == 0)4006 { if (config.inline8087 && ((*pretregs & (ALLREGS | mBP)) == 0 || I64)) 3898 4007 return neg87(e,pretregs); 3899 4008 retregs = (!I32 && sz == 8) ? DOUBLEREGS_16 : ALLREGS; 3900 4009 c1 = codelem(e->E1,&retregs,FALSE); 3901 4010 /*cg = callclib(e,CLIBdneg,pretregs,0);*/ 3902 4011 c1 = cat(c1,getregs(retregs)); 3903 4012 if (I32) 3904 4013 { reg = (sz == 8) ? findregmsw(retregs) : findreg(retregs); 3905 4014 c1 = genc2(c1,0x81,modregrm(3,4,reg),0x7FFFFFFF); /* AND EDX,~sign bit */ 3906 4015 } 3907 4016 else 3908 4017 { reg = (sz == 8) ? AX : findregmsw(retregs); 3909 4018 c1 = genc2(c1,0x81,modregrm(3,4,reg),0x7FFF); /* AND AX,0x7FFF */ 3910 4019 } 3911 4020 return cat(c1,fixresult(e,retregs,pretregs)); 3912 4021 } 3913 4022 3914 4023 byte = sz == 1; 3915 4024 assert(byte == 0); 3916 4025 byte = 0; 3917 4026 possregs = (sz <= REGSIZE) ? mAX : allregs; 3918 4027 retregs = *pretregs & possregs; 3919 4028 if (retregs == 0) 3920 4029 retregs = possregs; 3921 4030 c1 = codelem(e->E1,&retregs,FALSE); 3922 4031 cg = getregs(retregs); /* retregs will be destroyed */ 3923 4032 if (sz <= REGSIZE) 3924 4033 { unsigned reg; 3925 4034 code *c2; 3926 4035 3927 4036 /* cwd 3928 4037 xor AX,DX 3929 4038 sub AX,DX 3930 4039 */ 3931 4040 3932 4041 cg = cat(cg,getregs(mDX)); 3933 4042 reg = findreg(retregs); 3934 if ( I32&& sz == SHORTSIZE)4043 if (!I16 && sz == SHORTSIZE) 3935 4044 cg = gen1(cg,0x98); // CWDE 3936 4045 cg = gen1(cg,0x99); // CWD 3937 gen2(cg,0x33 ^ byte,modregrm(3,AX,DX)); // XOR EAX,EDX 3938 c = gen2(CNIL,0x2B ^ byte,modregrm(3,AX,DX)); // SUB EAX,EDX 3939 if (I32 && sz == SHORTSIZE && *pretregs & mPSW) 4046 code_orrex(cg, rex); 4047 gen2(cg,0x33 ^ byte,(rex << 16) | modregrm(3,AX,DX)); // XOR EAX,EDX 4048 c = gen2(CNIL,0x2B ^ byte,(rex << 16) | modregrm(3,AX,DX)); // SUB EAX,EDX 4049 if (!I16 && sz == SHORTSIZE && *pretregs & mPSW) 3940 4050 c->Iflags |= CFopsize | CFpsw; 3941 4051 if (*pretregs & mPSW) 3942 4052 c->Iflags |= CFpsw; 3943 4053 *pretregs &= ~mPSW; // flags already set 3944 4054 } 3945 4055 else if (sz == 2 * REGSIZE) 3946 4056 { unsigned msreg,lsreg; 3947 4057 code *cnop; 3948 4058 3949 4059 /* tst DX 3950 4060 jns L2 3951 4061 neg DX 3952 4062 neg AX 3953 4063 sbb DX,0 3954 4064 L2: 3955 4065 */ 3956 4066 3957 4067 cnop = gennop(CNIL); 3958 4068 msreg = findregmsw(retregs); 3959 4069 lsreg = findreglsw(retregs); … … 3975 4084 */ 3976 4085 3977 4086 code *cdpost(elem *e,regm_t *pretregs) 3978 4087 { code cs,*c1,*c2,*c3,*c4,*c5,*c6; 3979 4088 unsigned reg,op,byte; 3980 4089 tym_t tyml; 3981 4090 regm_t retregs,possregs,idxregs; 3982 4091 targ_int n; 3983 4092 elem *e2; 3984 4093 int sz; 3985 4094 int stackpushsave; 3986 4095 3987 4096 retregs = *pretregs; 3988 4097 op = e->Eoper; /* OPxxxx */ 3989 4098 if (retregs == 0) /* if nothing to return */ 3990 4099 return cdaddass(e,pretregs); 3991 4100 c4 = c5 = CNIL; 3992 4101 tyml = tybasic(e->E1->Ety); 3993 4102 sz = tysize[tyml]; 3994 4103 e2 = e->E2; 4104 unsigned rex = (I64 && sz == 8) ? REX_W : 0; 3995 4105 3996 4106 if (tyfloating(tyml)) 3997 4107 { 3998 4108 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 3999 4109 return post87(e,pretregs); 4000 4110 #else 4001 4111 if (config.inline8087) 4002 4112 return post87(e,pretregs); 4003 4113 assert(sz <= 8); 4004 4114 c1 = getlvalue(&cs,e->E1,DOUBLEREGS); 4005 4115 freenode(e->E1); 4006 idxregs = idxregm( cs.Irm,cs.Isib); /* mask of index regs used */4116 idxregs = idxregm(&cs); // mask of index regs used 4007 4117 cs.Iop = 0x8B; /* MOV DOUBLEREGS,EA */ 4008 4118 c2 = fltregs(&cs,tyml); 4009 4119 stackchanged = 1; 4010 4120 stackpushsave = stackpush; 4011 4121 if (sz == 8) 4012 4122 { 4013 4123 if (I32) 4014 4124 { 4015 4125 gen1(c2,0x50 + DX); /* PUSH DOUBLEREGS */ 4016 4126 gen1(c2,0x50 + AX); 4017 4127 stackpush += DOUBLESIZE; 4018 4128 retregs = DOUBLEREGS2_32; 4019 4129 } 4020 4130 else 4021 4131 { 4022 4132 gen1(c2,0x50 + AX); 4023 4133 gen1(c2,0x50 + BX); 4024 4134 gen1(c2,0x50 + CX); 4025 4135 gen1(c2,0x50 + DX); /* PUSH DOUBLEREGS */ 4026 4136 stackpush += DOUBLESIZE + DOUBLESIZE; … … 4081 4191 } 4082 4192 } 4083 4193 else 4084 4194 { gen1(c5,0x58 + AX); 4085 4195 if (!I32) 4086 4196 gen1(c5,0x58 + DX); 4087 4197 stackpush -= FLOATSIZE; 4088 4198 retregs = FLOATREGS; 4089 4199 } 4090 4200 c5 = genadjesp(c5,stackpush - stackpushsave); 4091 4201 c6 = fixresult(e,retregs,pretregs); 4092 4202 return cat6(c1,c2,c3,c4,c5,c6); 4093 4203 #endif 4094 4204 } 4095 4205 4096 4206 assert(e2->Eoper == OPconst); 4097 4207 byte = (sz == 1); 4098 4208 possregs = byte ? BYTEREGS : allregs; 4099 4209 c1 = getlvalue(&cs,e->E1,0); 4100 4210 freenode(e->E1); 4101 idxregs = idxregm( cs.Irm,cs.Isib); /* mask of index regs used */4211 idxregs = idxregm(&cs); // mask of index regs used 4102 4212 if (sz <= REGSIZE && *pretregs == mPSW && (cs.Irm & 0xC0) == 0xC0 && 4103 ( I32|| (idxregs & (mBX | mSI | mDI | mBP))))4213 (!I16 || (idxregs & (mBX | mSI | mDI | mBP)))) 4104 4214 { // Generate: 4105 4215 // TEST reg,reg 4106 4216 // LEA reg,n[reg] // don't affect flags 4107 4217 int rm; 4108 4218 4109 4219 reg = cs.Irm & 7; 4220 if (cs.Irex & REX_B) 4221 reg |= 8; 4110 4222 cs.Iop = 0x85 ^ byte; 4111 c s.Irm |= modregrm(0,reg,0);4223 code_newreg(&cs, reg); 4112 4224 cs.Iflags |= CFpsw; 4113 4225 c2 = gen(NULL,&cs); // TEST reg,reg 4114 4226 4115 4227 // If lvalue is a register variable, we must mark it as modified 4116 c3 = modEA( cs.Irm);4228 c3 = modEA(&cs); 4117 4229 4118 4230 n = e2->EV.Vint; 4119 4231 if (op == OPpostdec) 4120 4232 n = -n; 4121 4233 rm = reg; 4122 if ( !I32)4234 if (I16) 4123 4235 rm = regtorm[reg]; 4124 c4 = genc1(NULL,0x8D, modregrm(2,reg,rm),FLconst,n);// LEA reg,n[reg]4236 c4 = genc1(NULL,0x8D,(rex << 16) | modregxrmx(2,reg,rm),FLconst,n); // LEA reg,n[reg] 4125 4237 return cat4(c1,c2,c3,c4); 4126 4238 } 4127 4239 else if (sz <= REGSIZE || tyfv(tyml)) 4128 4240 { code cs2; 4129 4241 4130 4242 cs.Iop = 0x8B ^ byte; 4131 4243 retregs = possregs & ~idxregs & *pretregs; 4132 4244 if (!tyfv(tyml)) 4133 4245 { if (retregs == 0) 4134 4246 retregs = possregs & ~idxregs; 4135 4247 } 4136 4248 else /* tyfv(tyml) */ 4137 4249 { if ((retregs &= mLSW) == 0) 4138 4250 retregs = mLSW & ~idxregs; 4139 4251 /* Can't use LES if the EA uses ES as a seg override */ 4140 4252 if (*pretregs & mES && (cs.Iflags & CFSEG) != CFes) 4141 4253 { cs.Iop = 0xC4; /* LES */ 4142 4254 c1 = cat(c1,getregs(mES)); /* allocate ES */ 4143 4255 } 4144 4256 } 4145 4257 c2 = allocreg(&retregs,®,TYint); 4146 c s.Irm |= modregrm(0,reg,0);4258 code_newreg(&cs, reg); 4147 4259 c3 = gen(CNIL,&cs); /* MOV reg,EA */ 4148 4260 cs2 = cs; 4149 4261 4150 4262 /* If lvalue is a register variable, we must mark it as modified */ 4151 c3 = cat(c3,modEA( cs.Irm));4263 c3 = cat(c3,modEA(&cs)); 4152 4264 4153 4265 cs.Iop = 0x81 ^ byte; 4154 4266 cs.Irm &= ~modregrm(0,7,0); /* reg field = 0 */ 4267 cs.Irex &= ~REX_R; 4155 4268 if (op == OPpostdec) 4156 4269 cs.Irm |= modregrm(0,5,0); /* SUB */ 4157 4270 cs.IFL2 = FLconst; 4158 4271 n = e2->EV.Vint; 4159 4272 cs.IEV2.Vint = n; 4160 4273 if (n == 1) /* can use INC or DEC */ 4161 4274 { cs.Iop |= 0xFE; /* xFE is dec byte, xFF is word */ 4162 4275 if (op == OPpostdec) 4163 4276 NEWREG(cs.Irm,1); // DEC EA 4164 4277 else 4165 4278 NEWREG(cs.Irm,0); // INC EA 4166 4279 } 4167 4280 else if (n == -1) // can use INC or DEC 4168 4281 { cs.Iop |= 0xFE; // xFE is dec byte, xFF is word 4169 4282 if (op == OPpostinc) 4170 4283 NEWREG(cs.Irm,1); // DEC EA 4171 4284 else 4172 4285 NEWREG(cs.Irm,0); // INC EA 4173 4286 } 4174 4287 4175 4288 // For scheduling purposes, we wish to replace: 4176 4289 // MOV reg,EA 4177 4290 // OP EA 4178 4291 // with: 4179 4292 // MOV reg,EA 4180 4293 // OP reg 4181 4294 // MOV EA,reg 4182 4295 // ~OP reg 4183 4296 if (sz <= REGSIZE && (cs.Irm & 0xC0) != 0xC0 && 4184 4297 config.target_cpu >= TARGET_Pentium && 4185 4298 config.flags4 & CFG4speed) 4186 4299 { 4187 4300 // Replace EA in cs with reg 4188 cs.Irm = (cs.Irm & ~modregrm(3,0,7)) | modregrm(3,0,reg); 4301 cs.Irm = (cs.Irm & ~modregrm(3,0,7)) | modregrm(3,0,reg & 7); 4302 if (reg & 8) 4303 { cs.Irex &= ~REX_R; 4304 cs.Irex |= REX_B; 4305 } 4189 4306 gen(c3,&cs); // ADD/SUB reg,const 4190 4307 4191 4308 // Reverse MOV direction 4192 4309 cs2.Iop ^= 2; 4193 4310 gen(c3,&cs2); // MOV EA,reg 4194 4311 4195 4312 // Toggle INC <-> DEC, ADD <-> SUB 4196 4313 cs.Irm ^= (n == 1 || n == -1) ? modregrm(0,1,0) : modregrm(0,5,0); 4197 4314 gen(c3,&cs); 4198 4315 4199 4316 if (*pretregs & mPSW) 4200 4317 { *pretregs &= ~mPSW; // flags already set 4201 4318 code_orflag(c3,CFpsw); 4202 4319 } 4203 4320 } 4204 4321 else 4205 4322 gen(c3,&cs); // ADD/SUB EA,const 4206 4323 4207 4324 freenode(e2); 4208 4325 if (tyfv(tyml)) … … 4496 4613 /***************************************** 4497 4614 */ 4498 4615 4499 4616 code *cdhalt(elem *e,regm_t *pretregs) 4500 4617 { 4501 4618 assert(*pretregs == 0); 4502 4619 return gen1(NULL, 0xF4); // HLT 4503 4620 } 4504 4621 4505 4622 /**************************************** 4506 4623 * Check to see if pointer is NULL. 4507 4624 */ 4508 4625 4509 4626 code *cdnullcheck(elem *e,regm_t *pretregs) 4510 4627 { regm_t retregs; 4511 4628 regm_t scratch; 4512 4629 unsigned reg; 4513 4630 code *c; 4514 4631 code *cs; 4515 4632 4516 assert( I32);4633 assert(!I16); 4517 4634 retregs = *pretregs; 4518 4635 if ((retregs & allregs) == 0) 4519 4636 retregs |= allregs; 4520 4637 c = codelem(e->E1,&retregs,FALSE); 4521 4638 scratch = allregs & ~retregs; 4522 4639 cs = allocreg(&scratch,®,TYint); 4523 cs = genc1(cs,0x8B,modregrm(2,reg,findreg(retregs)),FLconst,0); // MOV reg,0[e] 4640 unsigned rex = I64 ? REX_W : 0; 4641 cs = genc1(cs,0x8B,(rex << 16) | modregxrmx(2,reg,findreg(retregs)),FLconst,0); // MOV reg,0[e] 4524 4642 return cat3(c,cs,fixresult(e,retregs,pretregs)); 4525 4643 } 4526 4644 4527 4645 #endif // !SPP branches/dmd-1.x/src/backend/cod3.c
r569 r577 269 269 DOUBLEREGS = DOUBLEREGS_16; 270 270 } 271 271 #endif 272 272 } 273 273 274 274 /******************************** 275 275 * Fix global variables for I64. 276 276 */ 277 277 278 278 void cod3_set64() 279 279 { 280 280 inssize[0xA0] = T|5; // MOV AL,mem 281 281 inssize[0xA1] = T|5; // MOV RAX,mem 282 282 inssize[0xA2] = T|5; // MOV mem,AL 283 283 inssize[0xA3] = T|5; // MOV mem,RAX 284 284 BPRM = 5; // [RBP] addressing mode 285 285 fregsaved = mBP | mBX | mR12 | mR13 | mR14 | mR15 | mES; // saved across function calls 286 286 FLOATREGS = FLOATREGS_64; 287 287 FLOATREGS2 = FLOATREGS2_64; 288 288 DOUBLEREGS = DOUBLEREGS_64; 289 STACKALIGN = 16; 289 290 290 291 for (unsigned i = 0x80; i < 0x90; i++) 291 292 inssize2[i] = W|T|6; 292 293 } 293 294 294 295 /********************************* 295 296 * Word or dword align start of function. 296 297 */ 297 298 298 299 void cod3_align() 299 300 { 300 301 static char nops[7] = { 0x90,0x90,0x90,0x90,0x90,0x90,0x90 }; 301 302 unsigned nbytes; 302 303 #if OMFOBJ 303 304 if (config.flags4 & CFG4speed) // if optimized for speed 304 305 { 305 306 // Pick alignment based on CPU target 306 307 if (config.target_cpu == TARGET_80486 || 307 308 config.target_cpu >= TARGET_PentiumPro) 308 309 { // 486 does reads on 16 byte boundaries, so if we are near … … 353 354 354 355 e = b->Belem; 355 356 elem_debug(e); 356 357 cc = docommas(&e); 357 358 cgstate.stackclean++; 358 359 tys = tybasic(e->Ety); 359 360 sz = tysize[tys]; 360 361 dword = (sz == 2 * REGSIZE); 361 362 mswsame = 1; // assume all msw's are the same 362 363 p = b->BS.Bswitch; /* pointer to case data */ 363 364 assert(p); 364 365 ncases = *p++; /* number of cases */ 365 366 366 367 vmax = MINLL; // smallest possible llong 367 368 vmin = MAXLL; // largest possible llong 368 369 for (n = 0; n < ncases; n++) // find max and min case values 369 370 { val = *p++; 370 371 if (val > vmax) vmax = val; 371 372 if (val < vmin) vmin = val; 372 373 if (REGSIZE == 2) 373 { unsigned short ms; 374 375 #if __DMC__ 376 ms = ((unsigned short *)&val)[1]; 377 #else 378 ms = (val >> 16) & 0xFFFF; 379 #endif 374 { 375 unsigned short ms = (val >> 16) & 0xFFFF; 380 376 if (n == 0) 381 377 msw = ms; 382 378 else if (msw != ms) 383 379 mswsame = 0; 384 380 } 385 381 else // REGSIZE == 4 386 { targ_ulong ms; 387 388 #if __DMC__ 389 /* This statement generates garbage for ms under g++, 390 * I don't know why. 391 */ 392 ms = ((targ_ulong *)&val)[1]; 393 #else 394 ms = (val >> 32) & 0xFFFFFFFF; 395 #endif 382 { 383 targ_ulong ms = (val >> 32) & 0xFFFFFFFF; 396 384 if (n == 0) 397 385 msw = ms; 398 386 else if (msw != ms) 399 387 mswsame = 0; 400 388 } 401 389 } 402 390 p -= ncases; 403 391 //dbg_printf("vmax = x%lx, vmin = x%lx, vmax-vmin = x%lx\n",vmax,vmin,vmax - vmin); 404 392 flags = (config.flags & CFGromable) ? CFcs : 0; // table is in code seg 405 393 394 if (I64) 395 { // For now, just generate basic if-then sequence to get us running 396 retregs = ALLREGS; 397 b->BC = BCifthen; 398 c = scodelem(e,&retregs,0,TRUE); 399 assert(!dword); // 128 bit switches not supported 400 reg = findreg(retregs); // reg that result is in 401 bl = b->Bsucc; 402 for (n = 0; n < ncases; n++) 403 { code *cx; 404 val = *p; 405 if (sz == 4) 406 cx = genc2(CNIL,0x81,modregrmx(3,7,reg),val); // CMP reg,val 407 else if (sz == 8) 408 { 409 if (val == (int)val) // if val is a 64 bit value sign-extended from 32 bits 410 { 411 cx = genc2(CNIL,0x81,modregrmx(3,7,reg),val); // CMP reg,value32 412 cx->Irex |= REX_W; // 64 bit operand 413 } 414 else 415 { unsigned sreg; 416 // MOV sreg,value64 417 cx = regwithvalue(CNIL, ALLREGS & ~mask[reg], val, &sreg, 64); 418 cx = genregs(cx,0x3B,reg,sreg); // CMP reg,sreg 419 code_orrex(cx, REX_W); 420 } 421 } 422 else 423 assert(0); 424 bl = list_next(bl); 425 genjmp(cx,JE,FLblock,list_block(bl)); // JE caseaddr 426 c = cat(c,cx); 427 p++; 428 } 429 if (list_block(b->Bsucc) != b->Bnext) /* if default is not next block */ 430 c = cat(c,genjmp(CNIL,JMP,FLblock,list_block(b->Bsucc))); 431 ce = NULL; 432 } 406 433 // Need to do research on MACHOBJ to see about better methods 407 if (MACHOBJ || ncases <= 3) // generate if-then sequence408 { 434 else if (MACHOBJ || ncases <= 3) 435 { // generate if-then sequence 409 436 retregs = ALLREGS; 410 437 L1: 411 438 b->BC = BCifthen; 412 439 c = scodelem(e,&retregs,0,TRUE); 413 440 if (dword) 414 441 { reg = findreglsw(retregs); 415 442 reg2 = findregmsw(retregs); 416 443 } 417 444 else 418 445 reg = findreg(retregs); /* reg that result is in */ 419 446 bl = b->Bsucc; 420 447 if (dword && mswsame) 421 448 { /* CMP reg2,MSW */ 422 449 c = genc2(c,0x81,modregrm(3,7,reg2),msw); 423 450 genjmp(c,JNE,FLblock,list_block(b->Bsucc)); /* JNE default */ 424 451 } 425 452 for (n = 0; n < ncases; n++) 426 453 { code *cnext = CNIL; 427 454 /* CMP reg,casevalue */ 428 455 c = cat(c,ce = genc2(CNIL,0x81,modregrm(3,7,reg),(targ_int)*p)); … … 833 860 L1: 834 861 #if DEBUG 835 862 if ((jp & 0xF0) != 0x70) 836 863 WROP(op), 837 864 printf("i %d zero %d op x%x jp x%x\n",i,zero,op,jp); 838 865 #endif 839 866 assert((jp & 0xF0) == 0x70); 840 867 return jp; 841 868 } 842 869 843 870 844 871 /********************************** 845 872 * Append code to *pc which validates pointer described by 846 873 * addressing mode in *pcs. Modify addressing mode in *pcs. 847 874 * Input: 848 875 * keepmsk mask of registers we must not destroy or use 849 876 * if (keepmsk & RMstore), this will be only a store operation 850 877 * into the lvalue 851 878 */ 852 879 853 void cod3_ptrchk(code * __ss *pc,code __ss*pcs,regm_t keepmsk)880 void cod3_ptrchk(code **pc,code *pcs,regm_t keepmsk) 854 881 { code *c; 855 882 code *cs2; 856 883 unsigned char rm,sib; 857 884 unsigned reg; 858 885 unsigned flagsave; 859 886 unsigned opsave; 860 887 regm_t idxregs; 861 888 regm_t tosave; 862 889 regm_t used; 863 890 int i; 864 891 892 assert(!I64); 865 893 if (!I16 && pcs->Iflags & (CFes | CFss | CFcs | CFds | CFfs | CFgs)) 866 894 return; // not designed to deal with 48 bit far pointers 867 895 868 896 c = *pc; 869 897 870 898 rm = pcs->Irm; 871 899 assert(!(rm & 0x40)); // no disp8 or reg addressing modes 872 900 873 901 // If the addressing mode is already a register 874 902 reg = rm & 7; 875 903 if (I16) 876 904 { static const unsigned char imode[8] = { BP,BP,BP,BP,SI,DI,BP,BX }; 877 905 878 906 reg = imode[reg]; // convert [SI] to SI, etc. 879 907 } 880 908 idxregs = mask[reg]; 881 909 if ((rm & 0x80 && (pcs->IFL1 != FLoffset || pcs->IEV1.Vuns)) || 882 910 !(idxregs & ALLREGS) 883 911 ) 884 912 { … … 937 965 case CFcs: segreg = 0x0E; break; 938 966 case 0: segreg = 0x1E; break; // DS 939 967 default: 940 968 assert(0); 941 969 } 942 970 943 971 // See if we should default to SS: 944 972 // (Happens when BP is part of the addressing mode) 945 973 if (segreg == 0x1E && (rm & 0xC0) != 0xC0 && 946 974 rm & 2 && (rm & 7) != 7) 947 975 { segreg = 0x16; 948 976 if (config.wflags & WFssneds) 949 977 pcs->Iflags |= CFss; // because BP won't be there anymore 950 978 } 951 979 c = gen1(c,segreg); // PUSH segreg 952 980 } 953 981 954 982 c = gen1(c,0x50 + reg); // PUSH reg 955 983 956 984 // Rewrite the addressing mode in *pcs so it is just 0[reg] 957 pcs->Irm = getaddrmode(idxregs);985 setaddrmode(pcs, idxregs); 958 986 pcs->IFL1 = FLoffset; 959 987 pcs->IEV1.Vuns = 0; 960 988 961 989 // Call the validation function 962 990 { 963 991 makeitextern(rtlsym[RTLSYM_PTRCHK]); 964 992 965 993 used &= ~(keepmsk | idxregs); // regs destroyed by this exercise 966 994 c = cat(c,getregs(used)); 967 995 // CALL __ptrchk 968 996 gencs(c,(LARGECODE) ? 0x9A : 0xE8,0,FLfunc,rtlsym[RTLSYM_PTRCHK]); 969 997 } 970 998 971 999 *pc = cat(c,cs2); 972 1000 } 973 1001 974 1002 975 1003 976 1004 /*********************************** 977 1005 * Determine if BP can be used as a general purpose register. … … 1017 1045 config.flags & CFGstack || 1018 1046 localsize >= 0x100 || // arbitrary value < 0x1000 1019 1047 (usednteh & ~NTEHjmonitor) || 1020 1048 usedalloca 1021 1049 ) 1022 1050 goto Lcant; 1023 1051 } 1024 1052 Lcan: 1025 1053 return mBP; 1026 1054 1027 1055 Lcant: 1028 1056 return 0; 1029 1057 } 1030 1058 1031 1059 /*************************************** 1032 1060 * Gen code for OPframeptr 1033 1061 */ 1034 1062 1035 1063 code *cdframeptr(elem *e, regm_t *pretregs) 1036 1064 { 1037 regm_t retregs;1038 1065 unsigned reg; 1039 code *cg;1040 code *c1;1041 1066 code cs; 1042 1067 1043 re tregs = *pretregs & allregs;1068 regm_t retregs = *pretregs & allregs; 1044 1069 if (!retregs) 1045 1070 retregs = allregs; 1046 cg = allocreg(&retregs, ®, TYint); 1047 //c1 = genmovreg(cg, reg, BP); 1071 code *cg = allocreg(&retregs, ®, TYint); 1048 1072 1049 1073 cs.Iop = ESCAPE; 1050 1074 cs.Iop2 = ESCframeptr; 1051 1075 cs.Iflags = 0; 1052 1076 cs.Irex = 0; 1053 1077 cs.Irm = reg; 1054 c 1= gen(cg,&cs);1055 1056 return cat(c 1,fixresult(e,retregs,pretregs));1078 cg = gen(cg,&cs); 1079 1080 return cat(cg,fixresult(e,retregs,pretregs)); 1057 1081 } 1058 1082 1059 1083 /*************************************** 1060 1084 * Gen code for load of _GLOBAL_OFFSET_TABLE_. 1061 1085 * This value gets cached in the local variable 'localgot'. 1062 1086 */ 1063 1087 1064 1088 code *cdgot(elem *e, regm_t *pretregs) 1065 1089 { 1066 1090 #if TARGET_OSX 1067 1091 regm_t retregs; 1068 1092 unsigned reg; 1069 1093 code *c; 1070 1094 1071 1095 retregs = *pretregs & allregs; 1072 1096 if (!retregs) 1073 1097 retregs = allregs; 1074 1098 c = allocreg(&retregs, ®, TYnptr); 1075 1099 1076 1100 c = genc(c,0xE8,0,0,0,FLgot,0); // CALL L1 … … 1843 1867 static unsigned char ops0[] = { 0x07,0x1F,0x5F,0x5E, 1844 1868 0x5D,0x5B,0x5B,0x5A, 1845 1869 0x59,0x58,0xCF,0 }; 1846 1870 unsigned char *p; 1847 1871 1848 1872 c = genregs(c,0x8B,SP,BP); // MOV SP,BP 1849 1873 p = (config.target_cpu >= TARGET_80286) ? ops2 : ops0; 1850 1874 do 1851 1875 gen1(c,*p); 1852 1876 while (*++p); 1853 1877 goto Lopt; 1854 1878 } 1855 1879 1856 1880 if (config.flags & CFGtrace && 1857 1881 (!(config.flags4 & CFG4allcomdat) || 1858 1882 funcsym_p->Sclass == SCcomdat || 1859 1883 funcsym_p->Sclass == SCglobal || 1860 1884 (config.flags2 & CFG2comdat && SymInline(funcsym_p)) 1861 1885 ) 1862 1886 ) 1863 { symbol *s; 1864 1865 s = rtlsym[farfunc ? RTLSYM_TRACE_EPI_F : RTLSYM_TRACE_EPI_N]; 1887 { 1888 symbol *s = rtlsym[farfunc ? RTLSYM_TRACE_EPI_F : RTLSYM_TRACE_EPI_N]; 1866 1889 makeitextern(s); 1867 1890 c = gencs(c,I16 ? 0x9A : 0xE8,0,FLfunc,s); // CALLF _trace 1868 1891 if (!I16) 1869 1892 code_orflag(c,CFoff | CFselfrel); 1870 1893 useregs((ALLREGS | mBP | mES) & ~s->Sregsaved); 1871 1894 } 1872 1895 1873 1896 if (usednteh & ~NTEHjmonitor && (config.exe == EX_NT || MARS)) 1874 1897 c = cat(c,nteh_epilog()); 1875 1898 1876 1899 cpopds = CNIL; 1877 1900 if (tyf & mTYloadds) 1878 1901 { cpopds = gen1(cpopds,0x1F); // POP DS 1879 1902 c = cat(c,cpopds); 1880 1903 spoff += intsize; 1881 1904 } 1882 1905 1883 reg = 7; 1884 regm = 1 << 7; 1906 /* Pop all the general purpose registers saved on the stack 1907 * by the prolog code. Remember to do them in the reverse 1908 * order they were pushed. 1909 */ 1910 reg = I64 ? R15 : DI; 1911 regm = 1 << reg; 1885 1912 topop = fregsaved & ~mfuncreg; 1886 1913 #ifdef DEBUG 1887 if (topop & ~0xFF )1914 if (topop & ~0xFFFF) 1888 1915 printf("fregsaved = x%x, mfuncreg = x%x\n",fregsaved,mfuncreg); 1889 1916 #endif 1890 assert(!(topop & ~0xFF ));1917 assert(!(topop & ~0xFFFF)); 1891 1918 while (topop) 1892 1919 { if (topop & regm) 1893 { c = gen1(c,0x58 + reg); /* POP reg */1894 if (reg & 8)1895 code_orrex(c, REX_B);1896 topop &= ~regm;1897 spoff += intsize;1920 { c = gen1(c,0x58 + reg); // POP reg 1921 if (reg & 8) 1922 code_orrex(c, REX_B); 1923 topop &= ~regm; 1924 spoff += intsize; 1898 1925 } 1899 1926 regm >>= 1; 1900 1927 reg--; 1901 1928 } 1902 1929 1903 1930 #if MARS 1904 1931 if (usednteh & NTEHjmonitor) 1905 1932 { 1906 1933 regm_t retregs = 0; 1907 1934 if (b->BC == BCretexp) 1908 1935 retregs = regmask(b->Belem->Ety, tym); 1909 1936 code *cn = nteh_monitor_epilog(retregs); 1910 1937 c = cat(c,cn); 1911 1938 xlocalsize += 8; 1912 1939 } 1913 1940 #endif 1914 1941 1915 1942 if (config.wflags & WFwindows && farfunc) 1916 1943 { 1917 1944 int wflags = config.wflags; … … 1983 2010 op = tyfarfunc(tym) ? 0xCA : 0xC2; 1984 2011 if (tym == TYhfunc) 1985 2012 { 1986 2013 c = genc2(c,0xC2,0,4); // RET 4 1987 2014 } 1988 2015 else if (!typfunc(tym) || Poffset == 0) 1989 2016 { op++; // to a regular RET 1990 2017 c = gen1(c,op); 1991 2018 } 1992 2019 else 1993 2020 { // Stack is always aligned on register size boundary 1994 2021 Poffset = (Poffset + (REGSIZE - 1)) & ~(REGSIZE - 1); 1995 2022 c = genc2(c,op,0,Poffset); // RET Poffset 1996 2023 } 1997 2024 } 1998 2025 1999 2026 Lopt: 2000 2027 // If last instruction in ce is ADD SP,imm, and first instruction 2001 2028 // in c sets SP, we can dump the ADD. 2002 2029 cr = code_last(ce); 2003 if (cr && c )2030 if (cr && c && !I64) 2004 2031 { 2005 2032 if (cr->Iop == 0x81 && cr->Irm == modregrm(3,0,SP)) // if ADD SP,imm 2006 2033 { 2007 2034 if ( 2008 2035 c->Iop == 0xC9 || // LEAVE 2009 2036 (c->Iop == 0x8B && c->Irm == modregrm(3,SP,BP)) || // MOV SP,BP 2010 2037 (c->Iop == 0x8D && c->Irm == modregrm(1,SP,6)) // LEA SP,-imm[BP] 2011 2038 ) 2012 2039 cr->Iop = NOP; 2013 2040 else if (c->Iop == 0x58 + BP) // if POP BP 2014 2041 { cr->Iop = 0x8B; 2015 2042 cr->Irm = modregrm(3,SP,BP); // MOV SP,BP 2016 2043 } 2017 2044 } 2018 2045 #if 0 // These optimizations don't work if the called function 2019 2046 // cleans off the stack. 2020 2047 else if (c->Iop == 0xC3 && cr->Iop == 0xE8) // CALL near 2021 2048 { cr->Iop = 0xE9; // JMP near 2022 2049 c->Iop = NOP; 2023 2050 } … … 2375 2401 break; 2376 2402 2377 2403 default: 2378 2404 goto L3; 2379 2405 } 2380 2406 2381 2407 if (disp == 0) // bra to next instruction 2382 2408 { bytesaved += csize; 2383 2409 c->Iop = NOP; // del branch instruction 2384 2410 c->IEV2.Vcode = NULL; 2385 2411 c = cn; 2386 2412 if (!c) 2387 2413 break; 2388 2414 continue; 2389 2415 } 2390 2416 else if ((targ_size_t)(targ_schar)(disp - 2) == (disp - 2) && 2391 2417 (targ_size_t)(targ_schar)disp == disp) 2392 2418 { 2393 2419 if (op == JMP) 2394 2420 { c->Iop = JMPS; // JMP SHORT 2395 bytesaved += I 32 ? 3 : 1;2421 bytesaved += I16 ? 1 : 3; 2396 2422 } 2397 2423 else // else Jcond 2398 2424 { c->Iflags &= ~CFjmp16; // a branch is ok 2399 bytesaved += I 32 ? 4 : 3;2425 bytesaved += I16 ? 3 : 4; 2400 2426 2401 2427 // Replace a cond jump around a call to a function that 2402 2428 // never returns with a cond jump to that function. 2403 2429 if (config.flags4 & CFG4optimized && 2404 2430 config.target_cpu >= TARGET_80386 && 2405 disp == (I 32 ? 5 : 3) &&2431 disp == (I16 ? 3 : 5) && 2406 2432 cn && 2407 2433 cn->Iop == 0xE8 && 2408 2434 cn->IFL2 == FLfunc && 2409 2435 cn->IEVsym2->Sflags & SFLexit && 2410 2436 !(cn->Iflags & (CFtarg | CFtarg2)) 2411 2437 ) 2412 2438 { 2413 2439 cn->Iop = 0x0F; 2414 2440 cn->Iop2 = (c->Iop & 0x0F) ^ 0x81; 2415 2441 c->Iop = NOP; 2416 2442 c->IEV2.Vcode = NULL; 2417 2443 bytesaved++; 2418 2444 2419 2445 // If nobody else points to ct, we can remove the CFtarg 2420 2446 if (flag && ct) 2421 2447 { code *cx; 2422 2448 2423 2449 for (cx = bl->Bcode; 1; cx = code_next(cx)) 2424 2450 { 2425 2451 if (!cx) … … 2533 2559 #ifdef DEBUG 2534 2560 if (0) 2535 2561 { printf("assignaddrc()\n"); 2536 2562 c->print(); 2537 2563 } 2538 2564 if (code_next(c) && code_next(code_next(c)) == c) 2539 2565 assert(0); 2540 2566 #endif 2541 2567 if (c->Iop == 0x0F) 2542 2568 ins = inssize2[c->Iop2]; 2543 2569 else if (c->Iop == ESCAPE) 2544 2570 { 2545 2571 if (c->Iop2 == ESCadjesp) 2546 2572 { 2547 2573 //printf("adjusting EBPtoESP (%d) by %ld\n",EBPtoESP,c->IEV2.Vint); 2548 2574 EBPtoESP += c->IEV2.Vint; 2549 2575 c->Iop = NOP; 2550 2576 } 2551 2577 if (c->Iop2 == ESCframeptr) 2552 2578 { // Convert to load of frame pointer 2579 // c->Irm is the register to use 2553 2580 if (hasframe) 2554 2581 { // MOV reg,EBP 2555 2582 c->Iop = 0x89; 2556 c->Irm = modregrm(3,BP,c->Irm); 2583 if (c->Irm & 8) 2584 c->Irex |= REX_B; 2585 c->Irm = modregrm(3,BP,c->Irm & 7); 2557 2586 } 2558 2587 else 2559 2588 { // LEA reg,EBPtoESP[ESP] 2560 2589 c->Iop = 0x8D; 2561 c->Irm = modregrm(2,c->Irm,4); 2590 if (c->Irm & 8) 2591 c->Irex |= REX_R; 2592 c->Irm = modregrm(2,c->Irm & 7,4); 2562 2593 c->Isib = modregrm(0,4,SP); 2563 2594 c->Iflags = CFoff; 2564 2595 c->IFL1 = FLconst; 2565 2596 c->IEV1.Vuns = EBPtoESP; 2566 2597 } 2567 2598 } 2599 if (I64) 2600 c->Irex |= REX_W; 2568 2601 continue; 2569 2602 } 2570 2603 else 2571 2604 ins = inssize[c->Iop]; 2572 2605 if (!(ins & M) || 2573 2606 ((rm = c->Irm) & 0xC0) == 0xC0) 2574 2607 goto do2; /* if no first operand */ 2575 2608 if (is32bitaddr(I32,c->Iflags)) 2576 2609 { 2577 2610 2578 2611 if ( 2579 2612 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c->Isib & 7) == 5 || (rm & 7) == 5)) 2580 2613 ) 2581 2614 goto do2; /* if no first operand */ 2582 2615 } 2583 2616 else 2584 2617 { 2585 2618 if ( 2586 2619 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) 2587 2620 ) … … 2637 2670 2638 2671 case FLreg: 2639 2672 case FLauto: 2640 2673 soff = Aoff; 2641 2674 L1: 2642 2675 if (s->Sflags & SFLunambig && !(s->Sflags & SFLread) && // if never loaded 2643 2676 !anyiasm && 2644 2677 // if not optimized, leave it in for debuggability 2645 2678 (config.flags4 & CFG4optimized || !config.fulltypes)) 2646 2679 { c->Iop = NOP; // remove references to it 2647 2680 continue; 2648 2681 } 2649 2682 if (s->Sfl == FLreg && c->IEVpointer1 < 2) 2650 2683 { int reg = s->Sreglsw; 2651 2684 2652 2685 assert(!(s->Sregm & ~mask[reg])); 2653 2686 if (c->IEVpointer1 == 1) 2654 2687 { assert(reg < 4); /* must be a BYTEREGS */ 2655 2688 reg |= 4; /* convert to high byte reg */ 2656 2689 } 2690 if (reg & 8) 2691 { assert(I64); 2692 c->Irex |= REX_B; 2693 reg &= 7; 2694 } 2657 2695 c->Irm = (c->Irm & modregrm(0,7,0)) 2658 2696 | modregrm(3,0,reg); 2659 2697 assert(c->Iop != LES && c->Iop != LEA); 2660 2698 goto do2; 2661 2699 } 2662 2700 else 2663 2701 { c->IEVpointer1 += s->Soffset + soff + BPoff; 2664 2702 if (s->Sflags & SFLunambig) 2665 2703 c->Iflags |= CFunambig; 2666 2704 L2: 2667 2705 if (!hasframe) 2668 2706 { /* Convert to ESP relative address instead of EBP */ 2669 2707 unsigned char rm; 2670 2708 2671 assert( I32);2709 assert(!I16); 2672 2710 c->IEVpointer1 += EBPtoESP; 2673 2711 rm = c->Irm; 2674 2712 if ((rm & 7) == 4) // if SIB byte 2675 2713 { 2676 2714 assert((c->Isib & 7) == BP); 2677 2715 assert((rm & 0xC0) != 0); 2678 2716 c->Isib = (c->Isib & ~7) | modregrm(0,0,SP); 2679 2717 } 2680 2718 else 2681 2719 { 2682 2720 assert((rm & 7) == 5); 2683 2721 c->Irm = (rm & modregrm(0,7,0)) 2684 2722 | modregrm(2,0,4); 2685 2723 c->Isib = modregrm(0,4,SP); 2686 2724 } 2687 2725 } 2688 2726 } 2689 2727 break; 2690 2728 case FLpara: 2691 2729 soff = Poff - BPoff; // cancel out add of BPoff … … 2877 2915 void pinholeopt(code *c,block *b) 2878 2916 { targ_size_t a; 2879 2917 unsigned op,mod,rm,reg,ereg; 2880 2918 unsigned char ins; 2881 2919 int usespace; 2882 2920 int useopsize; 2883 2921 int space; 2884 2922 block *bn; 2885 2923 2886 2924 #if 0 2887 2925 code *cstart = c; 2888 2926 if (debugc) 2889 2927 { 2890 2928 printf("+pinholeopt(%p)\n",c); 2891 2929 } 2892 2930 #endif 2893 2931 2894 2932 if (b) 2895 2933 { bn = b->Bnext; 2896 2934 usespace = (config.flags4 & CFG4space && b->BC != BCasm); 2897 useopsize = ( !I32|| (config.flags4 & CFG4space && b->BC != BCasm));2935 useopsize = (I16 || (config.flags4 & CFG4space && b->BC != BCasm)); 2898 2936 } 2899 2937 else 2900 2938 { bn = NULL; 2901 2939 usespace = (config.flags4 & CFG4space); 2902 useopsize = ( !I32|| config.flags4 & CFG4space);2940 useopsize = (I16 || config.flags4 & CFG4space); 2903 2941 } 2904 2942 for (; c; c = code_next(c)) 2905 2943 { 2906 2944 L1: 2907 2945 op = c->Iop; 2908 2946 if (op == 0x0F) 2909 2947 ins = inssize2[c->Iop2]; 2910 2948 else 2911 2949 ins = inssize[c->Iop]; 2912 if ( ins & M) /* if modregrm byte */2913 { int longop = (c->Iflags & CFopsize) ? !I32: I32;2950 if (!I64 && ins & M) // if modregrm byte 2951 { int longop = (c->Iflags & CFopsize) ? I16 : I32; 2914 2952 int local_BPRM = BPRM; 2915 2953 2916 2954 if (c->Iflags & CFaddrsize) 2917 2955 local_BPRM ^= 5 ^ 6; // toggle between 5 and 6 2918 2956 2919 2957 rm = c->Irm; 2920 2958 reg = rm & (7<<3); // isolate reg field 2921 2959 ereg = rm & 7; 2922 2960 2923 2961 /* If immediate second operand */ 2924 2962 if ((ins & T || op == 0xF6 || op == 0xF7) && 2925 2963 c->IFL2 == FLconst) 2926 2964 { int flags; 2927 2965 targ_long u; 2928 2966 2929 2967 flags = c->Iflags & CFpsw; /* if want result in flags */ 2930 2968 u = c->IEV2.Vuns; 2931 2969 if (ins & E) 2932 2970 u = (signed char) u; 2933 2971 else if (!longop) 2934 2972 u = (short) u; 2935 2973 2936 2974 // Replace CMP reg,0 with TEST reg,reg 2937 #if 02938 // BUG: is this the right one?2939 if ((op & 0xFC) == 0x80 &&2940 #else2941 2975 if ((op & 0xFE) == 0x80 && 2942 #endif2943 2976 rm >= modregrm(3,7,AX) && 2944 2977 u == 0) 2945 2978 { c->Iop = (op & 1) | 0x84; 2946 2979 c->Irm = modregrm(3,ereg,ereg); 2947 2980 goto L1; 2948 2981 } 2949 2982 2950 2983 /* Optimize ANDs with an immediate constant */ 2951 2984 if ((op == 0x81 || op == 0x80) && reg == modregrm(0,4,0)) 2952 2985 { 2953 2986 if (rm >= modregrm(3,4,AX)) 2954 2987 { 2955 2988 if (u == 0) 2956 2989 { /* Replace with XOR reg,reg */ 2957 2990 c->Iop = 0x30 | (op & 1); 2958 2991 NEWREG(c->Irm,rm & 7); 2959 2992 goto L1; 2960 2993 } 2961 2994 if (u == 0xFFFFFFFF && !flags) 2962 2995 { c->Iop = NOP; … … 4023 4056 return offset; /* ending address */ 4024 4057 } 4025 4058 4026 4059 4027 4060 STATIC void do64bit(enum FL fl,union evc *uev,int flags) 4028 4061 { char *p; 4029 4062 symbol *s; 4030 4063 targ_size_t ad; 4031 4064 long tmp; 4032 4065 4033 4066 assert(I64); 4034 4067 switch (fl) 4035 4068 { 4036 4069 case FLconst: 4037 4070 ad = * (targ_size_t *) uev; 4038 4071 L1: 4039 4072 GENP(8,&ad); 4040 4073 return; 4041 4074 case FLdatseg: 4042 4075 FLUSH(); 4043 reftodatseg(cseg,offset,uev->_EP.Vpointer,uev->_EP.Vseg, flags);4076 reftodatseg(cseg,offset,uev->_EP.Vpointer,uev->_EP.Vseg,CFoffset64 | flags); 4044 4077 break; 4045 4078 case FLframehandler: 4046 4079 framehandleroffset = OFFSET(); 4047 4080 ad = 0; 4048 4081 goto L1; 4049 4082 case FLswitch: 4050 4083 FLUSH(); 4051 4084 ad = uev->Vswitch->Btableoffset; 4052 4085 if (config.flags & CFGromable) 4053 4086 reftocodseg(cseg,offset,ad); 4054 4087 else 4055 4088 reftodatseg(cseg,offset,ad,JMPSEG,CFoff); 4056 4089 break; 4057 4090 case FLcsdata: 4058 4091 case FLfardata: 4059 4092 #if DEBUG 4060 4093 symbol_print(uev->sp.Vsym); 4061 4094 #endif 4062 4095 assert(!TARGET_FLAT); 4063 4096 // NOTE: In ELFOBJ all symbol refs have been tagged FLextern 4064 4097 // strings and statics are treated like offsets from a 4065 4098 // un-named external with is the start of .rodata or .data 4066 4099 case FLextern: /* external data symbol */ 4067 4100 case FLtlsdata: 4068 4101 #if TARGET_LINUX || TARGET_FREEBSD || TARGET_SOLARIS 4069 4102 case FLgot: 4070 4103 case FLgotoff: 4071 4104 #endif 4072 4105 FLUSH(); 4073 4106 s = uev->sp.Vsym; /* symbol pointer */ 4074 reftoident(cseg,offset,s,uev->sp.Voffset, flags);4107 reftoident(cseg,offset,s,uev->sp.Voffset,CFoffset64 | flags); 4075 4108 break; 4076 4109 4077 4110 #if TARGET_OSX 4078 4111 case FLgot: 4079 4112 funcsym_p->Slocalgotoffset = OFFSET(); 4080 4113 ad = 0; 4081 4114 goto L1; 4082 4115 #endif 4083 4116 4084 4117 case FLfunc: /* function call */ 4085 4118 s = uev->sp.Vsym; /* symbol pointer */ 4086 4119 assert(!(TARGET_FLAT && tyfarfunc(s->ty()))); 4087 4120 FLUSH(); 4088 reftoident(cseg,offset,s,0, flags);4121 reftoident(cseg,offset,s,0,CFoffset64 | flags); 4089 4122 break; 4090 4123 4091 4124 case FLblock: /* displacement to another block */ 4092 4125 ad = uev->Vblock->Boffset - OFFSET() - 4; 4093 4126 //printf("FLblock: funcoffset = %x, OFFSET = %x, Boffset = %x, ad = %x\n", funcoffset, OFFSET(), uev->Vblock->Boffset, ad); 4094 4127 goto L1; 4095 4128 4096 4129 case FLblockoff: 4097 4130 FLUSH(); 4098 4131 assert(uev->Vblock); 4099 4132 //printf("FLblockoff: offset = %x, Boffset = %x, funcoffset = %x\n", offset, uev->Vblock->Boffset, funcoffset); 4100 4133 reftocodseg(cseg,offset,uev->Vblock->Boffset); 4101 4134 break; 4102 4135 4103 4136 default: 4104 4137 #ifdef DEBUG 4105 4138 WRFL(fl); 4106 4139 #endif 4107 4140 assert(0); 4108 4141 } branches/dmd-1.x/src/backend/cod4.c
r569 r577 33 33 * Return number of times symbol s appears in tree e. 34 34 */ 35 35 36 36 STATIC int intree(symbol *s,elem *e) 37 37 { 38 38 if (EOP(e)) 39 39 return intree(s,e->E1) + (EBIN(e) ? intree(s,e->E2) : 0); 40 40 return e->Eoper == OPvar && e->EV.sp.Vsym == s; 41 41 } 42 42 43 43 /*********************************** 44 44 * Determine if expression e can be evaluated directly into register 45 45 * variable s. 46 46 * Have to be careful about things like x=x+x+x, and x=a+x. 47 47 * Returns: 48 48 * !=0 can 49 49 * 0 can't 50 50 */ 51 51 52 52 STATIC int doinreg(symbol *s, elem *e) 53 { int in ;53 { int in = 0; 54 54 int op; 55 55 56 56 L1: 57 57 op = e->Eoper; 58 58 if (op == OPind || 59 59 OTcall(op) || 60 60 OTleaf(op) || 61 61 (in = intree(s,e)) == 0 || 62 62 (OTunary(op) && !EOP(e->E1)) 63 63 ) 64 64 return 1; 65 65 if (in == 1) 66 66 { 67 67 switch (op) 68 68 { 69 69 case OPadd: 70 70 case OPmin: 71 71 case OPand: 72 72 case OPor: 73 73 case OPxor: 74 74 case OPshl: 75 75 case OPmul: 76 76 if (!intree(s,e->E2)) 77 77 { 78 78 e = e->E1; 79 79 goto L1; 80 80 } 81 81 } 82 82 } 83 83 return 0; 84 84 } 85 85 86 86 /**************************** 87 87 * Return code for saving common subexpressions if EA 88 88 * turns out to be a register. 89 89 * This is called just before modifying an EA. 90 90 */ 91 91 92 code *modEA( unsigned Irm)92 code *modEA(code *c) 93 93 { 94 return ((Irm & 0xC0) == 0xC0) ? getregs(mask[Irm & 7]) : CNIL; 94 if ((c->Irm & 0xC0) == 0xC0) // addressing mode refers to a register 95 { 96 unsigned reg = c->Irm & 7; 97 if (c->Irex & REX_B) 98 { reg |= 8; 99 assert(I64); 100 } 101 return getregs(mask[reg]); 102 } 103 return CNIL; 95 104 } 96 105 97 106 #if TARGET_WINDOS 98 107 // This code is for CPUs that do not support the 8087 99 108 100 109 /**************************** 101 110 * Gen code for op= for doubles. 102 111 */ 103 112 104 113 STATIC code * opassdbl(elem *e,regm_t *pretregs,unsigned op) 105 114 { code *c1,*c2,*c3,*c4,*c5,*c6,cs; 106 115 unsigned clib; 107 116 regm_t retregs2,retregs,idxregs; 108 117 tym_t tym; 109 118 elem *e1; 110 119 111 120 static unsigned clibtab[OPdivass - OPpostinc + 1] = 112 121 /* OPpostinc,OPpostdec,OPeq,OPaddass,OPminass,OPmulass,OPdivass */ 113 122 { CLIBdadd, CLIBdsub, (unsigned)-1, CLIBdadd,CLIBdsub,CLIBdmul,CLIBddiv }; 114 123 … … 121 130 122 131 if (tym == TYfloat) 123 132 { 124 133 clib += CLIBfadd - CLIBdadd; /* convert to float operation */ 125 134 126 135 /* Load EA into FLOATREGS */ 127 136 c1 = cat(c1,getregs(FLOATREGS)); 128 137 cs.Iop = 0x8B; 129 138 cs.Irm |= modregrm(0,AX,0); 130 139 c1 = gen(c1,&cs); 131 140 132 141 if (!I32) 133 142 { 134 143 cs.Irm |= modregrm(0,DX,0); 135 144 getlvalue_msw(&cs); 136 145 c1 = gen(c1,&cs); 137 146 getlvalue_lsw(&cs); 138 147 139 148 } 140 149 retregs2 = FLOATREGS2; 141 idxregs = FLOATREGS | idxregm( cs.Irm,cs.Isib);150 idxregs = FLOATREGS | idxregm(&cs); 142 151 retregs = FLOATREGS; 143 152 } 144 153 else 145 154 { 146 155 if (I32) 147 156 { 148 157 /* Load EA into DOUBLEREGS */ 149 158 c1 = cat(c1,getregs(DOUBLEREGS_32)); 150 159 cs.Iop = 0x8B; 151 160 cs.Irm |= modregrm(0,AX,0); 152 161 c1 = gen(c1,&cs); 153 162 cs.Irm |= modregrm(0,DX,0); 154 163 getlvalue_msw(&cs); 155 164 c1 = gen(c1,&cs); 156 165 getlvalue_lsw(&cs); 157 166 158 167 retregs2 = DOUBLEREGS2_32; 159 idxregs = DOUBLEREGS_32 | idxregm( cs.Irm,cs.Isib);168 idxregs = DOUBLEREGS_32 | idxregm(&cs); 160 169 } 161 170 else 162 171 { 163 172 /* Push EA onto stack */ 164 173 cs.Iop = 0xFF; 165 174 cs.Irm |= modregrm(0,6,0); 166 175 cs.IEVoffset1 += DOUBLESIZE - REGSIZE; 167 176 c1 = gen(c1,&cs); 168 177 getlvalue_lsw(&cs); 169 178 gen(c1,&cs); 170 179 getlvalue_lsw(&cs); 171 180 gen(c1,&cs); 172 181 getlvalue_lsw(&cs); 173 182 gen(c1,&cs); 174 183 stackpush += DOUBLESIZE; 175 184 176 185 retregs2 = DOUBLEREGS_16; 177 idxregs = idxregm( cs.Irm,cs.Isib);186 idxregs = idxregm(&cs); 178 187 } 179 188 retregs = DOUBLEREGS; 180 189 } 181 190 182 191 if ((cs.Iflags & CFSEG) == CFes) 183 192 idxregs |= mES; 184 193 cgstate.stackclean++; 185 194 c3 = scodelem(e->E2,&retregs2,idxregs,FALSE); 186 195 cgstate.stackclean--; 187 196 c4 = callclib(e,clib,&retregs,0); 188 197 if (e1->Ecount) 189 198 cssave(e1,retregs,EOP(e1)); /* if lvalue is a CSE */ 190 199 freenode(e1); 191 200 cs.Iop = 0x89; /* MOV EA,DOUBLEREGS */ 192 201 c5 = fltregs(&cs,tym); 193 202 c6 = fixresult(e,retregs,pretregs); 194 203 return cat6(c1,CNIL,c3,c4,c5,c6); 195 204 } 196 205 197 206 /**************************** 198 207 * Gen code for OPnegass for doubles. 199 208 */ 200 209 201 210 STATIC code * opnegassdbl(elem *e,regm_t *pretregs) 202 211 { code *c1,*c2,*c3,*c,*cl,*cr,cs; 203 212 unsigned clib; 204 213 regm_t retregs2,retregs,idxregs; 205 214 tym_t tym; 206 215 elem *e1; 207 216 int sz; 208 217 209 218 if (config.inline8087) 210 219 return cdnegass87(e,pretregs); 211 220 e1 = e->E1; 212 221 tym = tybasic(e1->Ety); 213 222 sz = tysize[tym]; 214 223 215 224 cl = getlvalue(&cs,e1,*pretregs ? DOUBLEREGS | mBX | mCX : 0); 216 cr = modEA( cs.Irm);225 cr = modEA(&cs); 217 226 cs.Irm |= modregrm(0,6,0); 218 227 cs.Iop = 0x80; 219 228 cs.IEVoffset1 += sz - 1; 220 229 cs.IFL2 = FLconst; 221 230 cs.IEV2.Vuns = 0x80; 222 231 c = gen(NULL,&cs); // XOR 7[EA],0x80 223 232 if (tycomplex(tym)) 224 233 { 225 234 cs.IEVoffset1 -= sz / 2; 226 235 gen(c,&cs); // XOR 7[EA],0x80 227 236 } 228 237 c = cat3(cl,cr,c); 229 238 230 239 if (*pretregs || e1->Ecount) 231 240 { 232 241 cs.IEVoffset1 -= sz - 1; 233 242 234 243 if (tym == TYfloat) 235 244 { 236 245 // Load EA into FLOATREGS … … 353 362 sz = tysize[tyml]; 354 363 assert((int)sz > 0); 355 364 356 365 if (retregs == 0) /* if no return value */ 357 366 { int fl; 358 367 359 368 if ((e2oper == OPconst || /* if rvalue is a constant */ 360 369 e2oper == OPrelconst && 361 370 ((fl = el_fl(e2)) == FLdata || 362 371 fl==FLudata || fl == FLextern) && 363 372 !(e2->EV.sp.Vsym->ty() & mTYcs) 364 373 ) && 365 374 !evalinregister(e2) && 366 375 !e1->Ecount) /* and no CSE headaches */ 367 376 { 368 377 // Look for special case of (*p++ = ...), where p is a register variable 369 378 if (e1->Eoper == OPind && 370 379 ((e11 = e1->E1)->Eoper == OPpostinc || e11->Eoper == OPpostdec) && 371 380 e11->E1->Eoper == OPvar && 372 381 e11->E1->EV.sp.Vsym->Sfl == FLreg && 373 ( I32|| e11->E1->EV.sp.Vsym->Sregm & IDXREGS)382 (!I16 || e11->E1->EV.sp.Vsym->Sregm & IDXREGS) 374 383 ) 375 384 { 376 385 postinc = e11->E2->EV.Vint; 377 386 if (e11->Eoper == OPpostdec) 378 387 postinc = -postinc; 379 388 cl = getlvalue(&cs,e11,RMstore); 380 389 freenode(e11->E2); 381 390 } 382 391 else 383 392 { postinc = 0; 384 393 cl = getlvalue(&cs,e1,RMstore); 385 394 386 395 if (e2oper == OPconst && 387 396 config.flags4 & CFG4speed && 388 397 (config.target_cpu == TARGET_Pentium || 389 398 config.target_cpu == TARGET_PentiumMMX) && 390 399 (cs.Irm & 0xC0) == 0x80 391 400 ) 392 401 { 393 if (sz == REGSIZE && e2->EV.Vint) 394 { regm_t rregm; 395 unsigned rreg; 396 402 if (I64 && sz == 8 && e2->EV.Vpointer) 403 { 404 // MOV reg,imm64 405 // MOV EA,reg 406 regm_t rregm = allregs & ~idxregm(&cs); 407 unsigned reg; 408 cl = regwithvalue(cl,rregm,e2->EV.Vpointer,®,CFoffset64); 409 cs.Iop = 0x89; 410 cs.Irm |= modregrm(0,reg & 7,0); 411 if (reg & 8) 412 cs.Irex |= REX_R; 413 c = gen(cl,&cs); 414 freenode(e2); 415 goto Lp; 416 } 417 if ((sz == REGSIZE || (I64 && sz == 4)) && e2->EV.Vint) 418 { 397 419 // MOV reg,imm 398 420 // MOV EA,reg 399 rregm = allregs & ~idxregm(cs.Irm,cs.Isib); 421 regm_t rregm = allregs & ~idxregm(&cs); 422 unsigned reg; 400 423 cl = regwithvalue(cl,rregm,e2->EV.Vint,®,0); 401 424 cs.Iop = 0x89; 402 cs.Irm |= modregrm(0,reg,0); 425 cs.Irm |= modregrm(0,reg & 7,0); 426 if (reg & 8) 427 cs.Irex |= REX_R; 403 428 c = gen(cl,&cs); 404 429 freenode(e2); 405 430 goto Lp; 406 431 } 407 432 if (sz == 2 * REGSIZE && e2->EV.Vllong == 0) 408 433 { regm_t rregm; 409 unsigned r reg;434 unsigned reg; 410 435 411 436 // MOV reg,imm 412 437 // MOV EA,reg 413 438 // MOV EA+2,reg 414 rregm = getscratch() & ~idxregm( cs.Irm,cs.Isib);439 rregm = getscratch() & ~idxregm(&cs); 415 440 if (rregm) 416 441 { cl = regwithvalue(cl,rregm,e2->EV.Vint,®,0); 417 442 cs.Iop = 0x89; 418 443 cs.Irm |= modregrm(0,reg,0); 419 444 c = gen(cl,&cs); 420 445 getlvalue_msw(&cs); 421 446 c = gen(c,&cs); 422 447 freenode(e2); 423 448 goto Lp; 424 449 } 425 450 } 426 451 } 427 452 } 428 453 429 454 /* If loading result into a register */ 430 455 if ((cs.Irm & 0xC0) == 0xC0) 431 { cl = cat(cl, getregs(mask[cs.Irm & 7]));456 { cl = cat(cl,modEA(&cs)); 432 457 if (sz == 2 * REGSIZE && cs.IFL1 == FLreg) 433 458 cl = cat(cl,getregs(cs.IEVsym1->Sregm)); 434 459 } 435 460 cs.Iop = (sz == 1) ? 0xC6 : 0xC7; 436 461 437 462 if (e2oper == OPrelconst) 438 463 { 439 464 cs.IEVoffset2 = e2->EV.sp.Voffset; 440 465 cs.IFL2 = fl; 441 466 cs.IEVsym2 = e2->EV.sp.Vsym; 442 467 cs.Iflags |= CFoff; 443 468 cl = gen(cl,&cs); /* MOV EA,&variable */ 469 if (I64 && sz == 8) 470 code_orrex(cl, REX_W); 444 471 if (sz > REGSIZE) 445 472 { 446 473 cs.Iop = 0x8C; 447 474 getlvalue_msw(&cs); 448 475 cs.Irm |= modregrm(0,3,0); 449 476 cl = gen(cl,&cs); /* MOV EA+2,DS */ 450 477 } 451 478 } 452 479 else 453 { targ_int *p;454 480 { 481 assert(e2oper == OPconst); 455 482 cs.IFL2 = FLconst; 456 p = (targ_int *) &(e2->EV);457 cs.IEV2.V int = *p;483 targ_size_t *p = (targ_size_t *) &(e2->EV); 484 cs.IEV2.Vsize_t = *p; 458 485 // Look for loading a register variable 459 486 if ((cs.Irm & 0xC0) == 0xC0) 460 { 461 cl = movregconst(cl,cs.Irm & 7,*p,1 ^ (cs.Iop & 1)); 462 if (sz == 2 * REGSIZE) 463 { getlvalue_msw(&cs); 464 cl = movregconst(cl,cs.Irm & 7,p[1],0); 465 } 487 { unsigned reg = cs.Irm & 7; 488 489 if (cs.Irex & REX_B) 490 reg |= 8; 491 if (I64 && sz == 8) 492 cl = movregconst(cl,reg,*p,CFoffset64); 493 else 494 cl = movregconst(cl,reg,*p,1 ^ (cs.Iop & 1)); 495 if (sz == 2 * REGSIZE) 496 { getlvalue_msw(&cs); 497 cl = movregconst(cl,cs.Irm & 7,p[1],0); 498 } 466 499 } 467 500 else 468 501 { int regsize; 469 502 470 i = sz; 471 do 472 { regsize = REGSIZE; 473 retregs = (sz == 1) ? BYTEREGS : allregs; 474 if (i >= 4 && !I32 && I386) 503 i = sz; 504 do 505 { regsize = REGSIZE; 506 retregs = (sz == 1) ? BYTEREGS : allregs; 507 if (i >= 4 && I16 && I386) 508 { 509 regsize = 4; 510 cs.Iflags |= CFopsize; // use opsize to do 32 bit operation 511 } 512 else 513 { 514 if (reghasvalue(retregs,*p,®)) 475 515 { 476 regsize = 4; 516 cs.Iop = (cs.Iop & 1) | 0x88; 517 cs.Irm |= modregrm(0,reg & 7,0); // MOV EA,reg 518 if (reg & 8) 519 cs.Irex |= REX_R; 520 } 521 if (!I16 && i == 2) // if 16 bit operand 477 522 cs.Iflags |= CFopsize; 478 } 479 else 480 { 481 if (reghasvalue(retregs,*p,®)) 482 { 483 cs.Iop = (cs.Iop & 1) | 0x88; 484 cs.Irm |= reg << 3; /* MOV EA,reg */ 485 } 486 if (I32 && i == 2) // if 16 bit operand 487 cs.Iflags |= CFopsize; 488 } 489 cl = gen(cl,&cs); /* MOV EA,const */ 490 491 p = (targ_int *)((char *) p + regsize); 492 cs.Iop = (cs.Iop & 1) | 0xC6; 493 cs.Irm &= ~(7 << 3); 494 cs.IEVoffset1 += regsize; 495 cs.IEV2.Vint = *p; 496 i -= regsize; 497 } while (i > 0); 523 if (I64 && sz == 8) 524 assert(cs.Irex & REX_W); 525 } 526 cl = gen(cl,&cs); /* MOV EA,const */ 527 528 p = (targ_size_t *)((char *) p + regsize); 529 cs.Iop = (cs.Iop & 1) | 0xC6; 530 cs.Irm &= ~modregrm(0,7,0); 531 cs.Irex &= ~REX_R; 532 cs.IEVoffset1 += regsize; 533 cs.IEV2.Vint = *p; 534 i -= regsize; 535 } while (i > 0); 498 536 } 499 537 } 500 538 freenode(e2); 501 539 c = cl; 502 540 goto Lp; 503 541 } 504 542 retregs = allregs; /* pick a reg, any reg */ 505 543 } 506 544 if (retregs == mPSW) 507 545 retregs = allregs; 508 546 cs.Iop = 0x89; 509 547 if (sz == 1) // must have byte regs 510 548 { cs.Iop = 0x88; 511 549 retregs &= BYTEREGS; 512 550 if (!retregs) 513 551 retregs = BYTEREGS; 514 552 } 515 553 else if (retregs & mES && 516 554 ((e1->Eoper == OPind && 517 555 ((tymll = tybasic(e1->E1->Ety)) == TYfptr || tymll == TYhptr)) … … 542 580 if (e1->EV.sp.Voffset) 543 581 retregs &= mMSW; 544 582 else 545 583 retregs &= mLSW; 546 584 reg = findreg(retregs); 547 585 } 548 586 } 549 587 } 550 588 if (*pretregs & mPSW && !EOP(e1)) /* if evaluating e1 couldn't change flags */ 551 589 { /* Be careful that this lines up with jmpopcode() */ 552 590 retregs |= mPSW; 553 591 *pretregs &= ~mPSW; 554 592 } 555 593 cr = scodelem(e2,&retregs,0,TRUE); /* get rvalue */ 556 594 557 595 // Look for special case of (*p++ = ...), where p is a register variable 558 596 if (e1->Eoper == OPind && 559 597 ((e11 = e1->E1)->Eoper == OPpostinc || e11->Eoper == OPpostdec) && 560 598 e11->E1->Eoper == OPvar && 561 599 e11->E1->EV.sp.Vsym->Sfl == FLreg && 562 ( I32|| e11->E1->EV.sp.Vsym->Sregm & IDXREGS)600 (!I16 || e11->E1->EV.sp.Vsym->Sregm & IDXREGS) 563 601 ) 564 602 { 565 603 postinc = e11->E2->EV.Vint; 566 604 if (e11->Eoper == OPpostdec) 567 605 postinc = -postinc; 568 606 cl = getlvalue(&cs,e11,RMstore | retregs); 569 607 freenode(e11->E2); 570 608 } 571 609 else 572 610 { postinc = 0; 573 611 cl = getlvalue(&cs,e1,RMstore | retregs); // get lvalue (cl == CNIL if regvar) 574 612 } 575 613 576 614 c = getregs_imm(varregm); 577 615 578 616 assert(!(retregs & mES && (cs.Iflags & CFSEG) == CFes)); 579 617 if ((tyml == TYfptr || tyml == TYhptr) && retregs & mES) 580 618 { 581 619 reg = findreglsw(retregs); 582 620 cs.Irm |= modregrm(0,reg,0); 583 621 c = gen(c,&cs); /* MOV EA,reg */ 584 622 getlvalue_msw(&cs); // point to where segment goes 585 623 cs.Iop = 0x8C; 586 624 NEWREG(cs.Irm,0); 587 625 gen(c,&cs); /* MOV EA+2,ES */ 588 626 } 589 627 else 590 628 { 591 if ( I32)629 if (!I16) 592 630 { 593 631 reg = findreg(retregs & 594 632 ((sz > REGSIZE) ? mBP | mLSW : mBP | ALLREGS)); 595 cs.Irm |= modregrm(0,reg,0); 633 cs.Irm |= modregrm(0,reg & 7,0); 634 if (reg & 8) 635 cs.Irex |= REX_R; 596 636 for (; TRUE; sz -= REGSIZE) 597 637 { 598 638 // Do not generate mov from register onto itself 599 639 if (regvar && reg == (cs.Irm & 7)) 600 640 break; 601 641 if (sz == 2) // if 16 bit operand 602 642 cs.Iflags |= CFopsize; 603 643 c = gen(c,&cs); // MOV EA+offset,reg 604 644 if (sz <= REGSIZE) 605 645 break; 606 646 getlvalue_msw(&cs); 607 647 reg = findregmsw(retregs); 608 NEWREG(cs.Irm,reg);648 code_newreg(&cs, reg); 609 649 } 610 650 } 611 651 else 612 652 { 613 653 if (sz > REGSIZE) 614 654 cs.IEVoffset1 += sz - REGSIZE; /* 0,2,6 */ 615 655 reg = findreg(retregs & 616 656 (sz > REGSIZE ? mMSW : ALLREGS)); 617 657 if (tyml == TYdouble || tyml == TYdouble_alias) 618 658 reg = AX; 619 659 cs.Irm |= modregrm(0,reg,0); 620 660 /* Do not generate mov from register onto itself */ 621 661 if (!regvar || reg != (cs.Irm & 7)) 622 662 for (; TRUE; sz -= REGSIZE) /* 1,2,4 */ 623 663 { 624 664 c = gen(c,&cs); /* MOV EA+offset,reg */ 625 665 if (sz <= REGSIZE) 626 666 break; 627 667 cs.IEVoffset1 -= REGSIZE; 628 668 if (tyml == TYdouble || tyml == TYdouble_alias) 629 669 reg = dblreg[reg]; 630 670 else 631 671 reg = findreglsw(retregs); 632 672 NEWREG(cs.Irm,reg); 633 673 } 634 674 } 635 675 } 636 676 if (e1->Ecount || /* if lvalue is a CSE or */ 637 677 regvar) /* rvalue can't be a CSE */ 638 678 { 639 679 c = cat(c,getregs_imm(retregs)); // necessary if both lvalue and 640 680 // rvalue are CSEs (since a reg 641 681 // can hold only one e at a time) 642 682 cssave(e1,retregs,EOP(e1)); /* if lvalue is a CSE */ 643 683 } 644 684 645 685 c = cat4(cr,cl,c,fixresult(e,retregs,pretregs)); 646 686 Lp: 647 687 if (postinc) 648 { int reg; 649 650 reg = findreg(idxregm(cs.Irm,cs.Isib)); 688 { 689 int reg = findreg(idxregm(&cs)); 651 690 if (*pretregs & mPSW) 652 691 { // Use LEA to avoid touching the flags 653 c = genc1(c,0x8D,modregrm(2,reg,cs.Irm & 7),FLconst,postinc); 692 unsigned rm = cs.Irm & 7; 693 if (cs.Irex & REX_B) 694 rm |= 8; 695 c = genc1(c,0x8D,modregxrmx(2,reg,rm),FLconst,postinc); 696 if (sz == 8) 697 code_orrex(c, REX_W); 698 } 699 else if (I64) 700 { 701 c = genc2(c,0x81,modregrmx(3,0,reg),postinc); 702 if (sz == 8) 703 code_orrex(c, REX_W); 654 704 } 655 705 else 656 706 { 657 707 if (postinc == 1) 658 708 c = gen1(c,0x40 + reg); // INC reg 659 709 else if (postinc == -(targ_int)1) 660 710 c = gen1(c,0x48 + reg); // DEC reg 661 711 else 662 712 { 663 713 c = genc2(c,0x81,modregrm(3,0,reg),postinc); 664 714 } 665 715 } 666 716 } 667 717 freenode(e1); 668 718 return c; 669 719 } 670 720 671 721 672 722 /************************ 673 723 * Generate code for += -= &= |= ^= negass … … 695 745 reverse = 0; 696 746 e1 = e->E1; 697 747 tyml = tybasic(e1->Ety); // type of lvalue 698 748 sz = tysize[tyml]; 699 749 byte = (sz == 1); // 1 for byte operation, else 0 700 750 if (tyfloating(tyml)) 701 751 { 702 752 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 703 753 if (op == OPnegass) 704 754 c = cdnegass87(e,pretregs); 705 755 else 706 756 c = opass87(e,pretregs); 707 757 #else 708 758 if (op == OPnegass) 709 759 c = opnegassdbl(e,pretregs); 710 760 else 711 761 c = opassdbl(e,pretregs,op); 712 762 #endif 713 763 return c; 714 764 } 715 opsize = ( !I32&& tylong(tyml) && config.target_cpu >= TARGET_80386)765 opsize = (I16 && tylong(tyml) && config.target_cpu >= TARGET_80386) 716 766 ? CFopsize : 0; 717 767 cflags = 0; 718 768 forccs = *pretregs & mPSW; // return result in flags 719 forregs = *pretregs & (mBP | ALLREGS | mES);// return result in regs769 forregs = *pretregs & ~mPSW; // return result in regs 720 770 /* TRUE if we want the result in a register */ 721 771 wantres = forregs || (e1->Ecount && EOP(e1)); 722 772 723 773 switch (op) /* select instruction opcodes */ 724 774 { case OPpostinc: op = OPaddass; /* i++ => += */ 725 775 case OPaddass: op1 = 0x01; op2 = 0x11; 726 776 cflags = CFpsw; 727 777 mode = 0; break; /* ADD, ADC */ 728 778 case OPpostdec: op = OPminass; /* i-- => -= */ 729 779 case OPminass: op1 = 0x29; op2 = 0x19; 730 780 cflags = CFpsw; 731 781 mode = 5; break; /* SUB, SBC */ 732 782 case OPandass: op1 = op2 = 0x21; 733 783 mode = 4; break; /* AND, AND */ 734 784 case OPorass: op1 = op2 = 0x09; 735 785 mode = 1; break; /* OR , OR */ 736 786 case OPxorass: op1 = op2 = 0x31; 737 787 mode = 6; break; /* XOR, XOR */ 738 788 case OPnegass: op1 = 0xF7; // NEG 739 789 break; 740 790 default: 741 791 assert(0); 742 792 } 743 793 op1 ^= byte; /* bit 0 is 0 for byte operation */ 744 794 745 795 if (op == OPnegass) 746 796 { 747 797 cl = getlvalue(&cs,e1,0); 748 cr = modEA( cs.Irm);798 cr = modEA(&cs); 749 799 cs.Irm |= modregrm(0,3,0); 750 800 cs.Iop = op1; 751 801 switch (tysize[tyml]) 752 802 { case CHARSIZE: 753 803 c = gen(CNIL,&cs); 754 804 break; 755 805 case SHORTSIZE: 756 806 c = gen(CNIL,&cs); 757 if ( I32&& *pretregs & mPSW)808 if (!I16 && *pretregs & mPSW) 758 809 c->Iflags |= CFopsize | CFpsw; 759 810 break; 760 811 case LONGSIZE: 761 if ( I32|| opsize)813 if (!I16 || opsize) 762 814 { c = gen(CNIL,&cs); 763 815 c->Iflags |= opsize; 764 816 break; 765 817 } 766 818 neg_2reg: 767 819 getlvalue_msw(&cs); 768 820 c = gen(CNIL,&cs); // NEG EA+2 769 821 getlvalue_lsw(&cs); 770 822 gen(c,&cs); // NEG EA 771 823 code_orflag(c,CFpsw); 772 824 cs.Iop = 0x81; 773 825 getlvalue_msw(&cs); 774 826 cs.IFL2 = FLconst; 775 827 cs.IEV2.Vuns = 0; 776 828 gen(c,&cs); // SBB EA+2,0 777 829 break; 778 830 case LLONGSIZE: 779 if (I 32)780 goto neg_2reg;781 assert(0); // not implemented yet782 break; 832 if (I16) 833 assert(0); // not implemented yet 834 goto neg_2reg; 835 783 836 default: 784 837 assert(0); 785 838 } 786 839 c = cat3(cl,cr,c); 787 840 forccs = 0; // flags already set by NEG 788 841 *pretregs &= ~mPSW; 789 842 } 790 843 else if ((e2 = e->E2)->Eoper == OPconst && // if rvalue is a const 791 844 // Don't evaluate e2 in register if we can use an INC or DEC 792 845 (((sz <= REGSIZE || tyfv(tyml)) && 793 846 (op == OPaddass || op == OPminass) && 794 847 ((val = el_tolong(e2)) == 1 || val == -1) 795 848 ) || 796 849 (!evalinregister(e2) && tyml != TYhptr) 797 850 ) 798 851 ) 799 852 { 800 853 cl = getlvalue(&cs,e1,0); 801 cl = cat(cl,modEA( cs.Irm));854 cl = cat(cl,modEA(&cs)); 802 855 cs.IFL2 = FLconst; 803 856 cs.IEV2.Vint = e2->EV.Vint; 804 857 if (sz <= REGSIZE || tyfv(tyml) || opsize) 805 { targ_int i; 806 807 i = cs.IEV2.Vint; 858 { 859 targ_size_t i = cs.IEV2.Vsize_t; 808 860 809 861 /* Handle shortcuts. Watch out for if result has */ 810 862 /* to be in flags. */ 811 863 812 864 if (reghasvalue(ALLREGS,i,®) && i != 1 && i != -1 && 813 865 !opsize) 814 866 { 815 867 cs.Iop = op1; 816 868 cs.Irm |= modregrm(0,reg,0); 817 869 } 818 870 else 819 871 { 820 872 cs.Iop = 0x81; 821 873 cs.Irm |= modregrm(0,mode,0); 822 874 switch (op) 823 875 { case OPminass: /* convert to += */ 824 876 cs.Irm ^= modregrm(0,5,0); 825 877 i = -i; 826 cs.IEV2.V int = i;878 cs.IEV2.Vsize_t = i; 827 879 /* FALL-THROUGH */ 828 880 case OPaddass: 829 881 if (i == 1) /* INC EA */ 830 882 goto L1; 831 883 else if (i == -1) /* DEC EA */ 832 884 { cs.Irm |= modregrm(0,1,0); 833 885 L1: cs.Iop = 0xFF; 834 886 } 835 887 break; 836 888 } 837 889 } 838 890 cs.Iop ^= byte; /* for byte operations */ 839 891 cs.Iflags |= opsize; 840 892 if (forccs) 841 893 cs.Iflags |= CFpsw; 842 else if ( I32&& cs.Iflags & CFopsize)894 else if (!I16 && cs.Iflags & CFopsize) 843 895 { 844 896 switch (op) 845 897 { case OPorass: 846 898 case OPxorass: 847 cs.IEV2.V int &= 0xFFFF;899 cs.IEV2.Vsize_t &= 0xFFFF; 848 900 cs.Iflags &= ~CFopsize; // don't worry about MSW 849 901 break; 850 902 case OPandass: 851 cs.IEV2.V int |= ~0xFFFFL;903 cs.IEV2.Vsize_t |= ~0xFFFFLL; 852 904 cs.Iflags &= ~CFopsize; // don't worry about MSW 853 905 break; 854 906 case OPminass: 855 907 case OPaddass: 856 908 #if 1 857 909 if ((cs.Irm & 0xC0) == 0xC0) // EA is register 858 910 cs.Iflags &= ~CFopsize; 859 911 #else 860 912 if ((cs.Irm & 0xC0) == 0xC0 && // EA is register and 861 913 e1->Eoper == OPind) // not a register var 862 914 cs.Iflags &= ~CFopsize; 863 915 #endif 864 916 break; 865 917 default: 866 918 assert(0); 867 919 break; 868 920 } 869 921 } 870 922 871 923 // For scheduling purposes, we wish to replace: 872 924 // OP EA 873 925 // with: 874 926 // MOV reg,EA 875 927 // OP reg 876 928 // MOV EA,reg 877 929 if (forregs && sz <= REGSIZE && (cs.Irm & 0xC0) != 0xC0 && 878 930 (config.target_cpu == TARGET_Pentium || 879 931 config.target_cpu == TARGET_PentiumMMX) && 880 932 config.flags4 & CFG4speed) 881 933 { regm_t sregm; 882 934 code cs2; 883 935 884 936 // Determine which registers to use 885 sregm = allregs & ~idxregm( cs.Irm,cs.Isib);937 sregm = allregs & ~idxregm(&cs); 886 938 if (byte) 887 939 sregm &= BYTEREGS; 888 940 if (sregm & forregs) 889 941 sregm &= forregs; 890 942 891 943 cr = allocreg(&sregm,®,tyml); // allocate register 892 944 893 945 cs2 = cs; 894 946 cs2.Iflags &= ~CFpsw; 895 947 cs2.Iop = 0x8B ^ byte; 896 c s2.Irm = (cs2.Irm & modregrm(3,0,7)) | modregrm(0,reg,0);948 code_newreg(&cs2, reg); 897 949 cr = gen(cr,&cs2); // MOV reg,EA 898 950 899 cs.Irm = (cs.Irm & modregrm(0,7,0)) | modregrm(3,0,reg); 951 cs.Irm = (cs.Irm & modregrm(0,7,0)) | modregrm(3,0,reg & 7); 952 if (reg & 8) 953 cs.Irex |= REX_B; 900 954 gen(cr,&cs); // OP reg 901 955 902 956 cs2.Iop ^= 2; 903 957 gen(cr,&cs2); // MOV EA,reg 904 958 905 959 c = cat(cl,cr); 906 960 retregs = sregm; 907 961 wantres = 0; 908 962 if (e1->Ecount) 909 963 cssave(e1,retregs,EOP(e1)); 910 964 } 911 965 else 912 966 { 913 967 c = gen(cl,&cs); 914 968 cs.Iflags &= ~opsize; 915 969 cs.Iflags &= ~CFpsw; 916 if ( !I32&& opsize) // if DWORD operand970 if (I16 && opsize) // if DWORD operand 917 971 cs.IEVoffset1 += 2; // compensate for wantres code 918 972 } 919 973 } 920 974 else if (sz == 2 * REGSIZE) 921 975 { targ_uns msw; 922 976 923 977 cs.Iop = 0x81; 924 978 cs.Irm |= modregrm(0,mode,0); 925 979 c = cl; 926 980 cs.Iflags |= cflags; 927 981 c = gen(c,&cs); 928 982 cs.Iflags &= ~CFpsw; 929 983 930 984 getlvalue_msw(&cs); // point to msw 931 985 msw = MSREG(e->E2->EV.Vllong); 932 986 cs.IEV2.Vuns = msw; /* msw of constant */ 933 987 switch (op) 934 988 { case OPminass: 935 989 cs.Irm ^= modregrm(0,6,0); /* SUB => SBB */ 936 990 break; 937 991 case OPaddass: 938 992 cs.Irm |= modregrm(0,2,0); /* ADD => ADC */ 939 993 break; 940 994 } 941 995 c = gen(c,&cs); 942 996 } 943 997 freenode(e->E2); /* don't need it anymore */ 944 998 } 945 999 else if (isregvar(e1,&varregm,&varreg) && 946 1000 (e2->Eoper == OPvar || e2->Eoper == OPind) && 947 1001 !evalinregister(e2) && 948 1002 sz <= REGSIZE) // deal with later 949 1003 { 950 1004 cr = getlvalue(&cs,e2,0); 951 1005 freenode(e2); 952 1006 cl = getregs(varregm); 953 c s.Irm |= modregrm(0,varreg,0);1007 code_newreg(&cs, varreg); 954 1008 cs.Iop = op1 ^ 2; // toggle direction bit 955 1009 if (forccs) 956 1010 cs.Iflags |= CFpsw; 957 1011 reverse = 2; // remember we toggled it 958 1012 cl = gen(cl,&cs); 959 1013 c = cat(cr,cl); 960 1014 retregs = 0; /* to trigger a bug if we attempt to use it */ 961 1015 } 962 1016 else // evaluate e2 into register 963 1017 { 964 1018 retregs = (byte) ? BYTEREGS : ALLREGS; // pick working reg 965 1019 if (tyml == TYhptr) 966 1020 retregs &= ~mCX; // need CX for shift count 967 1021 cr = scodelem(e->E2,&retregs,0,TRUE); // get rvalue 968 1022 cl = getlvalue(&cs,e1,retregs); // get lvalue 969 cl = cat(cl,modEA( cs.Irm));1023 cl = cat(cl,modEA(&cs)); 970 1024 cs.Iop = op1; 971 1025 if (sz <= REGSIZE || tyfv(tyml)) 972 1026 { reg = findreg(retregs); 973 c s.Irm |= modregrm(0,reg,0);// OP1 EA,reg1027 code_newreg(&cs, reg); // OP1 EA,reg 974 1028 } 975 1029 else if (tyml == TYhptr) 976 1030 { unsigned mreg,lreg; 977 1031 978 1032 mreg = findregmsw(retregs); 979 1033 lreg = findreglsw(retregs); 980 1034 cl = cat(cl,getregs(retregs | mCX)); 981 1035 982 1036 // If h -= l, convert to h += -l 983 1037 if (e->Eoper == OPminass) 984 1038 { 985 1039 cl = gen2(cl,0xF7,modregrm(3,3,mreg)); // NEG mreg 986 1040 gen2(cl,0xF7,modregrm(3,3,lreg)); // NEG lreg 987 1041 code_orflag(cl,CFpsw); 988 1042 genc2(cl,0x81,modregrm(3,3,mreg),0); // SBB mreg,0 989 1043 } 990 1044 cs.Iop = 0x01; 991 1045 cs.Irm |= modregrm(0,lreg,0); 992 1046 cl = gen(cl,&cs); // ADD EA,lreg 993 1047 code_orflag(cl,CFpsw); … … 1015 1069 1016 1070 /* See if we need to reload result into a register. */ 1017 1071 /* Need result in registers in case we have a 32 bit */ 1018 1072 /* result and we want the flags as a result. */ 1019 1073 if (wantres || (sz > REGSIZE && forccs)) 1020 1074 { 1021 1075 if (sz <= REGSIZE) 1022 1076 { regm_t possregs; 1023 1077 1024 1078 possregs = ALLREGS; 1025 1079 if (byte) 1026 1080 possregs = BYTEREGS; 1027 1081 retregs = forregs & possregs; 1028 1082 if (!retregs) 1029 1083 retregs = possregs; 1030 1084 1031 1085 // If reg field is destination 1032 1086 if (cs.Iop & 2 && cs.Iop < 0x40 && (cs.Iop & 7) <= 5) 1033 1087 { 1034 1088 reg = (cs.Irm >> 3) & 7; 1089 if (cs.Irex & REX_R) 1090 reg |= 8; 1035 1091 retregs = mask[reg]; 1036 1092 ce = allocreg(&retregs,®,tyml); 1037 1093 } 1038 1094 // If lvalue is a register, just use that register 1039 1095 else if ((cs.Irm & 0xC0) == 0xC0) 1040 1096 { 1041 1097 reg = cs.Irm & 7; 1098 if (cs.Irex & REX_B) 1099 reg |= 8; 1042 1100 retregs = mask[reg]; 1043 1101 ce = allocreg(&retregs,®,tyml); 1044 1102 } 1045 1103 else 1046 1104 { 1047 1105 ce = allocreg(&retregs,®,tyml); 1048 1106 cs.Iop = 0x8B ^ byte ^ reverse; 1049 NEWREG(cs.Irm,reg);1107 code_newreg(&cs, reg); 1050 1108 ce = gen(ce,&cs); // MOV reg,EA 1051 1109 } 1052 1110 } 1053 1111 else if (tyfv(tyml) || tyml == TYhptr) 1054 1112 { regm_t idxregs; 1055 1113 1056 1114 if (tyml == TYhptr) 1057 1115 getlvalue_lsw(&cs); 1058 idxregs = idxregm( cs.Irm,cs.Isib);1116 idxregs = idxregm(&cs); 1059 1117 retregs = forregs & ~idxregs; 1060 1118 if (!(retregs & IDXREGS)) 1061 1119 retregs |= IDXREGS & ~idxregs; 1062 1120 if (!(retregs & mMSW)) 1063 1121 retregs |= mMSW & ALLREGS; 1064 1122 ce = allocreg(&retregs,®,tyml); 1065 1123 NEWREG(cs.Irm,findreglsw(retregs)); 1066 1124 if (retregs & mES) /* if want ES loaded */ 1067 1125 { cs.Iop = 0xC4; 1068 1126 ce = gen(ce,&cs); /* LES lreg,EA */ 1069 1127 } 1070 1128 else 1071 1129 { cs.Iop = 0x8B; 1072 1130 ce = gen(ce,&cs); /* MOV lreg,EA */ 1073 1131 getlvalue_msw(&cs); 1074 1132 if (I32) 1075 1133 cs.Iflags |= CFopsize; 1076 1134 NEWREG(cs.Irm,reg); 1077 1135 gen(ce,&cs); /* MOV mreg,EA+2 */ 1078 1136 } 1079 1137 } 1080 1138 else if (sz == 2 * REGSIZE) 1081 1139 { regm_t idx; 1082 1140 code *cm,*cl; 1083 1141 1084 idx = idxregm( cs.Irm,cs.Isib);1142 idx = idxregm(&cs); 1085 1143 retregs = forregs; 1086 1144 if (!retregs) 1087 1145 retregs = ALLREGS; 1088 1146 ce = allocreg(&retregs,®,tyml); 1089 1147 cs.Iop = 0x8B; 1090 1148 NEWREG(cs.Irm,reg); 1091 1149 cm = gen(NULL,&cs); // MOV reg,EA+2 1092 1150 NEWREG(cs.Irm,findreglsw(retregs)); 1093 1151 getlvalue_lsw(&cs); 1094 1152 cl = gen(NULL,&cs); // MOV reg+1,EA 1095 1153 if (mask[reg] & idx) 1096 1154 ce = cat3(ce,cl,cm); 1097 1155 else 1098 1156 ce = cat3(ce,cm,cl); 1099 1157 } 1100 1158 c = cat(c,ce); 1101 1159 if (e1->Ecount) /* if we gen a CSE */ 1102 1160 cssave(e1,retregs,EOP(e1)); 1103 1161 } 1104 1162 freenode(e1); 1105 1163 if (sz <= REGSIZE) 1106 *pretregs &= mES | ALLREGS | mBP;// flags are already set1164 *pretregs &= ~mPSW; // flags are already set 1107 1165 return cat(c,fixresult(e,retregs,pretregs)); 1108 1166 } 1109 1167 1110 1168 1111 1169 /******************************** 1112 1170 * Generate code for *= /= %= 1113 1171 */ 1114 1172 1115 1173 code *cdmulass(elem *e,regm_t *pretregs) 1116 1174 { elem *e1,*e2; 1117 1175 code *cr,*cl,*cg,*c,cs; 1118 1176 tym_t tym,tyml; 1119 1177 regm_t retregs; 1120 1178 char uns; 1121 1179 unsigned op,resreg,reg,opr,lib,byte; 1122 1180 unsigned sz; 1123 1181 1124 1182 e1 = e->E1; 1125 1183 e2 = e->E2; 1126 1184 op = e->Eoper; /* OPxxxx */ 1127 1185 1128 1186 tyml = tybasic(e1->Ety); /* type of lvalue */ 1129 1187 uns = tyuns(tyml) || tyuns(e2->Ety); 1130 1188 tym = tybasic(e->Ety); /* type of result */ 1131 1189 sz = tysize[tyml]; 1189 1190 unsigned rex = (I64 && sz == 8) ? REX_W : 0; 1191 unsigned grex = rex << 16; // 64 bit operands 1192 1132 1193 1133 1194 if (tyfloating(tyml)) 1134 1195 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 1135 1196 return opass87(e,pretregs); 1136 1197 #else 1137 1198 return opassdbl(e,pretregs,op); 1138 1199 #endif 1139 1200 1140 1201 if (sz <= REGSIZE) /* if word or byte */ 1141 1202 { byte = (sz == 1); /* 1 for byte operation */ 1142 1203 resreg = AX; /* result register for * or / */ 1143 1204 if (uns) /* if unsigned operation */ 1144 1205 opr = 4; /* MUL */ 1145 1206 else /* else signed */ 1146 1207 opr = 5; /* IMUL */ 1147 1208 if (op != OPmulass) /* if /= or %= */ 1148 1209 { opr += 2; /* MUL => DIV, IMUL => IDIV */ 1149 1210 if (op == OPmodass) 1150 1211 resreg = DX; /* remainder is in DX */ 1151 1212 } 1152 1213 if (op == OPmulass) /* if multiply */ 1153 1214 { 1154 1215 if (config.target_cpu >= TARGET_80286 && 1155 1216 e2->Eoper == OPconst && !byte) 1156 { targ_int e2factor;1157 regm_t idxregs;1158 1159 e2factor = el_tolong(e2);1217 { 1218 targ_size_t e2factor = el_tolong(e2); 1219 if (I64 && sz == 8 && e2factor != (int)e2factor) 1220 goto L1; 1160 1221 freenode(e2); 1161 1222 cr = CNIL; 1162 1223 cl = getlvalue(&cs,e1,0); /* get EA */ 1163 idxregs = idxregm(cs.Irm,cs.Isib);1224 regm_t idxregs = idxregm(&cs); 1164 1225 retregs = *pretregs & (ALLREGS | mBP) & ~idxregs; 1165 1226 if (!retregs) 1166 1227 retregs = ALLREGS & ~idxregs; 1167 1228 cg = allocreg(&retregs,&resreg,tyml); 1168 1229 cs.Iop = 0x69; /* IMUL reg,EA,e2value */ 1169 1230 cs.IFL2 = FLconst; 1170 1231 cs.IEV2.Vint = e2factor; 1171 1232 opr = resreg; 1172 1233 } 1173 else if (I32 && !byte) 1174 { 1234 else if (!I16 && !byte) 1235 { 1236 L1: 1175 1237 retregs = *pretregs & (ALLREGS | mBP); 1176 1238 if (!retregs) 1177 1239 retregs = ALLREGS; 1178 1240 cr = codelem(e2,&retregs,FALSE); /* load rvalue in reg */ 1179 1241 cl = getlvalue(&cs,e1,retregs); /* get EA */ 1180 1242 cg = getregs(retregs); /* destroy these regs */ 1181 1243 cs.Iop = 0x0F; /* IMUL resreg,EA */ 1182 1244 cs.Iop2 = 0xAF; 1183 1245 resreg = findreg(retregs); 1184 1246 opr = resreg; 1185 1247 } 1186 1248 else 1187 1249 { 1188 1250 retregs = mAX; 1189 1251 cr = codelem(e2,&retregs,FALSE); // load rvalue in AX 1190 1252 cl = getlvalue(&cs,e1,mAX); // get EA 1191 1253 cg = getregs(byte ? mAX : mAX | mDX); // destroy these regs 1192 1254 cs.Iop = 0xF7 ^ byte; // [I]MUL EA 1193 1255 } 1194 1256 cs.Irm |= modregrm(0,opr,0); 1195 1257 c = gen(CNIL,&cs); 1196 1258 } 1197 1259 else // /= or %= 1198 { targ_ int e2factor;1260 { targ_size_t e2factor; 1199 1261 int pow2; 1200 1262 targ_ulong m; 1201 1263 1202 1264 assert(!byte); // should never happen 1203 assert( !I32|| sz != SHORTSIZE);1265 assert(I16 || sz != SHORTSIZE); 1204 1266 if (config.flags4 & CFG4speed && 1205 e2->Eoper == OPconst && sz == REGSIZE && !uns && 1267 e2->Eoper == OPconst && !uns && 1268 (sz == REGSIZE || (I64 && sz == 4)) && 1206 1269 (pow2 = ispow2(e2factor = el_tolong(e2))) != -1 && 1270 e2factor == (int)e2factor && 1207 1271 !(config.target_cpu < TARGET_80286 && pow2 != 1 && op == OPdivass) 1208 1272 ) 1209 1273 { 1210 1274 // Signed divide or modulo by power of 2 1211 1275 cr = NULL; 1212 1276 c = NULL; 1213 1277 cl = getlvalue(&cs,e1,mAX | mDX); 1214 1278 cs.Iop = 0x8B; 1215 NEWREG(cs.Irm,AX);1279 code_newreg(&cs, AX); 1216 1280 cl = gen(cl,&cs); // MOV AX,EA 1217 1281 freenode(e2); 1218 1282 cg = getregs(mAX | mDX); // trash these regs 1219 1283 cg = gen1(cg,0x99); // CWD 1284 code_orrex(cg, rex); 1220 1285 if (pow2 == 1) 1221 1286 { 1222 1287 if (op == OPdivass) 1223 { gen2(cg,0x2B, modregrm(3,AX,DX)); // SUB AX,DX1224 gen2(cg,0xD1, modregrm(3,7,AX)); // SAR AX,11288 { gen2(cg,0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1289 gen2(cg,0xD1,grex | modregrm(3,7,AX)); // SAR AX,1 1225 1290 resreg = AX; 1226 1291 } 1227 1292 else // OPmod 1228 { gen2(cg,0x33, modregrm(3,AX,DX)); // XOR AX,DX1229 genc2(cg,0x81, modregrm(3,4,AX),1); // AND AX,11230 gen2(cg,0x03, modregrm(3,DX,AX)); // ADD DX,AX1293 { gen2(cg,0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1294 genc2(cg,0x81,grex | modregrm(3,4,AX),1); // AND AX,1 1295 gen2(cg,0x03,grex | modregrm(3,DX,AX)); // ADD DX,AX 1231 1296 resreg = DX; 1232 1297 } 1233 1298 } 1234 1299 else 1235 { targ_ulong m;1236 1237 m = (1 << pow2) - 1;1300 { 1301 assert(pow2 < 32); 1302 targ_ulong m = (1 << pow2) - 1; 1238 1303 if (op == OPdivass) 1239 { genc2(cg,0x81, modregrm(3,4,DX),m); // AND DX,m1240 gen2(cg,0x03, modregrm(3,AX,DX)); // ADD AX,DX1304 { genc2(cg,0x81,grex | modregrm(3,4,DX),m); // AND DX,m 1305 gen2(cg,0x03,grex | modregrm(3,AX,DX)); // ADD AX,DX 1241 1306 // Be careful not to generate this for 8088 1242 1307 assert(config.target_cpu >= TARGET_80286); 1243 genc2(cg,0xC1, modregrm(3,7,AX),pow2); // SAR AX,pow21308 genc2(cg,0xC1,grex | modregrm(3,7,AX),pow2); // SAR AX,pow2 1244 1309 resreg = AX; 1245 1310 } 1246 1311 else // OPmodass 1247 { gen2(cg,0x33, modregrm(3,AX,DX)); // XOR AX,DX1248 gen2(cg,0x2B, modregrm(3,AX,DX)); // SUB AX,DX1249 genc2(cg,0x81, modregrm(3,4,AX),m); // AND AX,m1250 gen2(cg,0x33, modregrm(3,AX,DX)); // XOR AX,DX1251 gen2(cg,0x2B, modregrm(3,AX,DX)); // SUB AX,DX1312 { gen2(cg,0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1313 gen2(cg,0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1314 genc2(cg,0x81,grex | modregrm(3,4,AX),m); // AND AX,m 1315 gen2(cg,0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1316 gen2(cg,0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1252 1317 resreg = AX; 1253 1318 } 1254 1319 } 1255 1320 } 1256 1321 else 1257 1322 { 1258 1323 retregs = ALLREGS & ~(mAX|mDX); // DX gets sign extension 1259 1324 cr = codelem(e2,&retregs,FALSE); // load rvalue in retregs 1260 1325 reg = findreg(retregs); 1261 1326 cl = getlvalue(&cs,e1,mAX | mDX | retregs); // get EA 1262 1327 cg = getregs(mAX | mDX); // destroy these regs 1263 1328 cs.Irm |= modregrm(0,AX,0); 1264 1329 cs.Iop = 0x8B; 1265 1330 c = gen(CNIL,&cs); // MOV AX,EA 1266 1331 if (uns) // if unsigned 1267 1332 movregconst(c,DX,0,0); // CLR DX 1268 1333 else // else signed 1269 gen1(c,0x99); // CWD 1334 { gen1(c,0x99); // CWD 1335 code_orrex(c,rex); 1336 } 1270 1337 c = cat(c,getregs(mDX | mAX)); // DX and AX will be destroyed 1271 1338 genregs(c,0xF7,opr,reg); // OPR reg 1339 code_orrex(c,rex); 1272 1340 } 1273 1341 } 1274 1342 cs.Iop = 0x89 ^ byte; 1275 1343 NEWREG(cs.Irm,resreg); 1276 1344 c = gen(c,&cs); // MOV EA,resreg 1277 1345 if (e1->Ecount) // if we gen a CSE 1278 1346 cssave(e1,mask[resreg],EOP(e1)); 1279 1347 freenode(e1); 1280 1348 c = cat(c,fixresult(e,mask[resreg],pretregs)); 1281 1349 return cat4(cr,cl,cg,c); 1282 1350 } 1283 1351 else if (sz == 2 * REGSIZE) 1284 1352 { 1285 1353 lib = CLIBlmul; 1286 1354 if (op == OPdivass || op == OPmodass) 1287 1355 { lib = (uns) ? CLIBuldiv : CLIBldiv; 1288 1356 if (op == OPmodass) 1289 1357 lib++; 1290 1358 } 1291 1359 retregs = mCX | mBX; … … 1299 1367 gen(cl,&cs); /* MOV DX,EA+2 */ 1300 1368 getlvalue_lsw(&cs); 1301 1369 retregs = 0; 1302 1370 if (config.target_cpu >= TARGET_PentiumPro && op == OPmulass) 1303 1371 { 1304 1372 /* IMUL ECX,EAX 1305 1373 IMUL EDX,EBX 1306 1374 ADD ECX,EDX 1307 1375 MUL EBX 1308 1376 ADD EDX,ECX 1309 1377 */ 1310 1378 c = getregs(mAX|mDX|mCX); 1311 1379 c = gen2(c,0x0FAF,modregrm(3,CX,AX)); 1312 1380 gen2(c,0x0FAF,modregrm(3,DX,BX)); 1313 1381 gen2(c,0x03,modregrm(3,CX,DX)); 1314 1382 gen2(c,0xF7,modregrm(3,4,BX)); 1315 1383 gen2(c,0x03,modregrm(3,DX,CX)); 1316 1384 retregs = mDX | mAX; 1317 1385 } 1318 1386 else 1319 c = callclib(e,lib,&retregs,idxregm( cs.Irm,cs.Isib));1387 c = callclib(e,lib,&retregs,idxregm(&cs)); 1320 1388 reg = (op == OPmodass) ? BX : AX; 1321 1389 retregs = mask[reg]; 1322 1390 cs.Iop = 0x89; 1323 1391 NEWREG(cs.Irm,reg); 1324 1392 gen(c,&cs); /* MOV EA,lsreg */ 1325 1393 reg = (op == OPmodass) ? CX : DX; 1326 1394 retregs |= mask[reg]; 1327 1395 NEWREG(cs.Irm,reg); 1328 1396 getlvalue_msw(&cs); 1329 1397 gen(c,&cs); /* MOV EA+2,msreg */ 1330 1398 if (e1->Ecount) /* if we gen a CSE */ 1331 1399 cssave(e1,retregs,EOP(e1)); 1332 1400 freenode(e1); 1333 1401 cg = fixresult(e,retregs,pretregs); 1334 1402 return cat4(cr,cl,c,cg); 1335 1403 } 1336 1404 else 1337 1405 { assert(0); 1338 1406 /* NOTREACHED */ 1339 1407 return 0; … … 1348 1416 code *cdshass(elem *e,regm_t *pretregs) 1349 1417 { elem *e1,*e2; 1350 1418 code *cr,*cl,*cg,*c,cs,*ce; 1351 1419 tym_t tym,tyml,uns; 1352 1420 regm_t retregs; 1353 1421 unsigned shiftcnt,op1,op2,reg,v,oper,byte,conste2; 1354 1422 unsigned loopcnt; 1355 1423 unsigned sz; 1356 1424 1357 1425 e1 = e->E1; 1358 1426 e2 = e->E2; 1359 1427 1360 1428 tyml = tybasic(e1->Ety); /* type of lvalue */ 1361 1429 sz = tysize[tyml]; 1362 1430 byte = tybyte(e->Ety) != 0; /* 1 for byte operations */ 1363 1431 uns = tyuns(tyml); 1364 1432 tym = tybasic(e->Ety); /* type of result */ 1365 1433 oper = e->Eoper; 1366 1434 assert(tysize(e2->Ety) <= REGSIZE); 1367 1435 1436 unsigned rex = (I64 && sz == 8) ? REX_W : 0; 1437 unsigned grex = rex << 16; // 64 bit operands 1438 1368 1439 // if our lvalue is a cse, make sure we evaluate for result in register 1369 1440 if (e1->Ecount && !(*pretregs & (ALLREGS | mBP)) && !isregvar(e1,&retregs,®)) 1370 1441 *pretregs |= ALLREGS; 1371 1442 1372 1443 #if SCPP 1373 1444 // Do this until the rest of the compiler does OPshr/OPashr correctly 1374 1445 if (oper == OPshrass) 1375 1446 oper = (uns) ? OPshrass : OPashrass; 1376 1447 #endif 1377 1448 1378 1449 // Select opcodes. op2 is used for msw for long shifts. 1379 1450 1380 1451 switch (oper) 1381 1452 { case OPshlass: 1382 1453 op1 = 4; // SHL 1383 1454 op2 = 2; // RCL 1384 1455 break; 1385 1456 case OPshrass: 1386 1457 op1 = 5; // SHR 1387 1458 op2 = 3; // RCR … … 1402 1473 if (cnst(e2)) 1403 1474 { 1404 1475 conste2 = TRUE; /* e2 is a constant */ 1405 1476 shiftcnt = e2->EV.Vint; /* byte ordering of host */ 1406 1477 if (config.target_cpu >= TARGET_80286 && 1407 1478 sz <= REGSIZE && 1408 1479 shiftcnt != 1) 1409 1480 v = 0xC1; // SHIFT xx,shiftcnt 1410 1481 else if (shiftcnt <= 3) 1411 1482 { loopcnt = shiftcnt; 1412 1483 v = 0xD1; // SHIFT xx,1 1413 1484 } 1414 1485 } 1415 1486 if (v == 0xD3) /* if COUNT == CL */ 1416 1487 { retregs = mCX; 1417 1488 cr = codelem(e2,&retregs,FALSE); 1418 1489 } 1419 1490 else 1420 1491 freenode(e2); 1421 1492 cl = getlvalue(&cs,e1,mCX); /* get lvalue, preserve CX */ 1422 cl = cat(cl,modEA( cs.Irm)); /* check for modifying register */1493 cl = cat(cl,modEA(&cs)); // check for modifying register 1423 1494 1424 1495 if (*pretregs == 0 || /* if don't return result */ 1425 1496 (*pretregs == mPSW && conste2 && tysize[tym] <= REGSIZE) || 1426 1497 sz > REGSIZE 1427 1498 ) 1428 1499 { retregs = 0; // value not returned in a register 1429 1500 cs.Iop = v ^ byte; 1430 1501 c = CNIL; 1431 1502 while (loopcnt--) 1432 1503 { 1433 1504 NEWREG(cs.Irm,op1); /* make sure op1 is first */ 1434 1505 if (sz <= REGSIZE) 1435 1506 { cs.IFL2 = FLconst; 1436 1507 cs.IEV2.Vint = shiftcnt; 1437 1508 c = gen(c,&cs); /* SHIFT EA,[CL|1] */ 1438 1509 if (*pretregs & mPSW && !loopcnt && conste2) 1439 1510 code_orflag(c,CFpsw); 1440 1511 } 1441 1512 else /* TYlong */ 1442 1513 { cs.Iop = 0xD1; /* plain shift */ … … 1477 1548 1478 1549 if (sz == 2 * REGSIZE && *pretregs) 1479 1550 { retregs = *pretregs & (ALLREGS | mBP); 1480 1551 if (retregs) 1481 1552 { ce = allocreg(&retregs,®,tym); 1482 1553 cs.Iop = 0x8B; 1483 1554 1484 1555 /* be careful not to trash any index regs */ 1485 1556 /* do MSW first (which can't be an index reg) */ 1486 1557 getlvalue_msw(&cs); 1487 1558 NEWREG(cs.Irm,reg); 1488 1559 cg = gen(CNIL,&cs); 1489 1560 getlvalue_lsw(&cs); 1490 1561 reg = findreglsw(retregs); 1491 1562 NEWREG(cs.Irm,reg); 1492 1563 gen(cg,&cs); 1493 1564 if (*pretregs & mPSW) 1494 1565 cg = cat(cg,tstresult(retregs,tyml,TRUE)); 1495 1566 } 1496 1567 else /* flags only */ 1497 { retregs = ALLREGS & ~idxregm( cs.Irm,cs.Isib);1568 { retregs = ALLREGS & ~idxregm(&cs); 1498 1569 ce = allocreg(&retregs,®,TYint); 1499 1570 cs.Iop = 0x8B; 1500 1571 NEWREG(cs.Irm,reg); 1501 1572 cg = gen(CNIL,&cs); /* MOV reg,EA */ 1502 1573 cs.Iop = 0x0B; /* OR reg,EA+2 */ 1503 1574 cs.Iflags |= CFpsw; 1504 1575 getlvalue_msw(&cs); 1505 1576 gen(cg,&cs); 1506 1577 } 1507 1578 c = cat3(c,ce,cg); 1508 1579 } 1509 1580 cg = CNIL; 1510 1581 } 1511 1582 1512 1583 1513 1584 else /* else must evaluate in register */ 1514 1585 { 1515 1586 if (sz <= REGSIZE) 1516 { regm_t possregs; 1517 1518 possregs = ALLREGS & ~mCX & ~idxregm(cs.Irm,cs.Isib); 1585 { 1586 regm_t possregs = ALLREGS & ~mCX & ~idxregm(&cs); 1519 1587 if (byte) 1520 1588 possregs &= BYTEREGS; 1521 1589 retregs = *pretregs & possregs; 1522 1590 if (retregs == 0) 1523 1591 retregs = possregs; 1524 1592 cg = allocreg(&retregs,®,tym); 1525 1593 cs.Iop = 0x8B ^ byte; 1526 c s.Irm |= modregrm(0,reg,0);1594 code_newreg(&cs, reg); 1527 1595 c = ce = gen(CNIL,&cs); /* MOV reg,EA */ 1528 if ( I32)1596 if (!I16) 1529 1597 { 1530 1598 assert(!byte || (mask[reg] & BYTEREGS)); 1531 ce = genc2(CNIL,v ^ byte,modregrm(3,op1,reg),shiftcnt); 1599 ce = genc2(CNIL,v ^ byte,modregrmx(3,op1,reg),shiftcnt); 1600 code_orrex(ce, rex); 1532 1601 /* We can do a 32 bit shift on a 16 bit operand if */ 1533 1602 /* it's a left shift and we're not concerned about */ 1534 1603 /* the flags. Remember that flags are not set if */ 1535 1604 /* a shift of 0 occurs. */ 1536 1605 if (tysize[tym] == SHORTSIZE && 1537 1606 (oper == OPshrass || oper == OPashrass || 1538 1607 (*pretregs & mPSW && conste2))) 1539 1608 ce->Iflags |= CFopsize; /* 16 bit operand */ 1540 1609 cat(c,ce); 1541 1610 } 1542 1611 else 1543 1612 { 1544 1613 while (loopcnt--) 1545 1614 { /* Generate shift instructions. */ 1546 1615 genc2(ce,v ^ byte,modregrm(3,op1,reg),shiftcnt); 1547 1616 } 1548 1617 } 1549 1618 if (*pretregs & mPSW && conste2) 1550 1619 { assert(shiftcnt); 1551 1620 *pretregs &= ~mPSW; // result is already in flags … … 1596 1665 e1 = e->E1; 1597 1666 e2 = e->E2; 1598 1667 if (*pretregs == 0) /* if don't want result */ 1599 1668 { cl = codelem(e1,pretregs,FALSE); 1600 1669 *pretregs = 0; /* in case e1 changed it */ 1601 1670 cr = codelem(e2,pretregs,FALSE); 1602 1671 return cat(cl,cr); 1603 1672 } 1604 1673 1605 1674 jop = jmpopcode(e); // must be computed before 1606 1675 // leaves are free'd 1607 1676 reverse = 0; 1608 1677 cl = cr = CNIL; 1609 1678 op = e->Eoper; 1610 1679 assert(OTrel(op)); 1611 1680 eqorne = (op == OPeqeq) || (op == OPne); 1612 1681 1613 1682 tym = tybasic(e1->Ety); 1614 1683 sz = tysize[tym]; 1615 1684 byte = sz == 1; 1685 1686 unsigned rex = (I64 && sz == 8) ? REX_W : 0; 1687 unsigned grex = rex << 16; // 64 bit operands 1688 1616 1689 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 1617 1690 if (tyfloating(tym)) /* if floating operation */ 1618 1691 { 1619 1692 retregs = mPSW; 1620 1693 c = orth87(e,&retregs); 1621 1694 goto L3; 1622 1695 } 1623 1696 #else 1624 1697 if (tyfloating(tym)) /* if floating operation */ 1625 1698 { 1626 1699 if (config.inline8087) 1627 1700 { retregs = mPSW; 1628 1701 c = orth87(e,&retregs); 1629 1702 } 1630 1703 else 1631 1704 { int clib; 1632 1705 1633 1706 retregs = 0; /* skip result for now */ 1634 1707 if (iffalse(e2)) /* second operand is constant 0 */ 1635 1708 { assert(!eqorne); /* should be OPbool or OPnot */ … … 1646 1719 cl = codelem(e1,&retregs,FALSE); 1647 1720 retregs = 0; 1648 1721 c = callclib(e,clib,&retregs,0); 1649 1722 freenode(e2); 1650 1723 } 1651 1724 else 1652 1725 { clib = CLIBdcmp; 1653 1726 if (rel_exception(op)) 1654 1727 clib += CLIBdcmpexc - CLIBdcmp; 1655 1728 c = opdouble(e,&retregs,clib); 1656 1729 } 1657 1730 } 1658 1731 goto L3; 1659 1732 } 1660 1733 #endif 1661 1734 1662 1735 /* If it's a signed comparison of longs, we have to call a library */ 1663 1736 /* routine, because we don't know the target of the signed branch */ 1664 1737 /* (have to set up flags so that jmpopcode() will do it right) */ 1665 1738 if (!eqorne && 1666 ( !I32&& tym == TYlong && tybasic(e2->Ety) == TYlong ||1667 I32 && tym == TYllong && tybasic(e2->Ety) == TYllong)1739 (I16 && tym == TYlong && tybasic(e2->Ety) == TYlong || 1740 I32 && tym == TYllong && tybasic(e2->Ety) == TYllong) 1668 1741 ) 1669 1742 { retregs = mDX | mAX; 1670 1743 cl = codelem(e1,&retregs,FALSE); 1671 1744 retregs = mCX | mBX; 1672 1745 cr = scodelem(e2,&retregs,mDX | mAX,FALSE); 1673 1746 retregs = 0; 1674 1747 c = callclib(e,CLIBlcmp,&retregs,0); /* gross, but it works */ 1675 1748 goto L3; 1676 1749 } 1677 1750 1678 1751 /* See if we should swap operands */ 1679 1752 if (e1->Eoper == OPvar && e2->Eoper == OPvar && evalinregister(e2)) 1680 1753 { e1 = e->E2; 1681 1754 e2 = e->E1; 1682 1755 reverse = 2; 1683 1756 } 1684 1757 1685 1758 retregs = allregs; 1686 1759 if (byte) 1687 1760 retregs = BYTEREGS; 1688 1761 1689 1762 c = CNIL; 1690 1763 ce = CNIL; 1691 cs.Iflags = ( I32&& sz == SHORTSIZE) ? CFopsize : 0;1692 cs.Irex = 0;1764 cs.Iflags = (!I16 && sz == SHORTSIZE) ? CFopsize : 0; 1765 cs.Irex = rex; 1693 1766 if (sz > REGSIZE) 1694 1767 ce = gennop(ce); 1695 1768 1696 1769 switch (e2->Eoper) 1697 1770 { 1698 1771 default: 1699 1772 L2: 1700 1773 cl = scodelem(e1,&retregs,0,TRUE); /* compute left leaf */ 1701 1774 L1: 1702 1775 rretregs = allregs & ~retregs; 1703 1776 if (byte) 1704 1777 rretregs &= BYTEREGS; 1705 1778 cr = scodelem(e2,&rretregs,retregs,TRUE); /* get right leaf */ 1706 1779 if (sz <= REGSIZE) /* CMP reg,rreg */ 1707 1780 { reg = findreg(retregs); /* get reg that e1 is in */ 1708 1781 rreg = findreg(rretregs); 1709 1782 c = genregs(CNIL,0x3B ^ byte ^ reverse,reg,rreg); 1710 if (I32 && sz == SHORTSIZE) 1783 code_orrex(c, rex); 1784 if (!I16 && sz == SHORTSIZE) 1711 1785 c->Iflags |= CFopsize; /* compare only 16 bits */ 1712 1786 } 1713 1787 else 1714 1788 { assert(sz <= 2 * REGSIZE); 1715 1789 1716 1790 /* Compare MSW, if they're equal then compare the LSW */ 1717 1791 reg = findregmsw(retregs); 1718 1792 rreg = findregmsw(rretregs); 1719 1793 c = genregs(CNIL,0x3B ^ reverse,reg,rreg); /* CMP reg,rreg */ 1720 if ( I32&& sz == 6)1794 if (!I16 && sz == 6) 1721 1795 c->Iflags |= CFopsize; /* seg is only 16 bits */ 1722 1796 genjmp(c,JNE,FLcode,(block *) ce); /* JNE nop */ 1723 1797 1724 1798 reg = findreglsw(retregs); 1725 1799 rreg = findreglsw(rretregs); 1726 1800 genregs(c,0x3B ^ reverse,reg,rreg); /* CMP reg,rreg */ 1727 1801 } 1728 1802 break; 1729 1803 case OPrelconst: 1730 1804 fl = el_fl(e2); 1731 1805 switch (fl) 1732 1806 { case FLfunc: 1733 1807 fl = FLextern; // so it won't be self-relative 1734 1808 break; 1735 1809 case FLdata: 1736 1810 case FLudata: 1737 1811 case FLextern: 1738 1812 if (sz > REGSIZE) // compare against DS, not DGROUP 1739 1813 goto L2; 1740 1814 break; … … 1747 1821 cs.IEVsym2 = e2->EV.sp.Vsym; 1748 1822 offset2 = e2->EV.sp.Voffset; 1749 1823 if (sz > REGSIZE) 1750 1824 { cs.Iflags |= CFseg; 1751 1825 cs.IEVoffset2 = 0; 1752 1826 } 1753 1827 else 1754 1828 { cs.Iflags |= CFoff; 1755 1829 cs.IEVoffset2 = offset2; 1756 1830 } 1757 1831 goto L4; 1758 1832 1759 1833 case OPconst: 1760 1834 // If compare against 0 1761 1835 if (sz <= REGSIZE && *pretregs == mPSW && !boolres(e2) && 1762 1836 isregvar(e1,&retregs,®) 1763 1837 ) 1764 1838 { // Just do a TEST instruction 1765 1839 c = genregs(NULL,0x85 ^ byte,reg,reg); // TEST reg,reg 1766 1840 c->Iflags |= (cs.Iflags & CFopsize) | CFpsw; 1841 code_orrex(c, rex); 1767 1842 retregs = mPSW; 1768 1843 break; 1769 1844 } 1770 1845 1771 1846 if (!tyuns(tym) && !tyuns(e2->Ety) && 1772 !boolres(e2) && !(*pretregs & mPSW) && sz == REGSIZE && 1773 (I32 || op == OPlt || op == OPge)) 1847 !boolres(e2) && !(*pretregs & mPSW) && 1848 (sz == REGSIZE || (I64 && sz == 4)) && 1849 (!I16 || op == OPlt || op == OPge)) 1774 1850 { unsigned regi; 1775 1851 1776 1852 assert(*pretregs & (allregs)); 1777 1853 cl = codelem(e1,pretregs,FALSE); 1778 1854 reg = findreg(*pretregs); 1779 1855 c = getregs(mask[reg]); 1780 1856 switch (op) 1781 1857 { case OPle: 1782 c = genc2(c,0x81, modregrm(3,0,reg),(unsigned)-1); /* ADD reg,-1 */1783 genc2(c,0x81, modregrm(3,2,reg),0); /* ADC reg,0 */1858 c = genc2(c,0x81,grex | modregrmx(3,0,reg & 7),(unsigned)-1); // ADD reg,-1 1859 genc2(c,0x81,grex | modregrmx(3,2,reg & 7),0); // ADC reg,0 1784 1860 goto oplt; 1785 1861 case OPgt: 1786 c = gen2(c,0xF7, modregrm(3,3,reg)); // NEG reg1862 c = gen2(c,0xF7,grex | modregrmx(3,3,reg & 7)); // NEG reg 1787 1863 #if TARGET_WINDOS 1788 1864 // What does the Windows platform do? 1789 1865 // lower INT_MIN by 1? See test exe9.c 1790 1866 // BUG: fix later 1791 genc2(c,0x81, modregrm(3,3,reg),0); // SBB reg,01867 genc2(c,0x81,grex | modregrmx(3,3,reg & 7),0); // SBB reg,0 1792 1868 #endif 1793 1869 goto oplt; 1794 1870 case OPlt: 1795 1871 oplt: 1796 if ( I32)1797 c = genc2(c,0xC1, modregrm(3,5,reg),31); /* SHR reg,31 */1872 if (!I16) 1873 c = genc2(c,0xC1,grex | modregrmx(3,5,reg & 7),sz * 8 - 1); // SHR reg,31 1798 1874 else 1799 1875 { /* 8088-286 do not have a barrel shifter, so use this 1800 1876 faster sequence 1801 1877 */ 1802 1878 c = genregs(c,0xD1,0,reg); /* ROL reg,1 */ 1803 1879 if (reghasvalue(allregs,1,®i)) 1804 1880 c = genregs(c,0x23,reg,regi); /* AND reg,regi */ 1805 1881 else 1806 1882 c = genc2(c,0x81,modregrm(3,4,reg),1); /* AND reg,1 */ 1807 1883 } 1808 1884 break; 1809 1885 case OPge: 1810 1886 c = genregs(c,0xD1,4,reg); /* SHL reg,1 */ 1887 code_orrex(c,rex); 1811 1888 genregs(c,0x19,reg,reg); /* SBB reg,reg */ 1812 gen1(c,0x40 + reg); /* INC reg */ 1889 code_orrex(c,rex); 1890 if (I64) 1891 { 1892 c = gen2(c,0xFF,modregrmx(3,0,reg)); // INC reg 1893 code_orrex(c, rex); 1894 } 1895 else 1896 c = gen1(c,0x40 + reg); // INC reg 1813 1897 break; 1814 1898 default: 1815 1899 assert(0); 1816 1900 } 1817 1901 freenode(e2); 1818 1902 goto ret; 1819 1903 } 1820 1904 1821 1905 if (sz > REGSIZE) 1822 1906 cs.IEV2.Vint = MSREG(e2->EV.Vllong); 1823 1907 else 1824 1908 cs.IEV2.Vint = e2->EV.Vint; 1825 1909 cs.IFL2 = FLconst; 1826 1910 L4: 1827 1911 cs.Iop = 0x81 ^ byte; 1828 1912 1829 1913 /* if ((e1 is data or a '*' reference) and it's not a 1830 1914 * common subexpression 1831 1915 */ 1832 1916 1833 1917 if ((e1->Eoper == OPvar && datafl[el_fl(e1)] || 1834 1918 e1->Eoper == OPind) && 1835 1919 !evalinregister(e1)) 1836 1920 { cl = getlvalue(&cs,e1,RMload); 1837 1921 freenode(e1); 1838 1922 if (evalinregister(e2)) 1839 1923 { 1840 retregs = idxregm( cs.Irm,cs.Isib);1925 retregs = idxregm(&cs); 1841 1926 if ((cs.Iflags & CFSEG) == CFes) 1842 1927 retregs |= mES; /* take no chances */ 1843 1928 rretregs = allregs & ~retregs; 1844 1929 if (byte) 1845 1930 rretregs &= BYTEREGS; 1846 1931 cr = scodelem(e2,&rretregs,retregs,TRUE); 1847 1932 cs.Iop = 0x39 ^ byte ^ reverse; 1848 1933 if (sz > REGSIZE) 1849 1934 { 1850 1935 rreg = findregmsw(rretregs); 1851 1936 cs.Irm |= modregrm(0,rreg,0); 1852 1937 getlvalue_msw(&cs); 1853 1938 c = gen(CNIL,&cs); /* CMP EA+2,rreg */ 1854 1939 if (I32 && sz == 6) 1855 1940 c->Iflags |= CFopsize; /* seg is only 16 bits */ 1856 1941 genjmp(c,JNE,FLcode,(block *) ce); /* JNE nop */ 1857 1942 rreg = findreglsw(rretregs); 1858 1943 NEWREG(cs.Irm,rreg); 1859 1944 getlvalue_lsw(&cs); 1860 1945 } 1861 1946 else 1862 1947 { 1863 1948 rreg = findreg(rretregs); 1864 c s.Irm |= modregrm(0,rreg,0);1949 code_newreg(&cs, rreg); 1865 1950 } 1866 1951 } 1867 1952 else 1868 1953 { 1869 1954 cs.Irm |= modregrm(0,7,0); 1870 1955 if (sz > REGSIZE) 1871 1956 { 1872 1957 #if TARGET_FLAT 1873 1958 if (sz == 6) 1874 1959 assert(0); 1875 1960 #endif 1876 1961 if (e2->Eoper == OPrelconst) 1877 1962 { cs.Iflags = (cs.Iflags & ~(CFoff | CFseg)) | CFseg; 1878 1963 cs.IEVoffset2 = 0; 1879 1964 } 1880 1965 getlvalue_msw(&cs); 1881 1966 c = gen(CNIL,&cs); /* CMP EA+2,const */ 1882 if ( I32&& sz == 6)1967 if (!I16 && sz == 6) 1883 1968 c->Iflags |= CFopsize; /* seg is only 16 bits */ 1884 1969 genjmp(c,JNE,FLcode,(block *) ce); /* JNE nop */ 1885 1970 if (e2->Eoper == OPconst) 1886 1971 cs.IEV2.Vint = e2->EV.Vllong; 1887 1972 else 1888 1973 { /* Turn off CFseg, on CFoff */ 1889 1974 cs.Iflags ^= CFseg | CFoff; 1890 1975 cs.IEVoffset2 = offset2; 1891 1976 } 1892 1977 getlvalue_lsw(&cs); 1893 1978 } 1894 1979 freenode(e2); 1895 1980 } 1896 1981 c = gen(c,&cs); 1897 1982 break; 1898 1983 } 1899 1984 1900 1985 if (evalinregister(e2) && !OTassign(e1->Eoper) && 1901 1986 !isregvar(e1,NULL,NULL)) 1902 1987 { regm_t m; … … 1909 1994 } 1910 1995 if ((e1->Eoper == OPstrcmp || (OTassign(e1->Eoper) && sz <= REGSIZE)) && 1911 1996 !boolres(e2) && !evalinregister(e1)) 1912 1997 { 1913 1998 retregs = mPSW; 1914 1999 cl = scodelem(e1,&retregs,0,FALSE); 1915 2000 freenode(e2); 1916 2001 break; 1917 2002 } 1918 2003 if (sz <= REGSIZE && !boolres(e2) && e1->Eoper == OPadd && *pretregs == mPSW) 1919 2004 { 1920 2005 retregs |= mPSW; 1921 2006 cl = scodelem(e1,&retregs,0,FALSE); 1922 2007 freenode(e2); 1923 2008 break; 1924 2009 } 1925 2010 cl = scodelem(e1,&retregs,0,TRUE); /* compute left leaf */ 1926 2011 if (sz == 1) 1927 2012 { 1928 2013 reg = findreg(retregs & allregs); // get reg that e1 is in 1929 cs.Irm = modregrm(3,7,reg); 2014 cs.Irm = modregrm(3,7,reg & 7); 2015 if (reg & 8) 2016 cs.Irex |= REX_B; 1930 2017 if (e1->Eoper == OPvar && e1->EV.sp.Voffset == 1 && e1->EV.sp.Vsym->Sfl == FLreg) 2018 { assert(reg < 4); 1931 2019 cs.Irm |= 4; // use upper register half 2020 } 1932 2021 } 1933 2022 else if (sz <= REGSIZE) 1934 2023 { /* CMP reg,const */ 1935 2024 reg = findreg(retregs & allregs); // get reg that e1 is in 1936 2025 rretregs = allregs & ~retregs; 1937 2026 if (cs.IFL2 == FLconst && reghasvalue(rretregs,cs.IEV2.Vint,&rreg)) 1938 { code *cc;1939 1940 c c = genregs(CNIL,0x3B,reg,rreg);1941 if ( I32)2027 { 2028 code *cc = genregs(CNIL,0x3B,reg,rreg); 2029 code_orrex(cc, rex); 2030 if (!I16) 1942 2031 cc->Iflags |= cs.Iflags & CFopsize; 1943 2032 c = cat(c,cc); 1944 2033 freenode(e2); 1945 2034 break; 1946 2035 } 1947 cs.Irm = modregrm(3,7,reg); 2036 cs.Irm = modregrm(3,7,reg & 7); 2037 if (reg & 8) 2038 cs.Irex |= REX_B; 1948 2039 } 1949 2040 else if (sz <= 2 * REGSIZE) 1950 2041 { 1951 2042 reg = findregmsw(retregs); // get reg that e1 is in 1952 2043 cs.Irm = modregrm(3,7,reg); 1953 2044 c = gen(CNIL,&cs); /* CMP reg,MSW */ 1954 2045 if (I32 && sz == 6) 1955 2046 c->Iflags |= CFopsize; /* seg is only 16 bits */ 1956 2047 genjmp(c,JNE,FLcode,(block *) ce); /* JNE ce */ 1957 2048 1958 2049 reg = findreglsw(retregs); 1959 2050 cs.Irm = modregrm(3,7,reg); 1960 2051 if (e2->Eoper == OPconst) 1961 2052 cs.IEV2.Vint = e2->EV.Vlong; 1962 2053 else 1963 2054 { /* Turn off CFseg, on CFoff */ 1964 2055 cs.Iflags ^= CFseg | CFoff; 1965 2056 cs.IEVoffset2 = offset2; 1966 2057 } 1967 2058 } … … 1975 2066 if (e2->Ecount) 1976 2067 goto L2; 1977 2068 goto L5; 1978 2069 1979 2070 case OPvar: 1980 2071 if ((e1->Eoper == OPvar && 1981 2072 isregvar(e2,&rretregs,®) && 1982 2073 sz <= REGSIZE 1983 2074 ) || 1984 2075 (e1->Eoper == OPind && 1985 2076 isregvar(e2,&rretregs,®) && 1986 2077 !evalinregister(e1) && 1987 2078 sz <= REGSIZE 1988 2079 ) 1989 2080 ) 1990 2081 { 1991 2082 // CMP EA,e2 1992 2083 cl = getlvalue(&cs,e1,RMload); 1993 2084 freenode(e1); 1994 2085 cs.Iop = 0x39 ^ byte ^ reverse; 1995 c s.Irm |= modregrm(0,reg,0);2086 code_newreg(&cs,reg); 1996 2087 c = gen(c,&cs); 1997 2088 freenode(e2); 1998 2089 break; 1999 2090 } 2000 2091 L5: 2001 2092 cl = scodelem(e1,&retregs,0,TRUE); /* compute left leaf */ 2002 2093 if (sz <= REGSIZE) /* CMP reg,EA */ 2003 { unsigned opsize; 2004 2094 { 2005 2095 reg = findreg(retregs & allregs); // get reg that e1 is in 2006 opsize = cs.Iflags & CFopsize;2096 unsigned opsize = cs.Iflags & CFopsize; 2007 2097 c = cat(c,loadea(e2,&cs,0x3B ^ byte ^ reverse,reg,0,RMload | retregs,0)); 2008 2098 code_orflag(c,opsize); 2009 2099 } 2010 2100 else if (sz <= 2 * REGSIZE) 2011 2101 { 2012 2102 reg = findregmsw(retregs); /* get reg that e1 is in */ 2013 2103 // CMP reg,EA 2014 2104 c = loadea(e2,&cs,0x3B ^ reverse,reg,REGSIZE,RMload | retregs,0); 2015 2105 if (I32 && sz == 6) 2016 2106 c->Iflags |= CFopsize; /* seg is only 16 bits */ 2017 2107 genjmp(c,JNE,FLcode,(block *) ce); /* JNE ce */ 2018 2108 reg = findreglsw(retregs); 2019 2109 if (e2->Eoper == OPind) 2020 2110 { 2021 2111 NEWREG(cs.Irm,reg); 2022 2112 getlvalue_lsw(&cs); 2023 2113 c = gen(c,&cs); 2024 2114 } 2025 2115 else 2026 2116 c = cat(c,loadea(e2,&cs,0x3B ^ reverse,reg,0,RMload | retregs,0)); 2027 2117 } 2028 2118 else 2029 2119 assert(0); 2030 2120 freenode(e2); 2031 2121 break; 2032 2122 } 2033 2123 c = cat(c,ce); 2034 2124 2035 2125 L3: 2036 if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) /* if return result in register*/ 2037 { code *nop; 2038 regm_t save; 2039 2040 nop = CNIL; 2041 save = regcon.immed.mval; 2126 if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register 2127 { code *nop = CNIL; 2128 regm_t save = regcon.immed.mval; 2042 2129 cg = allocreg(&retregs,®,TYint); 2043 2130 regcon.immed.mval = save; 2044 2131 if ((*pretregs & mPSW) == 0 && 2045 2132 (jop == JC || jop == JNC)) 2046 2133 { 2047 2134 cg = cat(cg,getregs(retregs)); 2048 2135 cg = genregs(cg,0x19,reg,reg); /* SBB reg,reg */ 2136 if (rex) 2137 code_orrex(cg, rex); 2049 2138 if (flag) 2050 2139 ; // cdcond() will handle it 2051 2140 else if (jop == JNC) 2052 gen1(cg,0x40 + reg); /* INC reg */ 2141 { 2142 if (I64) 2143 { 2144 cg = gen2(cg,0xFF,modregrmx(3,0,reg)); // INC reg 2145 code_orrex(cg, rex); 2146 } 2147 else 2148 gen1(cg,0x40 + reg); // INC reg 2149 } 2053 2150 else 2054 gen2(cg,0xF7,modregrm(3,3,reg)); /* NEG reg */ 2151 { gen2(cg,0xF7,modregrmx(3,3,reg)); /* NEG reg */ 2152 code_orrex(cg, rex); 2153 } 2154 } 2155 else if (I64 && sz == 8) 2156 { 2157 assert(!flag); 2158 cg = movregconst(cg,reg,1,64|8); // MOV reg,1 2159 nop = gennop(nop); 2160 cg = genjmp(cg,jop,FLcode,(block *) nop); // Jtrue nop 2161 // MOV reg,0 2162 movregconst(cg,reg,0,(*pretregs & mPSW) ? 64|8 : 64); 2163 regcon.immed.mval &= ~mask[reg]; 2055 2164 } 2056 2165 else 2057 2166 { 2058 2167 assert(!flag); 2059 2168 cg = movregconst(cg,reg,1,8); // MOV reg,1 2060 2169 nop = gennop(nop); 2061 2170 cg = genjmp(cg,jop,FLcode,(block *) nop); // Jtrue nop 2062 2171 // MOV reg,0 2063 2172 movregconst(cg,reg,0,(*pretregs & mPSW) ? 8 : 0); 2064 2173 regcon.immed.mval &= ~mask[reg]; 2065 2174 } 2066 2175 *pretregs = retregs; 2067 2176 c = cat3(c,cg,nop); 2068 2177 } 2069 2178 ret: 2070 2179 return cat3(cl,cr,c); 2071 2180 } 2072 2181 2073 2182 2074 2183 /********************************** … … 2120 2229 reg = findreglsw(retregs); 2121 2230 rreg = findreglsw(rretregs); 2122 2231 clsw = genregs(CNIL,0x3B,reg,rreg); /* CMP reg,rreg */ 2123 2232 break; 2124 2233 case OPconst: 2125 2234 cs.IEV2.Vint = MSREG(e2->EV.Vllong); // MSW first 2126 2235 cs.IFL2 = FLconst; 2127 2236 cs.Iop = 0x81; 2128 2237 2129 2238 /* if ((e1 is data or a '*' reference) and it's not a 2130 2239 * common subexpression 2131 2240 */ 2132 2241 2133 2242 if ((e1->Eoper == OPvar && datafl[el_fl(e1)] || 2134 2243 e1->Eoper == OPind) && 2135 2244 !evalinregister(e1)) 2136 2245 { cl = getlvalue(&cs,e1,0); 2137 2246 freenode(e1); 2138 2247 if (evalinregister(e2)) 2139 2248 { 2140 retregs = idxregm( cs.Irm,cs.Isib);2249 retregs = idxregm(&cs); 2141 2250 if ((cs.Iflags & CFSEG) == CFes) 2142 2251 retregs |= mES; /* take no chances */ 2143 2252 rretregs = ALLREGS & ~retregs; 2144 2253 cr = scodelem(e2,&rretregs,retregs,TRUE); 2145 2254 rreg = findregmsw(rretregs); 2146 2255 cs.Iop = 0x39; 2147 2256 cs.Irm |= modregrm(0,rreg,0); 2148 2257 getlvalue_msw(&cs); 2149 2258 cmsw = gen(CNIL,&cs); /* CMP EA+2,rreg */ 2150 2259 rreg = findreglsw(rretregs); 2151 2260 NEWREG(cs.Irm,rreg); 2152 2261 } 2153 2262 else 2154 2263 { cs.Irm |= modregrm(0,7,0); 2155 2264 getlvalue_msw(&cs); 2156 2265 cmsw = gen(CNIL,&cs); /* CMP EA+2,const */ 2157 2266 cs.IEV2.Vint = e2->EV.Vlong; 2158 2267 freenode(e2); 2159 2268 } 2160 2269 getlvalue_lsw(&cs); … … 2280 2389 retregs = regmask(e->E1->Ety, e->E1->E1->Ety); 2281 2390 if (retregs & (mST01 | mST0)) // if return in ST0 2282 2391 { 2283 2392 c1 = codelem(e->E1,pretregs,FALSE); 2284 2393 if (*pretregs & mST0) 2285 2394 note87(e, 0, 0); 2286 2395 return c1; 2287 2396 } 2288 2397 else 2289 2398 break; 2290 2399 } 2291 2400 if (tycomplex(e->E1->Ety)) 2292 2401 goto Lcomplex; 2293 2402 /* FALL-THROUGH */ 2294 2403 case OPs64_d: 2295 2404 case OPs32_d: 2296 2405 case OPs16_d: 2297 2406 case OPu16_d: 2298 2407 return load87(e,0,pretregs,NULL,-1); 2299 2408 case OPu32_d: 2300 if (I32) 2301 { unsigned reg; 2302 2303 retregs = ALLREGS; 2409 if (!I16) 2410 { 2411 unsigned retregs = ALLREGS; 2304 2412 c1 = codelem(e->E1, &retregs, FALSE); 2305 reg = findreg(retregs);2413 unsigned reg = findreg(retregs); 2306 2414 c1 = genfltreg(c1, 0x89, reg, 0); 2307 2415 regwithvalue(c1,ALLREGS,0,®,0); 2308 2416 genfltreg(c1, 0x89, reg, REGSIZE); 2309 2417 2310 2418 cat(c1, push87()); 2311 2419 genfltreg(c1,0xDF,5,0); // FILD m64int 2312 2420 2313 2421 retregs = mST0 | (*pretregs & mPSW); 2314 2422 c2 = fixresult87(e, retregs, pretregs); 2315 2423 return cat(c1, c2); 2316 2424 } 2317 2425 break; 2318 2426 case OPd_s16: 2319 2427 case OPd_s32: 2320 2428 case OPd_u16: 2321 2429 case OPd_s64: 2322 2430 return cnvt87(e,pretregs); 2323 2431 case OPd_u32: // use subroutine, not 8087 2324 2432 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 2325 2433 retregs = mST0; … … 2346 2454 c2 = callclib(e,CLIBld_u64,pretregs,0); 2347 2455 return cat(c1,c2); 2348 2456 } 2349 2457 } 2350 2458 retregs = regmask(e->E1->Ety, TYnfunc); 2351 2459 L1: 2352 2460 c1 = codelem(e->E1,&retregs,FALSE); 2353 2461 for (i = 0; 1; i++) 2354 2462 { assert(i < arraysize(clib)); 2355 2463 if (clib[i][0] == e->Eoper) 2356 2464 { c2 = callclib(e,clib[i][1],pretregs,0); 2357 2465 break; 2358 2466 } 2359 2467 } 2360 2468 return cat(c1,c2); 2361 2469 } 2362 2470 2363 2471 2364 2472 /*************************** 2365 2473 * Convert short to long. 2366 * For OPs htlng, OPu16_32, OPptrlptr, OPulngllng, OPlngllng2474 * For OPs16_32, OPu16_32, OPptrlptr, OPu32_64, OPs32_64 2367 2475 */ 2368 2476 2369 2477 code *cdshtlng(elem *e,regm_t *pretregs) 2370 2478 { code *c,*ce,*c1,*c2,*c3,*c4; 2371 2479 unsigned reg; 2372 2480 unsigned char op; 2373 2481 regm_t retregs; 2374 2482 int e1comsub; 2375 2483 2376 2484 e1comsub = e->E1->Ecount; 2377 2485 if ((*pretregs & (ALLREGS | mBP)) == 0) // if don't need result in regs 2378 2486 c = codelem(e->E1,pretregs,FALSE); /* then conversion isn't necessary */ 2379 2487 2380 2488 else if ((op = e->Eoper) == OPptrlptr || 2381 ( !I32&& op == OPu16_32) ||2382 ( I32 && op == OPulngllng)2489 (I16 && op == OPu16_32) || 2490 (I32 && op == OPu32_64) 2383 2491 ) 2384 2492 { 2385 2493 regm_t regm; 2386 2494 tym_t tym1; 2387 2495 2388 2496 retregs = *pretregs & mLSW; 2389 2497 assert(retregs); 2390 2498 tym1 = tybasic(e->E1->Ety); 2391 2499 c = codelem(e->E1,&retregs,FALSE); 2392 2500 2393 2501 regm = *pretregs & (mMSW & ALLREGS); 2394 2502 if (regm == 0) /* *pretregs could be mES */ 2395 2503 regm = mMSW & ALLREGS; 2396 2504 ce = allocreg(®m,®,TYint); 2397 2505 if (e1comsub) 2398 2506 ce = cat(ce,getregs(retregs)); 2399 2507 if (op == OPptrlptr) 2400 2508 { int segreg; 2401 2509 2402 2510 /* BUG: what about pointers to functions? */ 2403 2511 switch (tym1) 2404 2512 { 2405 2513 case TYnptr: segreg = SEG_DS; break; 2406 2514 case TYcptr: segreg = SEG_CS; break; 2407 2515 case TYsptr: segreg = SEG_SS; break; 2408 2516 default: assert(0); 2409 2517 } 2410 2518 ce = gen2(ce,0x8C,modregrm(3,segreg,reg)); /* MOV reg,segreg */ 2411 2519 } 2412 2520 else 2413 2521 ce = movregconst(ce,reg,0,0); /* 0 extend */ 2414 2522 2415 2523 c = cat3(c,ce,fixresult(e,retregs | regm,pretregs)); 2416 2524 } 2417 else if ( I32 && (op == OPshtlng|| op == OPu16_32))2525 else if (!I16 && (op == OPs16_32 || op == OPu16_32)) 2418 2526 { 2419 /* OPshtlng || OPu16_32 */2420 elem *e1;2421 2527 elem *e11; 2422 2528 2423 e 1 = e->E1;2529 elem *e1 = e->E1; 2424 2530 2425 2531 if (e1->Eoper == OPu8_16 && !e1->Ecount && 2426 2532 ((e11 = e1->E1)->Eoper == OPvar || (e11->Eoper == OPind && !e11->Ecount)) 2427 2533 ) 2428 2534 { code cs; 2429 2535 2430 2536 retregs = *pretregs & BYTEREGS; 2431 2537 if (!retregs) 2432 2538 retregs = BYTEREGS; 2433 2539 c1 = allocreg(&retregs,®,TYint); 2434 2540 c2 = movregconst(NULL,reg,0,0); // XOR reg,reg 2435 2541 c3 = loadea(e11,&cs,0x8A,reg,0,retregs,retregs); // MOV regL,EA 2436 2542 freenode(e11); 2437 2543 freenode(e1); 2438 2544 } 2439 2545 else if (e1->Eoper == OPvar || 2440 2546 (e1->Eoper == OPind && !e1->Ecount)) 2441 2547 { code cs; 2442 2548 unsigned opcode; 2443 2549 … … 2453 2559 else 2454 2560 { 2455 2561 L2: 2456 2562 retregs = *pretregs; 2457 2563 *pretregs &= ~mPSW; /* flags are already set */ 2458 2564 c1 = codelem(e1,&retregs,FALSE); 2459 2565 c2 = getregs(retregs); 2460 2566 if (op == OPu16_32 && c1) 2461 2567 { code *cx; 2462 2568 2463 2569 cx = code_last(c1); 2464 2570 if (cx->Iop == 0x81 && (cx->Irm & modregrm(3,7,0)) == modregrm(3,4,0)) 2465 2571 { 2466 2572 // Convert AND of a word to AND of a dword, zeroing upper word 2467 2573 retregs = mask[cx->Irm & 7]; 2468 2574 cx->Iflags &= ~CFopsize; 2469 2575 cx->IEV2.Vint &= 0xFFFF; 2470 2576 goto L1; 2471 2577 } 2472 2578 } 2473 if (op == OPs htlng&& retregs == mAX)2579 if (op == OPs16_32 && retregs == mAX) 2474 2580 c2 = gen1(c2,0x98); /* CWDE */ 2475 2581 else 2476 2582 { 2477 2583 reg = findreg(retregs); 2478 2584 if (config.flags4 & CFG4speed && op == OPu16_32) 2479 2585 { // AND reg,0xFFFF 2480 2586 c3 = genc2(NULL,0x81,modregrm(3,4,reg),0xFFFFu); 2481 2587 } 2482 2588 else 2483 2589 { 2484 2590 c3 = genregs(CNIL,0x0F,reg,reg); 2485 2591 c3->Iop2 = (op == OPu16_32) ? 0xB7 : 0xBF; /* MOVZX/MOVSX reg,reg */ 2486 2592 } 2487 2593 c2 = cat(c2,c3); 2488 2594 } 2489 2595 L1: 2490 2596 c3 = e1comsub ? getregs(retregs) : CNIL; 2491 2597 } 2492 2598 c4 = fixresult(e,retregs,pretregs); 2493 2599 c = cat4(c1,c2,c3,c4); 2494 2600 } 2495 2601 else if (*pretregs & mPSW || config.target_cpu < TARGET_80286) 2496 2602 { 2497 // OPs htlng, OPlngllng2603 // OPs16_32, OPs32_64 2498 2604 // CWD doesn't affect flags, so we can depend on the integer 2499 2605 // math to provide the flags. 2500 2606 retregs = mAX | mPSW; // want integer result in AX 2501 2607 *pretregs &= ~mPSW; // flags are already set 2502 2608 c1 = codelem(e->E1,&retregs,FALSE); 2503 2609 c2 = getregs(mDX); // sign extend into DX 2504 2610 c2 = gen1(c2,0x99); // CWD/CDQ 2505 2611 c3 = e1comsub ? getregs(retregs) : CNIL; 2506 2612 c4 = fixresult(e,mDX | retregs,pretregs); 2507 2613 c = cat4(c1,c2,c3,c4); 2508 2614 } 2509 2615 else 2510 2616 { 2511 // OPs htlng, OPlngllng2617 // OPs16_32, OPs32_64 2512 2618 unsigned msreg,lsreg; 2513 2619 2514 2620 retregs = *pretregs & mLSW; 2515 2621 assert(retregs); 2516 2622 c1 = codelem(e->E1,&retregs,FALSE); 2517 2623 retregs |= *pretregs & mMSW; 2518 2624 c2 = allocreg(&retregs,®,e->Ety); 2519 2625 msreg = findregmsw(retregs); 2520 2626 lsreg = findreglsw(retregs); 2521 2627 c3 = genmovreg(NULL,msreg,lsreg); // MOV msreg,lsreg 2522 2628 assert(config.target_cpu >= TARGET_80286); // 8088 can't handle SAR reg,imm8 2523 2629 c3 = genc2(c3,0xC1,modregrm(3,7,msreg),REGSIZE * 8 - 1); // SAR msreg,31 2524 2630 c4 = fixresult(e,retregs,pretregs); 2525 2631 c = cat4(c1,c2,c3,c4); 2526 2632 } 2527 2633 return c; 2528 2634 } 2529 2635 2530 2636 2531 2637 /*************************** … … 2533 2639 * For OPu8int and OPs8int. 2534 2640 */ 2535 2641 2536 2642 code *cdbyteint(elem *e,regm_t *pretregs) 2537 2643 { code *c,*ce,*c0,*c1,*c2,*c3,*c4; 2538 2644 regm_t retregs; 2539 2645 unsigned reg; 2540 2646 char op; 2541 2647 char size; 2542 2648 elem *e1; 2543 2649 2544 2650 2545 2651 if ((*pretregs & (ALLREGS | mBP)) == 0) // if don't need result in regs 2546 2652 return codelem(e->E1,pretregs,FALSE); /* then conversion isn't necessary */ 2547 2653 2548 2654 op = e->Eoper; 2549 2655 e1 = e->E1; 2550 2656 c0 = NULL; 2551 2657 if (e1->Eoper == OPcomma) 2552 2658 c0 = docommas(&e1); 2553 if ( I32)2659 if (!I16) 2554 2660 { 2555 2661 if (e1->Eoper == OPvar || (e1->Eoper == OPind && !e1->Ecount)) 2556 2662 { code cs; 2557 2663 unsigned opcode; 2558 2664 2559 2665 retregs = *pretregs; 2560 2666 c1 = allocreg(&retregs,®,TYint); 2561 2667 if (config.flags4 & CFG4speed && 2562 2668 op == OPu8int && mask[reg] & BYTEREGS && 2563 2669 config.target_cpu < TARGET_PentiumPro) 2564 2670 { 2565 2671 c2 = movregconst(NULL,reg,0,0); // XOR reg,reg 2566 2672 c3 = loadea(e1,&cs,0x8A,reg,0,retregs,retregs); // MOV regL,EA 2567 2673 } 2568 2674 else 2569 2675 { 2570 2676 opcode = (op == OPu8int) ? 0x0FB6 : 0x0FBE; // MOVZX/MOVSX reg,EA 2571 2677 c2 = loadea(e1,&cs,opcode,reg,0,0,retregs); 2572 2678 c3 = CNIL; 2573 2679 } … … 2590 2696 retregs = BYTEREGS; 2591 2697 } 2592 2698 else 2593 2699 { 2594 2700 /* CBW doesn't affect flags, so we can depend on the integer */ 2595 2701 /* math to provide the flags. */ 2596 2702 retregs = mAX | (*pretregs & mPSW); /* want integer result in AX */ 2597 2703 } 2598 2704 } 2599 2705 2600 2706 c3 = CNIL; 2601 2707 c1 = codelem(e1,&retregs,FALSE); 2602 2708 reg = findreg(retregs); 2603 2709 if (!c1) 2604 2710 goto L1; 2605 2711 2606 2712 for (c = c1; c->next; c = c->next) 2607 2713 ; /* find previous instruction */ 2608 2714 2609 2715 /* If previous instruction is an AND bytereg,value */ 2610 if (c->Iop == 0x80 && c->Irm == modregrm(3,4,reg ) &&2716 if (c->Iop == 0x80 && c->Irm == modregrm(3,4,reg & 7) && 2611 2717 (op == OPu8int || (c->IEV2.Vuns & 0x80) == 0)) 2612 2718 { 2613 2719 if (*pretregs & mPSW) 2614 2720 c->Iflags |= CFpsw; 2615 2721 c->Iop |= 1; /* convert to word operation */ 2616 2722 c->IEV2.Vuns &= 0xFF; /* dump any high order bits */ 2617 2723 *pretregs &= ~mPSW; /* flags already set */ 2618 2724 } 2619 2725 else 2620 2726 { 2621 2727 L1: 2622 if ( I32)2728 if (!I16) 2623 2729 { 2624 2730 if (op == OPs8int && reg == AX && size == 2) 2625 2731 { c3 = gen1(c3,0x98); /* CBW */ 2626 2732 c3->Iflags |= CFopsize; /* don't do a CWDE */ 2627 2733 } 2628 2734 else 2629 2735 { 2630 2736 /* We could do better by not forcing the src and dst */ 2631 2737 /* registers to be the same. */ 2632 2738 2633 2739 if (config.flags4 & CFG4speed && op == OPu8_16) 2634 2740 { // AND reg,0xFF 2635 2741 c3 = genc2(c3,0x81,modregrm(3,4,reg),0xFF); 2636 2742 } 2637 2743 else 2638 2744 { c3 = genregs(c3,0x0F,reg,reg); 2639 2745 c3->Iop2 = (op == OPu8int) ? 0xB6 : 0xBE; /* MOVZX/MOVSX reg,reg */ 2640 2746 } 2641 2747 } 2642 2748 } 2643 2749 else 2644 2750 { 2645 2751 if (op == OPu8int) 2646 2752 c3 = genregs(c3,0x30,reg+4,reg+4); // XOR regH,regH 2647 2753 else 2648 2754 { 2649 2755 c3 = gen1(c3,0x98); /* CBW */ 2650 2756 *pretregs &= ~mPSW; /* flags already set */ 2651 2757 } 2652 2758 } 2653 2759 } 2654 2760 c2 = getregs(retregs); 2655 2761 L2: 2656 2762 c4 = fixresult(e,retregs,pretregs); 2657 2763 return cat6(c0,c1,c2,c3,c4,NULL); 2658 2764 } 2659 2765 2660 2766 2661 2767 /*************************** 2662 * Convert long to short (OP lngsht).2768 * Convert long to short (OP32_16). 2663 2769 * Get offset of far pointer (OPoffset). 2664 * Convert int to byte (OP int8).2770 * Convert int to byte (OP16_8). 2665 2771 * Convert long long to long (OP64_32). 2666 2772 */ 2667 2773 2668 2774 code *cdlngsht(elem *e,regm_t *pretregs) 2669 2775 { regm_t retregs; 2670 2776 code *c; 2671 2777 2672 2778 #ifdef DEBUG 2673 2779 switch (e->Eoper) 2674 2780 { 2675 case OP lngsht:2781 case OP32_16: 2676 2782 case OPoffset: 2677 case OP int8:2783 case OP16_8: 2678 2784 case OP64_32: 2679 2785 break; 2680 2786 2681 2787 default: 2682 2788 assert(0); 2683 2789 } 2684 2790 #endif 2685 2791 2686 if (e->Eoper == OP int8)2792 if (e->Eoper == OP16_8) 2687 2793 { retregs = *pretregs ? BYTEREGS : 0; 2688 2794 c = codelem(e->E1,&retregs,FALSE); 2689 2795 } 2690 2796 else 2691 2797 { if (e->E1->Eoper == OPrelconst) 2692 2798 c = offsetinreg(e->E1,&retregs); 2693 2799 else 2694 2800 { retregs = *pretregs ? ALLREGS : 0; 2695 2801 c = codelem(e->E1,&retregs,FALSE); 2696 if ( !I32 || e->Eoper == OPoffset || e->Eoper == OP64_32)2802 if (I16 || I32 && (e->Eoper == OPoffset || e->Eoper == OP64_32)) 2697 2803 retregs &= mLSW; /* want LSW only */ 2698 2804 } 2699 2805 } 2700 2806 2701 2807 /* We "destroy" a reg by assigning it the result of a new e, even */ 2702 2808 /* though the values are the same. Weakness of our CSE strategy that */ 2703 2809 /* a register can only hold the contents of one elem at a time. */ 2704 2810 if (e->Ecount) 2705 2811 c = cat(c,getregs(retregs)); 2706 2812 else 2707 2813 useregs(retregs); 2708 2814 2709 2815 #ifdef DEBUG 2710 2816 if (!(!*pretregs || retregs)) 2711 2817 WROP(e->Eoper), 2712 2818 printf(" *pretregs = x%x, retregs = x%x, e = %p\n",*pretregs,retregs,e); 2713 2819 #endif 2714 2820 assert(!*pretregs || retregs); 2715 2821 return cat(c,fixresult(e,retregs,pretregs)); /* lsw only */ 2716 2822 } … … 2799 2905 c3->Iflags |= CFopsize; 2800 2906 retregs = mAX; 2801 2907 return cat4(c1,c2,c3,fixresult(e,retregs,pretregs)); 2802 2908 } 2803 2909 2804 2910 /************************ 2805 2911 * Generate code for an asm elem. 2806 2912 */ 2807 2913 2808 2914 code *cdasm(elem *e,regm_t *pretregs) 2809 2915 { code *c; 2810 2916 2811 2917 #if 1 2812 2918 /* Assume only regs normally destroyed by a function are destroyed */ 2813 2919 c = getregs((ALLREGS | mES) & ~fregsaved); 2814 2920 #else 2815 2921 /* Assume all regs are destroyed */ 2816 2922 c = getregs(ALLREGS | mES); 2817 2923 #endif 2818 2924 c = genasm(c,e->EV.ss.Vstring,e->EV.ss.Vstrlen); 2819 return cat(c,fixresult(e,(I 32 ? mAX : mDX |mAX),pretregs));2925 return cat(c,fixresult(e,(I16 ? mDX | mAX : mAX),pretregs)); 2820 2926 } 2821 2927 2822 2928 /************************ 2823 2929 * Generate code for OPtofar16 and OPfromfar16. 2824 2930 */ 2825 2931 2826 2932 code *cdfar16( elem *e, regm_t *pretregs) 2827 2933 { code *c; 2828 2934 code *c1; 2829 2935 code *c3; 2830 2936 code *cnop; 2831 2937 code cs; 2832 2938 unsigned reg; 2833 2939 2834 2940 assert(I32); 2835 2941 c = codelem(e->E1,pretregs,FALSE); 2836 2942 reg = findreg(*pretregs); 2837 2943 c = cat(c,getregs(*pretregs)); /* we will destroy the regs */ 2838 2944 2839 2945 cs.Iop = 0xC1; … … 2921 3027 2922 3028 switch (e->Eoper) 2923 3029 { 2924 3030 case OPbt: op = 0xA3; mode = 4; break; 2925 3031 case OPbtc: op = 0xBB; mode = 7; break; 2926 3032 case OPbtr: op = 0xB3; mode = 6; break; 2927 3033 case OPbts: op = 0xAB; mode = 5; break; 2928 3034 2929 3035 default: 2930 3036 assert(0); 2931 3037 } 2932 3038 2933 3039 e1 = e->E1; 2934 3040 e2 = e->E2; 2935 3041 cs.Iflags = 0; 2936 3042 c = getlvalue(&cs, e, RMload); // get addressing mode 2937 3043 if (e->Eoper == OPbt && *pretregs == 0) 2938 3044 return cat(c, codelem(e2,pretregs,FALSE)); 2939 3045 2940 3046 ty1 = tybasic(e1->Ety); 2941 word = ( I32&& tysize[ty1] == SHORTSIZE) ? CFopsize : 0;2942 idxregs = idxregm( cs.Irm, cs.Isib); // mask if index regs used3047 word = (!I16 && tysize[ty1] == SHORTSIZE) ? CFopsize : 0; 3048 idxregs = idxregm(&cs); // mask if index regs used 2943 3049 2944 3050 // if (e2->Eoper == OPconst && e2->EV.Vuns < 0x100) // should do this instead? 2945 3051 if (e2->Eoper == OPconst) 2946 3052 { 2947 3053 cs.Iop = 0x0F; 2948 3054 cs.Iop2 = 0xBA; // BT rm,imm8 2949 3055 cs.Irm |= modregrm(0,mode,0); 2950 3056 cs.Iflags |= CFpsw | word; 2951 3057 cs.IFL2 = FLconst; 2952 3058 if (tysize[ty1] == SHORTSIZE) 2953 3059 { 2954 3060 cs.IEVoffset1 += (e2->EV.Vuns & ~15) >> 3; 2955 3061 cs.IEV2.Vint = e2->EV.Vint & 15; 2956 3062 } 2957 else 3063 else if (tysize[ty1] == 4) 2958 3064 { 2959 3065 cs.IEVoffset1 += (e2->EV.Vuns & ~31) >> 3; 2960 3066 cs.IEV2.Vint = e2->EV.Vint & 31; 3067 } 3068 else 3069 { 3070 cs.IEVoffset1 += (e2->EV.Vuns & ~63) >> 3; 3071 cs.IEV2.Vint = e2->EV.Vint & 63; 2961 3072 } 2962 3073 c2 = gen(CNIL,&cs); 2963 3074 } 2964 3075 else 2965 3076 { 2966 3077 retregs = ALLREGS & ~idxregs; 2967 3078 c2 = scodelem(e2,&retregs,idxregs,TRUE); 2968 3079 reg = findreg(retregs); 2969 3080 2970 3081 cs.Iop = 0x0F; 2971 3082 cs.Iop2 = op; // BT rm,reg 2972 c s.Irm |= modregrm(0,reg,0);3083 code_newreg(&cs,reg); 2973 3084 cs.Iflags |= CFpsw | word; 2974 3085 c2 = gen(c2,&cs); 2975 3086 } 2976 3087 2977 3088 if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register 2978 { code *nop; 2979 regm_t save; 2980 code *cg; 2981 2982 nop = CNIL; 2983 save = regcon.immed.mval; 2984 cg = allocreg(&retregs,®,TYint); 3089 { 3090 code *nop = CNIL; 3091 regm_t save = regcon.immed.mval; 3092 code *cg = allocreg(&retregs,®,TYint); 2985 3093 regcon.immed.mval = save; 2986 3094 if ((*pretregs & mPSW) == 0) 2987 3095 { 2988 3096 cg = cat(cg,getregs(retregs)); 2989 3097 cg = genregs(cg,0x19,reg,reg); // SBB reg,reg 2990 3098 } 2991 3099 else 2992 3100 { 2993 3101 cg = movregconst(cg,reg,1,8); // MOV reg,1 2994 3102 nop = gennop(nop); 2995 3103 cg = genjmp(cg,JC,FLcode,(block *) nop); // Jtrue nop 2996 3104 // MOV reg,0 2997 3105 movregconst(cg,reg,0,8); 2998 3106 regcon.immed.mval &= ~mask[reg]; 2999 3107 } 3000 3108 *pretregs = retregs; 3001 3109 c2 = cat3(c2,cg,nop); 3002 3110 } 3003 3111 3004 3112 return cat(c,c2); … … 3017 3125 code *cl,*cg; 3018 3126 code cs; 3019 3127 3020 3128 //printf("cdbscan()\n"); 3021 3129 //elem_print(e); 3022 3130 if (*pretregs == 0) 3023 3131 return codelem(e->E1,pretregs,FALSE); 3024 3132 tyml = tybasic(e->E1->Ety); 3025 3133 sz = tysize[tyml]; 3026 3134 assert(sz == 2 || sz == 4); 3027 3135 3028 3136 if ((e->E1->Eoper == OPind && !e->E1->Ecount) || e->E1->Eoper == OPvar) 3029 3137 { 3030 3138 cl = getlvalue(&cs, e->E1, RMload); // get addressing mode 3031 3139 } 3032 3140 else 3033 3141 { 3034 3142 retregs = allregs; 3035 3143 cl = codelem(e->E1, &retregs, FALSE); 3036 3144 reg = findreg(retregs); 3037 cs.Irm = modregrm(3,0,reg );3145 cs.Irm = modregrm(3,0,reg & 7); 3038 3146 cs.Iflags = 0; 3039 3147 cs.Irex = 0; 3148 if (reg & 8) 3149 cs.Irex |= REX_B; 3040 3150 } 3041 3151 3042 3152 retregs = *pretregs & allregs; 3043 3153 if (!retregs) 3044 3154 retregs = allregs; 3045 3155 cg = allocreg(&retregs, ®, e->Ety); 3046 3156 3047 3157 cs.Iop = 0x0F; 3048 3158 cs.Iop2 = (e->Eoper == OPbsf) ? 0xBC : 0xBD; // BSF/BSR reg,EA 3049 c s.Irm |= modregrm(0,reg,0);3050 if ( I32&& sz == SHORTSIZE)3159 code_newreg(&cs, reg); 3160 if (!I16 && sz == SHORTSIZE) 3051 3161 cs.Iflags |= CFopsize; 3052 3162 cg = gen(cg,&cs); 3053 3163 3054 3164 return cat3(cl,cg,fixresult(e,retregs,pretregs)); 3055 3165 } 3056 3166 3057 3167 /******************************************* 3058 3168 * Generate code for OPpair, OPrpair. 3059 3169 */ 3060 3170 3061 3171 code *cdpair(elem *e, regm_t *pretregs) 3062 3172 { 3063 3173 regm_t retregs; 3064 3174 regm_t regs1; 3065 3175 regm_t regs2; 3066 3176 unsigned reg; 3067 3177 code *cg; 3068 3178 code *c1; 3069 3179 code *c2; 3070 3180 branches/dmd-1.x/src/backend/code.c
r428 r577 12 12 13 13 #if !SPP 14 14 15 15 #include <stdio.h> 16 16 #include <time.h> 17 17 #include "cc.h" 18 18 #include "el.h" 19 19 #include "code.h" 20 20 #include "global.h" 21 21 22 22 static code *code_list; 23 23 24 24 /***************** 25 25 * Allocate code 26 26 */ 27 27 28 28 #if SCPP && __SC__ && __INTSIZE == 4 && TX86 && !_DEBUG_TRACE && !MEM_DEBUG 29 29 30 30 __declspec(naked) code *code_calloc() 31 31 { 32 if (sizeof(code) != 0x2 0)32 if (sizeof(code) != 0x24) 33 33 util_assert("code",__LINE__); 34 34 __asm 35 35 { 36 36 mov EAX,code_list 37 37 test EAX,EAX 38 38 je L20 39 39 mov ECX,[EAX] 40 40 mov code_list,ECX 41 41 jmp L29 42 42 43 43 L20: push sizeof(code) 44 44 call mem_fmalloc 45 45 ;add ESP,4 46 46 L29: 47 47 xor ECX,ECX 48 48 mov DWORD PTR [EAX],0 49 49 50 50 mov 4[EAX],ECX ;these pair 51 51 mov 8[EAX],ECX 52 52 53 53 mov 12[EAX],ECX 54 54 mov 16[EAX],ECX 55 55 56 56 mov 20[EAX],ECX 57 57 mov 24[EAX],ECX 58 58 59 59 mov 28[EAX],ECX 60 mov 32[EAX],ECX 60 61 ret 61 62 } 62 63 } 63 64 64 65 #else 65 66 66 67 code *code_calloc() 67 68 { code *c; 68 69 static code czero; 69 70 71 //printf("code %x\n", sizeof(code)); 70 72 c = code_list; 71 73 if (c) 72 74 code_list = code_next(c); 73 75 else 74 76 c = (code *)mem_fmalloc(sizeof(*c)); 75 77 *c = czero; // zero it out 76 78 //dbg_printf("code_calloc: %p\n",c); 77 79 return c; 78 80 } 79 81 80 82 #endif 81 83 82 84 /***************** 83 85 * Free code 84 86 */ 85 87 86 88 void code_free(code *cstart) 87 89 { code **pc; 88 90 code *c; 89 91 branches/dmd-1.x/src/backend/code.h
r572 r577 89 89 90 90 #define mXMM0 (1 << XMM0) 91 91 #define mXMM1 (1 << XMM1) 92 92 #define mXMM2 (1 << XMM2) 93 93 #define mXMM3 (1 << XMM3) 94 94 #define mXMM4 (1 << XMM4) 95 95 #define mXMM5 (1 << XMM5) 96 96 #define mXMM6 (1 << XMM6) 97 97 #define mXMM7 (1 << XMM7) 98 98 #define XMMREGS (mXMM0 |mXMM1 |mXMM2 |mXMM3 |mXMM4 |mXMM5 |mXMM6 |mXMM7) 99 99 100 100 #define mES (1 << ES) // 0x10000 101 101 #define mPSW (1 << PSW) // 0x20000 102 102 103 103 #define mSTACK (1 << STACK) // 0x40000 104 104 105 105 #define mST0 (1 << ST0) // 0x200000 106 106 #define mST01 (1 << ST01) // 0x400000 107 107 108 108 // Flags for getlvalue (must fit in regm_t) 109 #define RMload 0x4000110 #define RMstore 0x8000109 #define RMload (1 << 30) 110 #define RMstore (1 << 31) 111 111 112 112 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 113 113 // To support positional independent code, 114 114 // must be able to remove BX from available registers 115 115 extern regm_t ALLREGS; 116 116 #define ALLREGS_INIT (mAX|mBX|mCX|mDX|mSI|mDI) 117 117 #define ALLREGS_INIT_PIC (mAX|mCX|mDX|mSI|mDI) 118 118 extern regm_t BYTEREGS; 119 119 #define BYTEREGS_INIT (mAX|mBX|mCX|mDX) 120 120 #define BYTEREGS_INIT_PIC (mAX|mCX|mDX) 121 121 #else 122 122 #define ALLREGS (mAX|mBX|mCX|mDX|mSI|mDI) 123 123 #define ALLREGS_INIT ALLREGS 124 124 #undef BYTEREGS 125 125 #define BYTEREGS (mAX|mBX|mCX|mDX) 126 126 #endif 127 127 128 128 /* We use the same IDXREGS for the 386 as the 8088, because if 129 129 we used ALLREGS, it would interfere with mMSW 130 130 */ … … 241 241 #define ESCrelease2 9 // release eh stack 242 242 #define ESCframeptr 10 // replace with load of frame pointer 243 243 244 244 #define ASM 0x36 // string of asm bytes, actually an SS: opcode 245 245 246 246 /********************************* 247 247 * Macros to ease generating code 248 248 * modregrm: generate mod reg r/m field 249 249 * modregxrm: reg could be R8..R15 250 250 * modregrmx: rm could be R8..R15 251 251 * modregxrmx: reg or rm could be R8..R15 252 252 * NEWREG: change reg field of x to r 253 253 * genorreg: OR t,f 254 254 */ 255 255 256 256 #define modregrm(m,r,rm) (((m)<<6)|((r)<<3)|(rm)) 257 257 #define modregxrm(m,r,rm) ((((r)&8)<<15)|modregrm((m),(r)&7,rm)) 258 258 #define modregrmx(m,r,rm) ((((rm)&8)<<13)|modregrm((m),r,(rm)&7)) 259 259 #define modregxrmx(m,r,rm) ((((r)&8)<<15)|(((rm)&8)<<13)|modregrm((m),(r)&7,(rm)&7)) 260 260 261 #define NEWREXR(x,r) ((x)=((x)&~REX_R)|(((r)&8)>>1)) 261 262 #define NEWREG(x,r) ((x)=((x)&~(7<<3))|((r)<<3)) 263 #define code_newreg(c,r) (NEWREG((c)->Irm,(r)&7),NEWREXR((c)->Irex,(r))) 264 262 265 #define genorreg(c,t,f) genregs((c),0x09,(f),(t)) 263 266 264 267 #define REX 0x40 // REX prefix byte, OR'd with the following bits: 265 268 #define REX_W 8 // 0 = default operand size, 1 = 64 bit operand size 266 269 #define REX_R 4 // high bit of reg field of modregrm 267 270 #define REX_X 2 // high bit of sib index reg 268 271 #define REX_B 1 // high bit of rm field, sib base reg, or opcode reg 269 272 270 273 /********************** 271 274 * C library routines. 272 275 * See callclib(). 273 276 */ 274 277 275 278 enum CLIB 276 279 { 277 280 CLIBlcmp, 278 281 CLIBlmul, 279 282 CLIBldiv, 280 283 CLIBlmod, 281 284 CLIBuldiv, … … 374 377 // CFes at the same time, though!) 375 378 #define CFpsw 0x40 // we need the flags result after this instruction 376 379 #define CFopsize 0x80 // prefix with operand size 377 380 #define CFaddrsize 0x100 // prefix with address size 378 381 #define CFds 0x200 // need DS override (not with es, ss, or cs ) 379 382 #define CFcs 0x400 // need CS override 380 383 #define CFfs 0x800 // need FS override 381 384 #define CFgs (CFcs | CFfs) // need GS override 382 385 #define CFwait 0x1000 // If I32 it indicates when to output a WAIT 383 386 #define CFselfrel 0x2000 // if self-relative 384 387 #define CFunambig 0x4000 // indicates cannot be accessed by other addressing 385 388 // modes 386 389 #define CFtarg2 0x8000 // like CFtarg, but we can't optimize this away 387 390 #define CFvolatile 0x10000 // volatile reference, do not schedule 388 391 #define CFclassinit 0x20000 // class init code 389 392 #define CFoffset64 0x40000 // offset is 64 bits 390 393 391 394 #define CFPREFIX (CFSEG | CFopsize | CFaddrsize) 392 395 #define CFSEG (CFes | CFss | CFds | CFcs | CFfs | CFgs) 393 396 394 unsigned char Irex; // REX prefix395 397 396 398 unsigned char Iop; 397 unsigned char Irm; // reg/mode398 399 399 unsigned char Iop2; // second opcode byte 400 unsigned char Isib; // SIB byte401 402 400 unsigned char Iop3; // third opcode byte 401 402 union 403 { unsigned _Iea; 404 struct 405 { 406 unsigned char _Irm; // reg/mode 407 unsigned char _Isib; // SIB byte 408 unsigned char _Irex; // REX prefix 409 } _ea; 410 } _EA; 411 412 #define Iea _EA._Iea 413 #define Irm _EA._ea._Irm 414 #define Isib _EA._ea._Isib 415 #define Irex _EA._ea._Irex 403 416 404 417 unsigned char IFL1,IFL2; // FLavors of 1st, 2nd operands 405 418 union evc IEV1; // 1st operand, if any 406 419 #define IEVpointer1 IEV1._EP.Vpointer 407 420 #define IEVseg1 IEV1._EP.Vseg 408 421 #define IEVsym1 IEV1.sp.Vsym 409 422 #define IEVdsym1 IEV1.dsp.Vsym 410 423 #define IEVoffset1 IEV1.sp.Voffset 411 424 #define IEVlsym1 IEV1.lab.Vsym 412 425 #define IEVint1 IEV1.Vint 413 426 union evc IEV2; // 2nd operand, if any 414 427 #define IEVpointer2 IEV2._EP.Vpointer 415 428 #define IEVseg2 IEV2._EP.Vseg 416 429 #define IEVsym2 IEV2.sp.Vsym 417 430 #define IEVdsym2 IEV2.dsp.Vsym 418 431 #define IEVoffset2 IEV2.sp.Voffset 419 432 #define IEVlsym2 IEV2.lab.Vsym 420 433 #define IEVint2 IEV2.Vint 421 434 void print(); // pretty-printer 422 435 … … 528 541 /***********************************************************/ 529 542 530 543 extern regm_t msavereg,mfuncreg,allregs; 531 544 532 545 /*long cxmalloc,cxcalloc,cx1;*/ 533 546 534 547 typedef code *cd_t (elem *e , regm_t *pretregs ); 535 548 536 549 extern int BPRM; 537 550 extern regm_t FLOATREGS; 538 551 extern regm_t FLOATREGS2; 539 552 extern regm_t DOUBLEREGS; 540 553 extern const char datafl[],stackfl[],segfl[],flinsymtab[]; 541 554 extern char needframe,usedalloca,gotref; 542 555 extern targ_size_t localsize,Toff,Poff,Aoff, 543 556 Poffset,funcoffset, 544 557 framehandleroffset, 545 558 Aoffset,Toffset,EEoffset; 546 559 extern int Aalign; 547 560 extern int cseg; 561 extern int STACKALIGN; 548 562 #if TARGET_OSX 549 563 extern targ_size_t localgotoffset; 550 564 #endif 551 565 552 566 /* cgcod.c */ 553 567 extern int pass; 554 568 #define PASSinit 0 // initial pass through code generator 555 569 #define PASSreg 1 // register assignment pass 556 570 #define PASSfinal 2 // final pass 557 571 558 572 extern int dfoidx; 559 573 extern struct CSE *csextab; 560 574 extern unsigned cstop; 561 575 #if TX86 562 576 extern bool floatreg; 563 577 #endif 564 578 extern targ_size_t retoffset; 565 579 extern unsigned stackpush; 566 580 extern int stackchanged; 567 581 extern int refparam; … … 581 595 #endif 582 596 #define findregmsw(regm) findreg((regm) & mMSW) 583 597 #define findreglsw(regm) findreg((regm) & (mLSW | mBP)) 584 598 void freenode (elem *e ); 585 599 int isregvar (elem *e , regm_t *pregm , unsigned *preg ); 586 600 #ifdef DEBUG 587 601 code *allocreg (regm_t *pretregs , unsigned *preg , tym_t tym , int line , const char *file ); 588 602 #define allocreg(a,b,c) allocreg((a),(b),(c),__LINE__,__FILE__) 589 603 #else 590 604 code *allocreg (regm_t *pretregs , unsigned *preg , tym_t tym ); 591 605 #endif 592 606 void useregs (regm_t regm ); 593 607 code *getregs (regm_t r ); 594 608 code *getregs_imm (regm_t r ); 595 609 code *cse_flush(int); 596 610 void cssave (elem *e , regm_t regm , unsigned opsflag ); 597 611 bool evalinregister (elem *e ); 598 612 regm_t getscratch(); 599 613 code *codelem (elem *e , regm_t *pretregs , bool constflag ); 600 614 const char *regm_str(regm_t rm); 615 int numbitsset(regm_t); 601 616 602 617 /* cod1.c */ 603 618 extern int clib_inited; 604 619 605 620 int isscaledindex(elem *); 606 621 int ssindex(int op,targ_uns product); 607 622 void buildEA(code *c,int base,int index,int scale,targ_size_t disp); 608 623 void andregcon (con_t *pregconsave); 609 624 code *docommas (elem **pe ); 610 625 code *gencodelem(code *c,elem *e,regm_t *pretregs,bool constflag); 611 626 void gensaverestore(regm_t, code **, code **); 612 627 code *genstackclean(code *c,unsigned numpara,regm_t keepmsk); 613 628 code *logexp (elem *e , int jcond , unsigned fltarg , code *targ ); 614 code *loadea (elem *e , code __ss*cs , unsigned op , unsigned reg , targ_size_t offset , regm_t keepmsk , regm_t desmsk );629 code *loadea (elem *e , code *cs , unsigned op , unsigned reg , targ_size_t offset , regm_t keepmsk , regm_t desmsk ); 615 630 unsigned getaddrmode (regm_t idxregs ); 631 void setaddrmode(code *c, regm_t idxregs); 616 632 void getlvalue_msw(code *); 617 633 void getlvalue_lsw(code *); 618 code *getlvalue (code __ss*pcs , elem *e , regm_t keepmsk );634 code *getlvalue (code *pcs , elem *e , regm_t keepmsk ); 619 635 code *scodelem (elem *e , regm_t *pretregs , regm_t keepmsk , bool constflag ); 620 code *fltregs (code __ss*pcs , tym_t tym );636 code *fltregs (code *pcs , tym_t tym ); 621 637 code *tstresult (regm_t regm , tym_t tym , unsigned saveflag ); 622 638 code *fixresult (elem *e , regm_t retregs , regm_t *pretregs ); 623 639 code *callclib (elem *e , unsigned clib , regm_t *pretregs , regm_t keepmask ); 624 640 cd_t cdfunc; 625 641 cd_t cdstrthis; 626 642 code *params(elem *, unsigned); 627 643 code *offsetinreg (elem *e , regm_t *pretregs ); 628 644 code *loaddata (elem *e , regm_t *pretregs ); 629 645 630 646 /* cod2.c */ 631 regm_t idxregm (unsigned rm,unsigned sib);647 regm_t idxregm(code *c); 632 648 #if TARGET_WINDOS 633 649 code *opdouble (elem *e , regm_t *pretregs , unsigned clib ); 634 650 #endif 635 651 cd_t cdorth; 636 652 cd_t cdmul; 637 653 cd_t cdnot; 638 654 cd_t cdcom; 639 655 cd_t cdbswap; 640 656 cd_t cdcond; 641 657 void WRcodlst (code *c ); 642 658 cd_t cdcomma; 643 659 cd_t cdloglog; 644 660 cd_t cdshift; 645 661 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 646 662 cd_t cdindpic; 647 663 #endif 648 664 cd_t cdind; 649 665 cd_t cdstrlen; 650 666 cd_t cdstrcmp; 651 667 cd_t cdstrcpy; … … 662 678 cd_t cderr; 663 679 cd_t cdinfo; 664 680 cd_t cdctor; 665 681 cd_t cddtor; 666 682 cd_t cdmark; 667 683 cd_t cdnullcheck; 668 684 cd_t cdclassinit; 669 685 670 686 /* cod3.c */ 671 687 extern int BPoff; 672 688 673 689 int cod3_EA(code *c); 674 690 regm_t cod3_useBP(); 675 691 void cod3_set386 (void ); 676 692 void cod3_set64 (void ); 677 693 void cod3_align (void ); 678 694 void doswitch (block *b ); 679 695 void outjmptab (block *b ); 680 696 void outswitab (block *b ); 681 697 int jmpopcode (elem *e ); 682 void cod3_ptrchk(code * __ss *pc,code __ss*pcs,regm_t keepmsk);698 void cod3_ptrchk(code **pc,code *pcs,regm_t keepmsk); 683 699 code *prolog (void ); 684 700 void epilog (block *b); 685 701 cd_t cdframeptr; 686 702 cd_t cdgot; 687 703 targ_size_t cod3_spoff(); 688 704 code *cod3_load_got(); 689 705 void makeitextern (symbol *s ); 690 706 void fltused(void); 691 707 int branch(block *bl, int flag); 692 708 void cod3_adjSymOffsets(); 693 709 void assignaddr (block *bl ); 694 710 void assignaddrc (code *c ); 695 711 targ_size_t cod3_bpoffset(symbol *s); 696 712 void pinholeopt (code *c , block *bn ); 697 713 void jmpaddr (code *c ); 698 714 int code_match(code *c1,code *c2); 699 715 unsigned calcblksize (code *c); 700 716 unsigned calccodsize(code *c); 701 717 unsigned codout (code *c ); 702 718 void addtofixlist (symbol *s , targ_size_t soffset , int seg , targ_size_t val , int flags ); 703 719 void searchfixlist (symbol *s ); 704 720 void outfixlist (void ); 705 721 void code_hydrate(code **pc); 706 722 void code_dehydrate(code **pc); 707 723 708 724 /* cod4.c */ 709 725 extern const unsigned dblreg[]; 710 726 extern int cdcmp_flag; 711 727 712 code *modEA (unsigned Irm);728 code *modEA(code *c); 713 729 cd_t cdeq; 714 730 cd_t cdaddass; 715 731 cd_t cdmulass; 716 732 cd_t cdshass; 717 733 cd_t cdcmp; 718 734 cd_t cdcnvt; 719 735 cd_t cdshtlng; 720 736 cd_t cdbyteint; 721 737 cd_t cdlngsht; 722 738 cd_t cdmsw; 723 739 cd_t cdport; 724 740 cd_t cdasm; 725 741 cd_t cdsetjmp; 726 742 cd_t cdvoid; 727 743 cd_t cdhalt; 728 744 cd_t cdfar16; 729 745 cd_t cdbt; 730 746 cd_t cdbscan; 731 747 cd_t cdpair; 732 748 code *longcmp (elem *,bool,unsigned,code *); … … 786 802 code *nteh_filter(block *b); 787 803 void nteh_framehandler(symbol *); 788 804 code *nteh_gensindex(int); 789 805 #define GENSINDEXSIZE 7 790 806 code *nteh_monitor_prolog(Symbol *shandle); 791 807 code *nteh_monitor_epilog(regm_t retregs); 792 808 793 809 // cgen.c 794 810 code *code_last(code *c); 795 811 void code_orflag(code *c,unsigned flag); 796 812 void code_orrex(code *c,unsigned rex); 797 813 code * __pascal cat (code *c1 , code *c2 ); 798 814 code * cat3 (code *c1 , code *c2 , code *c3 ); 799 815 code * cat4 (code *c1 , code *c2 , code *c3 , code *c4 ); 800 816 code * cat6 (code *c1 , code *c2 , code *c3 , code *c4 , code *c5 , code *c6 ); 801 817 code *gen (code *c , code *cs ); 802 818 code *gen1 (code *c , unsigned op ); 803 819 code *gen2 (code *c , unsigned op , unsigned rm ); 804 820 code *gen2sib(code *c,unsigned op,unsigned rm,unsigned sib); 805 821 code *genregs (code *c , unsigned op , unsigned dstreg , unsigned srcreg ); 822 code *genpush (code *c , unsigned reg ); 823 code *genpop (code *c , unsigned reg ); 806 824 code *gentstreg (code *c , unsigned reg ); 807 825 code *genasm (code *c , char *s , unsigned slen ); 808 826 code *genmovreg (code *c , unsigned to , unsigned from ); 809 827 code *genjmp (code *c , unsigned op , unsigned fltarg , block *targ ); 810 828 code *gencsi (code *c , unsigned op , unsigned rm , unsigned FL2 , SYMIDX si ); 811 829 code *gencs (code *c , unsigned op , unsigned rm , unsigned FL2 , symbol *s ); 812 830 code *genc2 (code *c , unsigned op , unsigned rm , targ_size_t EV2 ); 813 831 code *genc1 (code *c , unsigned op , unsigned rm , unsigned FL1 , targ_size_t EV1 ); 814 832 code *genc (code *c , unsigned op , unsigned rm , unsigned FL1 , targ_size_t EV1 , unsigned FL2 , targ_size_t EV2 ); 815 833 code *genmulimm(code *c,unsigned r1,unsigned r2,targ_int imm); 816 834 code *genlinnum(code *,Srcpos); 817 835 void cgen_linnum(code **pc,Srcpos srcpos); 818 836 void cgen_prelinnum(code **pc,Srcpos srcpos); 819 837 code *genadjesp(code *c, int offset); 820 838 code *gennop(code *); 821 839 code *genshift(code *); 822 840 code *movregconst (code *c , unsigned reg , targ_size_t value , regm_t flags ); 823 841 bool reghasvalue (regm_t regm , targ_size_t value , unsigned *preg ); 824 842 code *regwithvalue (code *c , regm_t regm , targ_size_t value , unsigned *preg , regm_t flags ); 825 843 branches/dmd-1.x/src/backend/iasm.h
r572 r577 71 71 // Operand flags - usOp1, usOp2, usOp3 72 72 // 73 73 74 74 typedef unsigned opflag_t; 75 75 76 76 // Operand flags for normal opcodes 77 77 78 78 #define _r8 CONSTRUCT_FLAGS( _8, _reg, _normal, 0 ) 79 79 #define _r16 CONSTRUCT_FLAGS(_16, _reg, _normal, 0 ) 80 80 #define _r32 CONSTRUCT_FLAGS(_32, _reg, _normal, 0 ) 81 81 #define _r64 CONSTRUCT_FLAGS(_64, _reg, _normal, 0 ) 82 82 #define _m8 CONSTRUCT_FLAGS(_8, _m, _normal, 0 ) 83 83 #define _m16 CONSTRUCT_FLAGS(_16, _m, _normal, 0 ) 84 84 #define _m32 CONSTRUCT_FLAGS(_32, _m, _normal, 0 ) 85 85 #define _m48 CONSTRUCT_FLAGS( _48, _m, _normal, 0 ) 86 86 #define _m64 CONSTRUCT_FLAGS( _64, _m, _normal, 0 ) 87 87 #define _m128 CONSTRUCT_FLAGS( _anysize, _m, _normal, 0 ) 88 88 #define _rm8 CONSTRUCT_FLAGS(_8, _rm, _normal, 0 ) 89 89 #define _rm16 CONSTRUCT_FLAGS(_16, _rm, _normal, 0 ) 90 90 #define _rm32 CONSTRUCT_FLAGS(_32, _rm, _normal, 0) 91 #define _rm64 CONSTRUCT_FLAGS(_64, _rm, _normal, 0) 91 92 #define _r32m16 CONSTRUCT_FLAGS(_32|_16, _rm, _normal, 0) 92 93 #define _imm8 CONSTRUCT_FLAGS(_8, _imm, _normal, 0 ) 93 94 #define _imm16 CONSTRUCT_FLAGS(_16, _imm, _normal, 0) 94 95 #define _imm32 CONSTRUCT_FLAGS(_32, _imm, _normal, 0) 95 96 #define _rel8 CONSTRUCT_FLAGS(_8, _rel, _normal, 0) 96 97 #define _rel16 CONSTRUCT_FLAGS(_16, _rel, _normal, 0) 97 98 #define _rel32 CONSTRUCT_FLAGS(_32, _rel, _normal, 0) 98 99 #define _p1616 CONSTRUCT_FLAGS(_32, _p, _normal, 0) 99 100 #define _m1616 CONSTRUCT_FLAGS(_32, _mnoi, _normal, 0) 100 101 #define _p1632 CONSTRUCT_FLAGS(_48, _p, _normal, 0 ) 101 102 #define _m1632 CONSTRUCT_FLAGS(_48, _mnoi, _normal, 0) 102 103 #define _special CONSTRUCT_FLAGS( 0, 0, _rspecial, 0 ) 103 104 #define _seg CONSTRUCT_FLAGS( 0, 0, _rseg, 0 ) 104 105 #define _a16 CONSTRUCT_FLAGS( 0, 0, _addr16, 0 ) 105 106 #define _a32 CONSTRUCT_FLAGS( 0, 0, _addr32, 0 ) 106 107 #define _f16 CONSTRUCT_FLAGS( 0, 0, _fn16, 0) 107 108 // Near function pointer 108 109 #define _f32 CONSTRUCT_FLAGS( 0, 0, _fn32, 0) 109 110 // Far function pointer 110 111 #define _lbl CONSTRUCT_FLAGS( 0, 0, _flbl, 0 ) branches/dmd-1.x/src/backend/ptrntab.c
r572 r577 500 500 PTRNTAB1 aptb1LOOPZ[] = /* LOOPZ */ { 501 501 { 0xe1, _cb | _modcx,_rel8 }, 502 502 { ASM_END, 0, 0 } 503 503 }; 504 504 PTRNTAB1 aptb1LOOPNE[] = /* LOOPNE */ { 505 505 { 0xe0, _cb | _modcx,_rel8 }, 506 506 { ASM_END, 0, 0 } 507 507 }; 508 508 PTRNTAB1 aptb1LOOPNZ[] = /* LOOPNZ */ { 509 509 { 0xe0, _cb | _modcx,_rel8 }, 510 510 { ASM_END, 0, 0 } 511 511 }; 512 512 PTRNTAB1 aptb1LTR[] = /* LTR */ { 513 513 { 0x0f00, _3|_modnot1, _rm16 }, 514 514 { ASM_END, 0, 0 } 515 515 }; 516 516 PTRNTAB1 aptb1NEG[] = /* NEG */ { 517 517 { 0xf6, _3, _rm8 }, 518 518 { 0xf7, _3 | _16_bit, _rm16 }, 519 519 { 0xf7, _3 | _32_bit, _rm32 }, 520 { 0xf7, _3 | _64_bit, _rm64 }, 520 521 { ASM_END, 0, 0 } 521 522 }; 522 523 PTRNTAB1 aptb1NOT[] = /* NOT */ { 523 524 { 0xf6, _2, _rm8 }, 524 525 { 0xf7, _2 | _16_bit, _rm16 }, 525 526 { 0xf7, _2 | _32_bit, _rm32 }, 526 527 { ASM_END, 0, 0 } 527 528 }; 528 529 PTRNTAB1 aptb1POP[] = /* POP */ { 529 530 { 0x8f, _0 | _16_bit, _m16 }, 530 531 { 0x8f, _0 | _32_bit, _m32}, 531 532 { 0x58, _rw | _16_bit, _r16 | _plus_r }, 532 533 { 0x58, _rd | _32_bit, _r32 | _plus_r }, 534 { 0x58, _r | _64_bit, _r64 | _plus_r }, 533 535 { 0x1f, 0, _ds | _seg }, 534 536 { 0x07, _modes, _es | _seg}, 535 537 { 0x17, 0, _ss | _seg}, 536 538 { 0x0fa1, 0, _fs | _seg}, 537 539 { 0x0fa9, 0, _gs | _seg}, 538 540 { ASM_END, 0, 0 } 539 541 }; 540 542 PTRNTAB1 aptb1PUSH[] = /* PUSH */ { 541 543 { 0xff, _6 | _16_bit, _m16 }, 542 544 { 0xff, _6 | _32_bit, _m32 }, 545 { 0xff, _6 | _64_bit, _m64 }, 543 546 { 0x50, _r | _16_bit, _r16 | _plus_r }, 544 547 { 0x50, _r | _32_bit, _r32 | _plus_r }, 548 { 0x50, _r | _64_bit, _r64 | _plus_r }, 545 549 { 0x6a, 0,_imm8 }, 546 550 { 0x68, _16_bit,_imm16 }, 547 551 { 0x68, _16_bit,_rel16 }, 548 552 { 0x68, _32_bit,_imm32 }, 549 553 { 0x68, _32_bit,_rel32 }, 550 554 { 0x0e, 0,_cs | _seg}, 551 555 { 0x16, 0,_ss | _seg}, 552 556 { 0x1e, 0,_ds | _seg}, 553 557 { 0x06, 0,_es | _seg}, 554 558 { 0x0fa0, 0,_fs | _seg}, 555 559 { 0x0fa8, 0,_gs | _seg}, 556 560 { ASM_END, 0, 0 } 557 561 }; 558 562 PTRNTAB1 aptb1RET[] = /* RET */ { 559 563 { 0xc3, 0, 0 }, 560 564 { 0xc2, _iw, _imm16 }, 561 565 { ASM_END, 0, 0 } 562 566 }; 563 567 PTRNTAB1 aptb1RETF[] = /* RETF */ { 564 568 { 0xcb, 0, 0 }, branches/dmd-1.x/src/freebsd.mak
r532 r577 7 7 8 8 #OPT=-g -g3 9 9 #OPT=-O2 10 10 11 11 #COV=-fprofile-arcs -ftest-coverage 12 12 13 13 WARNINGS=-Wno-deprecated -Wstrict-aliasing 14 14 15 15 #GFLAGS = $(WARNINGS) -D__near= -D__pascal= -fno-exceptions -g -DDEBUG=1 $(COV) 16 16 GFLAGS = $(WARNINGS) -D__near= -D__pascal= -fno-exceptions -O2 17 17 18 18 CFLAGS = $(GFLAGS) -I$(ROOT) -D__I86__=1 -DMARS=1 -DTARGET_FREEBSD=1 -D_DH 19 19 MFLAGS = $(GFLAGS) -I$C -I$(TK) -D__I86__=1 -DMARS=1 -DTARGET_FREEBSD=1 -D_DH 20 20 21 21 CH= $C/cc.h $C/global.h $C/parser.h $C/oper.h $C/code.h $C/type.h \ 22 22 $C/dt.h $C/cgcv.h $C/el.h $C/iasm.h 23 23 TOTALH= 24 24 25 25 DMD_OBJS = \ 26 26 access.o array.o attrib.o bcomplex.o bit.o blockopt.o \ 27 cast.o code.o cg.o cg87.o cgcod.o cgcs.o cg cv.o cgelem.o cgen.o \27 cast.o code.o cg.o cg87.o cgcod.o cgcs.o cgelem.o cgen.o \ 28 28 cgreg.o cgsched.o class.o cod1.o cod2.o cod3.o cod4.o cod5.o \ 29 29 constfold.o irstate.o dchar.o cond.o debug.o \ 30 30 declaration.o dsymbol.o dt.o dump.o e2ir.o ee.o eh.o el.o \ 31 31 dwarf.o enum.o evalu8.o expression.o func.o gdag.o gflow.o \ 32 32 glocal.o gloop.o glue.o gnuc.o go.o gother.o html.o iasm.o id.o \ 33 33 identifier.o impcnvtab.o import.o inifile.o init.o inline.o \ 34 34 lexer.o link.o lstring.o mangle.o mars.o rmem.o module.o msc.o mtype.o \ 35 35 nteh.o cppmangle.o opover.o optimize.o os.o out.o outbuf.o \ 36 36 parse.o ph.o ptrntab.o root.o rtlsym.o s2ir.o scope.o statement.o \ 37 37 stringtable.o struct.o csymbol.o template.o tk.o tocsym.o todt.o \ 38 38 type.o typinf.o util.o var.o version.o strtold.o utf.o staticassert.o \ 39 39 unialpha.o toobj.o toctype.o toelfdebug.o entity.o doc.o macro.o \ 40 40 hdrgen.o delegatize.o aa.o ti_achar.o toir.o interpret.o traits.o \ 41 41 builtin.o clone.o aliasthis.o \ 42 42 man.o arrayop.o port.o response.o async.o json.o speller.o aav.o unittests.o \ 43 43 imphint.o \ 44 44 libelf.o elfobj.o 45 45 46 46 SRC = win32.mak linux.mak osx.mak freebsd.mak solaris.mak \ 47 47 mars.c enum.c struct.c dsymbol.c import.c idgen.c impcnvgen.c \ branches/dmd-1.x/src/iasm.c
r572 r577 93 93 EM_num, 94 94 EM_float, 95 95 EM_char, 96 96 EM_label_expected, 97 97 EM_uplevel, 98 98 EM_type_as_operand, 99 99 }; 100 100 101 101 const char *asmerrmsgs[] = 102 102 { 103 103 "unknown operand for floating point instruction", 104 104 "bad addr mode", 105 105 "align %d must be a power of 2", 106 106 "opcode expected, not %s", 107 107 "prefix", 108 108 "end of instruction", 109 109 "bad operand", 110 110 "bad integral operand", 111 111 "identifier expected", 112 112 "not struct", 113 " nops expected",113 "%u operands found for %s instead of the expected %u", 114 114 "bad type/size of operands '%s'", 115 115 "constant initializer expected", 116 116 "undefined identifier '%s'", 117 117 "pointer", 118 118 "colon", 119 119 "] expected instead of '%s'", 120 120 ") expected instead of '%s'", 121 121 "ptr expected", 122 122 "integer expected", 123 123 "floating point expected", 124 124 "character is truncated", 125 125 "label expected", 126 126 "uplevel nested reference to variable %s", 127 127 "cannot use type %s as an operand" 128 128 }; 129 129 130 130 // Additional tokens for the inline assembler 131 131 typedef enum 132 132 { 133 133 ASMTKlocalsize = TOKMAX + 1, … … 575 575 { 576 576 popnd2->usFlags = usFlags2 = asm_determine_operand_flags(popnd2); 577 577 if (!popnd3) 578 578 usNumops = 2; 579 579 else 580 580 { 581 581 popnd3->usFlags = usFlags3 = asm_determine_operand_flags(popnd3); 582 582 usNumops = 3; 583 583 } 584 584 } 585 585 } 586 586 587 587 // Now check to insure that the number of operands is correct 588 588 usActual = (pop->usNumops & ITSIZE); 589 589 if (usActual != usNumops && asmstate.ucItype != ITopt && 590 590 asmstate.ucItype != ITfloat) 591 591 { 592 592 PARAM_ERROR: 593 593 asmerr(EM_nops_expected, usActual, asm_opstr(pop), usNumops); 594 594 } 595 *pusNumops = asmstate.ucItype == ITfloat ? usActual : usNumops;596 595 if (usActual < usNumops) 597 596 *pusNumops = usActual; 598 597 else 599 598 *pusNumops = usNumops; 600 599 // 601 600 // The number of arguments matches, now check to find the opcode 602 601 // in the associated opcode table 603 602 // 604 603 RETRY: 605 604 //printf("usActual = %d\n", usActual); 606 605 switch (usActual) 607 606 { 608 607 case 0: 609 608 ptbRet = pop->ptb ; 610 609 goto RETURN_IT; 611 610 612 611 case 1: 613 612 //printf("usFlags1 = "); asm_output_flags(usFlags1); printf("\n"); 614 613 for (pptb1 = pop->ptb.pptb1; pptb1->usOpcode != ASM_END; 615 614 pptb1++) … … 1477 1476 if (usNumops == 1) 1478 1477 { pc->IFL2 = FLblock; 1479 1478 pc->IEVlsym2 = label; 1480 1479 } 1481 1480 else 1482 1481 { pc->IFL1 = FLblock; 1483 1482 pc->IEVlsym1 = label; 1484 1483 } 1485 1484 } 1486 1485 } 1487 1486 } 1488 1487 1489 1488 switch (usNumops) 1490 1489 { 1491 1490 case 0: 1492 1491 break; 1493 1492 case 1: 1494 1493 if (((aoptyTable1 == _reg || aoptyTable1 == _float) && 1495 1494 amodTable1 == _normal && (uRegmaskTable1 & _rplus_r))) 1496 1495 { 1497 if (asmstate.ucItype == ITfloat) 1498 pc->Irm += popnd1->base->val; 1499 else if (pc->Iop == 0x0f) 1500 pc->Iop2 += popnd1->base->val; 1501 else 1502 pc->Iop += popnd1->base->val; 1496 unsigned reg = popnd1->base->val; 1497 if (reg & 8) 1498 { reg &= 7; 1499 pc->Irex |= REX_B; 1500 assert(I64); 1501 } 1502 if (asmstate.ucItype == ITfloat) 1503 pc->Irm += reg; 1504 else if (pc->Iop == 0x0f) 1505 pc->Iop2 += reg; 1506 else 1507 pc->Iop += reg; 1503 1508 #ifdef DEBUG 1504 auchOpcode[usIdx-1] += popnd1->base->val;1509 auchOpcode[usIdx-1] += reg; 1505 1510 #endif 1506 1511 } 1507 1512 else 1508 1513 { asm_make_modrm_byte( 1509 1514 #ifdef DEBUG 1510 1515 auchOpcode, &usIdx, 1511 1516 #endif 1512 1517 pc, 1513 1518 ptb.pptb1->usFlags, 1514 1519 popnd1, NULL); 1515 1520 } 1516 1521 popndTmp = popnd1; 1517 1522 aoptyTmp = aoptyTable1; 1518 1523 uSizemaskTmp = uSizemaskTable1; 1519 1524 L1: 1520 1525 if (aoptyTmp == _imm) 1521 1526 { 1522 1527 Declaration *d = popndTmp->s ? popndTmp->s->isDeclaration() 1523 1528 : NULL; 1524 1529 if (popndTmp->bSeg) … … 1608 1613 else 1609 1614 { 1610 1615 asm_make_modrm_byte( 1611 1616 #ifdef DEBUG 1612 1617 auchOpcode, &usIdx, 1613 1618 #endif 1614 1619 pc, 1615 1620 ptb.pptb1->usFlags, 1616 1621 popnd2, popnd1); 1617 1622 } 1618 1623 popndTmp = popnd1; 1619 1624 aoptyTmp = aoptyTable1; 1620 1625 uSizemaskTmp = uSizemaskTable1; 1621 1626 } 1622 1627 else 1623 1628 { 1624 1629 if (((aoptyTable1 == _reg || aoptyTable1 == _float) && 1625 1630 amodTable1 == _normal && 1626 1631 (uRegmaskTable1 & _rplus_r))) 1627 1632 { 1628 if (asmstate.ucItype == ITfloat) 1629 pc->Irm += popnd1->base->val; 1630 else 1631 if (pc->Iop == 0x0f) 1632 pc->Iop2 += popnd1->base->val; 1633 else 1634 pc->Iop += popnd1->base->val; 1633 unsigned reg = popnd1->base->val; 1634 if (reg & 8) 1635 { reg &= 7; 1636 pc->Irex |= REX_B; 1637 assert(I64); 1638 } 1639 if (asmstate.ucItype == ITfloat) 1640 pc->Irm += reg; 1641 else if (pc->Iop == 0x0f) 1642 pc->Iop2 += reg; 1643 else 1644 pc->Iop += reg; 1635 1645 #ifdef DEBUG 1636 auchOpcode[usIdx-1] += popnd1->base->val;1646 auchOpcode[usIdx-1] += reg; 1637 1647 #endif 1638 1648 } 1639 1649 else 1640 1650 if (((aoptyTable2 == _reg || aoptyTable2 == _float) && 1641 1651 amodTable2 == _normal && 1642 1652 (uRegmaskTable2 & _rplus_r))) 1643 1653 { 1644 if (asmstate.ucItype == ITfloat) 1645 pc->Irm += popnd2->base->val; 1646 else 1647 if (pc->Iop == 0x0f) 1648 pc->Iop2 += popnd2->base->val; 1649 else 1650 pc->Iop += popnd2->base->val; 1654 unsigned reg = popnd2->base->val; 1655 if (reg & 8) 1656 { reg &= 7; 1657 pc->Irex |= REX_B; 1658 assert(I64); 1659 } 1660 if (asmstate.ucItype == ITfloat) 1661 pc->Irm += reg; 1662 else if (pc->Iop == 0x0f) 1663 pc->Iop2 += reg; 1664 else 1665 pc->Iop += reg; 1651 1666 #ifdef DEBUG 1652 auchOpcode[usIdx-1] += popnd2->base->val;1667 auchOpcode[usIdx-1] += reg; 1653 1668 #endif 1654 1669 } 1655 1670 else if (ptb.pptb0->usOpcode == 0xF30FD6 || 1656 1671 ptb.pptb0->usOpcode == 0x0F12 || 1657 1672 ptb.pptb0->usOpcode == 0x0F16 || 1658 1673 ptb.pptb0->usOpcode == 0x660F50 || 1659 1674 ptb.pptb0->usOpcode == 0x0F50 || 1660 1675 ptb.pptb0->usOpcode == 0x660FD7 || 1661 1676 ptb.pptb0->usOpcode == 0x0FD7) 1662 1677 { 1663 1678 asm_make_modrm_byte( 1664 1679 #ifdef DEBUG 1665 1680 auchOpcode, &usIdx, 1666 1681 #endif 1667 1682 pc, 1668 1683 ptb.pptb1->usFlags, 1669 1684 popnd2, popnd1); 1670 1685 } 1671 1686 else 1672 1687 { … … 1698 1713 if (aoptyTable2 == _m || aoptyTable2 == _rm || 1699 1714 usOpcode == 0x0FC5) // PEXTRW 1700 1715 { 1701 1716 asm_make_modrm_byte( 1702 1717 #ifdef DEBUG 1703 1718 auchOpcode, &usIdx, 1704 1719 #endif 1705 1720 pc, 1706 1721 ptb.pptb1->usFlags, 1707 1722 popnd2, popnd1); 1708 1723 popndTmp = popnd3; 1709 1724 aoptyTmp = aoptyTable3; 1710 1725 uSizemaskTmp = uSizemaskTable3; 1711 1726 } 1712 1727 else { 1713 1728 1714 1729 if (((aoptyTable1 == _reg || aoptyTable1 == _float) && 1715 1730 amodTable1 == _normal && 1716 1731 (uRegmaskTable1 &_rplus_r))) 1717 1732 { 1718 if (asmstate.ucItype == ITfloat) 1719 pc->Irm += popnd1->base->val; 1720 else 1721 if (pc->Iop == 0x0f) 1722 pc->Iop2 += popnd1->base->val; 1723 else 1724 pc->Iop += popnd1->base->val; 1733 unsigned reg = popnd1->base->val; 1734 if (reg & 8) 1735 { reg &= 7; 1736 pc->Irex |= REX_B; 1737 assert(I64); 1738 } 1739 if (asmstate.ucItype == ITfloat) 1740 pc->Irm += reg; 1741 else if (pc->Iop == 0x0f) 1742 pc->Iop2 += reg; 1743 else 1744 pc->Iop += reg; 1725 1745 #ifdef DEBUG 1726 auchOpcode[usIdx-1] += popnd1->base->val;1746 auchOpcode[usIdx-1] += reg; 1727 1747 #endif 1728 1748 } 1729 1749 else 1730 1750 if (((aoptyTable2 == _reg || aoptyTable2 == _float) && 1731 1751 amodTable2 == _normal && 1732 1752 (uRegmaskTable2 &_rplus_r))) 1733 1753 { 1734 if (asmstate.ucItype == ITfloat) 1735 pc->Irm += popnd1->base->val; 1736 else 1737 if (pc->Iop == 0x0f) 1738 pc->Iop2 += popnd1->base->val; 1739 else 1740 pc->Iop += popnd2->base->val; 1754 unsigned reg = popnd1->base->val; 1755 if (reg & 8) 1756 { reg &= 7; 1757 pc->Irex |= REX_B; 1758 assert(I64); 1759 } 1760 if (asmstate.ucItype == ITfloat) 1761 pc->Irm += reg; 1762 else if (pc->Iop == 0x0f) 1763 pc->Iop2 += reg; 1764 else 1765 pc->Iop += reg; 1741 1766 #ifdef DEBUG 1742 auchOpcode[usIdx-1] += popnd2->base->val;1767 auchOpcode[usIdx-1] += reg; 1743 1768 #endif 1744 1769 } 1745 1770 else 1746 1771 asm_make_modrm_byte( 1747 1772 #ifdef DEBUG 1748 1773 auchOpcode, &usIdx, 1749 1774 #endif 1750 1775 pc, 1751 1776 ptb.pptb1->usFlags, 1752 1777 popnd1, popnd2); 1753 1778 1754 1779 popndTmp = popnd3; 1755 1780 aoptyTmp = aoptyTable3; 1756 1781 uSizemaskTmp = uSizemaskTable3; 1757 1782 1758 1783 } 1759 1784 goto L1; 1760 1785 } 1761 1786 L2: 1762 1787 … … 2655 2680 case _modsidi: 2656 2681 usRet |= (mSI | mDI); 2657 2682 break; 2658 2683 case _modcx: 2659 2684 usRet |= mCX; 2660 2685 break; 2661 2686 case _modes: 2662 2687 /*usRet |= mES;*/ 2663 2688 break; 2664 2689 case _modall: 2665 2690 asmstate.bReturnax = TRUE; 2666 2691 return /*mES |*/ ALLREGS; 2667 2692 case _modsiax: 2668 2693 usRet |= (mSI | mAX); 2669 2694 break; 2670 2695 case _modsinot1: 2671 2696 usRet |= mSI; 2672 2697 popnd1 = NULL; 2673 2698 break; 2674 2699 } 2675 if (popnd1 && ASM_GET_aopty(popnd1->usFlags) == _reg) { 2676 switch (ASM_GET_amod(popnd1->usFlags)) { 2700 if (popnd1 && ASM_GET_aopty(popnd1->usFlags) == _reg) 2701 { 2702 switch (ASM_GET_amod(popnd1->usFlags)) 2703 { 2677 2704 default: 2678 if (ASM_GET_uSizemask(popnd1->usFlags) == _8) { 2679 switch(popnd1->base->val) { 2680 case _AL: 2681 case _AH: 2682 usRet |= mAX; 2683 break; 2684 case _BL: 2685 case _BH: 2686 usRet |= mBX; 2687 break; 2688 case _CL: 2689 case _CH: 2690 usRet |= mCX; 2691 break; 2692 case _DL: 2693 case _DH: 2694 usRet |= mDX; 2695 break; 2696 default: 2697 assert(0); 2698 } 2699 } 2700 else { 2701 switch (popnd1->base->val) { 2702 case _AX: 2703 usRet |= mAX; 2704 break; 2705 case _BX: 2706 usRet |= mBX; 2707 break; 2708 case _CX: 2709 usRet |= mCX; 2710 break; 2711 case _DX: 2712 usRet |= mDX; 2713 break; 2714 case _SI: 2715 usRet |= mSI; 2716 break; 2717 case _DI: 2718 usRet |= mDI; 2719 break; 2720 } 2721 } 2705 usRet |= 1 << popnd1->base->val; 2706 usRet &= ~(mBP | mSP); // ignore changing these 2722 2707 break; 2723 2708 case _rseg: 2724 2709 //if (popnd1->base->val == _ES) 2725 2710 //usRet |= mES; 2726 2711 break; 2727 2712 2728 2713 case _rspecial: 2729 2714 break; 2730 2715 } 2731 2716 } 2732 2717 if (usRet & mAX) 2733 2718 asmstate.bReturnax = TRUE; 2734 2719 2735 2720 return usRet; 2736 2721 } 2737 2722 2738 2723 /******************************* 2739 2724 * Match flags in operand against flags in opcode table. 2740 2725 * Returns: 2741 2726 * !=0 if match branches/dmd-1.x/src/linux.mak
r569 r577 9 9 10 10 #OPT=-g -g3 11 11 #OPT=-O2 12 12 13 13 #COV=-fprofile-arcs -ftest-coverage 14 14 15 15 WARNINGS=-Wno-deprecated -Wstrict-aliasing 16 16 17 17 #GFLAGS = $(WARNINGS) -D__near= -D__pascal= -fno-exceptions -g -DDEBUG=1 $(COV) 18 18 GFLAGS = $(WARNINGS) -D__near= -D__pascal= -fno-exceptions -O2 19 19 20 20 CFLAGS = $(GFLAGS) -I$(ROOT) -D__I86__=1 -DMARS=1 -DTARGET_LINUX=1 -D_DH 21 21 MFLAGS = $(GFLAGS) -I$C -I$(TK) -D__I86__=1 -DMARS=1 -DTARGET_LINUX=1 -D_DH 22 22 23 23 CH= $C/cc.h $C/global.h $C/parser.h $C/oper.h $C/code.h $C/type.h \ 24 24 $C/dt.h $C/cgcv.h $C/el.h $C/iasm.h 25 25 TOTALH= 26 26 27 27 DMD_OBJS = \ 28 28 access.o array.o attrib.o bcomplex.o bit.o blockopt.o \ 29 cast.o code.o cg.o cg87.o cgcod.o cgcs.o cg cv.o cgelem.o cgen.o \29 cast.o code.o cg.o cg87.o cgcod.o cgcs.o cgelem.o cgen.o \ 30 30 cgreg.o cgsched.o class.o cod1.o cod2.o cod3.o cod4.o cod5.o \ 31 31 constfold.o irstate.o dchar.o cond.o debug.o \ 32 32 declaration.o dsymbol.o dt.o dump.o e2ir.o ee.o eh.o el.o \ 33 33 dwarf.o enum.o evalu8.o expression.o func.o gdag.o gflow.o \ 34 34 glocal.o gloop.o glue.o gnuc.o go.o gother.o html.o iasm.o id.o \ 35 35 identifier.o impcnvtab.o import.o inifile.o init.o inline.o \ 36 36 lexer.o link.o lstring.o mangle.o mars.o rmem.o module.o msc.o mtype.o \ 37 37 nteh.o cppmangle.o opover.o optimize.o os.o out.o outbuf.o \ 38 38 parse.o ph.o ptrntab.o root.o rtlsym.o s2ir.o scope.o statement.o \ 39 39 stringtable.o struct.o csymbol.o template.o tk.o tocsym.o todt.o \ 40 40 type.o typinf.o util.o var.o version.o strtold.o utf.o staticassert.o \ 41 41 unialpha.o toobj.o toctype.o toelfdebug.o entity.o doc.o macro.o \ 42 42 hdrgen.o delegatize.o aa.o ti_achar.o toir.o interpret.o traits.o \ 43 43 builtin.o clone.o aliasthis.o \ 44 44 man.o arrayop.o port.o response.o async.o json.o speller.o aav.o unittests.o \ 45 45 imphint.o \ 46 46 libelf.o elfobj.o 47 47 48 48 SRC = win32.mak linux.mak osx.mak freebsd.mak solaris.mak \ 49 49 mars.c enum.c struct.c dsymbol.c import.c idgen.c impcnvgen.c \ branches/dmd-1.x/src/osx.mak
r532 r577 15 15 16 16 #OPT=-g -g3 17 17 #OPT=-O2 18 18 19 19 #COV=-fprofile-arcs -ftest-coverage 20 20 21 21 WARNINGS=-Wno-deprecated -Wstrict-aliasing 22 22 23 23 #GFLAGS = $(WARNINGS) -D__near= -D__pascal= -fno-exceptions -g -DDEBUG=1 -DUNITTEST $(COV) 24 24 GFLAGS = $(WARNINGS) -D__near= -D__pascal= -fno-exceptions -O2 25 25 26 26 CFLAGS = $(GFLAGS) -I$(ROOT) -D__I86__=1 -DMARS=1 -DTARGET_OSX=1 -D_DH 27 27 MFLAGS = $(GFLAGS) -I$C -I$(TK) -D__I86__=1 -DMARS=1 -DTARGET_OSX=1 -D_DH 28 28 29 29 CH= $C/cc.h $C/global.h $C/parser.h $C/oper.h $C/code.h $C/type.h \ 30 30 $C/dt.h $C/cgcv.h $C/el.h $C/iasm.h 31 31 TOTALH= 32 32 33 33 DMD_OBJS = \ 34 34 access.o array.o attrib.o bcomplex.o bit.o blockopt.o \ 35 cast.o code.o cg.o cg87.o cgcod.o cgcs.o cg cv.o cgelem.o cgen.o \35 cast.o code.o cg.o cg87.o cgcod.o cgcs.o cgelem.o cgen.o \ 36 36 cgreg.o cgsched.o class.o cod1.o cod2.o cod3.o cod4.o cod5.o \ 37 37 constfold.o irstate.o dchar.o cond.o debug.o \ 38 38 declaration.o dsymbol.o dt.o dump.o e2ir.o ee.o eh.o el.o \ 39 39 dwarf.o enum.o evalu8.o expression.o func.o gdag.o gflow.o \ 40 40 glocal.o gloop.o glue.o gnuc.o go.o gother.o html.o iasm.o id.o \ 41 41 identifier.o impcnvtab.o import.o inifile.o init.o inline.o \ 42 42 lexer.o link.o lstring.o mangle.o mars.o rmem.o module.o msc.o mtype.o \ 43 43 nteh.o cppmangle.o opover.o optimize.o os.o out.o outbuf.o \ 44 44 parse.o ph.o ptrntab.o root.o rtlsym.o s2ir.o scope.o statement.o \ 45 45 stringtable.o struct.o csymbol.o template.o tk.o tocsym.o todt.o \ 46 46 type.o typinf.o util.o var.o version.o strtold.o utf.o staticassert.o \ 47 47 unialpha.o toobj.o toctype.o toelfdebug.o entity.o doc.o macro.o \ 48 48 hdrgen.o delegatize.o aa.o ti_achar.o toir.o interpret.o traits.o \ 49 49 builtin.o clone.o aliasthis.o \ 50 50 man.o arrayop.o port.o response.o async.o json.o speller.o aav.o unittests.o \ 51 51 imphint.o \ 52 52 libmach.o machobj.o 53 53 54 54 SRC = win32.mak linux.mak osx.mak freebsd.mak solaris.mak \ 55 55 mars.c enum.c struct.c dsymbol.c import.c idgen.c impcnvgen.c \ branches/dmd-1.x/src/solaris.mak
r473 r577 5 5 6 6 CC=g++ -m32 7 7 8 8 #OPT=-g -g3 9 9 #OPT=-O2 10 10 11 11 #COV=-fprofile-arcs -ftest-coverage 12 12 13 13 #GFLAGS = -Wno-deprecated -D__near= -D__pascal= -fno-exceptions -g -DDEBUG=1 $(COV) 14 14 GFLAGS = -Wno-deprecated -D__near= -D__pascal= -fno-exceptions -O2 15 15 16 16 CFLAGS = $(GFLAGS) -I$(ROOT) -D__I86__=1 -DMARS=1 -DTARGET_SOLARIS=1 -D_DH 17 17 MFLAGS = $(GFLAGS) -I$C -I$(TK) -D__I86__=1 -DMARS=1 -DTARGET_SOLARIS=1 -D_DH 18 18 19 19 CH= $C/cc.h $C/global.h $C/parser.h $C/oper.h $C/code.h $C/type.h \ 20 20 $C/dt.h $C/cgcv.h $C/el.h $C/iasm.h 21 21 TOTALH= 22 22 23 23 DMD_OBJS = \ 24 24 access.o array.o attrib.o bcomplex.o bit.o blockopt.o \ 25 cast.o code.o cg.o cg87.o cgcod.o cgcs.o cg cv.o cgelem.o cgen.o \25 cast.o code.o cg.o cg87.o cgcod.o cgcs.o cgelem.o cgen.o \ 26 26 cgreg.o cgsched.o class.o cod1.o cod2.o cod3.o cod4.o cod5.o \ 27 27 constfold.o irstate.o dchar.o cond.o debug.o \ 28 28 declaration.o dsymbol.o dt.o dump.o e2ir.o ee.o eh.o el.o \ 29 29 dwarf.o enum.o evalu8.o expression.o func.o gdag.o gflow.o \ 30 30 glocal.o gloop.o glue.o gnuc.o go.o gother.o html.o iasm.o id.o \ 31 31 identifier.o impcnvtab.o import.o inifile.o init.o inline.o \ 32 32 lexer.o link.o lstring.o mangle.o mars.o rmem.o module.o msc.o mtype.o \ 33 33 nteh.o cppmangle.o opover.o optimize.o os.o out.o outbuf.o \ 34 34 parse.o ph.o ptrntab.o root.o rtlsym.o s2ir.o scope.o statement.o \ 35 35 stringtable.o struct.o csymbol.o template.o tk.o tocsym.o todt.o \ 36 36 type.o typinf.o util.o var.o version.o strtold.o utf.o staticassert.o \ 37 37 unialpha.o toobj.o toctype.o toelfdebug.o entity.o doc.o macro.o \ 38 38 hdrgen.o delegatize.o aa.o ti_achar.o toir.o interpret.o traits.o \ 39 39 builtin.o clone.o aliasthis.o \ 40 40 man.o arrayop.o port.o response.o async.o json.o speller.o aav.o unittests.o \ 41 41 imphint.o \ 42 42 libelf.o elfobj.o 43 43 44 44 SRC = win32.mak linux.mak osx.mak freebsd.mak solaris.mak \ 45 45 mars.c enum.c struct.c dsymbol.c import.c idgen.c impcnvgen.c \ trunk/src/backend/cdef.h
r569 r577 542 542 #define LARGECODE (config.memmodel & 5) 543 543 544 544 #define Smodel 0 /* 64k code, 64k data */ 545 545 #define Mmodel 1 /* large code, 64k data */ 546 546 #define Cmodel 2 /* 64k code, large data */ 547 547 #define Lmodel 3 /* large code, large data */ 548 548 #define Vmodel 4 /* large code, large data, vcm */ 549 549 #define MEMMODELS 5 /* number of memory models */ 550 550 #endif 551 551 552 552 /* Segments */ 553 553 #define CODE 1 /* code segment */ 554 554 #define DATA 2 /* initialized data */ 555 555 #define CDATA 3 /* constant data */ 556 556 #define UDATA 4 /* uninitialized data */ 557 557 #define UNKNOWN 0x7FFF // unknown segment 558 558 #define DGROUPIDX 1 /* group index of DGROUP */ 559 559 560 560 #define KEEPBITFIELDS 0 /* 0 means code generator cannot handle bit fields, */ 561 561 /* so replace them with shifts and masks */ 562 563 #if TARGET_OSX564 #define STACKALIGN 16565 #else566 #define STACKALIGN 0567 #endif568 562 569 563 #define REGMAX 29 // registers are numbered 0..10 570 564 571 565 typedef unsigned tym_t; // data type big enough for type masks 572 566 typedef int SYMIDX; // symbol table index 573 567 574 568 #if 0 575 569 #if defined(__DMC__) && __DMC__ < 0x81e 576 570 typedef int bool; 577 571 #endif 578 572 #define bool int 579 573 #endif 580 574 581 575 #define _chkstack() (void)0 582 576 583 577 /* For 32 bit compilations, we don't need far keyword */ 584 578 #if 1 585 579 #define far 586 580 #define _far 587 581 #define __far trunk/src/backend/cg87.c
r569 r577 1 1 // Copyright (C) 1987-1995 by Symantec 2 // Copyright (C) 2000-20 09by Digital Mars2 // Copyright (C) 2000-2010 by Digital Mars 3 3 // All Rights Reserved 4 4 // http://www.digitalmars.com 5 5 // Written by Walter Bright 6 6 /* 7 7 * This source file is made available for personal use 8 8 * only. The license is in /dmd/src/dmd/backendlicense.txt 9 9 * or /dm/src/dmd/backendlicense.txt 10 10 * For any other uses, please contact Digital Mars. 11 11 */ 12 12 13 13 #if !SPP 14 14 15 15 #include <stdio.h> 16 16 #include <string.h> 17 17 #include <time.h> 18 18 #include <math.h> 19 19 #include "cc.h" 20 20 #include "el.h" 21 21 #include "oper.h" 22 22 #include "code.h" … … 80 80 /********************************** 81 81 * When we need to temporarilly save 8087 registers, we record information 82 82 * about the save into an array of NDP structs: 83 83 */ 84 84 85 85 NDP *NDP::save = NULL; 86 86 int NDP::savemax = 0; /* # of entries in NDP::save[] */ 87 87 int NDP::savetop = 0; /* # of entries used in NDP::save[] */ 88 88 89 89 #ifdef DEBUG 90 90 #define NDPSAVEINC 2 /* flush reallocation bugs */ 91 91 #else 92 92 #define NDPSAVEINC 8 /* allocation chunk sizes */ 93 93 #endif 94 94 95 95 /**************************************** 96 96 * Store/load to ndp save location i 97 97 */ 98 98 99 99 code *ndp_fstp(code *c, int i, tym_t ty) 100 { 100 { unsigned grex = I64 ? (REX_W << 16) : 0; 101 101 switch (tybasic(ty)) 102 102 { 103 103 case TYfloat: 104 104 case TYifloat: 105 105 case TYcfloat: 106 c = genc1(c,0xD9, modregrm(2,3,BPRM),FLndp,i); // FSTP m32real i[BP]106 c = genc1(c,0xD9,grex | modregrm(2,3,BPRM),FLndp,i); // FSTP m32real i[BP] 107 107 break; 108 108 109 109 case TYdouble: 110 110 case TYdouble_alias: 111 111 case TYidouble: 112 112 case TYcdouble: 113 c = genc1(c,0xDD, modregrm(2,3,BPRM),FLndp,i); // FSTP m64real i[BP]113 c = genc1(c,0xDD,grex | modregrm(2,3,BPRM),FLndp,i); // FSTP m64real i[BP] 114 114 break; 115 115 116 116 case TYldouble: 117 117 case TYildouble: 118 118 case TYcldouble: 119 c = genc1(c,0xDB, modregrm(2,7,BPRM),FLndp,i); // FSTP m80real i[BP]119 c = genc1(c,0xDB,grex | modregrm(2,7,BPRM),FLndp,i); // FSTP m80real i[BP] 120 120 break; 121 121 122 122 default: 123 123 assert(0); 124 124 } 125 125 return c; 126 126 } 127 127 128 128 code *ndp_fld(code *c, int i, tym_t ty) 129 { 129 { unsigned grex = I64 ? (REX_W << 16) : 0; 130 130 switch (tybasic(ty)) 131 131 { 132 132 case TYfloat: 133 133 case TYifloat: 134 134 case TYcfloat: 135 c = genc1(c,0xD9, modregrm(2,0,BPRM),FLndp,i);135 c = genc1(c,0xD9,grex | modregrm(2,0,BPRM),FLndp,i); 136 136 break; 137 137 138 138 case TYdouble: 139 139 case TYdouble_alias: 140 140 case TYidouble: 141 141 case TYcdouble: 142 c = genc1(c,0xDD, modregrm(2,0,BPRM),FLndp,i);142 c = genc1(c,0xDD,grex | modregrm(2,0,BPRM),FLndp,i); 143 143 break; 144 144 145 145 case TYldouble: 146 146 case TYildouble: 147 147 case TYcldouble: 148 c = genc1(c,0xDB, modregrm(2,5,BPRM),FLndp,i); // FLD m80real i[BP]148 c = genc1(c,0xDB,grex | modregrm(2,5,BPRM),FLndp,i); // FLD m80real i[BP] 149 149 break; 150 150 151 151 default: 152 152 assert(0); 153 153 } 154 154 return c; 155 155 } 156 156 157 157 /************************** 158 158 * Return index of empty slot in NDP::save[]. 159 159 */ 160 160 161 161 STATIC int getemptyslot() 162 162 { int i; 163 163 164 164 for (i = 0; i < NDP::savemax; i++) 165 165 if (NDP::save[i].e == NULL) 166 166 goto L1; 167 167 /* Out of room, reallocate NDP::save[] */ 168 168 NDP::save = (NDP *)mem_realloc(NDP::save, … … 428 428 break; 429 429 } 430 430 if (_8087elems[i].e == e && 431 431 _8087elems[i].offset == offset) 432 432 { //printf("cse found %d\n",i); 433 433 //elem_print(e); 434 434 break; 435 435 } 436 436 } 437 437 return i; 438 438 } 439 439 440 440 /************************************* 441 441 * Reload common subexpression. 442 442 */ 443 443 444 444 code *comsub87(elem *e,regm_t *pretregs) 445 445 { code *c; 446 446 447 447 // Look on 8087 stack 448 int i; 449 450 i = cse_get(e, 0); 448 int i = cse_get(e, 0); 451 449 452 450 if (tycomplex(e->Ety)) 453 { unsigned sz; 454 int j; 455 456 sz = tysize(e->Ety); 457 j = cse_get(e, sz / 2); 451 { 452 unsigned sz = tysize(e->Ety); 453 int j = cse_get(e, sz / 2); 458 454 if (i >= 0 && j >= 0) 459 455 { 460 456 c = push87(); 461 457 c = cat(c, push87()); 462 458 c = genf2(c,0xD9,0xC0 + i); // FLD ST(i) 463 459 c = genf2(c,0xD9,0xC0 + j + 1); // FLD ST(j + 1) 464 460 c = cat(c,fixresult_complex87(e,mST01,pretregs)); 465 461 } 466 462 else 467 463 // Reload 468 464 c = loaddata(e,pretregs); 469 465 } 470 466 else 471 467 { 472 468 if (i >= 0) 473 469 { 474 470 c = push87(); 475 471 c = genf2(c,0xD9,0xC0 + i); // FLD ST(i) 476 472 c = cat(c,fixresult(e,mST0,pretregs)); 477 473 } 478 474 else 479 475 // Reload 480 476 c = loaddata(e,pretregs); 481 477 } 482 478 483 479 freenode(e); 484 480 return c; 485 481 } 486 482 487 483 488 484 /************************** 489 485 * Generate code to deal with floatreg. 490 486 */ 491 487 492 488 code * genfltreg(code *c,unsigned opcode,unsigned reg,targ_size_t offset) 493 489 { 494 490 floatreg = TRUE; 495 491 reflocal = TRUE; 496 492 if ((opcode & 0xF8) == 0xD8) 497 493 c = genfwait(c); 498 return genc1(c,opcode,modregrm(2,reg,BPRM),FLfltreg,offset); 494 unsigned grex = I64 ? (REX_W << 16) : 0; 495 return genc1(c,opcode,grex | modregxrm(2,reg,BPRM),FLfltreg,offset); 499 496 } 500 497 501 498 /******************************* 502 499 * Decide if we need to gen an FWAIT. 503 500 */ 504 501 505 502 code *genfwait(code *c) 506 503 { 507 504 if (ADDFWAIT()) 508 505 c = gen1(c,FWAIT); 509 506 return c; 510 507 } 511 508 512 509 /*************************************** 513 510 * Generate floating point instruction. 514 511 */ 515 512 516 513 STATIC code * genf2(code *c,unsigned op,unsigned rm) 517 514 { 518 515 return gen2(genfwait(c),op,rm); … … 743 740 tym_t tym; 744 741 code *c1,*c2; 745 742 unsigned sz; 746 743 747 744 //printf("fixresult87(e = %p, retregs = x%x, *pretregs = x%x)\n", e,retregs,*pretregs); 748 745 //printf("fixresult87(e = %p, retregs = %s, *pretregs = %s)\n", e,regm_str(retregs),regm_str(*pretregs)); 749 746 assert(!*pretregs || retregs); 750 747 c1 = CNIL; 751 748 c2 = CNIL; 752 749 tym = tybasic(e->Ety); 753 750 sz = tysize[tym]; 754 751 //printf("tym = x%x, sz = %d\n", tym, sz); 755 752 756 753 if (*pretregs & mST01) 757 754 return fixresult_complex87(e, retregs, pretregs); 758 755 759 756 /* if retregs needs to be transferred into the 8087 */ 760 757 if (*pretregs & mST0 && retregs & (mBP | ALLREGS)) 761 758 { 762 759 assert(sz <= DOUBLESIZE); 763 if ( I32)760 if (!I16) 764 761 { 765 762 766 763 if (*pretregs & mPSW) 767 764 { // Set flags 768 regm_t r; 769 770 r = retregs | mPSW; 765 regm_t r = retregs | mPSW; 771 766 c1 = fixresult(e,retregs,&r); 772 767 } 773 768 c2 = push87(); 774 if (sz == REGSIZE) 775 { unsigned reg; 776 777 reg = findreg(retregs); 769 if (sz == REGSIZE || (I64 && sz == 4)) 770 { 771 unsigned reg = findreg(retregs); 778 772 c2 = genfltreg(c2,0x89,reg,0); // MOV fltreg,reg 779 773 genfltreg(c2,0xD9,0,0); // FLD float ptr fltreg 780 774 } 781 775 else 782 776 { unsigned msreg,lsreg; 783 777 784 778 msreg = findregmsw(retregs); 785 779 lsreg = findreglsw(retregs); 786 780 c2 = genfltreg(c2,0x89,lsreg,0); // MOV fltreg,lsreg 787 781 genfltreg(c2,0x89,msreg,4); // MOV fltreg+4,msreg 788 782 genfltreg(c2,0xDD,0,0); // FLD double ptr fltreg 789 783 } 790 784 } 791 785 else 792 786 { 793 787 regm = (sz == FLOATSIZE) ? FLOATREGS : DOUBLEREGS; 794 788 regm |= *pretregs & mPSW; 795 789 c1 = fixresult(e,retregs,®m); 796 790 regm = 0; // don't worry about result from CLIBxxx 797 791 c2 = callclib(e, 798 792 ((sz == FLOATSIZE) ? CLIBfltto87 : CLIBdblto87), 799 793 ®m,0); 800 794 } 801 795 } 802 796 else if (*pretregs & (mBP | ALLREGS) && retregs & mST0) 803 797 { unsigned mf; 804 798 unsigned reg; 805 799 806 800 assert(sz <= DOUBLESIZE); 807 801 mf = (sz == FLOATSIZE) ? MFfloat : MFdouble; 808 802 if (*pretregs & mPSW && !(retregs & mPSW)) 809 803 c1 = genftst(c1,e,0); 810 804 /* FSTP floatreg */ 811 805 pop87(); 812 806 c1 = genfltreg(c1,ESC(mf,1),3,0); 813 807 genfwait(c1); 814 808 c2 = allocreg(pretregs,®,(sz == FLOATSIZE) ? TYfloat : TYdouble); 815 809 if (sz == FLOATSIZE) 816 810 { 817 if ( I32)811 if (!I16) 818 812 c2 = genfltreg(c2,0x8B,reg,0); 819 813 else 820 814 { c2 = genfltreg(c2,0x8B,reg,REGSIZE); 821 815 genfltreg(c2,0x8B,findreglsw(*pretregs),0); 822 816 } 823 817 } 824 818 else 825 819 { 826 if ( I32)820 if (!I16) 827 821 { c2 = genfltreg(c2,0x8B,reg,REGSIZE); 828 822 genfltreg(c2,0x8B,findreglsw(*pretregs),0); 829 823 } 830 824 else 831 825 { c2 = genfltreg(c2,0x8B,AX,6); 832 826 genfltreg(c2,0x8B,BX,4); 833 827 genfltreg(c2,0x8B,CX,2); 834 828 genfltreg(c2,0x8B,DX,0); 835 829 } 836 830 } 837 831 } 838 832 else if (*pretregs == 0 && retregs == mST0) 839 833 { 840 834 c1 = genf2(c1,0xDD,modregrm(3,3,0)); // FPOP 841 835 pop87(); 842 836 } 843 837 else 844 838 { if (*pretregs & mPSW) 845 839 { if (!(retregs & mPSW)) 846 840 { assert(retregs & mST0); … … 1382 1376 (e2->E1->Eoper == OPs32_d || e2->E1->Eoper == OPs64_d || e2->E1->Eoper == OPs16_d) && 1383 1377 e2->E1->E1->Eoper == OPvar 1384 1378 ) || 1385 1379 ((e2oper == OPs32_d || e2oper == OPs64_d || e2oper == OPs16_d) && 1386 1380 e2->E1->Eoper == OPvar 1387 1381 ) 1388 1382 ) 1389 1383 ) 1390 1384 ) 1391 1385 { // Reverse order of evaluation 1392 1386 e1 = e->E2; 1393 1387 e2 = e->E1; 1394 1388 op = oprev[op + 1]; 1395 1389 reverse ^= 1; 1396 1390 } 1397 1391 1398 1392 c1 = codelem(e1,&retregs,FALSE); 1399 1393 note87(e1,0,0); 1400 1394 1401 1395 if (config.flags4 & CFG4fdivcall && e->Eoper == OPdiv) 1402 { regm_t retregs; 1403 1404 retregs = mST0; 1396 { 1397 regm_t retregs = mST0; 1405 1398 c2 = load87(e2,0,&retregs,e1,-1); 1406 1399 c2 = cat(c2,makesure87(e1,0,1,0)); 1407 1400 if (op == 7) // if reverse divide 1408 1401 c2 = genf2(c2,0xD9,0xC8 + 1); // FXCH ST(1) 1409 1402 c2 = cat(c2,callclib(e,CLIBfdiv87,&retregs,0)); 1410 1403 pop87(); 1411 1404 resregm = mST0; 1412 1405 freenode(e2); 1413 1406 c4 = fixresult87(e,resregm,pretregs); 1414 1407 } 1415 1408 else if (e->Eoper == OPmod) 1416 1409 { 1417 1410 /* 1418 1411 * fld tbyte ptr y 1419 1412 * fld tbyte ptr x // ST = x, ST1 = y 1420 1413 * FM1: // We don't use fprem1 because for some inexplicable 1421 1414 * // reason we get -5 when we do _modulo(15, 10) 1422 1415 * fprem // ST = ST % ST1 1423 1416 * fstsw word ptr sw 1424 1417 * fwait 1425 1418 * mov AH,byte ptr sw+1 // get msb of status word in AH 1426 1419 * sahf // transfer to flags 1427 1420 * jp FM1 // continue till ST < ST1 1428 1421 * fstp ST(1) // leave remainder on stack 1429 1422 */ 1430 regm_t retregs; 1431 1432 retregs = mST0; 1423 regm_t retregs = mST0; 1433 1424 c2 = load87(e2,0,&retregs,e1,-1); 1434 1425 c2 = cat(c2,makesure87(e1,0,1,0)); // now have x,y on stack; need y,x 1435 1426 if (!reverse) // if not reverse modulo 1436 1427 c2 = genf2(c2,0xD9,0xC8 + 1); // FXCH ST(1) 1437 1428 1438 1429 c3 = gen2(NULL, 0xD9, 0xF8); // FM1: FPREM 1439 1430 c3 = cg87_87topsw(c3); 1440 1431 c3 = genjmp(c3, JP, FLcode, (block *)c3); // JP FM1 1441 1432 c3 = genf2(c3,0xDD,0xD8 + 1); // FSTP ST(1) 1442 1433 1443 1434 pop87(); 1444 1435 resregm = mST0; 1445 1436 freenode(e2); 1446 1437 c4 = fixresult87(e,resregm,pretregs); 1447 1438 } 1448 1439 else 1449 1440 { c2 = load87(e2,0,pretregs,e1,op); 1450 1441 freenode(e2); 1451 1442 } 1452 1443 if (*pretregs & mST0) … … 1593 1584 break; 1594 1585 } 1595 1586 note87(e,0,0); 1596 1587 } 1597 1588 break; 1598 1589 case OPd_f: 1599 1590 case OPf_d: 1600 1591 case OPd_ld: 1601 1592 mf1 = (tybasic(e->E1->Ety) == TYfloat || tybasic(e->E1->Ety) == TYifloat) 1602 1593 ? MFfloat : MFdouble; 1603 1594 if (op != -1 && stackused) 1604 1595 note87(eleft,eoffset,0); // don't trash this value 1605 1596 if (e->E1->Eoper == OPvar || e->E1->Eoper == OPind) 1606 1597 { 1607 1598 #if 1 1608 1599 L4: 1609 1600 c = getlvalue(&cs,e->E1,0); 1610 1601 cs.Iop = ESC(mf1,0); 1611 1602 if (ADDFWAIT()) 1612 1603 cs.Iflags |= CFwait; 1613 if ( I32)1604 if (!I16) 1614 1605 cs.Iflags &= ~CFopsize; 1615 1606 if (op != -1) 1616 1607 { cs.Irm |= modregrm(0,op,0); 1617 1608 c = cat(c,makesure87(eleft,eoffset,0,0)); 1618 1609 } 1619 1610 else 1620 1611 { cs.Iop |= 1; 1621 1612 c = cat(c,push87()); 1622 1613 } 1623 1614 c = gen(c,&cs); /* FLD / Fop */ 1624 1615 #else 1625 1616 c = loadea(e->E1,&cs,ESC(mf1,1),0,0,0,0); /* FLD e->E1 */ 1626 1617 #endif 1627 1618 /* Variable cannot be put into a register anymore */ 1628 1619 if (e->E1->Eoper == OPvar) 1629 1620 notreg(e->E1); 1630 1621 freenode(e->E1); 1631 1622 } 1632 1623 else 1633 1624 { 1634 1625 retregs = mST0; 1635 1626 c = codelem(e->E1,&retregs,FALSE); 1636 1627 if (op != -1) 1637 1628 { c = cat(c,makesure87(eleft,eoffset,1,0)); 1638 1629 c = genf2(c,0xDE,modregrm(3,opr,1)); // FopRP 1639 1630 pop87(); 1640 1631 } 1641 1632 } 1642 1633 break; 1643 1634 1644 1635 case OPs64_d: 1645 1636 if (e->E1->Eoper == OPvar || 1646 1637 (e->E1->Eoper == OPind && e->E1->Ecount == 0)) 1647 1638 { 1648 1639 c = getlvalue(&cs,e->E1,0); 1649 1640 cs.Iop = 0xDF; 1650 1641 if (ADDFWAIT()) 1651 1642 cs.Iflags |= CFwait; 1652 if ( I32)1643 if (!I16) 1653 1644 cs.Iflags &= ~CFopsize; 1654 1645 c = cat(c,push87()); 1655 1646 cs.Irm |= modregrm(0,5,0); 1656 1647 c = gen(c,&cs); // FILD m64 1657 1648 // Variable cannot be put into a register anymore 1658 1649 if (e->E1->Eoper == OPvar) 1659 1650 notreg(e->E1); 1660 1651 freenode(e->E1); 1661 1652 } 1662 1653 else 1663 1654 { 1664 1655 retregs = ALLREGS; 1665 1656 c = codelem(e->E1,&retregs,FALSE); 1666 1657 reg = findreglsw(retregs); 1667 1658 c = genfltreg(c,0x89,reg,0); // MOV floatreg,reglsw 1668 1659 reg = findregmsw(retregs); 1669 1660 c = genfltreg(c,0x89,reg,4); // MOV floatreg+4,regmsw 1670 1661 c = cat(c,push87()); 1671 1662 c = genfltreg(c,0xDF,5,0); // FILD long long ptr floatreg 1672 1663 } … … 1908 1899 if (I32) 1909 1900 cs.Iflags &= ~CFopsize; 1910 1901 else if (ADDFWAIT()) 1911 1902 cs.Iflags |= CFwait; 1912 1903 c2 = gen(c2, &cs); 1913 1904 #if LNGDBLSIZE == 12 1914 1905 /* This deals with the fact that 10 byte reals really 1915 1906 * occupy 12 bytes by zeroing the extra 2 bytes. 1916 1907 */ 1917 1908 if (op1 == 0xDB) 1918 1909 { 1919 1910 cs.Iop = 0xC7; // MOV EA+10,0 1920 1911 NEWREG(cs.Irm, 0); 1921 1912 cs.IEV1.sp.Voffset += 10; 1922 1913 cs.IFL2 = FLconst; 1923 1914 cs.IEV2.Vint = 0; 1924 1915 cs.Iflags |= CFopsize; 1925 1916 c2 = gen(c2, &cs); 1926 1917 } 1927 1918 #endif 1928 #if LNGDBLSIZE == 16 1919 if (tysize[TYldouble] == 16) 1920 { 1929 1921 /* This deals with the fact that 10 byte reals really 1930 1922 * occupy 16 bytes by zeroing the extra 6 bytes. 1931 1923 */ 1932 1924 if (op1 == 0xDB) 1933 1925 { 1934 1926 cs.Iop = 0xC7; // MOV EA+10,0 1935 1927 NEWREG(cs.Irm, 0); 1936 1928 cs.IEV1.sp.Voffset += 10; 1937 1929 cs.IFL2 = FLconst; 1938 1930 cs.IEV2.Vint = 0; 1939 1931 cs.Iflags |= CFopsize; 1940 1932 c2 = gen(c2, &cs); 1941 1933 1942 1934 cs.IEV1.sp.Voffset += 2; 1943 1935 cs.Iflags &= ~CFopsize; 1944 1936 c2 = gen(c2, &cs); 1945 1937 } 1946 #endif 1938 } 1947 1939 #endif 1948 1940 c2 = genfwait(c2); 1949 1941 freenode(e->E1); 1950 1942 c1 = cat3(c1,c2,fixresult87(e,mST0 | mPSW,pretregs)); 1951 1943 return c1; 1952 1944 } 1953 1945 1954 1946 /******************************* 1955 1947 * Perform an assignment to a long double/double/float. 1956 1948 */ 1957 1949 1958 1950 code *complex_eq87(elem *e,regm_t *pretregs) 1959 1951 { 1960 1952 regm_t retregs; 1961 1953 code *c1,*c2; 1962 1954 code cs; 1963 1955 unsigned op1; 1964 1956 unsigned op2; 1965 1957 unsigned sz; 1966 1958 tym_t ty1; … … 2023 2015 if (fxch) 2024 2016 c2 = genf2(c2,0xD9,0xC8 + 1); // FXCH ST(1) 2025 2017 cs.IEVoffset1 -= sz; 2026 2018 gen(c2, &cs); 2027 2019 if (fxch) 2028 2020 genf2(c2,0xD9,0xC8 + 1); // FXCH ST(1) 2029 2021 #if LNGDBLSIZE == 12 2030 2022 if (op1 == 0xDB) 2031 2023 { 2032 2024 cs.Iop = 0xC7; // MOV EA+10,0 2033 2025 NEWREG(cs.Irm, 0); 2034 2026 cs.IEV1.sp.Voffset += 10; 2035 2027 cs.IFL2 = FLconst; 2036 2028 cs.IEV2.Vint = 0; 2037 2029 cs.Iflags |= CFopsize; 2038 2030 c2 = gen(c2, &cs); 2039 2031 cs.IEVoffset1 += 12; 2040 2032 c2 = gen(c2, &cs); // MOV EA+22,0 2041 2033 } 2042 2034 #endif 2043 #if LNGDBLSIZE == 16 2035 if (tysize[TYldouble] == 16) 2036 { 2044 2037 if (op1 == 0xDB) 2045 2038 { 2046 2039 cs.Iop = 0xC7; // MOV EA+10,0 2047 2040 NEWREG(cs.Irm, 0); 2048 2041 cs.IEV1.sp.Voffset += 10; 2049 2042 cs.IFL2 = FLconst; 2050 2043 cs.IEV2.Vint = 0; 2051 2044 cs.Iflags |= CFopsize; 2052 2045 c2 = gen(c2, &cs); 2053 2046 2054 2047 cs.IEV1.sp.Voffset += 2; 2055 2048 cs.Iflags &= ~CFopsize; 2056 2049 c2 = gen(c2, &cs); 2057 2050 2058 2051 cs.IEVoffset1 += 14; 2059 2052 cs.Iflags |= CFopsize; 2060 2053 c2 = gen(c2, &cs); 2061 2054 2062 2055 cs.IEV1.sp.Voffset += 2; 2063 2056 cs.Iflags &= ~CFopsize; 2064 2057 c2 = gen(c2, &cs); 2065 2058 } 2066 #endif 2059 } 2067 2060 c2 = genfwait(c2); 2068 2061 freenode(e->E1); 2069 2062 return cat3(c1,c2,fixresult_complex87(e,mST01 | mPSW,pretregs)); 2070 2063 } 2071 2064 2072 2065 /******************************* 2073 2066 * Perform an assignment while converting to integral type, 2074 2067 * i.e. handle (e1 = (int) e2) 2075 2068 */ 2076 2069 2077 2070 code *cnvteq87(elem *e,regm_t *pretregs) 2078 2071 { 2079 2072 regm_t retregs; 2080 2073 code *c1,*c2; 2081 2074 code cs; 2082 2075 unsigned op1; 2083 2076 unsigned op2; 2084 2077 tym_t ty1; 2085 2078 2086 2079 assert(e->Eoper == OPeq); … … 2281 2274 jp FM1 2282 2275 fxch ST(1) 2283 2276 fld E1.im 2284 2277 FM2: fprem 2285 2278 fstsw word ptr sw 2286 2279 fwait 2287 2280 mov AH, byte ptr sw+1 2288 2281 jp FM2 2289 2282 fstp ST(1) 2290 2283 */ 2291 2284 2292 2285 ty1 = tybasic(e->E1->Ety); 2293 2286 sz2 = tysize[ty1] / 2; 2294 2287 2295 2288 retregs = mST0; 2296 2289 cr = codelem(e->E2,&retregs,FALSE); // FLD E2 2297 2290 note87(e->E2,0,0); 2298 2291 cl = getlvalue(&cs,e->E1,0); 2299 2292 cl = cat(cl,makesure87(e->E2,0,0,0)); 2300 2293 cs.Iflags |= ADDFWAIT() ? CFwait : 0; 2301 if ( I32)2294 if (!I16) 2302 2295 cs.Iflags &= ~CFopsize; 2303 2296 2304 2297 c = push87(); 2305 2298 switch (ty1) 2306 2299 { 2307 2300 case TYcdouble: cs.Iop = ESC(MFdouble,1); break; 2308 2301 case TYcfloat: cs.Iop = ESC(MFfloat,1); break; 2309 2302 case TYcldouble: cs.Iop = 0xDB; cs.Irm |= modregrm(0, 5, 0); break; 2310 2303 default: 2311 2304 assert(0); 2312 2305 } 2313 2306 c = gen(c,&cs); // FLD E1.re 2314 2307 2315 2308 code *c1; 2316 2309 2317 2310 c1 = gen2(NULL, 0xD9, 0xF8); // FPREM 2318 2311 c1 = cg87_87topsw(c1); 2319 2312 c1 = genjmp(c1, JP, FLcode, (block *)c1); // JP FM1 2320 2313 c1 = genf2(c1, 0xD9, 0xC8 + 1); // FXCH ST(1) 2321 2314 c = cat(c,c1); … … 2393 2386 if (!tycomplex(e->E2->Ety) && 2394 2387 (e->Eoper == OPmulass || e->Eoper == OPdivass)) 2395 2388 { 2396 2389 retregs = mST0; 2397 2390 cr = codelem(e->E2, &retregs, FALSE); 2398 2391 note87(e->E2, 0, 0); 2399 2392 cl = getlvalue(&cs, e->E1, 0); 2400 2393 cl = cat(cl,makesure87(e->E2,0,0,0)); 2401 2394 cl = cat(cl,push87()); 2402 2395 cl = genf2(cl,0xD9,0xC0); // FLD ST(0) 2403 2396 goto L1; 2404 2397 } 2405 2398 else 2406 2399 { 2407 2400 cr = loadComplex(e->E2); 2408 2401 cl = getlvalue(&cs,e->E1,0); 2409 2402 cl = cat(cl,makesure87(e->E2,sz2,0,0)); 2410 2403 cl = cat(cl,makesure87(e->E2,0,1,0)); 2411 2404 } 2412 2405 cs.Iflags |= ADDFWAIT() ? CFwait : 0; 2413 if ( I32)2406 if (!I16) 2414 2407 cs.Iflags &= ~CFopsize; 2415 2408 2416 2409 switch (e->Eoper) 2417 2410 { 2418 2411 case OPpostinc: 2419 2412 case OPaddass: 2420 2413 case OPpostdec: 2421 2414 case OPminass: 2422 2415 L1: 2423 2416 if (ty1 == TYcldouble) 2424 2417 { 2425 2418 c = push87(); 2426 2419 c = cat(c, push87()); 2427 2420 cs.Iop = 0xDB; 2428 2421 cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2429 2422 c = gen(c,&cs); // FLD e->E1.re 2430 2423 cs.IEVoffset1 += sz2; 2431 2424 gen(c,&cs); // FLD e->E1.im 2432 2425 genf2(c, 0xDE, op2 + 2); // FADDP/FSUBRP ST(2),ST 2433 2426 genf2(c, 0xDE, op2 + 2); // FADDP/FSUBRP ST(2),ST … … 2581 2574 cs.IEVoffset1 -= sz2; 2582 2575 gen(c, &cs); // FST mreal.re 2583 2576 retregs = mST01; 2584 2577 } 2585 2578 else 2586 2579 { 2587 2580 cs.Irm |= modregrm(0, 3, 0); 2588 2581 gen(c, &cs); // FSTP mreal.im 2589 2582 cs.IEVoffset1 -= sz2; 2590 2583 gen(c, &cs); // FSTP mreal.re 2591 2584 pop87(); 2592 2585 pop87(); 2593 2586 retregs = 0; 2594 2587 } 2595 2588 goto L3; 2596 2589 } 2597 2590 2598 2591 case OPdivass: 2599 2592 c = push87(); 2600 2593 c = cat(c, push87()); 2601 idxregs = idxregm( cs.Irm,cs.Isib);// mask of index regs used2594 idxregs = idxregm(&cs); // mask of index regs used 2602 2595 if (ty1 == TYcldouble) 2603 2596 { 2604 2597 cs.Iop = 0xDB; 2605 2598 cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2606 2599 c = gen(c,&cs); // FLD e->E1.re 2607 2600 genf2(c,0xD9,0xC8 + 2); // FXCH ST(2) 2608 2601 cs.IEVoffset1 += sz2; 2609 2602 gen(c,&cs); // FLD e->E1.im 2610 2603 genf2(c,0xD9,0xC8 + 2); // FXCH ST(2) 2611 2604 retregs = mST01; 2612 2605 c = cat(c,callclib(e, CLIBcdiv, &retregs, idxregs)); 2613 2606 goto L2; 2614 2607 } 2615 2608 else 2616 2609 { 2617 2610 cs.Iop = (ty1 == TYcfloat) ? 0xD9 : 0xDD; 2618 2611 cs.Irm |= modregrm(0, 0, 0); // FLD tbyte ptr ... 2619 2612 c = gen(c,&cs); // FLD e->E1.re 2620 2613 genf2(c,0xD9,0xC8 + 2); // FXCH ST(2) 2621 2614 cs.IEVoffset1 += sz2; … … 2652 2645 2653 2646 /************************** 2654 2647 * OPnegass 2655 2648 */ 2656 2649 2657 2650 code *cdnegass87(elem *e,regm_t *pretregs) 2658 2651 { regm_t retregs; 2659 2652 tym_t tyml; 2660 2653 unsigned op; 2661 2654 targ_long val; 2662 2655 code *cl,*cr,*c,cs; 2663 2656 elem *e1; 2664 2657 int sz; 2665 2658 2666 2659 //printf("cdnegass87(e = %p, *pretregs = x%x)\n", e, *pretregs); 2667 2660 e1 = e->E1; 2668 2661 tyml = tybasic(e1->Ety); // type of lvalue 2669 2662 sz = tysize[tyml]; 2670 2663 2671 2664 cl = getlvalue(&cs,e1,0); 2672 cr = modEA( cs.Irm);2665 cr = modEA(&cs); 2673 2666 cs.Irm |= modregrm(0,6,0); 2674 2667 cs.Iop = 0x80; 2668 cs.Irex = 0; 2675 2669 #if LNGDBLSIZE > 10 2676 2670 if (tyml == TYldouble || tyml == TYildouble) 2677 2671 cs.IEVoffset1 += 10 - 1; 2678 2672 else if (tyml == TYcldouble) 2679 2673 cs.IEVoffset1 += LNGDBLSIZE + 10 - 1; 2680 2674 else 2681 2675 #endif 2682 2676 cs.IEVoffset1 += sz - 1; 2683 2677 cs.IFL2 = FLconst; 2684 2678 cs.IEV2.Vuns = 0x80; 2685 2679 c = gen(NULL,&cs); // XOR 7[EA],0x80 2686 2680 if (tycomplex(tyml)) 2687 2681 { 2688 2682 cs.IEVoffset1 -= sz / 2; 2689 2683 gen(c,&cs); // XOR 7[EA],0x80 2690 2684 } 2691 2685 c = cat3(cl,cr,c); 2692 2686 2693 2687 if (*pretregs) 2694 2688 { … … 2718 2712 } 2719 2713 2720 2714 /************************ 2721 2715 * Take care of OPpostinc and OPpostdec. 2722 2716 */ 2723 2717 2724 2718 code *post87(elem *e,regm_t *pretregs) 2725 2719 { 2726 2720 regm_t retregs; 2727 2721 code *cl,*cr,*c; 2728 2722 code cs; 2729 2723 unsigned op; 2730 2724 unsigned op1; 2731 2725 unsigned reg; 2732 2726 tym_t ty1; 2733 2727 2734 2728 //printf("post87()\n"); 2735 2729 assert(*pretregs); 2736 2730 cl = getlvalue(&cs,e->E1,0); 2737 2731 cs.Iflags |= ADDFWAIT() ? CFwait : 0; 2738 if ( I32)2732 if (!I16) 2739 2733 cs.Iflags &= ~CFopsize; 2740 2734 ty1 = tybasic(e->E1->Ety); 2741 2735 switch (ty1) 2742 2736 { case TYdouble_alias: 2743 2737 case TYidouble: 2744 2738 case TYdouble: 2745 2739 case TYcdouble: op1 = ESC(MFdouble,1); reg = 0; break; 2746 2740 case TYifloat: 2747 2741 case TYfloat: 2748 2742 case TYcfloat: op1 = ESC(MFfloat,1); reg = 0; break; 2749 2743 case TYildouble: 2750 2744 case TYldouble: 2751 2745 case TYcldouble: op1 = 0xDB; reg = 5; break; 2752 2746 default: 2753 2747 assert(0); 2754 2748 } 2755 2749 NEWREG(cs.Irm, reg); 2756 2750 if (reg == 5) 2757 2751 reg = 7; 2758 2752 else … … 2805 2799 * OPd_s32 2806 2800 * OPd_u16 2807 2801 * OPd_s64 2808 2802 */ 2809 2803 2810 2804 code *cnvt87(elem *e,regm_t *pretregs) 2811 2805 { 2812 2806 regm_t retregs; 2813 2807 code *c1,*c2; 2814 2808 unsigned mf,rf,reg; 2815 2809 tym_t tym; 2816 2810 int clib; 2817 2811 int sz; 2818 2812 int szoff; 2819 2813 2820 2814 //printf("cnvt87(e = %p, *pretregs = x%x)\n", e, *pretregs); 2821 2815 assert(*pretregs); 2822 2816 tym = e->Ety; 2823 2817 sz = tysize(tym); 2824 2818 szoff = sz; 2819 unsigned grex = I64 ? REX_W << 16 : 0; 2825 2820 2826 2821 switch (e->Eoper) 2827 2822 { case OPd_s16: 2828 2823 clib = CLIBdblint87; 2829 2824 mf = ESC(MFword,1); 2830 2825 rf = 3; 2831 2826 break; 2832 2827 2833 2828 case OPd_u16: 2834 2829 szoff = 4; 2835 2830 case OPd_s32: 2836 2831 clib = CLIBdbllng87; 2837 2832 mf = ESC(MFlong,1); 2838 2833 rf = 3; 2839 2834 break; 2840 2835 2841 2836 case OPd_s64: 2842 2837 clib = CLIBdblllng; 2843 2838 mf = 0xDF; 2844 2839 rf = 7; 2845 2840 break; 2846 2841 2847 2842 default: 2848 2843 assert(0); 2849 2844 } 2850 2845 2851 if ( !I32) // C may change the default control word2846 if (I16) // C may change the default control word 2852 2847 { 2853 2848 if (clib == CLIBdblllng) 2854 2849 { retregs = I32 ? DOUBLEREGS_32 : DOUBLEREGS_16; 2855 2850 c1 = codelem(e->E1,&retregs,FALSE); 2856 2851 c2 = callclib(e,clib,pretregs,0); 2857 2852 } 2858 2853 else 2859 2854 { retregs = mST0; //I32 ? DOUBLEREGS_32 : DOUBLEREGS_16; 2860 2855 c1 = codelem(e->E1,&retregs,FALSE); 2861 2856 c2 = callclib(e,clib,pretregs,0); 2862 2857 pop87(); 2863 2858 } 2864 2859 } 2865 2860 else if (1) 2866 2861 { // Generate: 2867 2862 // sub ESP,12 2868 2863 // fstcw 8[ESP] 2869 2864 // fldcw roundto0 2870 2865 // fistp long64 ptr [ESP] 2871 2866 // fldcw 8[ESP] 2872 2867 // pop lsw 2873 2868 // pop msw 2874 2869 // add ESP,4 2875 2870 2876 2871 unsigned szpush = szoff + 2; 2877 2872 if (config.flags3 & CFG3pic) 2878 2873 szpush += 2; 2879 2874 szpush = (szpush + REGSIZE - 1) & ~(REGSIZE - 1); 2880 2875 2881 2876 retregs = mST0; 2882 2877 c1 = codelem(e->E1,&retregs,FALSE); 2883 2878 2884 2879 if (szpush == REGSIZE) 2885 2880 c1 = gen1(c1,0x50 + AX); // PUSH EAX 2886 2881 else 2887 c1 = genc2(c1,0x81, modregrm(3,5,SP), szpush); // SUB ESP,122882 c1 = genc2(c1,0x81,grex | modregrm(3,5,SP), szpush); // SUB ESP,12 2888 2883 c1 = genfwait(c1); 2889 genc1(c1,0xD9, modregrm(2,7,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FSTCW szoff[ESP]2884 genc1(c1,0xD9,grex | modregrm(2,7,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FSTCW szoff[ESP] 2890 2885 2891 2886 c1 = genfwait(c1); 2892 2887 2893 2888 if (config.flags3 & CFG3pic) 2894 2889 { 2895 genc(c1,0xC7, modregrm(2,0,4) + 256*modregrm(0,4,SP),FLconst,szoff+2,FLconst,CW_roundto0); // MOV szoff+2[ESP], CW_roundto02890 genc(c1,0xC7,grex | modregrm(2,0,4) + 256*modregrm(0,4,SP),FLconst,szoff+2,FLconst,CW_roundto0); // MOV szoff+2[ESP], CW_roundto0 2896 2891 code_orflag(c1, CFopsize); 2897 genc1(c1,0xD9, modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff+2); // FLDCW szoff+2[ESP]2892 genc1(c1,0xD9,grex | modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff+2); // FLDCW szoff+2[ESP] 2898 2893 } 2899 2894 else 2900 2895 c1 = genrnd(c1, CW_roundto0); // FLDCW roundto0 2901 2896 2902 2897 pop87(); 2903 2898 2904 2899 c1 = genfwait(c1); 2905 gen2sib(c1,mf, modregrm(0,rf,4),modregrm(0,4,SP)); // FISTP [ESP]2900 gen2sib(c1,mf,grex | modregrm(0,rf,4),modregrm(0,4,SP)); // FISTP [ESP] 2906 2901 2907 2902 retregs = *pretregs & (ALLREGS | mBP); 2908 2903 if (!retregs) 2909 2904 retregs = ALLREGS; 2910 2905 c2 = allocreg(&retregs,®,tym); 2911 2906 2912 2907 c2 = genfwait(c2); // FWAIT 2913 c2 = genc1(c2,0xD9, modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FLDCW szoff[ESP]2908 c2 = genc1(c2,0xD9,grex | modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FLDCW szoff[ESP] 2914 2909 2915 2910 if (szoff > REGSIZE) 2916 2911 { szpush -= REGSIZE; 2917 c2 = gen 1(c2,0x58 +findreglsw(retregs)); // POP lsw2912 c2 = genpop(c2,findreglsw(retregs)); // POP lsw 2918 2913 } 2919 2914 szpush -= REGSIZE; 2920 c2 = gen 1(c2,0x58 +reg); // POP reg2915 c2 = genpop(c2,reg); // POP reg 2921 2916 2922 2917 if (szpush) 2923 genc2(c2,0x81, modregrm(3,0,SP), szpush); // ADD ESP,42918 genc2(c2,0x81,grex | modregrm(3,0,SP), szpush); // ADD ESP,4 2924 2919 c2 = cat(c2,fixresult(e,retregs,pretregs)); 2925 2920 } 2926 2921 else 2927 2922 { 2928 2923 // This is incorrect. For -inf and nan, the 8087 returns the largest 2929 2924 // negative int (0x80000....). For -inf, 0x7FFFF... should be returned, 2930 2925 // and for nan, 0 should be returned. 2931 2926 retregs = mST0; 2932 2927 c1 = codelem(e->E1,&retregs,FALSE); 2933 2928 2934 2929 c1 = genfwait(c1); 2935 2930 c1 = genrnd(c1, CW_roundto0); // FLDCW roundto0 2936 2931 2937 2932 pop87(); 2938 2933 c1 = genfltreg(c1,mf,rf,0); // FISTP floatreg 2939 2934 retregs = *pretregs & (ALLREGS | mBP); 2940 2935 if (!retregs) 2941 2936 retregs = ALLREGS; 2942 2937 c2 = allocreg(&retregs,®,tym); 2943 2938 trunk/src/backend/cg.c
r558 r577 36 36 // of the public name index of a COMDAT) 37 37 38 38 /* Stack offsets */ 39 39 targ_size_t localsize, /* amt subtracted from SP for local vars */ 40 40 Toff, /* base for temporaries */ 41 41 Poff,Aoff; // comsubexps, params, regs, autos 42 42 43 43 /* The following are initialized for the 8088. cod3_set386() will 44 44 * change them if generating code for the 386, cod3_set64() for 64 bit code. 45 45 */ 46 46 int BPRM = 6; /* R/M value for [BP] or [EBP] */ 47 47 regm_t fregsaved = mBP | mSI | mDI; // mask of registers saved across 48 48 // function calls 49 49 // (add in mBX for I32) 50 50 regm_t FLOATREGS = FLOATREGS_16; 51 51 regm_t FLOATREGS2 = FLOATREGS2_16; 52 52 regm_t DOUBLEREGS = DOUBLEREGS_16; 53 53 54 54 symbol *localgot; // reference to GOT for this function 55 55 symbol *tls_get_addr_sym; // function __tls_get_addr 56 57 #if TARGET_OSX 58 int STACKALIGN = 16; 59 #else 60 int STACKALIGN = 0; 61 #endif trunk/src/backend/cgcod.c
r558 r577 19 19 20 20 #if __sun&&__SVR4 21 21 #include <alloca.h> 22 22 #endif 23 23 24 24 #include "cc.h" 25 25 #include "el.h" 26 26 #include "oper.h" 27 27 #include "code.h" 28 28 #include "global.h" 29 29 #include "type.h" 30 30 #include "exh.h" 31 31 32 32 static char __file__[] = __FILE__; /* for tassert.h */ 33 33 #include "tassert.h" 34 34 35 35 STATIC void resetEcomsub(elem *e); 36 36 STATIC code * loadcse(elem *,unsigned,regm_t); 37 37 STATIC void blcodgen(block *); 38 38 STATIC void cgcod_eh(); 39 STATIC int numbitsset(regm_t);40 39 STATIC code * cse_save(regm_t ms); 41 40 STATIC int cse_simple(elem *e,int i); 42 41 STATIC code * comsub(elem *,regm_t *); 43 42 44 43 bool floatreg; // !=0 if floating register is required 45 44 46 45 targ_size_t Aoffset; // offset of automatics and registers 47 46 targ_size_t Toffset; // offset of temporaries 48 47 targ_size_t EEoffset; // offset of SCstack variables from ESP 49 48 int Aalign; // alignment for Aoffset 50 49 51 50 CGstate cgstate; // state of code generator 52 51 53 52 /************************************ 54 53 * # of bytes that SP is beyond BP. 55 54 */ 56 55 57 56 unsigned stackpush; 58 57 59 58 int stackchanged; /* set to !=0 if any use of the stack … … 1561 1560 case TYcdouble: 1562 1561 if (I64) 1563 1562 return mXMM0 | mXMM1; 1564 1563 case TYcldouble: 1565 1564 return mST01; 1566 1565 1567 1566 default: 1568 1567 #if DEBUG 1569 1568 WRTYxx(tym); 1570 1569 #endif 1571 1570 assert(0); 1572 1571 return 0; 1573 1572 } 1574 1573 } 1575 1574 1576 1575 1577 1576 /****************************** 1578 1577 * Count the number of bits set in a register mask. 1579 1578 */ 1580 1579 1581 STATICint numbitsset(regm_t regm)1580 int numbitsset(regm_t regm) 1582 1581 { int n; 1583 1582 1584 1583 n = 0; 1585 1584 if (regm) 1586 1585 do 1587 1586 n++; 1588 1587 while ((regm &= regm - 1) != 0); 1589 1588 return n; 1590 1589 } 1591 1590 1592 1591 /****************************** 1593 1592 * Given a register mask, find and return the number 1594 1593 * of the first register that fits. 1595 1594 */ 1596 1595 1597 1596 #undef findreg 1598 1597 1599 1598 unsigned findreg(regm_t regm 1600 1599 #ifdef DEBUG 1601 1600 ,int line,const char *file trunk/src/backend/cgen.c
r569 r577 147 147 unsigned reg; 148 148 149 149 #ifdef DEBUG /* this is a high usage routine */ 150 150 assert(cs); 151 151 #endif 152 152 assert(I64 || cs->Irex == 0); 153 153 ce = code_calloc(); 154 154 *ce = *cs; 155 155 if (config.flags4 & CFG4optimized && 156 156 ce->IFL2 == FLconst && 157 157 (ce->Iop == 0x81 || ce->Iop == 0x80) && 158 158 reghasvalue((ce->Iop == 0x80) ? BYTEREGS : ALLREGS,ce->IEV2.Vlong,®) && 159 159 !(ce->Iflags & CFopsize && I16) 160 160 ) 161 161 { // See if we can replace immediate instruction with register instruction 162 162 static unsigned char regop[8] = 163 163 { 0x00,0x08,0x10,0x18,0x20,0x28,0x30,0x38 }; 164 164 165 165 //printf("replacing 0x%02x, val = x%lx\n",ce->Iop,ce->IEV2.Vlong); 166 166 ce->Iop = regop[(ce->Irm & modregrm(0,7,0)) >> 3] | (ce->Iop & 1); 167 c e->Irm = (ce->Irm & modregrm(3,0,7)) | modregrm(0,reg,0);167 code_newreg(ce, reg); 168 168 } 169 169 code_next(ce) = CNIL; 170 170 if (c) 171 171 { cstart = c; 172 172 while (code_next(c)) c = code_next(c); /* find end of list */ 173 173 code_next(c) = ce; /* link into list */ 174 174 return cstart; 175 175 } 176 176 return ce; 177 177 } 178 178 179 179 code *gen1(code *c,unsigned op) 180 180 { code *ce,*cstart; 181 181 182 182 ce = code_calloc(); 183 183 ce->Iop = op; 184 184 if (c) 185 185 { cstart = c; 186 186 while (code_next(c)) c = code_next(c); /* find end of list */ 187 187 code_next(c) = ce; /* link into list */ 188 188 return cstart; 189 189 } 190 190 return ce; 191 191 } 192 192 193 193 code *gen2(code *c,unsigned op,unsigned rm) 194 194 { code *ce,*cstart; 195 195 196 196 cstart = ce = code_calloc(); 197 197 /*cxcalloc++;*/ 198 198 if (op > 0xFF) 199 199 { ce->Iop = op >> 8; 200 200 ce->Iop2 = op & 0xFF; 201 201 } 202 202 else 203 203 ce->Iop = op; 204 ce->Irm = rm; 205 ce->Irex = rm >> 16; 204 ce->Iea = rm; 206 205 if (c) 207 206 { cstart = c; 208 207 while (code_next(c)) c = code_next(c); /* find end of list */ 209 208 code_next(c) = ce; /* link into list */ 210 209 } 211 210 return cstart; 212 211 } 213 212 214 213 code *gen2sib(code *c,unsigned op,unsigned rm,unsigned sib) 215 214 { code *ce,*cstart; 216 215 217 216 cstart = ce = code_calloc(); 218 217 /*cxcalloc++;*/ 219 218 ce->Iop = op; 220 219 ce->Irm = rm; 221 220 ce->Isib = sib; 222 ce->Irex = rm >> 16; 221 ce->Irex = (rm | (sib & (REX_B << 16))) >> 16; 222 if (sib & (REX_R << 16)) 223 ce->Irex |= REX_X; 223 224 if (c) 224 225 { cstart = c; 225 226 while (code_next(c)) c = code_next(c); /* find end of list */ 226 227 code_next(c) = ce; /* link into list */ 227 228 } 228 229 return cstart; 229 230 } 230 231 231 232 code *genregs(code *c,unsigned op,unsigned dstreg,unsigned srcreg) 232 233 { return gen2(c,op,modregxrmx(3,dstreg,srcreg)); } 233 234 234 235 code *gentstreg(code *c,unsigned t) 235 236 { 236 237 c = gen2(c,0x85,modregxrmx(3,t,t)); // TEST t,t 237 238 code_orflag(c,CFpsw); 239 return c; 240 } 241 242 code *genpush(code *c, unsigned reg) 243 { 244 c = gen1(c, 0x50 + (reg & 7)); 245 if (reg & 8) 246 code_orrex(c, REX_B); 247 return c; 248 } 249 250 code *genpop(code *c, unsigned reg) 251 { 252 c = gen1(c, 0x58 + (reg & 7)); 253 if (reg & 8) 254 code_orrex(c, REX_B); 238 255 return c; 239 256 } 240 257 241 258 /******************************** 242 259 * Generate an ASM sequence. 243 260 */ 244 261 245 262 code *genasm(code *c,char *s,unsigned slen) 246 263 { code *ce; 247 264 248 265 ce = code_calloc(); 249 266 ce->Iop = ASM; 250 267 ce->IFL1 = FLasm; 251 268 ce->IEV1.as.len = slen; 252 269 ce->IEV1.as.bytes = (char *) mem_malloc(slen); 253 270 memcpy(ce->IEV1.as.bytes,s,slen); 254 271 return cat(c,ce); 255 272 } 256 273 257 274 /************************** … … 310 327 gen(cj,&cs); 311 328 break; 312 329 case JNP << 8: 313 330 /* Do a JP around the jump instruction */ 314 331 cnop = gennop(CNIL); 315 332 c = genjmp(c,JP,FLcode,(block *) cnop); 316 333 cat(cj,cnop); 317 334 break; 318 335 case 1 << 8: /* toggled no jump */ 319 336 case 0 << 8: 320 337 break; 321 338 default: 322 339 #ifdef DEBUG 323 340 printf("jop = x%x\n",op); 324 341 #endif 325 342 assert(0); 326 343 } 327 344 return cat(c,cj); 328 345 } 329 346 330 code *gencs(code *c,unsigned op,unsigned rm,unsigned FL2,symbol *s)347 code *gencs(code *c,unsigned op,unsigned ea,unsigned FL2,symbol *s) 331 348 { code cs; 332 349 333 350 cs.Iop = op; 334 cs.I rm = rm;351 cs.Iea = ea; 335 352 cs.Iflags = 0; 336 cs.Irex = rm >> 16;337 353 cs.IFL2 = FL2; 338 354 cs.IEVsym2 = s; 339 355 cs.IEVoffset2 = 0; 340 356 341 357 return gen(c,&cs); 342 358 } 343 359 344 code *genc2(code *c,unsigned op,unsigned rm,targ_size_t EV2)360 code *genc2(code *c,unsigned op,unsigned ea,targ_size_t EV2) 345 361 { code cs; 346 362 347 363 if (op > 0xFF) 348 364 { cs.Iop = op >> 8; 349 365 cs.Iop2 = op & 0xFF; 350 366 } 351 367 else 352 368 cs.Iop = op; 353 cs.I rm = rm;369 cs.Iea = ea; 354 370 cs.Iflags = CFoff; 355 cs.Irex = rm >> 16;356 371 cs.IFL2 = FLconst; 357 372 cs.IEV2.Vsize_t = EV2; 358 373 return gen(c,&cs); 359 374 } 360 375 361 376 /***************** 362 377 * Generate code. 363 378 */ 364 379 365 code *genc1(code *c,unsigned op,unsigned rm,unsigned FL1,targ_size_t EV1)380 code *genc1(code *c,unsigned op,unsigned ea,unsigned FL1,targ_size_t EV1) 366 381 { code cs; 367 382 368 383 assert(FL1 < FLMAX); 369 384 assert(op < 256); 370 385 cs.Iop = op; 371 cs.Irm = rm; 372 cs.Isib = rm >> 8; 373 cs.Irex = rm >> 16; 386 cs.Iea = ea; 374 387 cs.Iflags = CFoff; 375 388 cs.IFL1 = FL1; 376 389 cs.IEV1.Vsize_t = EV1; 377 390 return gen(c,&cs); 378 391 } 379 392 380 393 /***************** 381 394 * Generate code. 382 395 */ 383 396 384 code *genc(code *c,unsigned op,unsigned rm,unsigned FL1,targ_size_t EV1,unsigned FL2,targ_size_t EV2)397 code *genc(code *c,unsigned op,unsigned ea,unsigned FL1,targ_size_t EV1,unsigned FL2,targ_size_t EV2) 385 398 { code cs; 386 399 387 400 assert(FL1 < FLMAX); 388 401 assert(op < 256); 389 402 cs.Iop = op; 390 cs.Irm = rm; 391 cs.Isib = rm >> 8; 392 cs.Irex = rm >> 16; 403 cs.Iea = ea; 393 404 cs.Iflags = CFoff; 394 405 cs.IFL1 = FL1; 395 406 cs.IEV1.Vsize_t = EV1; 396 407 assert(FL2 < FLMAX); 397 408 cs.IFL2 = FL2; 398 409 cs.IEV2.Vsize_t = EV2; 399 410 return gen(c,&cs); 400 411 } 401 412 402 413 /*************************************** 403 414 * Generate immediate multiply instruction for r1=r2*imm. 404 415 * Optimize it into LEA's if we can. 405 416 */ 406 417 407 418 code *genmulimm(code *c,unsigned r1,unsigned r2,targ_int imm) 408 419 { code cs; 409 420 410 421 // These optimizations should probably be put into pinholeopt() 411 422 switch (imm) 412 423 { case 1: trunk/src/backend/cod1.c
r552 r577 26 26 #include "tassert.h" 27 27 28 28 targ_size_t paramsize(elem *e,unsigned stackalign); 29 29 STATIC code * funccall (elem *,unsigned,unsigned,regm_t *,regm_t); 30 30 31 31 /* array to convert from index register to r/m field */ 32 32 /* AX CX DX BX SP BP SI DI */ 33 33 static const signed char regtorm32[8] = { 0, 1, 2, 3,-1, 5, 6, 7 }; 34 34 signed char regtorm [8] = { -1,-1,-1, 7,-1, 6, 4, 5 }; 35 35 36 36 /************************** 37 37 * Determine if e is a 32 bit scaled index addressing mode. 38 38 * Returns: 39 39 * 0 not a scaled index addressing mode 40 40 * !=0 the value for ss in the SIB byte 41 41 */ 42 42 43 43 int isscaledindex(elem *e) 44 44 { targ_uns ss; 45 45 46 assert( I32);46 assert(!I16); 47 47 while (e->Eoper == OPcomma) 48 48 e = e->E2; 49 49 if (!(e->Eoper == OPshl && !e->Ecount && 50 50 e->E2->Eoper == OPconst && 51 51 (ss = e->E2->EV.Vuns) <= 3 52 52 ) 53 53 ) 54 54 ss = 0; 55 55 return ss; 56 56 } 57 57 58 58 /********************************************* 59 59 * Generate code for which isscaledindex(e) returned a non-zero result. 60 60 */ 61 61 62 62 code *cdisscaledindex(elem *e,regm_t *pidxregs,regm_t keepmsk) 63 63 { code *c; 64 64 regm_t r; 65 65 66 66 // Load index register with result of e->E1 … … 132 132 { 133 133 if (ssindex_array[i].product == product) 134 134 return i; 135 135 } 136 136 return 0; 137 137 } 138 138 139 139 /*************************************** 140 140 * Build an EA of the form disp[base][index*scale]. 141 141 * Input: 142 142 * c struct to fill in 143 143 * base base register (-1 if none) 144 144 * index index register (-1 if none) 145 145 * scale scale factor - 1,2,4,8 146 146 * disp displacement 147 147 */ 148 148 149 149 void buildEA(code *c,int base,int index,int scale,targ_size_t disp) 150 150 { unsigned char rm; 151 151 unsigned char sib; 152 unsigned char rex = 0; 152 153 153 154 sib = 0; 154 if ( I32)155 if (!I16) 155 156 { unsigned ss; 156 157 157 158 assert(index != SP); 158 159 159 160 switch (scale) 160 161 { case 1: ss = 0; break; 161 162 case 2: ss = 1; break; 162 163 case 4: ss = 2; break; 163 164 case 8: ss = 3; break; 164 165 default: assert(0); 165 166 } 166 167 167 168 if (base == -1) 168 169 { 169 170 if (index == -1) 170 171 rm = modregrm(0,0,5); 171 172 else 172 173 { 173 174 rm = modregrm(0,0,4); 174 sib = modregrm(ss,index,5); 175 sib = modregrm(ss,index & 7,5); 176 if (index & 8) 177 rex |= REX_X; 175 178 } 176 179 } 177 180 else if (index == -1) 178 181 { 179 182 if (base == SP) 180 183 { 181 184 rm = modregrm(2,0,4); 182 185 sib = modregrm(0,4,SP); 183 186 } 184 187 else 185 rm = modregrm(2,0,base); 188 { rm = modregrm(2,0,base & 7); 189 if (base & 8) 190 rex |= REX_B; 191 } 186 192 } 187 193 else 188 194 { 189 195 rm = modregrm(2,0,4); 190 sib = modregrm(ss,index,base); 196 sib = modregrm(ss,index & 7,base & 7); 197 if (index & 8) 198 rex |= REX_X; 199 if (base & 8) 200 rex |= REX_B; 191 201 } 192 202 } 193 203 else 194 204 { 195 205 // -1 AX CX DX BX SP BP SI DI 196 206 static unsigned char EA16rm[9][9] = 197 207 { 198 208 { 0x06,0x09,0x09,0x09,0x87,0x09,0x86,0x84,0x85, }, // -1 199 209 { 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, }, // AX 200 210 { 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, }, // CX 201 211 { 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, }, // DX 202 212 { 0x87,0x09,0x09,0x09,0x09,0x09,0x09,0x80,0x81, }, // BX 203 213 { 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, }, // SP 204 214 { 0x86,0x09,0x09,0x09,0x09,0x09,0x09,0x82,0x83, }, // BP 205 215 { 0x84,0x09,0x09,0x09,0x80,0x09,0x82,0x09,0x09, }, // SI 206 216 { 0x85,0x09,0x09,0x09,0x81,0x09,0x83,0x09,0x09, } // DI 207 217 }; 208 218 209 219 assert(scale == 1); 210 220 rm = EA16rm[base + 1][index + 1]; 211 221 assert(rm != 9); 212 222 } 213 223 c->Irm = rm; 214 224 c->Isib = sib; 225 c->Irex = rex; 215 226 c->IFL1 = FLconst; 216 227 c->IEV1.Vuns = disp; 217 228 } 218 229 219 230 /************************** 220 231 * For elems in regcon that don't match regconsave, 221 232 * clear the corresponding bit in regcon.cse.mval. 222 233 * Do same for regcon.immed. 223 234 */ 224 235 225 236 void andregcon(con_t *pregconsave) 226 { int i; 227 regm_t m; 228 229 m = ~1; 230 for (i = 0; i < REGMAX; i++) 237 { 238 regm_t m = ~1; 239 for (int i = 0; i < REGMAX; i++) 231 240 { if (pregconsave->cse.value[i] != regcon.cse.value[i]) 232 241 regcon.cse.mval &= m; 233 242 if (pregconsave->immed.value[i] != regcon.immed.value[i]) 234 243 regcon.immed.mval &= m; 235 244 m <<= 1; 236 245 m |= 1; 237 246 } 238 247 //printf("regcon.cse.mval = x%x, regconsave->mval = x%x ",regcon.cse.mval,pregconsave->cse.mval); 239 248 regcon.used |= pregconsave->used; 240 249 regcon.cse.mval &= pregconsave->cse.mval; 241 250 regcon.immed.mval &= pregconsave->immed.mval; 242 251 regcon.params &= pregconsave->params; 243 252 //printf("regcon.cse.mval®con.cse.mops = x%x, regcon.cse.mops = x%x\n",regcon.cse.mval & regcon.cse.mops,regcon.cse.mops); 244 253 regcon.cse.mops &= regcon.cse.mval; 245 254 } 246 255 247 256 /********************************* 248 257 * Scan down comma-expressions. 249 258 * Output: 250 259 * *pe = first elem down right side that is not an OPcomma … … 295 304 if (e) 296 305 { 297 306 unsigned stackpushsave; 298 307 int stackcleansave; 299 308 300 309 stackpushsave = stackpush; 301 310 stackcleansave = cgstate.stackclean; 302 311 cgstate.stackclean = 0; // defer cleaning of stack 303 312 c = cat(c,codelem(e,pretregs,constflag)); 304 313 assert(cgstate.stackclean == 0); 305 314 cgstate.stackclean = stackcleansave; 306 315 c = genstackclean(c,stackpush - stackpushsave,*pretregs); // do defered cleaning 307 316 } 308 317 return c; 309 318 } 310 319 311 320 /******************************************** 312 321 * Gen a save/restore sequence for mask of registers. 313 322 */ 314 323 315 void gensaverestore(regm_t regm,code **csave,code **crestore) 316 { code *cs1; 317 code *cs2; 318 int i; 319 320 cs1 = NULL; 321 cs2 = NULL; 324 void gensaverestore2(regm_t regm,code **csave,code **crestore) 325 { 326 code *cs1 = *csave; 327 code *cs2 = *crestore; 328 322 329 regm &= mBP | mES | ALLREGS; 323 for (i = 0; regm; i++)330 for (int i = 0; regm; i++) 324 331 { 325 332 if (regm & 1) 326 333 { 327 assert(i != ES); // fix later 328 cs1 = gen1(cs1,0x50 + i); 329 cs2 = cat(gen1(NULL,0x58 + i),cs2); 334 if (i == ES) 335 { 336 cs1 = gen1(cs1, 0x06); // PUSH ES 337 cs2 = cat(gen1(CNIL, 0x07),cs2); // POP ES 338 } 339 else 340 { 341 cs1 = gen1(cs1,0x50 + (i & 7)); // PUSH i 342 code *c = gen1(NULL, 0x58 + (i & 7)); // POP i 343 if (i & 8) 344 { code_orrex(cs1, REX_B); 345 code_orrex(c, REX_B); 346 } 347 cs2 = cat(c,cs2); 348 } 330 349 } 331 350 regm >>= 1; 332 351 } 333 352 *csave = cs1; 334 353 *crestore = cs2; 354 } 355 356 void gensaverestore(regm_t regm,code **csave,code **crestore) 357 { 358 *csave = NULL; 359 *crestore = NULL; 360 gensaverestore2(regm, csave, crestore); 335 361 } 336 362 337 363 /**************************************** 338 364 * Clean parameters off stack. 339 365 * Input: 340 366 * numpara amount to adjust stack pointer 341 367 * keepmsk mask of registers to not destroy 342 368 */ 343 369 344 370 code *genstackclean(code *c,unsigned numpara,regm_t keepmsk) 345 371 { 346 372 //dbg_printf("genstackclean(numpara = %d, stackclean = %d)\n",numpara,cgstate.stackclean); 347 373 if (numpara && (cgstate.stackclean || STACKALIGN == 16)) 348 374 { 349 375 #if 0 // won't work if operand of scodelem 350 376 if (numpara == stackpush && // if this is all those pushed 351 377 needframe && // and there will be a BP 352 378 !config.windows && 353 379 !(regcon.mvar & fregsaved) // and no registers will be pushed 354 380 ) 355 381 c = genregs(c,0x89,BP,SP); // MOV SP,BP 356 382 else 357 383 #endif 358 384 { regm_t scratchm = 0; 359 unsigned r;360 385 361 386 if (numpara == REGSIZE && config.flags4 & CFG4space) 362 387 { 363 388 scratchm = ALLREGS & ~keepmsk & regcon.used & ~regcon.mvar; 364 389 } 365 390 366 391 if (scratchm) 367 { c = cat(c,allocreg(&scratchm,&r,TYint)); 392 { unsigned r; 393 c = cat(c,allocreg(&scratchm,&r,TYint)); 368 394 c = gen1(c,0x58 + r); // POP r 369 395 } 370 396 else 371 c = genc2(c,0x81,modregrm(3,0,SP),numpara); // ADD SP,numpara 397 { c = genc2(c,0x81,modregrm(3,0,SP),numpara); // ADD SP,numpara 398 if (I64) 399 code_orrex(c, REX_W); 400 } 372 401 } 373 402 stackpush -= numpara; 374 403 c = genadjesp(c,-numpara); 375 404 } 376 405 return c; 377 406 } 378 407 379 408 380 409 /********************************* 381 410 * Generate code for a logical expression. 382 411 * Input: 383 412 * e elem 384 413 * jcond 385 414 * bit 1 if TRUE then goto jump address if e 386 415 * if FALSE then goto jump address if !e 387 416 * 2 don't call save87() 388 417 * fltarg FLcode or FLblock, flavor of target if e evaluates to jcond 389 418 * targ either code or block pointer to destination 390 419 */ 391 420 392 421 code *logexp(elem *e,int jcond,unsigned fltarg,code *targ) 393 { code *c c,*c,*ce,*cnop;422 { code *c,*ce,*cnop; 394 423 regm_t retregs; 395 424 unsigned op; 396 int no87;397 425 398 426 //printf("logexp(e = %p, jcond = %d)\n", e, jcond); 399 no87 = (jcond & 2) == 0;427 int no87 = (jcond & 2) == 0; 400 428 _chkstack(); 401 c c = docommas(&e); /* scan down commas */429 code *cc = docommas(&e); // scan down commas 402 430 cgstate.stackclean++; 403 431 404 432 if (EOP(e) && !e->Ecount) /* if operator and not common sub */ 405 433 { con_t regconsave; 406 434 407 435 switch (e->Eoper) 408 436 { case OPoror: 409 437 if (jcond & 1) 410 438 { c = logexp(e->E1,jcond,fltarg,targ); 411 439 regconsave = regcon; 412 440 ce = logexp(e->E2,jcond,fltarg,targ); 413 441 } 414 442 else 415 443 { cnop = gennop(CNIL); 416 444 c = logexp(e->E1,jcond | 1,FLcode,cnop); 417 445 regconsave = regcon; 418 446 ce = logexp(e->E2,jcond,fltarg,targ); 419 447 ce = cat(ce,cnop); 420 448 } 421 449 cnop = CNIL; … … 438 466 freenode(e); 439 467 c = cat4(cc,c,ce,cnop); 440 468 goto Lret; 441 469 442 470 case OPnot: 443 471 jcond ^= 1; 444 472 case OPbool: 445 473 case OPs8int: 446 474 case OPu8int: 447 475 case OPs16_32: 448 476 case OPu16_32: 449 477 case OPs32_64: 450 478 case OPu32_64: 451 479 case OPu32_d: 452 480 case OPd_ld: 453 481 c = logexp(e->E1,jcond,fltarg,targ); 454 482 freenode(e); 455 483 goto Lretc; 456 484 457 485 case OPcond: 458 { code *cnop2; 459 con_t regconold; 460 461 cnop2 = gennop(CNIL); /* addresses of start of leaves */ 486 { 487 code *cnop2 = gennop(CNIL); // addresses of start of leaves 462 488 cnop = gennop(CNIL); 463 489 c = logexp(e->E1,FALSE,FLcode,cnop2); /* eval condition */ 464 regconold = regcon;490 con_t regconold = regcon; 465 491 ce = logexp(e->E2->E1,jcond,fltarg,targ); 466 492 ce = genjmp(ce,JMP,FLcode,(block *) cnop); /* skip second leaf */ 467 493 468 494 regconsave = regcon; 469 495 regcon = regconold; 470 496 471 497 code_next(cnop2) = logexp(e->E2->E2,jcond,fltarg,targ); 472 498 andregcon(®conold); 473 499 andregcon(®consave); 474 500 freenode(e->E2); 475 501 freenode(e); 476 502 c = cat6(cc,c,NULL,ce,cnop2,cnop); 477 503 goto Lret; 478 504 } 479 505 } 480 506 } 481 507 482 /* Special code for signed long compare */ 508 /* Special code for signed long compare. 509 * Not necessary for I64 until we do cents. 510 */ 483 511 if (OTrel2(e->Eoper) && /* if < <= >= > */ 484 512 !e->Ecount && 485 ( ( !I32&& tybasic(e->E1->Ety) == TYlong && tybasic(e->E2->Ety) == TYlong) ||486 ( I32 && tybasic(e->E1->Ety) == TYllong && tybasic(e->E2->Ety) == TYllong))513 ( (I16 && tybasic(e->E1->Ety) == TYlong && tybasic(e->E2->Ety) == TYlong) || 514 (I32 && tybasic(e->E1->Ety) == TYllong && tybasic(e->E2->Ety) == TYllong)) 487 515 ) 488 516 { 489 517 c = longcmp(e,jcond,fltarg,targ); 490 518 goto Lretc; 491 519 } 492 520 493 521 retregs = mPSW; /* return result in flags */ 494 522 op = jmpopcode(e); /* get jump opcode */ 495 523 if (!(jcond & 1)) 496 524 op ^= 0x101; // toggle jump condition(s) 497 525 c = codelem(e,&retregs,TRUE); /* evaluate elem */ 498 526 if (no87) 499 527 c = cat(c,cse_flush(no87)); // flush CSE's to memory 500 528 genjmp(c,op,fltarg,(block *) targ); /* generate jmp instruction */ 501 529 Lretc: 502 530 c = cat(cc,c); 503 531 Lret: 504 532 cgstate.stackclean--; 505 533 return c; 506 534 } 507 535 508 536 509 537 /****************************** 510 538 * Routine to aid in setting things up for gen(). 511 539 * Look for common subexpression. 512 540 * Can handle indirection operators, but not if they're common subs. 513 541 * Input: 514 542 * e -> elem where we get some of the data from 515 543 * cs -> partially filled code to add 516 544 * op = opcode 517 545 * reg = reg field of (mod reg r/m) 518 546 * offset = data to be added to Voffset field 519 547 * keepmsk = mask of registers we must not destroy 520 548 * desmsk = mask of registers destroyed by executing the instruction 521 549 * Returns: 522 550 * pointer to code generated 523 551 */ 524 552 525 code *loadea(elem *e,code __ss*cs,unsigned op,unsigned reg,targ_size_t offset,553 code *loadea(elem *e,code *cs,unsigned op,unsigned reg,targ_size_t offset, 526 554 regm_t keepmsk,regm_t desmsk) 527 { unsigned i; 528 regm_t rm; 529 tym_t tym; 555 { 530 556 code *c,*cg,*cd; 531 557 532 558 #ifdef DEBUG 533 559 if (debugw) 534 560 printf("loadea: e=%p cs=%p op=x%x reg=%d offset=%ld keepmsk=x%x desmsk=x%x\n", 535 561 e,cs,op,reg,offset,keepmsk,desmsk); 536 562 #endif 537 563 538 564 assert(e); 539 565 cs->Iflags = 0; 540 566 cs->Irex = 0; 541 567 cs->Iop = op; 542 if ( I32 && op >= 0x100) /* if 2 byte opcode */568 if (!I16 && op >= 0x100) // if 2 byte opcode 543 569 { cs->Iop = op >> 8; 544 570 cs->Iop2 = op; 545 571 } 546 tym = e->Ety; 572 tym_t tym = e->Ety; 573 int sz = tysize(tym); 547 574 548 575 /* Determine if location we want to get is in a register. If so, */ 549 576 /* substitute the register for the EA. */ 550 577 /* Note that operators don't go through this. CSE'd operators are */ 551 578 /* picked up by comsub(). */ 552 579 if (e->Ecount && /* if cse */ 553 580 e->Ecount != e->Ecomsub && /* and cse was generated */ 554 581 op != 0x8D && op != 0xC4 && /* and not an LEA or LES */ 555 582 (op != 0xFF || reg != 3) && /* and not CALLF MEM16 */ 556 583 (op & 0xFFF8) != 0xD8) // and not 8087 opcode 557 { int sz; 558 584 { 559 585 assert(!EOP(e)); /* can't handle this */ 560 rm = regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; /* possible regs */ 561 sz = tysize(tym); 562 if (sz > REGSIZE) 586 regm_t rm = regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; // possible regs 587 if (sz > REGSIZE) // value is in 2 or 4 registers 563 588 { 564 if ( !I32 && sz == 8)589 if (I16 && sz == 8) // value is in 4 registers 565 590 { static regm_t rmask[4] = { mDX,mCX,mBX,mAX }; 566 591 rm &= rmask[offset >> 1]; 567 592 } 568 593 569 594 else if (offset) 570 595 rm &= mMSW; /* only high words */ 571 596 else 572 597 rm &= mLSW; /* only low words */ 573 598 } 574 for ( i = 0; rm; i++)599 for (unsigned i = 0; rm; i++) 575 600 { if (mask[i] & rm) 576 601 { if (regcon.cse.value[i] == e && // if register has elem 577 602 /* watch out for a CWD destroying DX */ 578 603 !(i == DX && op == 0xF7 && desmsk & mDX)) 579 604 { 580 605 /* if ES, then it can only be a load */ 581 606 if (i == ES) 582 607 { if (op != 0x8B) 583 608 goto L1; /* not a load */ 584 609 cs->Iop = 0x8C; /* MOV reg,ES */ 585 cs->Irm = modregrm(3,0,reg); 610 cs->Irm = modregrm(3,0,reg & 7); 611 if (reg & 8) 612 code_orrex(cs, REX_B); 586 613 } 587 else /* XXX reg,i */ 588 cs->Irm = modregrm(3,reg,i); 614 else // XXX reg,i 615 { 616 cs->Irm = modregrm(3,reg & 7,i & 7); 617 if (reg & 8) 618 code_orrex(cs, REX_R); 619 if (i & 8) 620 code_orrex(cs, REX_B); 621 } 589 622 c = CNIL; 590 623 goto L2; 591 624 } 592 625 rm &= ~mask[i]; 593 626 } 594 627 } 595 628 } 596 629 597 630 L1: 598 631 c = getlvalue(cs,e,keepmsk); 599 632 if (offset == REGSIZE) 600 633 getlvalue_msw(cs); 601 634 else 602 635 cs->IEVoffset1 += offset; 603 c s->Irm |= modregrm(0,reg,0); /* OR in reg field */604 if ( I32)636 code_newreg(cs, reg); // OR in reg field 637 if (!I16) 605 638 { 606 639 if (reg == 6 && op == 0xFF || /* don't PUSH a word */ 607 640 op == 0x0FB7 || op == 0x0FBF || /* MOVZX/MOVSX */ 608 641 (op & 0xFFF8) == 0xD8 || /* 8087 instructions */ 609 642 op == 0x8D) /* LEA */ 610 643 cs->Iflags &= ~CFopsize; 611 644 } 612 645 else if ((op & 0xFFF8) == 0xD8 && ADDFWAIT()) 613 646 cs->Iflags |= CFwait; 614 647 L2: 615 648 cg = getregs(desmsk); /* save any regs we destroy */ 616 649 617 650 /* KLUDGE! fix up DX for divide instructions */ 618 651 cd = CNIL; 619 652 if (op == 0xF7 && desmsk == (mAX|mDX)) /* if we need to fix DX */ 620 653 { if (reg == 7) /* if IDIV */ 621 cd = gen1(cd,0x99); /* CWD */ 622 else if (reg == 6) /* if DIV */ 623 cd = genregs(cd,0x33,DX,DX); /* CLR DX */ 654 { cd = gen1(cd,0x99); // CWD 655 if (I64 && sz == 8) 656 code_orrex(cd, REX_W); 657 } 658 else if (reg == 6) // if DIV 659 { cd = genregs(cd,0x33,DX,DX); // XOR DX,DX 660 if (I64 && sz == 8) 661 code_orrex(cd, REX_W); 662 } 624 663 } 625 664 626 665 // Eliminate MOV reg,reg 627 666 if ((cs->Iop & 0xFC) == 0x88 && 628 (cs->Irm & 0xC7) == modregrm(3,0,reg)) 629 cs->Iop = NOP; 667 (cs->Irm & 0xC7) == modregrm(3,0,reg & 7)) 668 { 669 unsigned r = cs->Irm & 7; 670 if (cs->Irex & REX_B) 671 r |= 8; 672 if (r == reg) 673 cs->Iop = NOP; 674 } 630 675 631 676 return cat4(c,cg,cd,gen(CNIL,cs)); 632 677 } 633 678 634 679 635 680 /************************** 636 681 * Get addressing mode. 637 682 */ 638 683 639 684 unsigned getaddrmode(regm_t idxregs) 640 685 { 641 unsigned reg;642 685 unsigned mode; 643 686 644 if (I32) 645 { reg = findreg(idxregs & (ALLREGS | mBP)); 646 mode = modregrm(2,0,reg); 647 } 648 else 687 if (I16) 649 688 { 650 689 mode = (idxregs & mBX) ? modregrm(2,0,7) : /* [BX] */ 651 690 (idxregs & mDI) ? modregrm(2,0,5): /* [DI] */ 652 691 (idxregs & mSI) ? modregrm(2,0,4): /* [SI] */ 653 692 (assert(0),1); 654 693 } 694 else 695 { unsigned reg = findreg(idxregs & (ALLREGS | mBP)); 696 mode = modregrmx(2,0,reg); 697 } 655 698 return mode; 699 } 700 701 void setaddrmode(code *c, regm_t idxregs) 702 { 703 unsigned mode = getaddrmode(idxregs); 704 c->Irm = mode & 0xFF; 705 c->Irex &= ~REX_B; 706 c->Irex |= mode >> 16; 656 707 } 657 708 658 709 /********************************************** 659 710 */ 660 711 661 712 void getlvalue_msw(code *c) 662 713 { 663 714 if (c->IFL1 == FLreg) 664 { unsigned regmsw; 665 666 regmsw = c->IEVsym1->Sregmsw; 667 c->Irm = (c->Irm & ~7) | regmsw; 715 { 716 unsigned regmsw = c->IEVsym1->Sregmsw; 717 c->Irm = (c->Irm & ~7) | (regmsw & 7); 718 if (regmsw & 8) 719 c->Irex |= REX_B; 720 else 721 c->Irex &= ~REX_B; 668 722 } 669 723 else 670 724 c->IEVoffset1 += REGSIZE; 671 725 } 672 726 673 727 /********************************************** 674 728 */ 675 729 676 730 void getlvalue_lsw(code *c) 677 731 { 678 732 if (c->IFL1 == FLreg) 679 { unsigned reglsw; 680 681 reglsw = c->IEVsym1->Sreglsw; 682 c->Irm = (c->Irm & ~7) | reglsw; 733 { 734 unsigned reglsw = c->IEVsym1->Sreglsw; 735 c->Irm = (c->Irm & ~7) | (reglsw & 7); 736 if (reglsw & 8) 737 c->Irex |= REX_B; 738 else 739 c->Irex &= ~REX_B; 683 740 } 684 741 else 685 742 c->IEVoffset1 -= REGSIZE; 686 743 } 687 744 688 745 /****************** 689 746 * Compute addressing mode. 690 747 * Generate & return sequence of code (if any). 691 748 * Return in cs the info on it. 692 749 * Input: 693 750 * pcs -> where to store data about addressing mode 694 751 * e -> the lvalue elem 695 752 * keepmsk mask of registers we must not destroy or use 696 753 * if (keepmsk & RMstore), this will be only a store operation 697 754 * into the lvalue 698 755 * if (keepmsk & RMload), this will be a read operation only 699 756 */ 700 757 701 code *getlvalue(code __ss*pcs,elem *e,regm_t keepmsk)758 code *getlvalue(code *pcs,elem *e,regm_t keepmsk) 702 759 { regm_t idxregs; 703 760 unsigned fl,f,opsave; 704 761 code *c; 705 762 elem *e1; 706 763 elem *e11; 707 764 elem *e12; 708 765 bool e1isadd,e1free; 709 766 unsigned reg; 710 tym_t ty;711 767 tym_t e1ty; 712 768 symbol *s; 713 unsigned sz;714 769 715 770 //printf("getlvalue(e = %p)\n",e); 716 771 //elem_print(e); 717 772 assert(e); 718 773 elem_debug(e); 719 774 if (e->Eoper == OPvar || e->Eoper == OPrelconst) 720 775 { s = e->EV.sp.Vsym; 721 776 fl = s->Sfl; 722 777 if (tyfloating(s->ty())) 723 778 obj_fltused(); 724 779 } 725 780 else 726 781 fl = FLoper; 727 782 pcs->IFL1 = fl; 728 783 pcs->Iflags = CFoff; /* only want offsets */ 729 784 pcs->Irex = 0; 730 785 pcs->IEVoffset1 = 0; 731 ty = e->Ety; 786 787 tym_t ty = e->Ety; 788 unsigned sz = tysize(ty); 732 789 if (tyfloating(ty)) 733 790 obj_fltused(); 734 sz = tysize(ty); 735 if (I32 && sz == SHORTSIZE) 791 else if (I64 && sz == 8) 792 pcs->Irex |= REX_W; 793 if (!I16 && sz == SHORTSIZE) 736 794 pcs->Iflags |= CFopsize; 737 795 if (ty & mTYvolatile) 738 796 pcs->Iflags |= CFvolatile; 739 797 c = CNIL; 740 798 switch (fl) 741 799 { 742 800 #if 0 && TARGET_LINUX 743 801 case FLgot: 744 802 case FLgotoff: 745 803 gotref = 1; 746 804 pcs->IEVsym1 = s; 747 805 pcs->IEVoffset1 = e->EV.sp.Voffset; 748 806 if (e->Eoper == OPvar && fl == FLgot) 749 807 { 750 808 code *c1; 751 809 int saveop = pcs->Iop; 752 810 idxregs = allregs & ~keepmsk; // get a scratch register 753 811 c = allocreg(&idxregs,®,TYptr); 754 812 pcs->Irm = modregrm(2,reg,BX); // BX has GOT 755 813 pcs->Isib = 0; … … 801 859 e11 = e1->E1; 802 860 } 803 861 804 862 /* First see if we can replace *(e+&v) with 805 863 * MOV idxreg,e 806 864 * EA = [ES:] &v+idxreg 807 865 */ 808 866 809 867 if (e1isadd && 810 868 e12->Eoper == OPrelconst && 811 869 (f = el_fl(e12)) != FLfardata && 812 870 e1->Ecount == e1->Ecomsub && 813 871 (!e1->Ecount || (~keepmsk & ALLREGS & mMSW) || (e1ty != TYfptr && e1ty != TYhptr)) && 814 872 tysize(e11->Ety) == REGSIZE 815 873 ) 816 874 { unsigned char t; /* component of r/m field */ 817 875 int ss; 818 876 int ssi; 819 877 820 878 /*assert(datafl[f]);*/ /* what if addr of func? */ 821 if ( I32)879 if (!I16) 822 880 { /* Any register can be an index register */ 823 idxregs = allregs & ~keepmsk;881 regm_t idxregs = allregs & ~keepmsk; 824 882 assert(idxregs); 825 883 826 884 /* See if e1->E1 can be a scaled index */ 827 885 ss = isscaledindex(e11); 828 886 if (ss) 829 887 { 830 888 /* Load index register with result of e11->E1 */ 831 889 c = cdisscaledindex(e11,&idxregs,keepmsk); 832 890 reg = findreg(idxregs); 833 891 #if 0 && TARGET_LINUX 834 892 if (f == FLgot || f == FLgotoff) // config.flags3 & CFG3pic 835 893 { 836 894 gotref = 1; 837 895 pcs->Irm = modregrm(2,0,4); 838 896 pcs->Isib = modregrm(ss,reg,BX); 839 897 } 840 898 else 841 899 #endif 842 900 { 843 901 t = stackfl[f] ? 2 : 0; 844 902 pcs->Irm = modregrm(t,0,4); 845 pcs->Isib = modregrm(ss,reg,5); 903 pcs->Isib = modregrm(ss,reg & 7,5); 904 if (reg & 8) 905 pcs->Irex |= REX_X; 846 906 } 847 907 } 848 908 else if ((e11->Eoper == OPmul || e11->Eoper == OPshl) && 849 909 !e11->Ecount && 850 910 e11->E2->Eoper == OPconst && 851 911 (ssi = ssindex(e11->Eoper,e11->E2->EV.Vuns)) != 0 852 912 ) 853 913 { 854 914 regm_t scratchm; 855 unsigned r;856 int ss1;857 915 int ss2; 858 char ssflags;859 916 860 917 #if 0 && TARGET_LINUX 861 918 assert(f != FLgot && f != FLgotoff); 862 919 #endif 863 ssflags = ssindex_array[ssi].ssflags;920 char ssflags = ssindex_array[ssi].ssflags; 864 921 if (ssflags & SSFLnobp && stackfl[f]) 865 922 goto L6; 866 923 867 924 // Load index register with result of e11->E1 868 925 c = scodelem(e11->E1,&idxregs,keepmsk,TRUE); 869 926 reg = findreg(idxregs); 870 927 871 ss1 = ssindex_array[ssi].ss1;928 int ss1 = ssindex_array[ssi].ss1; 872 929 if (ssflags & SSFLlea) 873 930 { 874 931 assert(!stackfl[f]); 875 932 pcs->Irm = modregrm(2,0,4); 876 pcs->Isib = modregrm(ss1,reg,reg); 933 pcs->Isib = modregrm(ss1,reg & 7,reg & 7); 934 if (reg & 8) 935 pcs->Irex |= REX_X | REX_B; 877 936 } 878 937 else 879 938 { int rbase; 939 unsigned r; 880 940 881 941 scratchm = ALLREGS & ~keepmsk; 882 942 c = cat(c,allocreg(&scratchm,&r,TYint)); 883 943 884 944 if (ssflags & SSFLnobase1) 885 945 { t = 0; 886 946 rbase = 5; 887 947 } 888 948 else 889 949 { t = 0; 890 950 rbase = reg; 891 951 if (rbase == BP) 892 952 { static unsigned imm32[4] = {1+1,2+1,4+1,8+1}; 893 953 894 954 // IMUL r,BP,imm32 895 c = genc2(c,0x69,modreg rm(3,r,BP),imm32[ss1]);955 c = genc2(c,0x69,modregxrm(3,r,BP),imm32[ss1]); 896 956 goto L7; 897 957 } 898 958 } 899 959 900 c = gen2sib(c,0x8D,modregrm(t,r,4),modregrm(ss1,reg,rbase)); 960 c = gen2sib(c,0x8D,modregxrm(t,r,4),modregrm(ss1,reg & 7,rbase & 7)); 961 if (reg & 8) 962 code_orrex(c, REX_X); 963 if (rbase & 8) 964 code_orrex(c, REX_B); 965 901 966 if (ssflags & SSFLnobase1) 902 967 { code_last(c)->IFL1 = FLconst; 903 968 code_last(c)->IEV1.Vuns = 0; 904 969 } 905 970 L7: 906 971 if (ssflags & SSFLnobase) 907 972 { t = stackfl[f] ? 2 : 0; 908 973 rbase = 5; 909 974 } 910 975 else 911 976 { t = 2; 912 977 rbase = r; 913 978 assert(rbase != BP); 914 979 } 915 980 pcs->Irm = modregrm(t,0,4); 916 pcs->Isib = modregrm(ssindex_array[ssi].ss2,r,rbase); 981 pcs->Isib = modregrm(ssindex_array[ssi].ss2,r & 7,rbase & 7); 982 if (r & 8) 983 code_orrex(pcs, REX_X); 984 if (rbase & 8) 985 code_orrex(pcs, REX_B); 917 986 } 918 987 freenode(e11->E2); 919 988 freenode(e11); 920 989 } 921 990 else 922 991 { 923 992 L6: 924 993 /* Load index register with result of e11 */ 925 994 c = scodelem(e11,&idxregs,keepmsk,TRUE); 926 pcs->Irm = getaddrmode(idxregs);995 setaddrmode(pcs, idxregs); 927 996 #if 0 && TARGET_LINUX 928 997 if (e12->EV.sp.Vsym->Sfl == FLgot || e12->EV.sp.Vsym->Sfl == FLgotoff) 929 998 { 930 999 gotref = 1; 931 1000 #if 1 932 1001 reg = findreg(idxregs & (ALLREGS | mBP)); 933 1002 pcs->Irm = modregrm(2,0,4); 934 1003 pcs->Isib = modregrm(0,reg,BX); 935 1004 #else 936 1005 pcs->Isib = modregrm(0,pcs->Irm,BX); 937 1006 pcs->Irm = modregrm(2,0,4); 938 1007 #endif 939 1008 } 940 1009 else 941 1010 #endif 942 1011 if (stackfl[f]) /* if we need [EBP] too */ 943 { 944 pcs->Isib = modregrm(0,pcs->Irm,BP); 1012 { unsigned idx = pcs->Irm & 7; 1013 if (pcs->Irex & REX_B) 1014 pcs->Irex = (pcs->Irex & ~REX_B) | REX_X; 1015 pcs->Isib = modregrm(0,idx,BP); 945 1016 pcs->Irm = modregrm(2,0,4); 946 1017 } 947 1018 } 948 1019 } 949 1020 else 950 1021 { 951 1022 idxregs = IDXREGS & ~keepmsk; /* only these can be index regs */ 952 1023 assert(idxregs); 953 1024 #if 0 && TARGET_LINUX 954 1025 assert(f != FLgot && f != FLgotoff); 955 1026 #endif 956 1027 if (stackfl[f]) /* if stack data type */ 957 1028 { idxregs &= mSI | mDI; /* BX can't index off stack */ 958 1029 if (!idxregs) goto L1; /* index regs aren't avail */ 959 1030 t = 6; /* [BP+SI+disp] */ 960 1031 } 961 1032 else 962 1033 t = 0; /* [SI + disp] */ 963 1034 c = scodelem(e11,&idxregs,keepmsk,TRUE); /* load idx reg */ 964 1035 pcs->Irm = getaddrmode(idxregs) ^ t; … … 981 1052 { unsigned flagsave; 982 1053 983 1054 idxregs = IDXREGS & ~keepmsk; 984 1055 c = cat(c,allocreg(&idxregs,®,TYoffset)); 985 1056 986 1057 /* If desired result is a far pointer, we'll have */ 987 1058 /* to load another register with the segment of v */ 988 1059 if (e1ty == TYfptr) 989 1060 { 990 1061 unsigned msreg; 991 1062 992 1063 idxregs |= mMSW & ALLREGS & ~keepmsk; 993 1064 c = cat(c,allocreg(&idxregs,&msreg,TYfptr)); 994 1065 msreg = findregmsw(idxregs); 995 1066 /* MOV msreg,segreg */ 996 1067 c = genregs(c,0x8C,segfl[f],msreg); 997 1068 } 998 1069 opsave = pcs->Iop; 999 1070 flagsave = pcs->Iflags; 1000 1071 pcs->Iop = 0x8D; 1001 pcs->Irm |= modregrm(0,reg,0);1002 if ( I32)1072 code_newreg(pcs, reg); 1073 if (!I16) 1003 1074 pcs->Iflags &= ~CFopsize; 1004 1075 c = gen(c,pcs); /* LEA idxreg,EA */ 1005 1076 cssave(e1,idxregs,TRUE); 1006 if ( I32)1077 if (!I16) 1007 1078 pcs->Iflags = flagsave; 1008 1079 if (stackfl[f] && (config.wflags & WFssneds)) // if pointer into stack 1009 1080 pcs->Iflags |= CFss; // add SS: override 1010 1081 pcs->Iop = opsave; 1011 1082 pcs->IFL1 = FLoffset; 1012 1083 pcs->IEV1.Vuns = 0; 1013 pcs->Irm = getaddrmode(idxregs);1084 setaddrmode(pcs, idxregs); 1014 1085 } 1015 1086 freenode(e12); 1016 1087 if (e1free) 1017 1088 freenode(e1); 1018 1089 goto Lptr; 1019 1090 } 1020 1091 1021 1092 L1: 1022 1093 1023 1094 /* The rest of the cases could be a far pointer */ 1024 1095 1025 idxregs = (I 32 ? allregs : IDXREGS) & ~keepmsk; /* only these can be index regs */1096 idxregs = (I16 ? IDXREGS : allregs) & ~keepmsk; // only these can be index regs 1026 1097 assert(idxregs); 1027 if (I32 && sz == REGSIZE && keepmsk & RMstore) 1098 if (!I16 && 1099 (sz == REGSIZE || (I64 && sz == 4)) && 1100 keepmsk & RMstore) 1028 1101 idxregs |= regcon.mvar; 1029 1102 1030 1103 #if !TARGET_FLAT 1031 1104 switch (e1ty) 1032 1105 { case TYfptr: /* if far pointer */ 1033 1106 case TYhptr: 1034 1107 idxregs = (mES | IDXREGS) & ~keepmsk; // need segment too 1035 1108 assert(idxregs & mES); 1036 1109 pcs->Iflags |= CFes; /* ES segment override */ 1037 1110 break; 1038 1111 case TYsptr: /* if pointer to stack */ 1039 1112 if (config.wflags & WFssneds) // if SS != DS 1040 1113 pcs->Iflags |= CFss; /* then need SS: override */ 1041 1114 break; 1042 1115 case TYcptr: /* if pointer to code */ 1043 1116 pcs->Iflags |= CFcs; /* then need CS: override */ 1044 1117 break; 1045 1118 } 1046 1119 #endif 1047 1120 pcs->IFL1 = FLoffset; … … 1054 1127 */ 1055 1128 1056 1129 if (e1isadd && e12->Eoper == OPconst && 1057 1130 tysize(e12->Ety) == REGSIZE && 1058 1131 (!e1->Ecount || !e1free) 1059 1132 ) 1060 1133 { int ss; 1061 1134 1062 1135 pcs->IEV1.Vuns = e12->EV.Vuns; 1063 1136 freenode(e12); 1064 1137 if (e1free) freenode(e1); 1065 1138 if (I32 && e11->Eoper == OPadd && !e11->Ecount && 1066 1139 tysize(e11->Ety) == REGSIZE) 1067 1140 { 1068 1141 e12 = e11->E2; 1069 1142 e11 = e11->E1; 1070 1143 e1 = e1->E1; 1071 1144 e1free = TRUE; 1072 1145 goto L4; 1073 1146 } 1074 if ( I32&& (ss = isscaledindex(e11)) != 0)1147 if (!I16 && (ss = isscaledindex(e11)) != 0) 1075 1148 { // (v * scale) + const 1076 1149 c = cdisscaledindex(e11,&idxregs,keepmsk); 1077 1150 reg = findreg(idxregs); 1078 1151 pcs->Irm = modregrm(0,0,4); 1079 pcs->Isib = modregrm(ss,reg,5); 1152 pcs->Isib = modregrm(ss,reg & 7,5); 1153 if (reg & 8) 1154 pcs->Irex |= REX_X; 1080 1155 } 1081 1156 else 1082 1157 { 1083 1158 c = scodelem(e11,&idxregs,keepmsk,TRUE); // load index reg 1084 pcs->Irm = getaddrmode(idxregs);1159 setaddrmode(pcs, idxregs); 1085 1160 } 1086 1161 goto Lptr; 1087 1162 } 1088 1163 1089 1164 /* Look for *(v1 + v2) 1090 1165 * EA = [v1][v2] 1091 1166 */ 1092 1167 1093 if ( I32&& e1isadd && (!e1->Ecount || !e1free) &&1168 if (!I16 && e1isadd && (!e1->Ecount || !e1free) && 1094 1169 tysize[e1ty] == REGSIZE) 1095 1170 { code *c2; 1096 1171 regm_t idxregs2; 1097 1172 unsigned base,index; 1098 1173 int ss; 1099 1174 1100 1175 L4: 1101 1176 // Look for *(v1 + v2 << scale) 1102 1177 ss = isscaledindex(e12); 1103 1178 if (ss) 1104 1179 { 1105 1180 c = scodelem(e11,&idxregs,keepmsk,TRUE); 1106 1181 idxregs2 = allregs & ~(idxregs | keepmsk); 1107 1182 c2 = cdisscaledindex(e12,&idxregs2,keepmsk | idxregs); 1108 1183 } 1109 1184 1110 1185 // Look for *(v1 << scale + v2) 1111 1186 else if ((ss = isscaledindex(e11)) != 0) 1112 1187 { 1113 1188 idxregs2 = idxregs; … … 1122 1197 ) 1123 1198 { 1124 1199 pcs->IEV1.Vuns = e11->E2->EV.Vuns; 1125 1200 idxregs2 = idxregs; 1126 1201 c = cdisscaledindex(e11->E1,&idxregs2,keepmsk); 1127 1202 idxregs = allregs & ~(idxregs2 | keepmsk); 1128 1203 c2 = scodelem(e12,&idxregs,keepmsk | idxregs2,TRUE); 1129 1204 freenode(e11->E2); 1130 1205 freenode(e11); 1131 1206 } 1132 1207 else 1133 1208 { 1134 1209 c = scodelem(e11,&idxregs,keepmsk,TRUE); 1135 1210 idxregs2 = allregs & ~(idxregs | keepmsk); 1136 1211 c2 = scodelem(e12,&idxregs2,keepmsk | idxregs,TRUE); 1137 1212 } 1138 1213 c = cat(c,c2); 1139 1214 base = findreg(idxregs); 1140 1215 index = findreg(idxregs2); 1141 1216 pcs->Irm = modregrm(2,0,4); 1142 pcs->Isib = modregrm(ss,index,base); 1217 pcs->Isib = modregrm(ss,index & 7,base & 7); 1218 if (index & 8) 1219 pcs->Irex |= REX_X; 1220 if (base & 8) 1221 pcs->Irex |= REX_B; 1143 1222 if (e1free) freenode(e1); 1144 1223 goto Lptr; 1145 1224 } 1146 1225 1147 1226 /* give up and replace *e1 with 1148 1227 * MOV idxreg,e 1149 1228 * EA = 0[idxreg] 1150 1229 * pinholeopt() will usually correct the 0, we need it in case 1151 1230 * we have a pointer to a long and need an offset to the second 1152 1231 * word. 1153 1232 */ 1154 1233 1155 1234 assert(e1free); 1156 1235 c = scodelem(e1,&idxregs,keepmsk,TRUE); /* load index register */ 1157 pcs->Irm = getaddrmode(idxregs);1236 setaddrmode(pcs, idxregs); 1158 1237 Lptr: 1159 1238 if (config.flags3 & CFG3ptrchk) 1160 1239 cod3_ptrchk(&c,pcs,keepmsk); // validate pointer code 1161 1240 break; 1162 1241 case FLdatseg: 1163 1242 assert(0); 1164 1243 #if 0 1165 1244 pcs->Irm = modregrm(0,0,BPRM); 1166 1245 pcs->IEVpointer1 = e->EVpointer; 1167 1246 break; 1168 1247 #endif 1169 1248 case FLfltreg: 1170 1249 reflocal = TRUE; 1171 1250 pcs->Irm = modregrm(2,0,BPRM); 1172 1251 pcs->IEV1.Vint = 0; 1173 1252 break; 1174 1253 case FLreg: 1175 1254 goto L2; 1176 1255 case FLpara: 1177 1256 refparam = TRUE; 1178 1257 pcs->Irm = modregrm(2,0,BPRM); 1179 1258 goto L2; 1180 1259 1181 1260 case FLauto: 1182 1261 if (s->Sclass == SCfastpar && regcon.params & mask[s->Spreg]) 1183 1262 { 1184 1263 if (keepmsk & RMload) 1185 1264 { 1186 1265 if (sz == REGSIZE) // could this be (sz <= REGSIZE) ? 1187 1266 { 1188 pcs->Irm = modregrm(3,0,s->Spreg); 1267 pcs->Irm = modregrm(3,0,s->Spreg & 7); 1268 if (s->Spreg & 8) 1269 pcs->Irex |= REX_B; 1189 1270 regcon.used |= mask[s->Spreg]; 1190 1271 break; 1191 1272 } 1192 1273 } 1193 1274 else 1194 1275 regcon.params &= ~mask[s->Spreg]; 1195 1276 } 1196 1277 case FLtmp: 1197 1278 case FLbprel: 1198 1279 reflocal = TRUE; 1199 1280 pcs->Irm = modregrm(2,0,BPRM); 1200 1281 goto L2; 1201 1282 case FLextern: 1202 1283 if (s->Sident[0] == '_' && memcmp(s->Sident + 1,"tls_array",10) == 0) 1203 1284 { 1204 1285 #if TARGET_LINUX || TARGET_FREEBSD || TARGET_SOLARIS 1205 1286 // Rewrite as GS:[0000] 1206 1287 pcs->Irm = modregrm(0, 0, BPRM); 1207 1288 pcs->IFL1 = FLconst; 1208 1289 pcs->IEV1.Vuns = 0; … … 1216 1297 goto L3; 1217 1298 case FLdata: 1218 1299 case FLudata: 1219 1300 case FLcsdata: 1220 1301 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 1221 1302 case FLgot: 1222 1303 case FLgotoff: 1223 1304 case FLtlsdata: 1224 1305 #endif 1225 1306 L3: 1226 1307 pcs->Irm = modregrm(0,0,BPRM); 1227 1308 L2: 1228 1309 if (fl == FLreg) 1229 1310 { assert(s->Sregm & regcon.mvar); 1230 1311 if ( 1231 1312 s->Sclass == SCregpar || 1232 1313 s->Sclass == SCparameter) 1233 1314 { refparam = TRUE; 1234 1315 reflocal = TRUE; // kludge to set up prolog 1235 1316 } 1236 pcs->Irm = modregrm(3,0,s->Sreglsw); 1317 pcs->Irm = modregrm(3,0,s->Sreglsw & 7); 1318 if (s->Sreglsw & 8) 1319 pcs->Irex |= REX_B; 1237 1320 if (e->EV.sp.Voffset == 1 && sz == 1) 1238 1321 { assert(s->Sregm & BYTEREGS); 1322 assert(s->Sreglsw < 4); 1239 1323 pcs->Irm |= 4; // use 2nd byte of register 1240 1324 } 1241 1325 else 1242 1326 assert(!e->EV.sp.Voffset); 1243 1327 } 1244 1328 else if (s->ty() & mTYcs && !(fl == FLextern && LARGECODE)) 1245 1329 { 1246 1330 pcs->Iflags |= CFcs | CFoff; 1247 1331 } 1248 1332 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 1249 1333 // if (fl == FLtlsdata || s->ty() & mTYthread) 1250 1334 // pcs->Iflags |= CFgs; 1251 1335 #endif 1252 1336 pcs->IEVsym1 = s; 1253 1337 pcs->IEVoffset1 = e->EV.sp.Voffset; 1254 1338 if (sz == 1) 1255 1339 { /* Don't use SI or DI for this variable */ 1256 1340 s->Sflags |= GTbyte; 1257 1341 if (e->EV.sp.Voffset > 1) 1258 1342 s->Sflags &= ~GTregcand; 1259 1343 } 1260 1344 else if (e->EV.sp.Voffset) 1261 1345 s->Sflags &= ~GTregcand; 1262 1346 if (!(keepmsk & RMstore)) // if not store only 1263 1347 { s->Sflags |= SFLread; // assume we are doing a read 1264 1348 } 1265 1349 break; 1266 1350 case FLpseudo: 1267 1351 #if MARS 1268 1352 assert(0); 1269 1353 #else 1270 { unsigned u; 1271 1272 u = s->Sreglsw; 1354 { 1355 unsigned u = s->Sreglsw; 1273 1356 c = getregs(pseudomask[u]); 1274 1357 pcs->Irm = modregrm(3,0,pseudoreg[u] & 7); 1275 1358 break; 1276 1359 } 1277 1360 #endif 1278 1361 case FLfardata: 1279 1362 assert(!TARGET_FLAT); 1280 1363 case FLfunc: /* reading from code seg */ 1281 1364 if (config.exe & EX_flat) 1282 1365 goto L3; 1283 1366 Lfardata: 1284 { regm_t regm; 1285 code *c1; 1286 1287 regm = ALLREGS & ~keepmsk; /* need scratch register */ 1288 c1 = allocreg(®m,®,TYint); 1367 { 1368 regm_t regm = ALLREGS & ~keepmsk; // need scratch register 1369 code *c1 = allocreg(®m,®,TYint); 1289 1370 /* MOV mreg,seg of symbol */ 1290 1371 c = gencs(CNIL,0xB8 + reg,0,FLextern,s); 1291 1372 c->Iflags = CFseg; 1292 c = gen2(c,0x8E,modregrm (3,0,reg)); /* MOV ES,reg */1373 c = gen2(c,0x8E,modregrmx(3,0,reg)); /* MOV ES,reg */ 1293 1374 c = cat3(c1,getregs(mES),c); 1294 1375 pcs->Iflags |= CFes | CFoff; /* ES segment override */ 1295 1376 goto L3; 1296 1377 } 1297 1378 1298 1379 case FLstack: 1299 assert( I32);1380 assert(!I16); 1300 1381 pcs->Irm = modregrm(2,0,4); 1301 1382 pcs->Isib = modregrm(0,4,SP); 1302 1383 pcs->IEVsym1 = s; 1303 1384 pcs->IEVoffset1 = e->EV.sp.Voffset; 1304 1385 break; 1305 1386 1306 1387 default: 1307 1388 #ifdef DEBUG 1308 1389 WRFL((enum FL)fl); 1309 1390 symbol_print(s); 1310 1391 #endif 1311 1392 assert(0); 1312 1393 } 1313 1394 return c; 1314 1395 } 1315 1396 1316 1397 1317 1398 /******************************* 1318 1399 * Same as codelem(), but do not destroy the registers in keepmsk. 1319 1400 * Use scratch registers as much as possible, then use stack. … … 1432 1512 1433 1513 assert(i < REGMAX); 1434 1514 if (mi & tosave) /* i = register to save */ 1435 1515 { 1436 1516 if (touse) /* if any scratch registers */ 1437 1517 { for (j = 0; j < 8; j++) 1438 1518 { regm_t mj = mask[j]; 1439 1519 1440 1520 if (touse & mj) 1441 1521 { cs1 = genmovreg(cs1,j,i); 1442 1522 cs2 = cat(genmovreg(CNIL,i,j),cs2); 1443 1523 touse &= ~mj; 1444 1524 mfuncreg &= ~mj; 1445 1525 regcon.used |= mj; 1446 1526 break; 1447 1527 } 1448 1528 } 1449 1529 assert(j < 8); 1450 1530 } 1451 1531 else /* else use stack */ 1532 #if 0 1452 1533 { int push,pop; 1453 1534 1454 1535 stackchanged = 1; 1455 1536 adjesp += REGSIZE; 1456 1537 if (i == ES) 1457 1538 { push = 0x06; 1458 1539 pop = 0x07; 1459 1540 } 1460 1541 else 1461 1542 { push = 0x50 + i; 1462 1543 pop = push | 8; 1463 1544 } 1464 1545 cs1 = gen1(cs1,push); /* PUSH i */ 1465 1546 cs2 = cat(gen1(CNIL,pop),cs2); /* POP i */ 1466 1547 } 1548 #else 1549 { 1550 stackchanged = 1; 1551 adjesp += REGSIZE; 1552 gensaverestore2(mask[i], &cs1, &cs2); 1553 } 1554 #endif 1467 1555 cs3 = cat(getregs(mi),cs3); 1468 1556 tosave &= ~mi; 1469 1557 } 1470 1558 } 1471 1559 if (adjesp) 1472 1560 { 1473 1561 // If this is done an odd number of times, it 1474 1562 // will throw off the 8 byte stack alignment. 1475 1563 // We should *only* worry about this if a function 1476 1564 // was called in the code generation by codelem(). 1477 1565 int sz; 1478 1566 if (STACKALIGN == 16) 1479 1567 sz = -(adjesp & (STACKALIGN - 1)) & (STACKALIGN - 1); 1480 1568 else 1481 1569 sz = -(adjesp & 7) & 7; 1482 if (calledafunc && I32&& sz && (STACKALIGN == 16 || config.flags4 & CFG4stackalign))1483 { code *cx;1484 1570 if (calledafunc && !I16 && sz && (STACKALIGN == 16 || config.flags4 & CFG4stackalign)) 1571 { 1572 unsigned grex = I64 ? REX_W << 16 : 0; 1485 1573 regm_t mval_save = regcon.immed.mval; 1486 1574 regcon.immed.mval = 0; // prevent reghasvalue() optimizations 1487 1575 // because c hasn't been executed yet 1488 cs1 = genc2(cs1,0x81, modregrm(3,5,SP),sz); // SUB ESP,sz1576 cs1 = genc2(cs1,0x81,grex | modregrm(3,5,SP),sz); // SUB ESP,sz 1489 1577 regcon.immed.mval = mval_save; 1490 1578 cs1 = genadjesp(cs1, sz); 1491 1579 1492 c x = genc2(CNIL,0x81,modregrm(3,0,SP),sz); // ADD ESP,sz1580 code *cx = genc2(CNIL,0x81,grex | modregrm(3,0,SP),sz); // ADD ESP,sz 1493 1581 cx = genadjesp(cx, -sz); 1494 1582 cs2 = cat(cx, cs2); 1495 1583 } 1496 1584 1497 1585 cs1 = genadjesp(cs1,adjesp); 1498 1586 cs2 = genadjesp(cs2,-adjesp); 1499 1587 } 1500 1588 1501 1589 calledafunc |= calledafuncsave; 1502 1590 msavereg &= ~keepmsk | overlap; /* remove from mask of regs to save */ 1503 1591 mfuncreg &= oldmfuncreg; /* update original */ 1504 1592 #ifdef DEBUG 1505 1593 if (debugw) 1506 1594 printf("-scodelem(e=%p *pretregs=x%x keepmsk=x%x constflag=%d\n", 1507 1595 e,*pretregs,keepmsk,constflag); 1508 1596 #endif 1509 1597 return cat4(cs1,c,cs3,cs2); 1510 1598 } 1511 1599 1512 1600 1513 1601 /***************************** 1514 1602 * Given an opcode and EA in cs, generate code 1515 1603 * for each floating register in turn. 1516 1604 * Input: 1517 1605 * tym either TYdouble or TYfloat 1518 1606 */ 1519 1607 1520 code *fltregs(code __ss*pcs,tym_t tym)1608 code *fltregs(code *pcs,tym_t tym) 1521 1609 { code *c; 1522 1610 1611 assert(!I64); 1523 1612 tym = tybasic(tym); 1524 1613 if (I32) 1525 1614 { 1526 1615 c = getregs((tym == TYfloat) ? mAX : mAX | mDX); 1527 1616 if (tym != TYfloat) 1528 1617 { 1529 1618 pcs->IEVoffset1 += REGSIZE; 1530 1619 NEWREG(pcs->Irm,DX); 1531 1620 c = gen(c,pcs); 1532 1621 pcs->IEVoffset1 -= REGSIZE; 1533 1622 } 1534 1623 NEWREG(pcs->Irm,AX); 1535 1624 c = gen(c,pcs); 1536 1625 } 1537 1626 else 1538 1627 { 1539 1628 c = getregs((tym == TYfloat) ? FLOATREGS_16 : DOUBLEREGS_16); 1540 1629 pcs->IEVoffset1 += (tym == TYfloat) ? 2 : 6; 1541 1630 if (tym == TYfloat) 1542 1631 NEWREG(pcs->Irm,DX); … … 1553 1642 { pcs->IEVoffset1 -= 2; 1554 1643 NEWREG(pcs->Irm,CX); 1555 1644 gen(c,pcs); 1556 1645 pcs->IEVoffset1 -= 2; /* note that exit is with Voffset unaltered */ 1557 1646 NEWREG(pcs->Irm,DX); 1558 1647 gen(c,pcs); 1559 1648 } 1560 1649 } 1561 1650 return c; 1562 1651 } 1563 1652 1564 1653 1565 1654 /***************************** 1566 1655 * Given a result in registers, test it for TRUE or FALSE. 1567 1656 * Will fail if TYfptr and the reg is ES! 1568 1657 * If saveflag is TRUE, preserve the contents of the 1569 1658 * registers. 1570 1659 */ 1571 1660 1572 1661 code *tstresult(regm_t regm,tym_t tym,unsigned saveflag) 1573 { code *ce; 1574 unsigned reg; 1662 { 1575 1663 unsigned scrreg; /* scratch register */ 1576 unsigned sz;1577 1664 regm_t scrregm; 1578 1665 1579 1666 #ifdef DEBUG 1580 1667 if (!(regm & (mBP | ALLREGS))) 1581 1668 printf("tstresult(regm = x%x, tym = x%lx, saveflag = %d)\n", 1582 1669 regm,tym,saveflag); 1583 1670 #endif 1584 1671 assert(regm & (mBP | ALLREGS)); 1585 1672 tym = tybasic(tym); 1586 c e = CNIL;1587 reg = findreg(regm);1588 sz = tysize[tym];1673 code *ce = CNIL; 1674 unsigned reg = findreg(regm); 1675 unsigned sz = tysize[tym]; 1589 1676 if (sz == 1) 1590 1677 { assert(regm & BYTEREGS); 1591 1678 return genregs(ce,0x84,reg,reg); // TEST regL,regL 1592 1679 } 1593 1680 if (sz <= REGSIZE) 1594 1681 { 1595 if ( I32)1682 if (!I16) 1596 1683 { 1597 1684 if (tym == TYfloat) 1598 1685 { if (saveflag) 1599 1686 { 1600 1687 scrregm = allregs & ~regm; /* possible scratch regs */ 1601 1688 ce = allocreg(&scrregm,&scrreg,TYoffset); /* allocate scratch reg */ 1602 1689 ce = genmovreg(ce,scrreg,reg); /* MOV scrreg,msreg */ 1603 1690 reg = scrreg; 1604 1691 } 1605 1692 ce = cat(ce,getregs(mask[reg])); 1606 return gen2(ce,0xD1,modregrm (3,4,reg)); /* SHL reg,1 */1693 return gen2(ce,0xD1,modregrmx(3,4,reg)); // SHL reg,1 1607 1694 } 1608 1695 ce = gentstreg(ce,reg); // TEST reg,reg 1609 if ( tysize[tym]== SHORTSIZE)1696 if (sz == SHORTSIZE) 1610 1697 ce->Iflags |= CFopsize; /* 16 bit operands */ 1698 else if (sz == 8) 1699 code_orrex(ce, REX_W); 1611 1700 } 1612 1701 else 1613 1702 ce = gentstreg(ce,reg); // TEST reg,reg 1614 1703 return ce; 1615 1704 } 1616 1705 if (saveflag || tyfv(tym)) 1617 1706 { 1618 1707 scrregm = ALLREGS & ~regm; /* possible scratch regs */ 1619 1708 ce = allocreg(&scrregm,&scrreg,TYoffset); /* allocate scratch reg */ 1620 1709 if (I32 || sz == REGSIZE * 2) 1621 1710 { code *c; 1622 1711 1623 1712 assert(regm & mMSW && regm & mLSW); 1624 1713 1625 1714 reg = findregmsw(regm); 1626 1715 if (I32) 1627 1716 { 1628 1717 if (tyfv(tym)) 1629 1718 { c = genregs(CNIL,0x0F,scrreg,reg); 1630 1719 c->Iop2 = 0xB7; /* MOVZX scrreg,msreg */ … … 1701 1790 1702 1791 // printf("fixresult(e = %p, retregs = %s, *pretregs = %s)\n", 1703 1792 // e,regm_str(retregs),regm_str(*pretregs)); 1704 1793 if (*pretregs == 0) return CNIL; /* if don't want result */ 1705 1794 assert(e && retregs); /* need something to work with */ 1706 1795 forccs = *pretregs & mPSW; 1707 1796 forregs = *pretregs & (mST01 | mST0 | mBP | ALLREGS | mES | mSTACK); 1708 1797 tym = tybasic(e->Ety); 1709 1798 #if 0 1710 1799 if (tym == TYstruct) 1711 1800 // Hack to support cdstreq() 1712 1801 tym = TYfptr; 1713 1802 #else 1714 1803 if (tym == TYstruct) 1715 1804 // Hack to support cdstreq() 1716 1805 tym = (forregs & mMSW) ? TYfptr : TYnptr; 1717 1806 #endif 1718 1807 c = CNIL; 1719 1808 sz = tysize[tym]; 1720 1809 if (sz == 1) 1721 { unsigned reg; 1722 1810 { 1723 1811 assert(retregs & BYTEREGS); 1724 reg = findreg(retregs);1812 unsigned reg = findreg(retregs); 1725 1813 if (e->Eoper == OPvar && 1726 1814 e->EV.sp.Voffset == 1 && 1727 1815 e->EV.sp.Vsym->Sfl == FLreg) 1728 1816 { 1817 assert(reg < 4); 1729 1818 if (forccs) 1730 1819 c = gen2(c,0x84,modregrm(3,reg | 4,reg | 4)); // TEST regH,regH 1731 1820 forccs = 0; 1732 1821 } 1733 1822 } 1734 1823 if ((retregs & forregs) == retregs) /* if already in right registers */ 1735 1824 *pretregs = retregs; 1736 1825 else if (forregs) /* if return the result in registers */ 1737 { unsigned opsflag; 1738 1826 { 1739 1827 if (forregs & (mST01 | mST0)) 1740 1828 return fixresult87(e,retregs,pretregs); 1741 1829 ce = CNIL; 1742 opsflag = FALSE;1743 if ( !I32&& sz == 8)1830 unsigned opsflag = FALSE; 1831 if (I16 && sz == 8) 1744 1832 { if (forregs & mSTACK) 1745 1833 { assert(retregs == DOUBLEREGS_16); 1746 1834 /* Push floating regs */ 1747 1835 c = CNIL; 1748 1836 ce = gen1(ce,0x50 + AX); 1749 1837 gen1(ce,0x50 + BX); 1750 1838 gen1(ce,0x50 + CX); 1751 1839 gen1(ce,0x50 + DX); 1752 1840 stackpush += DOUBLESIZE; 1753 1841 } 1754 1842 else if (retregs & mSTACK) 1755 1843 { assert(forregs == DOUBLEREGS_16); 1756 1844 /* Pop floating regs */ 1757 1845 c = getregs(forregs); 1758 1846 ce = gen1(ce,0x58 + DX); 1759 1847 gen1(ce,0x58 + CX); 1760 1848 gen1(ce,0x58 + BX); 1761 1849 gen1(ce,0x58 + AX); 1762 1850 stackpush -= DOUBLESIZE; 1763 1851 retregs = DOUBLEREGS_16; /* for tstresult() below */ … … 1796 1884 /* cuz we have real result in rreg */ 1797 1885 retregs = *pretregs & ~mPSW; 1798 1886 } 1799 1887 if (forccs) /* if return result in flags */ 1800 1888 c = cat(c,tstresult(retregs,tym,forregs)); 1801 1889 return c; 1802 1890 } 1803 1891 1804 1892 1805 1893 /******************************** 1806 1894 * Generate code sequence to call C runtime library support routine. 1807 1895 * clib = CLIBxxxx 1808 1896 * keepmask = mask of registers not to destroy. Currently can 1809 1897 * handle only 1. Should use a temporary rather than 1810 1898 * push/pop for speed. 1811 1899 */ 1812 1900 1813 1901 int clib_inited = 0; // != 0 if initialized 1814 1902 1815 1903 code *callclib(elem *e,unsigned clib,regm_t *pretregs,regm_t keepmask) 1816 { code *c,*cpop; 1817 regm_t retregs; 1818 symbol *s; 1819 int i; 1820 1904 { 1821 1905 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 1822 1906 static symbol lib[] = 1823 1907 { 1824 1908 /* Convert destroyed regs into saved regs */ 1825 1909 #define Z(desregs) (~(desregs) & (mBP| mES | ALLREGS)) 1826 1910 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 1827 1911 #define N(name) "_" name 1828 1912 #else 1829 1913 #define N(name) name 1830 1914 #endif 1831 1915 1832 1916 /* Shorthand to map onto SYMBOLY() */ 1833 1917 #define Y(desregs,name) SYMBOLY(FLfunc,Z(desregs),N(name),0) 1834 1918 1835 1919 Y(0,"_LCMP__"), // CLIBlcmp 1836 1920 Y(mAX|mCX|mDX,"_LMUL__"), // CLIBlmul 1837 1921 #if 1 1838 1922 Y(mAX|mBX|mCX|mDX,"_LDIV__"), // CLIBldiv 1839 1923 Y(mAX|mBX|mCX|mDX,"_LDIV__"), // CLIBlmod 1840 1924 Y(mAX|mBX|mCX|mDX,"_ULDIV__"), // CLIBuldiv … … 2072 2156 {mAX,mAX,0,2}, // _DBLINT87@ dblint87 2073 2157 {mDX|mAX,mAX,0,2}, // _DBLLNG87@ dbllng87 2074 2158 {0,0,0,2}, // _FTST@ 2075 2159 {mPSW,mPSW,0,INFfloat,0,2}, // _FCOMPP@ 2076 2160 {mPSW,mPSW,0,2}, // _FTEST@ 2077 2161 {mPSW,mPSW,0,2}, // _FTEST0@ 2078 2162 {mST0,mST0,0,INFfloat,1,1}, // _FDIV@ 2079 2163 2080 2164 {mST01,mST01,0,INF32|INFfloat,3,5}, // _Cmul 2081 2165 {mST01,mST01,0,INF32|INFfloat,0,2}, // _Cdiv 2082 2166 {mPSW, mPSW, 0,INF32|INFfloat,0,4}, // _Ccmp 2083 2167 2084 2168 {mST0,mST0,0,INF32|INFfloat,2,1}, // _U64_LDBL 2085 2169 {0,mDX|mAX,0,INF32|INFfloat,1,2}, // __LDBLULLNG 2086 2170 }; 2087 2171 2088 2172 if (!clib_inited) /* if not initialized */ 2089 2173 { 2090 2174 assert(sizeof(lib) / sizeof(lib[0]) == CLIBMAX); 2091 2175 assert(sizeof(info) / sizeof(info[0]) == CLIBMAX); 2092 for (i = 0; i < CLIBMAX; i++)2176 for (int i = 0; i < CLIBMAX; i++) 2093 2177 { lib[i].Stype = tsclib; 2094 2178 #if MARS 2095 2179 lib[i].Sxtrnnum = 0; 2096 2180 lib[i].Stypidx = 0; 2097 2181 #endif 2098 2182 } 2099 2183 2100 if ( I32)2184 if (!I16) 2101 2185 { /* Adjust table for 386 */ 2102 2186 lib[CLIBdbllng].Sregsaved = Z(DOUBLEREGS_32); 2103 2187 lib[CLIBlngdbl].Sregsaved = Z(DOUBLEREGS_32); 2104 2188 lib[CLIBdblint].Sregsaved = Z(DOUBLEREGS_32); 2105 2189 lib[CLIBintdbl].Sregsaved = Z(DOUBLEREGS_32); 2106 2190 #if TARGET_WINDOS 2107 2191 lib[CLIBfneg].Sregsaved = Z(FLOATREGS_32); 2108 2192 lib[CLIBdneg].Sregsaved = Z(DOUBLEREGS_32); 2109 2193 lib[CLIBdbluns].Sregsaved = Z(DOUBLEREGS_32); 2110 2194 lib[CLIBunsdbl].Sregsaved = Z(DOUBLEREGS_32); 2111 2195 lib[CLIBdblulng].Sregsaved = Z(DOUBLEREGS_32); 2112 2196 lib[CLIBulngdbl].Sregsaved = Z(DOUBLEREGS_32); 2113 2197 #endif 2114 2198 lib[CLIBdblflt].Sregsaved = Z(DOUBLEREGS_32); 2115 2199 lib[CLIBfltdbl].Sregsaved = Z(DOUBLEREGS_32); 2116 2200 2117 2201 lib[CLIBdblllng].Sregsaved = Z(DOUBLEREGS_32); 2118 2202 lib[CLIBllngdbl].Sregsaved = Z(DOUBLEREGS_32); 2119 2203 lib[CLIBdblullng].Sregsaved = Z(DOUBLEREGS_32); 2120 2204 lib[CLIBullngdbl].Sregsaved = Z(DOUBLEREGS_32); 2121 2205 } 2122 2206 clib_inited++; 2123 2207 } 2124 2208 #undef Z 2125 2209 2126 2210 assert(clib < CLIBMAX); 2127 s = &lib[clib];2211 symbol *s = &lib[clib]; 2128 2212 assert(I32 || !(info[clib].flags & INF32)); 2129 c pop = CNIL;2130 c = getregs((~s->Sregsaved & (mES | mBP | ALLREGS)) & ~keepmask); // mask of regs destroyed2213 code *cpop = CNIL; 2214 code *c = getregs((~s->Sregsaved & (mES | mBP | ALLREGS)) & ~keepmask); // mask of regs destroyed 2131 2215 keepmask &= ~s->Sregsaved; 2132 int npushed = 0; 2216 int npushed = numbitsset(keepmask); 2217 gensaverestore2(keepmask, &c, &cpop); 2218 #if 0 2133 2219 while (keepmask) 2134 2220 { unsigned keepreg; 2135 2221 2136 2222 if (keepmask & (mBP|ALLREGS)) 2137 2223 { keepreg = findreg(keepmask & (mBP|ALLREGS)); 2138 2224 c = gen1(c,0x50 + keepreg); /* PUSH keepreg */ 2139 2225 cpop = cat(gen1(CNIL,0x58 + keepreg),cpop); // POP keepreg 2140 2226 keepmask &= ~mask[keepreg]; 2141 2227 npushed++; 2142 2228 } 2143 2229 if (keepmask & mES) 2144 2230 { c = gen1(c,0x06); /* PUSH ES */ 2145 2231 cpop = cat(gen1(CNIL,0x07),cpop); /* POP ES */ 2146 2232 keepmask &= ~mES; 2147 2233 npushed++; 2148 2234 } 2149 2235 } 2236 #endif 2150 2237 2151 2238 c = cat(c, save87regs(info[clib].push87)); 2152 for (i = 0; i < info[clib].push87; i++)2239 for (int i = 0; i < info[clib].push87; i++) 2153 2240 c = cat(c, push87()); 2154 2241 2155 for (i = 0; i < info[clib].pop87; i++)2242 for (int i = 0; i < info[clib].pop87; i++) 2156 2243 pop87(); 2157 2244 2158 2245 if (config.target_cpu >= TARGET_80386 && clib == CLIBlmul && !I32) 2159 2246 { static char lmul[] = { 2160 2247 0x66,0xc1,0xe1,0x10, // shl ECX,16 2161 2248 0x8b,0xcb, // mov CX,BX ;ECX = CX,BX 2162 2249 0x66,0xc1,0xe0,0x10, // shl EAX,16 2163 2250 0x66,0x0f,0xac,0xd0,0x10, // shrd EAX,EDX,16 ;EAX = DX,AX 2164 2251 0x66,0xf7,0xe1, // mul ECX 2165 2252 0x66,0x0f,0xa4,0xc2,0x10, // shld EDX,EAX,16 ;DX,AX = EAX 2166 2253 }; 2167 2254 2168 2255 c = genasm(c,lmul,sizeof(lmul)); 2169 2256 } 2170 2257 else 2171 2258 { makeitextern(s); 2172 2259 int nalign = 0; 2173 2260 if (STACKALIGN == 16) 2174 2261 { // Align the stack (assume no args on stack) 2175 2262 int npush = npushed * REGSIZE + stackpush; 2176 2263 if (npush & (STACKALIGN - 1)) 2177 2264 { nalign = STACKALIGN - (npush & (STACKALIGN - 1)); 2178 2265 c = genc2(c,0x81,modregrm(3,5,SP),nalign); // SUB ESP,nalign 2266 if (I64) 2267 code_orrex(c, REX_W); 2179 2268 } 2180 2269 } 2181 2270 c = gencs(c,(LARGECODE) ? 0x9A : 0xE8,0,FLfunc,s); // CALL s 2182 2271 if (nalign) 2183 c = genc2(c,0x81,modregrm(3,0,SP),nalign); // ADD ESP,nalign 2272 { c = genc2(c,0x81,modregrm(3,0,SP),nalign); // ADD ESP,nalign 2273 if (I64) 2274 code_orrex(c, REX_W); 2275 } 2184 2276 calledafunc = 1; 2185 2277 2186 if ( !I32 && // bug in Optlink2278 if (I16 && // bug in Optlink for weak references 2187 2279 config.flags3 & CFG3wkfloat && 2188 2280 (info[clib].flags & (INFfloat | INFwkdone)) == INFfloat) 2189 2281 { info[clib].flags |= INFwkdone; 2190 2282 makeitextern(rtlsym[RTLSYM_INTONLY]); 2191 2283 obj_wkext(s,rtlsym[RTLSYM_INTONLY]); 2192 2284 } 2193 }2194 if (!I32)2285 } 2286 if (I16) 2195 2287 stackpush -= info[clib].pop; 2196 retregs = I32 ? info[clib].retregs32 : info[clib].retregs16;2197 return cat(cat(c,cpop),fixresult(e,retregs,pretregs));2288 regm_t retregs = I16 ? info[clib].retregs16 : info[clib].retregs32; 2289 return cat(cat(c,cpop),fixresult(e,retregs,pretregs)); 2198 2290 } 2199 2291 2200 2292 2201 2293 /******************************* 2202 2294 * Generate code sequence for function call. 2203 2295 */ 2204 2296 2205 2297 code *cdfunc(elem *e,regm_t *pretregs) 2206 2298 { unsigned numpara = 0; 2207 2299 unsigned stackpushsave; 2208 2300 unsigned preg; 2209 2301 regm_t keepmsk; 2210 2302 unsigned numalign = 0; 2211 2303 code *c; 2212 2304 2213 2305 //printf("cdfunc()\n"); elem_print(e); 2214 2306 assert(e); 2215 2307 stackpushsave = stackpush; /* so we can compute # of parameters */ 2216 2308 cgstate.stackclean++; 2217 2309 c = CNIL; 2218 2310 keepmsk = 0; 2219 2311 if (OTbinary(e->Eoper)) // if parameters 2220 2312 { unsigned stackalign = REGSIZE; 2221 elem *ep;2222 2313 elem *en; 2223 2314 regm_t retregs; 2224 tym_t tyf; 2225 2226 if (I32) 2315 2316 if (!I16) 2227 2317 { 2228 ty f = tybasic(e->E1->Ety);2318 tym_t tyf = tybasic(e->E1->Ety); 2229 2319 2230 2320 // First compute numpara, the total pushed on the stack 2231 2321 switch (tyf) 2232 2322 { case TYf16func: 2233 2323 stackalign = 2; 2234 2324 goto Ldefault; 2235 2325 case TYmfunc: 2236 2326 case TYjfunc: 2237 2327 // last parameter goes into register 2328 elem *ep; 2238 2329 for (ep = e->E2; ep->Eoper == OPparam; ep = ep->E2) 2239 2330 { 2240 2331 numpara += paramsize(ep->E1,stackalign); 2241 2332 } 2242 2333 if (tyf == TYjfunc && 2243 2334 // This must match type_jparam() 2244 2335 !(tyjparam(ep->Ety) || 2245 2336 ((tybasic(ep->Ety) == TYstruct || tybasic(ep->Ety) == TYarray) && ep->Enumbytes <= intsize && ep->Enumbytes != 3 && ep->Enumbytes) 2246 2337 ) 2247 2338 ) 2248 2339 { 2249 2340 numpara += paramsize(ep,stackalign); 2250 2341 } 2251 2342 break; 2252 2343 default: 2253 2344 Ldefault: 2254 2345 numpara += paramsize(e->E2,stackalign); 2255 2346 break; 2256 2347 } 2257 2348 assert((numpara & (REGSIZE - 1)) == 0); 2258 2349 assert((stackpush & (REGSIZE - 1)) == 0); 2259 2350 2260 2351 /* Adjust start of the stack so after all args are pushed, 2261 2352 * the stack will be aligned. 2262 2353 */ 2263 2354 if (STACKALIGN == 16 && (numpara + stackpush) & (STACKALIGN - 1)) 2264 2355 { 2265 2356 numalign = STACKALIGN - ((numpara + stackpush) & (STACKALIGN - 1)); 2266 2357 c = genc2(NULL,0x81,modregrm(3,5,SP),numalign); // SUB ESP,numalign 2358 if (I64) 2359 code_orrex(c, REX_W); 2267 2360 c = genadjesp(c, numalign); 2268 2361 stackpush += numalign; 2269 2362 stackpushsave += numalign; 2270 2363 } 2271 2364 2272 2365 switch (tyf) 2273 2366 { case TYf16func: 2274 2367 stackalign = 2; 2275 2368 break; 2276 2369 case TYmfunc: // last parameter goes into ECX 2277 2370 preg = CX; 2278 2371 goto L1; 2279 2372 case TYjfunc: // last parameter goes into EAX 2280 2373 preg = AX; 2281 2374 goto L1; 2282 2375 2283 2376 L1: 2377 elem *ep; 2284 2378 for (ep = e->E2; ep->Eoper == OPparam; ep = en) 2285 2379 { 2286 2380 c = cat(c,params(ep->E1,stackalign)); 2287 2381 en = ep->E2; 2288 2382 freenode(ep); 2289 2383 ep = en; 2290 2384 } 2291 2385 if (tyf == TYjfunc && 2292 2386 // This must match type_jparam() 2293 2387 !(tyjparam(ep->Ety) || 2294 2388 ((tybasic(ep->Ety) == TYstruct || tybasic(ep->Ety) == TYarray) && ep->Enumbytes <= intsize && ep->Enumbytes != 3 && ep->Enumbytes) 2295 2389 ) 2296 2390 ) 2297 2391 { 2298 2392 c = cat(c,params(ep,stackalign)); 2299 2393 goto Lret; 2300 2394 } 2301 keepmsk = mask[preg]; 2395 // preg is the register to put the parameter ep in 2396 keepmsk = mask[preg]; // don't change preg when evaluating func address 2302 2397 retregs = keepmsk; 2303 2398 if (ep->Eoper == OPstrthis) 2304 { code *c1; 2305 code *c2; 2306 unsigned np; 2307 2308 c1 = getregs(retregs); 2399 { code *c2; 2400 2401 code *c1 = getregs(retregs); 2309 2402 // LEA preg,np[ESP] 2310 np = stackpush - ep->EV.Vuns; // stack delta to parameter 2311 c2 = genc1(CNIL,0x8D,modregrm(2,preg,4),FLconst,np); 2312 c2->Isib = modregrm(0,4,SP); 2403 unsigned np = stackpush - ep->EV.Vuns; // stack delta to parameter 2404 c2 = genc1(CNIL,0x8D,(modregrm(0,4,SP) << 8) | modregrm(2,preg,4),FLconst,np); 2405 if (I64) 2406 code_orrex(c2, REX_W); 2313 2407 c = cat3(c,c1,c2); 2314 2408 } 2315 2409 else 2316 2410 { code *cp = codelem(ep,&retregs,FALSE); 2317 2411 c = cat(c,cp); 2318 2412 } 2319 2413 goto Lret; 2320 2414 } 2321 2415 } 2322 2416 c = cat(c, params(e->E2,stackalign)); // push parameters 2323 2417 } 2324 2418 else 2325 2419 { 2326 2420 /* Adjust start of the stack so 2327 2421 * the stack will be aligned. 2328 2422 */ 2329 2423 if (STACKALIGN == 16 && (stackpush) & (STACKALIGN - 1)) 2330 2424 { 2331 2425 numalign = STACKALIGN - ((stackpush) & (STACKALIGN - 1)); 2332 2426 c = genc2(NULL,0x81,modregrm(3,5,SP),numalign); // SUB ESP,numalign 2427 if (I64) 2428 code_orrex(c, REX_W); 2333 2429 c = genadjesp(c, numalign); 2334 2430 stackpush += numalign; 2335 2431 stackpushsave += numalign; 2336 2432 } 2337 2433 2338 2434 } 2339 2435 Lret: 2340 2436 cgstate.stackclean--; 2341 if ( I32)2437 if (!I16) 2342 2438 { 2343 2439 if (numpara != stackpush - stackpushsave) 2344 2440 printf("numpara = %d, stackpush = %d, stackpushsave = %d\n", numpara, stackpush, stackpushsave); 2345 2441 assert(numpara == stackpush - stackpushsave); 2346 2442 } 2347 2443 else 2348 2444 numpara = stackpush - stackpushsave; 2349 2445 return cat(c,funccall(e,numpara,numalign,pretregs,keepmsk)); 2350 2446 } 2351 2447 2352 2448 /*********************************** 2353 2449 */ 2354 2450 2355 2451 code *cdstrthis(elem *e,regm_t *pretregs) 2356 2452 { 2357 2453 code *c1; 2358 2454 code *c2; 2359 unsigned np;2360 unsigned reg;2361 2455 2362 2456 assert(tysize(e->Ety) == REGSIZE); 2363 reg = findreg(*pretregs & allregs);2457 unsigned reg = findreg(*pretregs & allregs); 2364 2458 c1 = getregs(mask[reg]); 2365 2459 // LEA reg,np[ESP] 2366 np = stackpush - e->EV.Vuns; // stack delta to parameter 2367 c2 = genc1(CNIL,0x8D,modregrm(2,reg,4),FLconst,np); 2368 c2->Isib = modregrm(0,4,SP); 2460 unsigned np = stackpush - e->EV.Vuns; // stack delta to parameter 2461 c2 = genc1(CNIL,0x8D,(modregrm(0,4,SP) << 8) | modregxrm(2,reg,4),FLconst,np); 2462 if (I64) 2463 code_orrex(c2, REX_W); 2369 2464 return cat3(c1,c2,fixresult(e,mask[reg],pretregs)); 2370 2465 } 2371 2466 2372 2467 /****************************** 2373 2468 * Call function. All parameters are pushed onto the stack, numpara gives 2374 2469 * the size of them all. 2375 2470 */ 2376 2471 2377 2472 STATIC code * funccall(elem *e,unsigned numpara,unsigned numalign,regm_t *pretregs,regm_t keepmsk) 2378 2473 { 2379 2474 elem *e1; 2380 2475 code *c,*ce,cs; 2381 2476 tym_t tym1; 2382 2477 char farfunc; 2383 2478 regm_t retregs; 2384 2479 symbol *s; 2385 2480 2386 2481 //printf("funccall(e = %p, *pretregs = x%x, numpara = %d, numalign = %d)\n",e,*pretregs,numpara,numalign); 2387 2482 calledafunc = 1; 2388 2483 /* Determine if we need frame for function prolog/epilog */ … … 2408 2503 if (s->Sflags & SFLexit) 2409 2504 c = NULL; 2410 2505 else 2411 2506 c = save87(); // assume 8087 regs are all trashed 2412 2507 if (s->Sflags & SFLexit) 2413 2508 // Function doesn't return, so don't worry about registers 2414 2509 // it may use 2415 2510 c1 = NULL; 2416 2511 else if (!tyfunc(s->ty()) || !(config.flags4 & CFG4optimized)) 2417 2512 // so we can replace func at runtime 2418 2513 c1 = getregs(~fregsaved & (mBP | ALLREGS | mES)); 2419 2514 else 2420 2515 c1 = getregs(~s->Sregsaved & (mBP | ALLREGS | mES)); 2421 2516 if (strcmp(s->Sident,"alloca") == 0) 2422 2517 { 2423 2518 #if 1 2424 2519 s = rtlsym[RTLSYM_ALLOCA]; 2425 2520 makeitextern(s); 2426 2521 c1 = cat(c1,getregs(mCX)); 2427 2522 c1 = genc(c1,0x8D,modregrm(2,CX,BPRM),FLallocatmp,0,0,0); // LEA CX,&localsize[BP] 2523 if (I64) 2524 code_orrex(c1, REX_W); 2428 2525 usedalloca = 2; // new way 2429 2526 #else 2430 2527 usedalloca = 1; // old way 2431 2528 #endif 2432 2529 } 2433 2530 if (sytab[s->Sclass] & SCSS) // if function is on stack (!) 2434 2531 { 2435 2532 retregs = allregs & ~keepmsk; 2436 2533 s->Sflags &= ~GTregcand; 2437 2534 s->Sflags |= SFLread; 2438 2535 ce = cat(c1,cdrelconst(e1,&retregs)); 2439 2536 if (farfunc) 2440 2537 goto LF1; 2441 2538 else 2442 2539 goto LF2; 2443 2540 } 2444 2541 else 2445 2542 { int fl; 2446 2543 2447 2544 fl = FLfunc; … … 2462 2559 ce = gen1(ce, 0x90); // NOP 2463 2560 code_orflag(ce, CFvolatile); // don't schedule it 2464 2561 } 2465 2562 #endif 2466 2563 } 2467 2564 ce = cat(c1,ce); 2468 2565 } 2469 2566 else 2470 2567 { /* Call function via pointer */ 2471 2568 elem *e11; 2472 2569 tym_t e11ty; 2473 2570 2474 2571 #ifdef DEBUG 2475 2572 if (e1->Eoper != OPind 2476 2573 ) { WRFL((enum FL)el_fl(e1)); WROP(e1->Eoper); } 2477 2574 #endif 2478 2575 c = save87(); // assume 8087 regs are all trashed 2479 2576 assert(e1->Eoper == OPind); 2480 2577 e11 = e1->E1; 2481 2578 e11ty = tybasic(e11->Ety); 2482 assert( I32|| (e11ty == (farfunc ? TYfptr : TYnptr)));2579 assert(!I16 || (e11ty == (farfunc ? TYfptr : TYnptr))); 2483 2580 2484 2581 /* if we can't use loadea() */ 2485 2582 if ((EOP(e11) || e11->Eoper == OPconst) && 2486 2583 (e11->Eoper != OPind || e11->Ecount)) 2487 2584 { 2488 2585 unsigned reg; 2489 2586 2490 2587 retregs = allregs & ~keepmsk; 2491 2588 cgstate.stackclean++; 2492 2589 ce = scodelem(e11,&retregs,keepmsk,TRUE); 2493 2590 cgstate.stackclean--; 2494 2591 /* Kill registers destroyed by an arbitrary function call */ 2495 2592 ce = cat(ce,getregs((mBP | ALLREGS | mES) & ~fregsaved)); 2496 2593 if (e11ty == TYfptr) 2497 2594 { unsigned lsreg; 2498 2595 LF1: 2499 2596 reg = findregmsw(retregs); 2500 2597 lsreg = findreglsw(retregs); 2501 2598 floatreg = TRUE; /* use float register */ 2502 2599 reflocal = TRUE; 2503 2600 ce = genc1(ce,0x89, /* MOV floatreg+2,reg */ 2504 2601 modregrm(2,reg,BPRM),FLfltreg,REGSIZE); 2505 2602 genc1(ce,0x89, /* MOV floatreg,lsreg */ 2506 2603 modregrm(2,lsreg,BPRM),FLfltreg,0); 2507 2604 if (tym1 == TYifunc) 2508 2605 gen1(ce,0x9C); // PUSHF 2509 2606 genc1(ce,0xFF, /* CALL [floatreg] */ 2510 2607 modregrm(2,3,BPRM),FLfltreg,0); 2511 2608 } 2512 2609 else 2513 2610 { 2514 2611 LF2: 2515 2612 reg = findreg(retregs); 2516 ce = gen2(ce,0xFF,modregrm (3,2,reg)); /* CALL reg */2613 ce = gen2(ce,0xFF,modregrmx(3,2,reg)); /* CALL reg */ 2517 2614 } 2518 2615 } 2519 2616 else 2520 2617 { 2521 2618 if (tym1 == TYifunc) 2522 2619 c = gen1(c,0x9C); // PUSHF 2523 2620 // CALL [function] 2524 2621 cs.Iflags = 0; 2525 2622 cgstate.stackclean++; 2526 2623 ce = loadea(e11,&cs,0xFF,farfunc ? 3 : 2,0,keepmsk,(ALLREGS|mES|mBP) & ~fregsaved); 2527 2624 cgstate.stackclean--; 2528 2625 freenode(e11); 2529 2626 } 2530 2627 s = NULL; 2531 2628 } 2532 2629 c = cat(c,ce); 2533 2630 freenode(e1); 2534 2631 2535 2632 /* See if we will need the frame pointer. 2536 2633 Calculate it here so we can possibly use BP to fix the stack. … … 2678 2775 symbol *s; 2679 2776 int fl; 2680 2777 2681 2778 //printf("params(e = %p, stackalign = %d)\n", e, stackalign); 2682 2779 cp = NULL; 2683 2780 stackchanged = 1; 2684 2781 assert(e); 2685 2782 while (e->Eoper == OPparam) /* if more params */ 2686 2783 { 2687 2784 e2 = e->E2; 2688 2785 cp = cat(cp,params(e->E1,stackalign)); // push them backwards 2689 2786 freenode(e); 2690 2787 e = e2; 2691 2788 } 2692 2789 //printf("params()\n"); elem_print(e); 2693 2790 2694 2791 tym = tybasic(e->Ety); 2695 2792 if (tyfloating(tym)) 2696 2793 obj_fltused(); 2697 2794 2795 int grex = I64 ? REX_W << 16 : 0; 2796 2698 2797 /* sz = number of bytes pushed */ 2699 2798 if (tyscalar(tym)) 2700 2799 szb = size(tym); 2701 2800 else if (tym == TYstruct) 2702 2801 szb = e->Enumbytes; 2703 2802 else 2704 2803 { 2705 2804 #ifdef DEBUG 2706 2805 WRTYxx(tym); 2707 2806 #endif 2708 2807 assert(0); 2709 2808 } 2710 2809 sz = align(stackalign,szb); /* align on word stack boundary */ 2711 2810 assert((sz & (stackalign - 1)) == 0); /* ensure that alignment worked */ 2712 2811 assert((sz & (REGSIZE - 1)) == 0); 2713 2812 2714 2813 c = CNIL; 2715 2814 cs.Iflags = 0; 2716 2815 cs.Irex = 0; 2717 2816 switch (e->Eoper) 2718 2817 { 2719 2818 #if SCPP 2720 2819 case OPstrctor: 2721 2820 { 2722 2821 e1 = e->E1; 2723 2822 c = docommas(&e1); /* skip over any comma expressions */ 2724 2823 2725 c = genc2(c,0x81, modregrm(3,5,SP),sz); /* SUB SP,sizeof(struct) */2824 c = genc2(c,0x81,grex | modregrm(3,5,SP),sz); // SUB SP,sizeof(struct) 2726 2825 stackpush += sz; 2727 2826 genadjesp(c,sz); 2728 2827 2729 2828 // Find OPstrthis and set it to stackpush 2730 2829 exp2_setstrthis(e1,NULL,stackpush,NULL); 2731 2830 2732 2831 retregs = 0; 2733 2832 ce = codelem(e1,&retregs,TRUE); 2734 2833 goto L2; 2735 2834 } 2736 2835 case OPstrthis: 2737 2836 // This is the parameter for the 'this' pointer corresponding to 2738 2837 // OPstrctor. We push a pointer to an object that was already 2739 2838 // allocated on the stack by OPstrctor. 2740 2839 { unsigned np; 2741 2840 2742 2841 retregs = allregs; 2743 2842 c = allocreg(&retregs,®,TYoffset); 2744 2843 c = genregs(c,0x89,SP,reg); // MOV reg,SP 2844 if (I64) 2845 code_orrex(c, REX_W); 2745 2846 np = stackpush - e->EV.Vuns; // stack delta to parameter 2746 c = genc2(c,0x81, modregrm(3,0,reg),np); // ADD reg,np2847 c = genc2(c,0x81,grex | modregrmx(3,0,reg),np); // ADD reg,np 2747 2848 if (sz > REGSIZE) 2748 2849 { c = gen1(c,0x16); // PUSH SS 2749 2850 stackpush += REGSIZE; 2750 2851 } 2751 c = gen1(c,0x50 + reg); // PUSH reg 2852 c = gen1(c,0x50 + (reg & 7)); // PUSH reg 2853 if (reg & 8) 2854 code_orrex(c, REX_B); 2752 2855 stackpush += REGSIZE; 2753 2856 genadjesp(c,sz); 2754 2857 ce = CNIL; 2755 2858 goto L2; 2756 2859 } 2757 2860 #endif 2758 2861 case OPstrpar: 2759 2862 { code *cc,*c1,*c2,*c3; 2760 2863 unsigned rm; 2761 2864 unsigned seg; // segment override prefix flags 2762 2865 bool doneoff; 2763 2866 unsigned pushsize = REGSIZE; 2764 2867 unsigned op16 = 0; 2765 2868 unsigned npushes; 2766 2869 2767 2870 e1 = e->E1; 2768 2871 if (sz == 0) 2769 2872 { 2770 2873 ce = docommas(&e1); /* skip over any commas */ 2771 2874 goto L2; 2772 2875 } 2773 2876 if ((sz & 3) == 0 && (sz / REGSIZE) <= 4 && e1->Eoper == OPvar) 2774 2877 { freenode(e); 2775 2878 e = e1; 2776 2879 goto L1; 2777 2880 } 2778 2881 cc = docommas(&e1); /* skip over any commas */ 2779 2882 seg = 0; /* assume no seg override */ 2780 2883 retregs = sz ? IDXREGS : 0; 2781 2884 doneoff = FALSE; 2782 if ( I32 && sz & 2) /* if odd number of words to push */2885 if (!I16 && sz & 2) // if odd number of words to push 2783 2886 { pushsize = 2; 2784 2887 op16 = 1; 2785 2888 } 2786 else if ( !I32&& config.target_cpu >= TARGET_80386 && (sz & 3) == 0)2889 else if (I16 && config.target_cpu >= TARGET_80386 && (sz & 3) == 0) 2787 2890 { pushsize = 4; // push DWORDs at a time 2788 2891 op16 = 1; 2789 2892 } 2790 2893 npushes = sz / pushsize; 2791 2894 switch (e1->Eoper) 2792 2895 { case OPind: 2793 2896 if (sz) 2794 2897 { switch (tybasic(e1->E1->Ety)) 2795 2898 { 2796 2899 case TYfptr: 2797 2900 case TYhptr: 2798 2901 seg = CFes; 2799 2902 retregs |= mES; 2800 2903 break; 2801 2904 case TYsptr: 2802 2905 if (config.wflags & WFssneds) 2803 2906 seg = CFss; 2804 2907 break; 2805 2908 case TYcptr: 2806 2909 seg = CFcs; … … 2850 2953 /* Reverse the effect of the previous add */ 2851 2954 if (doneoff) 2852 2955 e1->EV.sp.Voffset -= sz - pushsize; 2853 2956 freenode(e1); 2854 2957 break; 2855 2958 case OPstreq: 2856 2959 //case OPcond: 2857 2960 if (!(config.exe & EX_flat)) 2858 2961 { seg = CFes; 2859 2962 retregs |= mES; 2860 2963 } 2861 2964 c1 = codelem(e1,&retregs,FALSE); 2862 2965 break; 2863 2966 default: 2864 2967 #ifdef DEBUG 2865 2968 elem_print(e1); 2866 2969 #endif 2867 2970 assert(0); 2868 2971 } 2869 2972 reg = findreglsw(retregs); 2870 rm = I 32 ? regtorm32[reg] : regtorm[reg];2973 rm = I16 ? regtorm[reg] : regtorm32[reg]; 2871 2974 if (op16) 2872 2975 seg |= CFopsize; // operand size 2873 2976 if (npushes <= 4) 2874 2977 { 2875 2978 assert(!doneoff); 2876 2979 for (c2 = CNIL; npushes > 1; npushes--) 2877 { c2 = genc1(c2,0xFF,modregrm (2,6,rm),FLconst,pushsize * (npushes - 1)); // PUSH [reg]2980 { c2 = genc1(c2,0xFF,modregrmx(2,6,rm),FLconst,pushsize * (npushes - 1)); // PUSH [reg] 2878 2981 code_orflag(c2,seg); 2879 2982 genadjesp(c2,pushsize); 2880 2983 } 2881 c3 = gen2(CNIL,0xFF,modregrm (0,6,rm));// PUSH [reg]2984 c3 = gen2(CNIL,0xFF,modregrmx(0,6,rm)); // PUSH [reg] 2882 2985 c3->Iflags |= seg; 2883 2986 genadjesp(c3,pushsize); 2884 2987 ce = cat4(cc,c1,c2,c3); 2885 2988 } 2886 2989 else if (sz) 2887 2990 { int size; 2888 2991 2889 2992 c2 = getregs_imm(mCX | retregs); 2890 2993 /* MOV CX,sz/2 */ 2891 2994 c2 = movregconst(c2,CX,npushes,0); 2892 2995 if (!doneoff) 2893 2996 { /* This disgusting thing should be done when */ 2894 2997 /* reg is loaded. Too lazy to fix it now. */ 2895 2998 /* ADD reg,sz-2 */ 2896 c2 = genc2(c2,0x81, modregrm(3,0,reg),sz-pushsize);2999 c2 = genc2(c2,0x81,grex | modregrmx(3,0,reg),sz-pushsize); 2897 3000 } 2898 c3 = gen2(CNIL,0xFF,modregrm (0,6,rm)); // PUSH [reg]3001 c3 = gen2(CNIL,0xFF,modregrmx(0,6,rm)); // PUSH [reg] 2899 3002 c3->Iflags |= seg | CFtarg2; 2900 genc2(c3,0x81, modregrm(3,5,reg),pushsize); // SUB reg,23003 genc2(c3,0x81,grex | modregrmx(3,5,reg),pushsize); // SUB reg,2 2901 3004 size = ((seg & CFSEG) ? -8 : -7) - op16; 2902 3005 if (code_next(c3)->Iop != 0x81) 2903 3006 size++; 2904 3007 //genc2(c3,0xE2,0,size); // LOOP .-7 or .-8 2905 3008 genjmp(c3,0xE2,FLcode,(block *)c3); // LOOP c3 2906 3009 regimmed_set(CX,0); 2907 3010 genadjesp(c3,sz); 2908 3011 ce = cat4(cc,c1,c2,c3); 2909 3012 } 2910 3013 else 2911 3014 ce = cat(cc,c1); 2912 3015 stackpush += sz; 2913 3016 goto L2; 2914 3017 } 2915 3018 case OPind: 2916 3019 if (!e->Ecount) /* if *e1 */ 2917 3020 { if (sz <= REGSIZE) 2918 3021 { // Watch out for single byte quantities being up 2919 3022 // against the end of a segment or in memory-mapped I/O 2920 3023 if (!(config.exe & EX_flat) && szb == 1) … … 2991 3094 if (tysize[tym] == tysize[TYfptr] && 2992 3095 (fl = s->Sfl) != FLfardata && 2993 3096 /* not a function that CS might not be the segment of */ 2994 3097 (!((fl == FLfunc || s->ty() & mTYcs) && 2995 3098 (s->Sclass == SCcomdat || s->Sclass == SCextern || s->Sclass == SCinline || config.wflags & WFthunk)) || 2996 3099 (fl == FLfunc && config.exe == EX_DOSX) 2997 3100 ) 2998 3101 ) 2999 3102 { 3000 3103 stackpush += sz; 3001 3104 c = gen1(c,0x06 + /* PUSH SEGREG */ 3002 3105 (((fl == FLfunc || s->ty() & mTYcs) ? 1 : segfl[fl]) << 3)); 3003 3106 c = genadjesp(c,REGSIZE); 3004 3107 3005 3108 if (config.target_cpu >= TARGET_80286 && !e->Ecount) 3006 3109 { ce = getoffset(e,STACK); 3007 3110 goto L2; 3008 3111 } 3009 3112 else 3010 3113 { c = cat(c,offsetinreg(e,&retregs)); 3011 c = gen1(c,0x50+findreg(retregs)); /* PUSH reg */ 3114 unsigned reg = findreg(retregs); 3115 c = genpush(c,reg); // PUSH reg 3012 3116 genadjesp(c,REGSIZE); 3013 3117 } 3014 3118 goto ret; 3015 3119 } 3016 3120 if (config.target_cpu >= TARGET_80286 && !e->Ecount) 3017 3121 { 3018 3122 stackpush += sz; 3019 3123 if (tysize[tym] == tysize[TYfptr]) 3020 { code *c1; 3021 3124 { 3022 3125 /* PUSH SEG e */ 3023 c 1 = gencs(CNIL,0x68,0,FLextern,s);3126 code *c1 = gencs(CNIL,0x68,0,FLextern,s); 3024 3127 c1->Iflags = CFseg; 3025 3128 genadjesp(c1,REGSIZE); 3026 3129 c = cat(c,c1); 3027 3130 } 3028 3131 ce = getoffset(e,STACK); 3029 3132 goto L2; 3030 3133 } 3031 3134 #endif 3032 3135 break; /* else must evaluate expression */ 3033 3136 case OPvar: 3034 3137 L1: 3035 3138 if (0 && I32 && sz == 2) 3036 3139 { /* 32 bit code, but pushing 16 bit values anyway */ 3037 3140 ce = loadea(e,&cs,0xFF,6,0,0,0); /* PUSH EA */ 3038 3141 // BUG: 0x66 fails with scheduler 3039 3142 ce = cat(gen1(CNIL,0x66),ce); /* 16 bit override */ 3040 3143 stackpush += sz; 3041 3144 genadjesp(ce,sz); 3042 3145 } 3043 3146 else if (config.flags4 & CFG4speed && 3044 3147 (config.target_cpu >= TARGET_80486 && 3045 3148 config.target_cpu <= TARGET_PentiumMMX) && 3046 3149 sz <= 2 * REGSIZE && 3047 3150 !tyfloating(tym)) 3048 3151 { // Avoid PUSH MEM on the Pentium when optimizing for speed 3049 3152 break; 3050 3153 } 3051 3154 else 3052 3155 { int regsize = REGSIZE; 3053 3156 unsigned flag = 0; 3054 3157 3055 if ( !I32&& config.target_cpu >= TARGET_80386 && sz > 2 &&3158 if (I16 && config.target_cpu >= TARGET_80386 && sz > 2 && 3056 3159 !e->Ecount) 3057 3160 { regsize = 4; 3058 3161 flag |= CFopsize; 3059 3162 } 3060 3163 ce = loadea(e,&cs,0xFF,6,sz - regsize,RMload,0); // PUSH EA+sz-2 3061 3164 code_orflag(ce,flag); 3062 3165 ce = genadjesp(ce,REGSIZE); 3063 3166 stackpush += sz; 3064 3167 while ((targ_int)(sz -= regsize) > 0) 3065 3168 { ce = cat(ce,loadea(e,&cs,0xFF,6,sz - regsize,RMload,0)); 3066 3169 code_orflag(ce,flag); 3067 3170 ce = genadjesp(ce,REGSIZE); 3068 3171 } 3069 3172 } 3070 3173 L2: 3071 3174 freenode(e); 3072 3175 c = cat(c,ce); 3073 3176 goto ret; 3074 3177 case OPconst: 3075 3178 { targ_int *pi; … … 3091 3194 ce = genadjesp(NULL,sz); 3092 3195 for (i = 2; i >= 0; i--) 3093 3196 { 3094 3197 if (reghasvalue(allregs, value, ®)) 3095 3198 ce = gen1(ce,0x50 + reg); // PUSH reg 3096 3199 else 3097 3200 ce = genc2(ce,0x68,0,value); // PUSH value 3098 3201 value = ((unsigned *)&e->EV.Vldouble)[i - 1]; 3099 3202 } 3100 3203 goto L2; 3101 3204 } 3102 3205 3103 3206 assert(sz <= LNGDBLSIZE); 3104 3207 i = sz; 3105 3208 if (I32 && i == 2) 3106 3209 flag = CFopsize; 3107 3210 3108 3211 if (config.target_cpu >= TARGET_80286) 3109 3212 // && (e->Ecount == 0 || e->Ecount != e->Ecomsub)) 3110 3213 { pushi = 1; 3111 if ( !I32&& config.target_cpu >= TARGET_80386 && i >= 4)3214 if (I16 && config.target_cpu >= TARGET_80386 && i >= 4) 3112 3215 { regsize = 4; 3113 3216 flag = CFopsize; 3114 3217 } 3115 3218 } 3116 3219 else if (i == REGSIZE) 3117 3220 break; 3118 3221 3119 3222 stackpush += sz; 3120 3223 ce = genadjesp(NULL,sz); 3121 3224 pi = (targ_long *) &e->EV.Vdouble; 3122 3225 ps = (targ_short *) pi; 3123 3226 i /= regsize; 3124 3227 do 3125 3228 { code *cp; 3126 3229 3127 3230 if (i) /* be careful not to go negative */ 3128 3231 i--; 3129 3232 value = (regsize == 4) ? pi[i] : ps[i]; 3130 3233 if (pushi) 3131 3234 { 3132 3235 if (regsize == REGSIZE && reghasvalue(allregs,value,®)) 3133 3236 goto Preg; 3134 3237 ce = genc2(ce,(szb == 1) ? 0x6A : 0x68,0,value); // PUSH value 3135 3238 } 3136 3239 else 3137 3240 { 3138 3241 ce = regwithvalue(ce,allregs,value,®,0); 3139 3242 Preg: 3140 ce = gen 1(ce,0x50 + reg); /* PUSH reg */3243 ce = genpush(ce,reg); // PUSH reg 3141 3244 } 3142 3245 code_orflag(ce,flag); /* operand size */ 3143 3246 } while (i); 3144 3247 goto L2; 3145 3248 } 3146 3249 default: 3147 3250 break; 3148 3251 } 3149 3252 retregs = tybyte(tym) ? BYTEREGS : allregs; 3150 3253 if (tyfloating(tym)) 3151 3254 { if (config.inline8087) 3152 3255 { code *c1,*c2; 3153 3256 unsigned op; 3154 3257 unsigned r; 3155 3258 3156 3259 retregs = tycomplex(tym) ? mST01 : mST0; 3157 3260 c = cat(c,codelem(e,&retregs,FALSE)); 3158 3261 stackpush += sz; 3159 3262 c = genadjesp(c,sz); 3160 c = genc2(c,0x81, modregrm(3,5,SP),sz); /* SUB SP,sz */3263 c = genc2(c,0x81,grex | modregrm(3,5,SP),sz); // SUB SP,sz 3161 3264 switch (tym) 3162 3265 { 3163 3266 case TYfloat: 3164 3267 case TYifloat: 3165 3268 case TYcfloat: 3166 3269 op = 0xD9; 3167 3270 r = 3; 3168 3271 break; 3169 3272 3170 3273 case TYdouble: 3171 3274 case TYidouble: 3172 3275 case TYdouble_alias: 3173 3276 case TYcdouble: 3174 3277 op = 0xDD; 3175 3278 r = 3; 3176 3279 break; 3177 3280 3178 3281 case TYldouble: 3179 3282 case TYildouble: 3180 3283 case TYcldouble: 3181 3284 op = 0xDB; 3182 3285 r = 7; 3183 3286 break; 3184 3287 3185 3288 default: 3186 3289 assert(0); 3187 3290 } 3188 if ( I32)3291 if (!I16) 3189 3292 { 3190 3293 c1 = NULL; 3191 3294 c2 = NULL; 3192 3295 if (tycomplex(tym)) 3193 3296 { 3194 3297 // FSTP sz/2[ESP] 3195 c2 = genc1(CNIL,op,modregrm(2,r,4),FLconst,sz/2); 3196 c2->Isib = modregrm(0,4,SP); 3298 c2 = genc1(CNIL,op,(modregrm(0,4,SP) << 8) | modregxrm(2,r,4),FLconst,sz/2); 3197 3299 pop87(); 3198 3300 } 3199 3301 pop87(); 3200 3302 c2 = gen2sib(c2,op,modregrm(0,r,4),modregrm(0,4,SP)); // FSTP [ESP] 3201 3303 } 3202 3304 else 3203 3305 { 3204 3306 retregs = IDXREGS; /* get an index reg */ 3205 3307 c1 = allocreg(&retregs,®,TYoffset); 3206 3308 c1 = genregs(c1,0x89,SP,reg); /* MOV reg,SP */ 3207 3309 pop87(); 3208 3310 c2 = gen2(CNIL,op,modregrm(0,r,regtorm[reg])); // FSTP [reg] 3209 3311 } 3210 3312 if (LARGEDATA) 3211 3313 c2->Iflags |= CFss; /* want to store into stack */ 3212 3314 genfwait(c2); // FWAIT 3213 3315 c = cat3(c,c1,c2); 3214 3316 goto ret; 3215 3317 } 3216 3318 else if (!I32 && (tym == TYdouble || tym == TYdouble_alias)) 3217 3319 retregs = mSTACK; 3218 3320 } 3219 3321 #if LONGLONG 3220 3322 else if (!I32 && sz == 8) // if long long 3221 3323 retregs = mSTACK; 3222 3324 #endif 3223 3325 c = cat(c,scodelem(e,&retregs,0,TRUE)); 3224 3326 if (retregs != mSTACK) /* if stackpush not already inc'd */ 3225 3327 stackpush += sz; 3226 3328 if (sz <= REGSIZE) 3227 3329 { 3228 c = gen 1(c,0x50+findreg(retregs)); /* PUSH reg */3330 c = genpush(c,findreg(retregs)); // PUSH reg 3229 3331 genadjesp(c,REGSIZE); 3230 3332 } 3231 3333 else if (sz == REGSIZE * 2) 3232 { c = gen 1(c,0x50+findregmsw(retregs)); /* PUSH msreg */3233 gen 1(c,0x50+findreglsw(retregs)); /* PUSH lsreg */3334 { c = genpush(c,findregmsw(retregs)); // PUSH msreg 3335 genpush(c,findreglsw(retregs)); // PUSH lsreg 3234 3336 genadjesp(c,sz); 3235 3337 } 3236 3338 ret: 3237 3339 return cat(cp,c); 3238 3340 } 3239 3341 3240 3342 3241 3343 /******************************* 3242 3344 * Get offset portion of e, and store it in an index 3243 3345 * register. Return mask of index register in *pretregs. 3244 3346 */ 3245 3347 3246 3348 code *offsetinreg( elem *e, regm_t *pretregs) 3247 3349 { regm_t retregs; 3248 3350 code *c; 3249 3351 unsigned reg; 3250 3352 3251 3353 retregs = mLSW; /* want only offset */ 3252 3354 if (e->Ecount && e->Ecount != e->Ecomsub) 3253 3355 { unsigned i; … … 3303 3405 if (config.inline8087) 3304 3406 { if (*pretregs & mST0) 3305 3407 return load87(e,0,pretregs,NULL,-1); 3306 3408 else if (tycomplex(tym)) 3307 3409 return cload87(e, pretregs); 3308 3410 } 3309 3411 } 3310 3412 sz = tysize[tym]; 3311 3413 cs.Iflags = 0; 3312 3414 cs.Irex = 0; 3313 3415 if (*pretregs == mPSW) 3314 3416 { 3315 3417 regm = allregs; 3316 3418 if (e->Eoper == OPconst) 3317 3419 { /* TRUE: OR SP,SP (SP is never 0) */ 3318 3420 /* FALSE: CMP SP,SP (always equal) */ 3319 3421 c = genregs(CNIL,(boolres(e)) ? 0x09 : 0x39,SP,SP); 3320 3422 } 3321 3423 else if (sz <= REGSIZE) 3322 3424 { 3323 if ( I32&& (tym == TYfloat || tym == TYifloat))3425 if (!I16 && (tym == TYfloat || tym == TYifloat)) 3324 3426 { c = allocreg(®m,®,TYoffset); /* get a register */ 3325 3427 ce = loadea(e,&cs,0x8B,reg,0,0,0); // MOV reg,data 3326 3428 c = cat(c,ce); 3327 ce = gen2(CNIL,0xD1,modregrm (3,4,reg)); /* SHL reg,1 */3429 ce = gen2(CNIL,0xD1,modregrmx(3,4,reg)); /* SHL reg,1 */ 3328 3430 c = cat(c,ce); 3329 3431 } 3330 3432 else 3331 3433 { cs.IFL2 = FLconst; 3332 3434 cs.IEV2.Vint = 0; 3333 3435 op = (sz == 1) ? 0x80 : 0x81; 3334 3436 c = loadea(e,&cs,op,7,0,0,0); /* CMP EA,0 */ 3335 3437 3336 3438 // Convert to TEST instruction if EA is a register 3337 3439 // (to avoid register contention on Pentium) 3338 3440 if ((c->Iop & 0xFE) == 0x38 && 3339 3441 (c->Irm & modregrm(3,0,0)) == modregrm(3,0,0) 3340 3442 ) 3341 3443 { c->Iop = (c->Iop & 1) | 0x84; 3342 c->Irm = (c->Irm & modregrm(3,0,7)) | modregrm(0,c->Irm & 7,0); 3444 code_newreg(c, c->Irm & 7); 3445 if (c->Irex & REX_B) 3446 c->Irex = (c->Irex & ~REX_B) | REX_R; 3343 3447 } 3344 3448 } 3345 3449 } 3346 3450 else if (sz < 8) 3347 3451 { 3348 3452 c = allocreg(®m,®,TYoffset); /* get a register */ 3349 3453 if (I32) // it's a 48 bit pointer 3350 3454 ce = loadea(e,&cs,0x0FB7,reg,REGSIZE,0,0); /* MOVZX reg,data+4 */ 3351 3455 else 3352 3456 { ce = loadea(e,&cs,0x8B,reg,REGSIZE,0,0); /* MOV reg,data+2 */ 3353 3457 if (tym == TYfloat || tym == TYifloat) // dump sign bit 3354 3458 gen2(ce,0xD1,modregrm(3,4,reg)); /* SHL reg,1 */ 3355 3459 } 3356 3460 c = cat(c,ce); 3357 3461 ce = loadea(e,&cs,0x0B,reg,0,regm,0); /* OR reg,data */ 3358 3462 c = cat(c,ce); 3359 3463 } 3360 3464 else if (sz == 8) 3361 3465 { code *c1; 3362 3466 int i; … … 3383 3487 return c; 3384 3488 } 3385 3489 /* not for flags only */ 3386 3490 flags = *pretregs & mPSW; /* save original */ 3387 3491 forregs = *pretregs & (mBP | ALLREGS | mES); 3388 3492 if (*pretregs & mSTACK) 3389 3493 forregs |= DOUBLEREGS; 3390 3494 if (e->Eoper == OPconst) 3391 3495 { regm_t save; 3392 3496 3393 3497 if (sz == REGSIZE && reghasvalue(forregs,e->EV.Vint,®)) 3394 3498 forregs = mask[reg]; 3395 3499 3396 3500 save = regcon.immed.mval; 3397 3501 c = allocreg(&forregs,®,tym); /* allocate registers */ 3398 3502 regcon.immed.mval = save; // KLUDGE! 3399 3503 if (sz <= REGSIZE) 3400 3504 { 3401 3505 if (sz == 1) 3402 3506 flags |= 1; 3403 else if ( I32&& sz == SHORTSIZE &&3507 else if (!I16 && sz == SHORTSIZE && 3404 3508 !(mask[reg] & regcon.mvar) && 3405 3509 !(config.flags4 & CFG4speed) 3406 3510 ) 3407 3511 flags |= 2; 3512 if (sz == 8) 3513 flags |= 64; 3408 3514 ce = movregconst(CNIL,reg,e->EV.Vint,flags); 3409 3515 flags = 0; // flags are already set 3410 3516 } 3411 3517 else if (sz < 8) // far pointers, longs for 16 bit targets 3412 3518 { 3413 3519 targ_int msw,lsw; 3414 3520 regm_t mswflags; 3415 3521 3416 3522 msw = I32 ? e->EV.Vfp.Vseg 3417 3523 : (e->EV.Vulong >> 16); 3418 3524 lsw = e->EV.Vfp.Voff; 3419 3525 mswflags = 0; 3420 3526 if (forregs & mES) 3421 3527 { 3422 3528 ce = movregconst(CNIL,reg,msw,0); // MOV reg,segment 3423 3529 genregs(ce,0x8E,0,reg); // MOV ES,reg 3424 3530 msw = lsw; // MOV reg,offset 3425 3531 } 3426 3532 else 3427 3533 { … … 3467 3573 { 3468 3574 reg = e->EV.sp.Vsym->Spreg; 3469 3575 forregs = mask[reg]; 3470 3576 mfuncreg &= ~forregs; 3471 3577 regcon.used |= forregs; 3472 3578 return fixresult(e,forregs,pretregs); 3473 3579 } 3474 3580 3475 3581 c = allocreg(&forregs,®,tym); /* allocate registers */ 3476 3582 3477 3583 if (sz == 1) 3478 3584 { regm_t nregm; 3479 3585 3480 3586 #ifdef DEBUG 3481 3587 if (!(forregs & BYTEREGS)) 3482 3588 { elem_print(e); 3483 3589 printf("forregs = x%x\n",forregs); 3484 3590 } 3485 3591 #endif 3486 3592 assert(forregs & BYTEREGS); 3487 if ( I32)3593 if (!I16) 3488 3594 c = cat(c,loadea(e,&cs,0x8A,reg,0,0,0)); // MOV regL,data 3489 3595 else 3490 3596 { nregm = tyuns(tym) ? BYTEREGS : mAX; 3491 3597 if (*pretregs & nregm) 3492 3598 nreg = reg; /* already allocated */ 3493 3599 else 3494 3600 c = cat(c,allocreg(&nregm,&nreg,tym)); 3495 3601 ce = loadea(e,&cs,0x8A,nreg,0,0,0); /* MOV nregL,data */ 3496 3602 c = cat(c,ce); 3497 3603 if (reg != nreg) 3498 3604 { genmovreg(c,reg,nreg); /* MOV reg,nreg */ 3499 3605 cssave(e,mask[nreg],FALSE); 3500 3606 } 3501 3607 } 3502 3608 } 3503 3609 else if (sz <= REGSIZE) 3504 3610 { 3505 3611 ce = loadea(e,&cs,0x8B,reg,0,RMload,0); // MOV reg,data 3506 3612 c = cat(c,ce); 3507 3613 } trunk/src/backend/cod2.c
r552 r577 19 19 #include "oper.h" 20 20 #include "el.h" 21 21 #include "code.h" 22 22 #include "global.h" 23 23 #include "type.h" 24 24 #if SCPP 25 25 #include "exh.h" 26 26 #endif 27 27 28 28 static char __file__[] = __FILE__; /* for tassert.h */ 29 29 #include "tassert.h" 30 30 31 31 int cdcmp_flag; 32 32 extern signed char regtorm[8]; 33 33 34 34 /******************************** 35 35 * Return mask of index registers used by addressing mode. 36 36 * Index is rm of modregrm field. 37 37 */ 38 38 39 regm_t idxregm( unsigned rm,unsigned sib)39 regm_t idxregm(code *c) 40 40 { 41 41 static const unsigned char idxsib[8] = { mAX,mCX,mDX,mBX,0,mBP,mSI,mDI }; 42 42 static const unsigned char idxrm[8] = {mBX|mSI,mBX|mDI,mSI,mDI,mSI,mDI,0,mBX}; 43 regm_t idxm; 44 45 idxm = 0;43 44 unsigned rm = c->Irm; 45 regm_t idxm = 0; 46 46 if ((rm & 0xC0) != 0xC0) /* if register is not the destination */ 47 47 { 48 if (I32) 48 if (I16) 49 idxm = idxrm[rm & 7]; 50 else 49 51 { 50 52 if ((rm & 7) == 4) /* if sib byte */ 51 53 { 52 idxm = idxsib[(sib >> 3) & 7]; /* scaled index reg */ 54 unsigned sib = c->Isib; 55 unsigned idxreg = (sib >> 3) & 7; 56 if (c->Irex & REX_X) 57 { idxreg |= 8; 58 idxm = mask[idxreg]; // scaled index reg 59 } 60 else 61 idxm = idxsib[idxreg]; // scaled index reg 53 62 if ((sib & 7) == 5 && (rm & 0xC0) == 0) 54 63 ; 55 64 else 56 idxm |= idxsib[sib & 7]; 65 { unsigned base = sib & 7; 66 if (c->Irex & REX_B) 67 idxm |= mask[base | 8]; 68 else 69 idxm |= idxsib[base]; 70 } 57 71 } 58 72 else 59 idxm |= idxsib[rm & 7]; 60 } 61 else 62 idxm = idxrm[rm & 7]; 73 { unsigned base = rm & 7; 74 if (c->Irex & REX_B) 75 idxm |= mask[base | 8]; 76 else 77 idxm |= idxsib[base]; 78 } 79 } 63 80 } 64 81 return idxm; 65 82 } 66 83 67 84 #if TARGET_WINDOS 68 85 /*************************** 69 86 * Gen code for call to floating point routine. 70 87 */ 71 88 72 89 code *opdouble(elem *e,regm_t *pretregs,unsigned clib) 73 90 { 74 91 regm_t retregs1,retregs2; 75 92 code *cl, *cr, *c; 76 93 77 94 if (config.inline8087) 78 95 return orth87(e,pretregs); 79 96 80 97 if (tybasic(e->E1->Ety) == TYfloat) 81 98 { 82 99 clib += CLIBfadd - CLIBdadd; /* convert to float operation */ … … 95 113 } 96 114 } 97 115 cl = codelem(e->E1, &retregs1,FALSE); 98 116 if (retregs1 & mSTACK) 99 117 cgstate.stackclean++; 100 118 cr = scodelem(e->E2, &retregs2, retregs1 & ~mSTACK, FALSE); 101 119 if (retregs1 & mSTACK) 102 120 cgstate.stackclean--; 103 121 c = callclib(e, clib, pretregs, 0); 104 122 return cat3(cl, cr, c); 105 123 } 106 124 #endif 107 125 108 126 109 127 /***************************** 110 128 * Handle operators which are more or less orthogonal 111 129 * ( + - & | ^ ) 112 130 */ 113 131 114 132 code *cdorth(elem *e,regm_t *pretregs) 115 { tym_t ty ,ty1,ty2;133 { tym_t ty1; 116 134 regm_t retregs,rretregs,posregs; 117 unsigned reg,rreg,op1,op2,mode ,test,byte;135 unsigned reg,rreg,op1,op2,mode; 118 136 int rval; 119 code *c,*cg,*cl ,*cr,cs;120 targ_ int i;137 code *c,*cg,*cl; 138 targ_size_t i; 121 139 elem *e1,*e2; 122 140 int numwords; /* # of words to be operated on */ 123 unsigned char word; /* if word operands */124 int e2oper;125 unsigned sz;126 141 static int nest; 127 142 128 143 //printf("cdorth(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs)); 129 144 e1 = e->E1; 130 145 e2 = e->E2; 131 146 if (*pretregs == 0) /* if don't want result */ 132 147 { c = codelem(e1,pretregs,FALSE); /* eval left leaf */ 133 148 *pretregs = 0; /* in case they got set */ 134 149 return cat(c,codelem(e2,pretregs,FALSE)); 135 150 } 136 151 137 152 ty1 = tybasic(e1->Ety); 138 153 if (tyfloating(ty1)) 139 154 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 140 155 return orth87(e,pretregs); 141 156 #else 142 157 return opdouble(e,pretregs,(e->Eoper == OPadd) ? CLIBdadd 143 158 : CLIBdsub); 144 159 #endif 145 ty2 = tybasic(e2->Ety); 146 e2oper = e2->Eoper; 147 ty = tybasic(e->Ety); 148 sz = tysize[ty]; 149 byte = (sz == 1); 150 word = (I32 && sz == SHORTSIZE) ? CFopsize : 0; 160 tym_t ty2 = tybasic(e2->Ety); 161 int e2oper = e2->Eoper; 162 tym_t ty = tybasic(e->Ety); 163 unsigned sz = tysize[ty]; 164 unsigned byte = (sz == 1); 165 unsigned char word = (!I16 && sz == SHORTSIZE) ? CFopsize : 0; 166 unsigned test = FALSE; // assume we destroyed lvalue 167 code cs; 151 168 cs.Iflags = 0; 152 169 cs.Irex = 0; 153 test = FALSE; /* assume we destroyed lvalue */ 154 cr = CNIL; /* initialize */ 170 code *cr = CNIL; 155 171 156 172 switch (e->Eoper) 157 173 { case OPadd: mode = 0; 158 174 op1 = 0x03; op2 = 0x13; break; /* ADD, ADC */ 159 175 case OPmin: mode = 5; 160 176 op1 = 0x2B; op2 = 0x1B; break; /* SUB, SBB */ 161 177 case OPor: mode = 1; 162 178 op1 = 0x0B; op2 = 0x0B; break; /* OR , OR */ 163 179 case OPxor: mode = 6; 164 180 op1 = 0x33; op2 = 0x33; break; /* XOR, XOR */ 165 181 case OPand: mode = 4; 166 182 op1 = 0x23; op2 = 0x23; /* AND, AND */ 167 183 if (tyreg(ty1) && 168 184 *pretregs == mPSW) /* if flags only */ 169 185 { test = TRUE; 170 186 op1 = 0x85; /* TEST */ 171 187 mode = 0; 172 188 } 173 189 break; 174 190 default: 175 191 assert(0); 176 192 } 177 193 op1 ^= byte; /* if byte operation */ 178 194 179 195 /* Compute number of words to operate on. */ 180 196 numwords = 1; 181 if ( I32)197 if (!I16) 182 198 { /* Cannot operate on longs and then do a 'paint' to a far */ 183 199 /* pointer, because far pointers are 48 bits and longs are 32. */ 184 200 /* Therefore, numwords can never be 2. */ 185 201 assert(!(tyfv(ty1) && tyfv(ty2))); 186 202 if (sz == 2 * REGSIZE) 187 203 { 188 204 numwords++; 189 205 } 190 206 } 191 207 else 192 208 { /* If ty is a TYfptr, but both operands are long, treat the */ 193 209 /* operation as a long. */ 194 210 if ((tylong(ty1) || ty1 == TYhptr) && 195 211 (tylong(ty2) || ty2 == TYhptr)) 196 212 numwords++; 197 213 } 198 214 199 215 // Special cases where only flags are set 200 216 if (test && tysize[ty1] <= REGSIZE && 201 217 (e1->Eoper == OPvar || (e1->Eoper == OPind && !e1->Ecount))) 202 218 { 203 219 // Handle the case of (var & const) 204 220 if (e2->Eoper == OPconst) 205 { targ_int value; 206 221 { 207 222 c = getlvalue(&cs,e1,0); 208 value = e2->EV.Vint;223 targ_size_t value = e2->EV.Vpointer; 209 224 if (sz == 2) 210 225 value &= 0xFFFF; 226 else if (sz == 4) 227 value &= 0xFFFFFFFF; 211 228 if (reghasvalue(byte ? BYTEREGS : ALLREGS,value,®)) 212 229 goto L11; 230 if (sz == 8) 231 { 232 assert(value == (int)value); // sign extend imm32 233 } 213 234 op1 = 0xF7; 214 235 cs.IEV2.Vint = value; 215 236 cs.IFL2 = FLconst; 216 237 goto L10; 217 238 } 218 239 219 240 // Handle (exp & reg) 220 241 if (isregvar(e2,&retregs,®)) 221 242 { 222 243 c = getlvalue(&cs,e1,0); 223 244 L11: 224 c s.Irm |= modregrm(0,reg,0);245 code_newreg(&cs, reg); 225 246 L10: 226 247 cs.Iop = op1 ^ byte; 227 248 cs.Iflags |= word | CFpsw; 228 249 freenode(e1); 229 250 freenode(e2); 230 251 return gen(c,&cs); 231 252 } 232 253 } 233 254 234 255 // Look for possible uses of LEA 235 256 if (e->Eoper == OPadd && 236 257 !(*pretregs & mPSW) && /* flags aren't set by LEA */ 237 258 !nest && // could cause infinite recursion if e->Ecount 238 sz == REGSIZE) // far pointers aren't handled 239 { int e1oper; 240 259 (sz == REGSIZE || (I64 && sz == 4))) // far pointers aren't handled 260 { 241 261 // Handle the case of (e + &var) 242 e1oper = e1->Eoper;262 int e1oper = e1->Eoper; 243 263 if ((e2oper == OPrelconst && (config.target_cpu >= TARGET_Pentium || (!e2->Ecount && stackfl[el_fl(e2)]))) 244 264 || // LEA costs too much for simple EAs on older CPUs 245 265 (e2oper == OPconst && (e1->Eoper == OPcall || e1->Eoper == OPcallns) && !(*pretregs & mAX)) || 246 ( I32&& (isscaledindex(e1) || isscaledindex(e2))) ||247 ( I32&& e1oper == OPvar && e1->EV.sp.Vsym->Sfl == FLreg && (e2oper == OPconst || (e2oper == OPvar && e2->EV.sp.Vsym->Sfl == FLreg))) ||266 (!I16 && (isscaledindex(e1) || isscaledindex(e2))) || 267 (!I16 && e1oper == OPvar && e1->EV.sp.Vsym->Sfl == FLreg && (e2oper == OPconst || (e2oper == OPvar && e2->EV.sp.Vsym->Sfl == FLreg))) || 248 268 (e2oper == OPconst && e1oper == OPeq && e1->E1->Eoper == OPvar) || 249 ( I32&& e2oper == OPrelconst && !e1->Ecount &&269 (!I16 && e2oper == OPrelconst && !e1->Ecount && 250 270 (e1oper == OPmul || e1oper == OPshl) && 251 271 e1->E2->Eoper == OPconst && 252 272 ssindex(e1oper,e1->E2->EV.Vuns) 253 273 ) || 254 ( I32&& e1->Ecount)274 (!I16 && e1->Ecount) 255 275 ) 256 { int inc; 257 258 inc = e->Ecount != 0; 276 { 277 int inc = e->Ecount != 0; 259 278 nest += inc; 260 279 c = getlvalue(&cs,e,0); 261 280 nest -= inc; 281 unsigned reg; 262 282 c = cat(c,allocreg(pretregs,®,ty)); 263 283 cs.Iop = 0x8D; 264 c s.Irm |= modregrm(0,reg,0);284 code_newreg(&cs, reg); 265 285 return gen(c,&cs); /* LEA reg,EA */ 266 286 } 267 287 268 288 // Handle the case of ((e + c) + e2) 269 if ( I32&&289 if (!I16 && 270 290 e1oper == OPadd && 271 291 (e1->E2->Eoper == OPconst || e2oper == OPconst) && 272 292 !e1->Ecount 273 293 ) 274 294 { elem *e11; 275 295 elem *ebase; 276 296 elem *edisp; 277 297 int ss; 278 298 int ss2; 279 299 unsigned reg1,reg2; 280 300 code *c1,*c2,*c3; 281 301 282 302 if (e2oper == OPconst) 283 303 { edisp = e2; 284 304 ebase = e1->E2; 285 305 } 286 306 else 287 307 { edisp = e1->E2; 288 308 ebase = e2; 289 309 } 290 310 291 311 e11 = e1->E1; 292 312 retregs = *pretregs & ALLREGS; 293 313 if (!retregs) 294 314 retregs = ALLREGS; 295 315 ss = 0; 296 316 ss2 = 0; 297 317 298 318 // Handle the case of (((e * c1) + c2) + e2) 299 319 // Handle the case of (((e << c1) + c2) + e2) 300 320 if ((e11->Eoper == OPmul || e11->Eoper == OPshl) && 301 321 e11->E2->Eoper == OPconst && 302 322 !e11->Ecount 303 323 ) 304 { targ_size_t co1; 305 306 co1 = el_tolong(e11->E2); 324 { 325 targ_size_t co1 = el_tolong(e11->E2); 307 326 if (e11->Eoper == OPshl) 308 327 { 309 328 if (co1 > 3) 310 329 goto L13; 311 330 ss = co1; 312 331 } 313 332 else 314 333 { 315 334 ss2 = 1; 316 335 switch (co1) 317 336 { 318 337 case 6: ss = 1; break; 319 338 case 12: ss = 1; ss2 = 2; break; 320 339 case 24: ss = 1; ss2 = 3; break; 321 340 case 10: ss = 2; break; 322 341 case 20: ss = 2; ss2 = 2; break; 323 342 case 40: ss = 2; ss2 = 3; break; 324 343 case 18: ss = 3; break; 325 344 case 36: ss = 3; ss2 = 2; break; 326 345 case 72: ss = 3; ss2 = 3; break; 327 346 default: 328 347 ss2 = 0; 329 348 goto L13; 330 349 } 331 350 } 332 351 freenode(e11->E2); 333 352 freenode(e11); 334 353 e11 = e11->E1; 335 354 goto L13; 336 355 } 337 356 else 338 { regm_t regm;357 { 339 358 L13: 359 regm_t regm; 340 360 if (e11->Eoper == OPvar && isregvar(e11,®m,®1)) 341 361 { 342 362 retregs = regm; 343 363 c1 = NULL; 344 364 freenode(e11); 345 365 } 346 366 else 347 367 c1 = codelem(e11,&retregs,FALSE); 348 368 } 349 369 rretregs = ALLREGS & ~retregs; 350 370 c2 = scodelem(ebase,&rretregs,retregs,TRUE); 351 { regm_t sregs; 352 353 sregs = *pretregs & ~rretregs; 371 { 372 regm_t sregs = *pretregs & ~rretregs; 354 373 if (!sregs) 355 374 sregs = ALLREGS & ~rretregs; 356 375 c3 = allocreg(&sregs,®,ty); 357 376 } 358 377 359 378 reg1 = findreg(retregs); 360 379 reg2 = findreg(rretregs); 361 380 362 381 if (ss2) 363 382 { 364 383 assert(reg != reg2); 365 384 if (reg1 == BP) 366 385 { static unsigned imm32[4] = {1+1,2+1,4+1,8+1}; 367 386 368 387 // IMUL reg,imm32 369 c = genc2(CNIL,0x69,modreg rm(3,reg,reg1),imm32[ss]);388 c = genc2(CNIL,0x69,modregxrm(3,reg,BP),imm32[ss]); 370 389 } 371 390 else 372 391 { // LEA reg,[reg1*ss][reg1] 373 c = gen2sib(CNIL,0x8D,modregrm(0,reg,4),modregrm(ss,reg1,reg1)); 392 c = gen2sib(CNIL,0x8D,modregxrm(0,reg,4),modregrm(ss,reg1 & 7,reg1 & 7)); 393 if (reg1 & 8) 394 code_orrex(c, REX_X | REX_B); 374 395 } 375 396 reg1 = reg; 376 397 ss = ss2; // use *2 for scale 377 398 } 378 399 else 379 400 c = NULL; 380 401 c = cat4(c1,c2,c3,c); 381 402 382 403 cs.Iop = 0x8D; // LEA reg,c[reg1*ss][reg2] 383 cs.Irm = modregrm(2,reg ,4);384 cs.Isib = modregrm(ss,reg1 ,reg2);404 cs.Irm = modregrm(2,reg & 7,4); 405 cs.Isib = modregrm(ss,reg1 & 7,reg2 & 7); 385 406 cs.Iflags = CFoff; 386 407 cs.Irex = 0; 408 if (reg & 8) 409 cs.Irex |= REX_R; 410 if (reg1 & 8) 411 cs.Irex |= REX_X; 412 if (reg2 & 8) 413 cs.Irex |= REX_B; 387 414 cs.IFL1 = FLconst; 388 415 cs.IEV1.Vuns = edisp->EV.Vuns; 389 416 390 417 freenode(edisp); 391 418 freenode(e1); 392 419 c = gen(c,&cs); 393 420 return cat(c,fixresult(e,mask[reg],pretregs)); 394 421 } 395 422 } 396 423 397 424 posregs = (byte) ? BYTEREGS : (mES | ALLREGS | mBP); 398 425 retregs = *pretregs & posregs; 399 426 if (retregs == 0) /* if no return regs speced */ 400 427 /* (like if wanted flags only) */ 401 428 retregs = ALLREGS & posregs; // give us some 402 429 403 430 if (tysize[ty1] > REGSIZE && numwords == 1) 404 431 { /* The only possibilities are (TYfptr + tyword) or (TYfptr - tyword) */ 405 432 #if DEBUG 406 433 if (tysize[ty2] != REGSIZE) … … 489 516 490 517 /* if retregs doesn't have any regs in it that aren't reg vars */ 491 518 if ((retregs & ~regcon.mvar) == 0) 492 519 retregs |= mAX; 493 520 } 494 521 else if (numwords == 2 && retregs & mES) 495 522 retregs = (retregs | mMSW) & ALLREGS; 496 523 497 524 // Determine if we should swap operands, because 498 525 // mov EAX,x 499 526 // add EAX,reg 500 527 // is faster than: 501 528 // mov EAX,reg 502 529 // add EAX,x 503 530 else if (e2oper == OPvar && 504 531 e1->Eoper == OPvar && 505 532 e->Eoper != OPmin && 506 533 isregvar(e1,®m,NULL) && 507 534 regm != retregs && 508 535 tysize[ty1] == tysize[ty2]) 509 { elem *es; 510 511 es = e1; 536 { 537 elem *es = e1; 512 538 e1 = e2; 513 539 e2 = es; 514 540 } 515 541 cl = codelem(e1,&retregs,test); /* eval left leaf */ 516 542 reg = findreg(retregs); 517 543 } 518 544 switch (e2oper) 519 545 { 520 546 case OPind: /* if addressing mode */ 521 547 if (!e2->Ecount) /* if not CSE */ 522 548 goto L1; /* try OP reg,EA */ 523 549 /* FALL-THROUGH */ 524 550 default: /* operator node */ 525 551 L2: 526 552 rretregs = ALLREGS & ~retregs; 527 553 /* Be careful not to do arithmetic on ES */ 528 554 if (tysize[ty1] == REGSIZE && tysize[ty2] > REGSIZE && *pretregs != mPSW) 529 555 rretregs = *pretregs & (mES | ALLREGS | mBP) & ~retregs; 530 556 else if (byte) 531 557 rretregs &= BYTEREGS; 532 558 533 559 cr = scodelem(e2,&rretregs,retregs,TRUE); /* get rvalue */ 534 560 rreg = (tysize[ty2] > REGSIZE) ? findreglsw(rretregs) : findreg(rretregs); 535 561 c = CNIL; 536 562 if (numwords == 1) /* ADD reg,rreg */ 537 563 { 538 564 /* reverse operands to avoid moving around the segment value */ 539 565 if (tysize[ty2] > REGSIZE) 540 566 { c = cat(c,getregs(rretregs)); 541 567 c = genregs(c,op1,rreg,reg); 542 568 retregs = rretregs; /* reverse operands */ 543 569 } 544 570 else 545 571 { c = genregs(c,op1,reg,rreg); 546 if ( I32&& *pretregs & mPSW)572 if (!I16 && *pretregs & mPSW) 547 573 c->Iflags |= word; 548 574 } 575 if (I64 && sz == 8) 576 code_orrex(c, REX_W); 549 577 } 550 578 else /* numwords == 2 */ /* ADD lsreg,lsrreg */ 551 579 { 552 580 reg = findreglsw(retregs); 553 581 rreg = findreglsw(rretregs); 554 582 c = genregs(c,op1,reg,rreg); 555 583 if (e->Eoper == OPadd || e->Eoper == OPmin) 556 584 code_orflag(c,CFpsw); 557 585 reg = findregmsw(retregs); 558 586 rreg = findregmsw(rretregs); 559 587 if (!(e2oper == OPu16_32 && // if second operand is 0 560 588 (op2 == 0x0B || op2 == 0x33)) // and OR or XOR 561 589 ) 562 590 genregs(c,op2,reg,rreg); // ADC msreg,msrreg 563 591 } 564 592 break; 565 593 566 594 case OPrelconst: 567 595 if (sz != REGSIZE) 568 596 goto L2; 569 597 if (segfl[el_fl(e2)] != 3) /* if not in data segment */ 570 598 goto L2; 571 599 if (evalinregister(e2)) 572 600 goto L2; 573 601 cs.IEVoffset2 = e2->EV.sp.Voffset; 574 602 cs.IEVsym2 = e2->EV.sp.Vsym; 575 603 cs.Iflags |= CFoff; 576 604 i = 0; /* no INC or DEC opcode */ 577 605 rval = 0; 578 606 goto L3; 579 607 580 608 case OPconst: 581 609 if (tyfv(ty2)) 582 610 goto L2; 583 611 if (numwords == 1) 584 612 { 585 i = e2->EV.V int;613 i = e2->EV.Vpointer; 586 614 if (word) 587 615 { 588 616 if (!(*pretregs & mPSW) && 589 617 config.flags4 & CFG4speed && 590 618 (e->Eoper == OPor || e->Eoper == OPxor || test || 591 619 (e1->Eoper != OPvar && e1->Eoper != OPind))) 592 620 { word = 0; 593 621 i &= 0xFFFF; 594 622 } 595 623 } 596 624 rval = reghasvalue(byte ? BYTEREGS : ALLREGS,i,&rreg); 597 625 cs.IEV2.Vint = i; 598 626 L3: 599 627 op1 ^= byte; 600 628 cs.Iflags |= word; 601 629 if (rval) 602 630 { cs.Iop = op1 ^ 2; 603 631 mode = rreg; 604 632 } 605 633 else 606 634 cs.Iop = 0x81; 607 cs.Irm = modregrm(3,mode,reg); 635 cs.Irm = modregrm(3,mode&7,reg&7); 636 if (mode & 8) 637 cs.Irex |= REX_R; 638 if (reg & 8) 639 cs.Irex |= REX_B; 608 640 cs.IFL2 = (e2->Eoper == OPconst) ? FLconst : el_fl(e2); 609 641 /* Modify instruction for special cases */ 610 642 switch (e->Eoper) 611 643 { case OPadd: 612 644 { int iop; 613 645 614 646 if (i == 1) 615 647 iop = 0; /* INC reg */ 616 648 else if (i == -1) 617 649 iop = 8; /* DEC reg */ 618 650 else 619 651 break; 620 652 cs.Iop = (0x40 | iop | reg) ^ byte; 621 if ( byte && *pretregs & mPSW)622 { cs.Irm = modregrm(3,0,reg ) | iop;653 if ((byte && *pretregs & mPSW) || I64) 654 { cs.Irm = modregrm(3,0,reg & 7) | iop; 623 655 cs.Iop = 0xFF; 624 656 } 625 657 break; 626 658 } 627 659 case OPand: 628 660 if (test) 629 661 cs.Iop = rval ? op1 : 0xF7; // TEST 630 662 break; 631 663 } 632 664 if (*pretregs & mPSW) 633 665 cs.Iflags |= CFpsw; 634 666 cs.Iop ^= byte; 635 667 c = gen(CNIL,&cs); 636 668 cs.Iflags &= ~CFpsw; 637 669 } 638 670 else if (numwords == 2) 639 671 { unsigned lsreg; 640 672 targ_int msw; 641 673 642 674 c = getregs(retregs); … … 657 689 cs.Iflags &= ~CFpsw; 658 690 659 691 cs.Irm = (cs.Irm & modregrm(3,7,0)) | reg; 660 692 cs.IEV2.Vint = msw; 661 693 if (e->Eoper == OPadd) 662 694 cs.Irm |= modregrm(0,2,0); /* ADC */ 663 695 c = gen(c,&cs); 664 696 } 665 697 else 666 698 assert(0); 667 699 freenode(e2); 668 700 break; 669 701 670 702 case OPvar: 671 703 L1: 672 704 if (tyfv(ty2)) 673 705 goto L2; 674 706 c = loadea(e2,&cs,op1, 675 707 ((numwords == 2) ? findreglsw(retregs) : reg), 676 708 0,retregs,retregs); 677 if ( I32&& word)709 if (!I16 && word) 678 710 { if (*pretregs & mPSW) 679 711 code_orflag(c,word); 680 712 else 681 { code *ce; 682 683 ce = code_last(c); 713 { 714 code *ce = code_last(c); 684 715 ce->Iflags &= ~word; 685 716 } 686 717 } 687 718 else if (numwords == 2) 688 719 { 689 720 if (e->Eoper == OPadd || e->Eoper == OPmin) 690 721 code_orflag(c,CFpsw); 691 722 reg = findregmsw(retregs); 692 723 if (EOP(e2)) 693 724 { getlvalue_msw(&cs); 694 725 cs.Iop = op2; 695 726 NEWREG(cs.Irm,reg); 696 727 c = gen(c,&cs); /* ADC reg,data+2 */ 697 728 } 698 729 else 699 730 c = cat(c,loadea(e2,&cs,op2,reg,REGSIZE,retregs,0)); 700 731 } 701 732 freenode(e2); 702 733 break; 703 734 } … … 733 764 int opunslng; 734 765 int pow2; 735 766 736 767 if (*pretregs == 0) // if don't want result 737 768 { c = codelem(e->E1,pretregs,FALSE); // eval left leaf 738 769 *pretregs = 0; // in case they got set 739 770 return cat(c,codelem(e->E2,pretregs,FALSE)); 740 771 } 741 772 742 773 keepregs = 0; 743 774 cs.Iflags = 0; 744 775 cs.Irex = 0; 745 776 c = cg = cr = CNIL; // initialize 746 777 e2 = e->E2; 747 778 e1 = e->E1; 748 779 tyml = tybasic(e1->Ety); 749 780 sz = tysize[tyml]; 750 781 byte = tybyte(e->Ety) != 0; 751 782 uns = tyuns(tyml) || tyuns(e2->Ety); 752 783 oper = e->Eoper; 784 unsigned rex = (I64 && sz == 8) ? REX_W : 0; 785 unsigned grex = rex << 16; 753 786 754 787 if (tyfloating(tyml)) 755 788 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 756 789 return orth87(e,pretregs); 757 790 #else 758 791 return opdouble(e,pretregs,(oper == OPmul) ? CLIBdmul : CLIBddiv); 759 792 #endif 760 793 761 opunslng = I 32 ? OPu32_64 : OPu16_32;794 opunslng = I16 ? OPu16_32 : OPu32_64; 762 795 switch (oper) 763 796 { 764 797 case OPmul: 765 798 resreg = mAX; 766 799 op = 5 - uns; 767 800 lib = CLIBlmul; 768 801 break; 769 802 770 803 case OPdiv: 771 804 resreg = mAX; 772 805 op = 7 - uns; 773 806 lib = uns ? CLIBuldiv : CLIBldiv; 774 807 if (I32) 775 808 keepregs |= mSI | mDI; 776 809 break; 777 810 778 811 case OPmod: 779 812 resreg = mDX; 780 813 op = 7 - uns; 781 814 lib = uns ? CLIBulmod : CLIBlmod; … … 815 848 case OPulngllng: 816 849 case OPlngllng: 817 850 if (sz != 2 * REGSIZE || oper != OPmul || e1->Eoper != e2->Eoper || 818 851 e1->Ecount || e2->Ecount) 819 852 goto L2; 820 853 op = (e2->Eoper == opunslng) ? 4 : 5; 821 854 retregs = mAX; 822 855 cl = codelem(e1->E1,&retregs,FALSE); /* eval left leaf */ 823 856 if (e2->E1->Eoper == OPvar || 824 857 (e2->E1->Eoper == OPind && !e2->E1->Ecount) 825 858 ) 826 859 { 827 860 cr = loadea(e2->E1,&cs,0xF7,op,0,mAX,mAX | mDX); 828 861 } 829 862 else 830 863 { 831 864 rretregs = ALLREGS & ~mAX; 832 865 cr = scodelem(e2->E1,&rretregs,retregs,TRUE); // get rvalue 833 866 cg = getregs(mAX | mDX); 834 867 rreg = findreg(rretregs); 835 cg = gen2(cg,0xF7, modregrm(3,op,rreg)); // OP AX,rreg868 cg = gen2(cg,0xF7,grex | modregrmx(3,op,rreg)); // OP AX,rreg 836 869 } 837 870 freenode(e->E1); 838 871 freenode(e2); 839 872 c = fixresult(e,mAX | mDX,pretregs); 840 873 break; 841 874 842 875 case OPconst: 843 876 e2factor = el_tolong(e2); 844 877 845 878 if (oper == OPmul && I32 && sz == REGSIZE * 2) 846 879 { targ_int msw,lsw; 847 880 regm_t scratch; 848 881 unsigned reg; 849 882 targ_llong e2factor; 850 883 851 884 cl = codelem(e1,&retregs,FALSE); // eval left leaf 852 885 /* IMUL EDX,EDX,lsw 853 886 IMUL reg,EAX,msw 854 887 ADD reg,EDX 855 888 MOV EDX,lsw … … 871 904 msw = e2factor >> (REGSIZE * 8); 872 905 873 906 if (msw) 874 907 { cg = genmulimm(cg,DX,DX,lsw); 875 908 cg = genmulimm(cg,reg,AX,msw); 876 909 cg = gen2(cg,0x03,modregrm(3,reg,DX)); 877 910 } 878 911 else 879 912 cg = genmulimm(cg,reg,DX,lsw); 880 913 881 914 cg = movregconst(cg,DX,lsw,0); // MOV EDX,lsw 882 915 cg = cat(cg,getregs(mDX)); 883 916 cg = gen2(cg,0xF7,modregrm(3,4,DX)); // MUL EDX 884 917 gen2(cg,0x03,modregrm(3,DX,reg)); // ADD EDX,reg 885 918 886 919 resreg = mDX | mAX; 887 920 freenode(e2); 888 921 goto L3; 889 922 } 890 923 891 if (oper != OPmul && e2factor == 10 && sz == REGSIZE && 924 if (oper != OPmul && e2factor == 10 && 925 (!I16 && sz == 4) && 892 926 config.flags4 & CFG4speed && !uns) 893 927 { 894 928 /* R1 / 10 895 929 * 896 930 * MOV EAX,0x66666667 897 931 * IMUL R1 898 932 * MOV EAX,R1 899 933 * SAR EAX,31 900 934 * SAR EDX,2 901 935 * SUB EDX,EAX 902 936 * IMUL EAX,EDX,10 903 937 * SUB R1,EAX 904 938 * 905 939 * EDX = quotient 906 940 * R1 = remainder 907 941 */ 908 942 regm_t regm; 909 943 unsigned reg; 910 944 911 945 regm = allregs & ~(mAX | mDX); 912 946 cl = codelem(e1,®m,FALSE); // eval left leaf 913 947 reg = findreg(regm); 914 948 cg = getregs(regm | mDX | mAX); 915 949 916 950 cg = movregconst(cg, AX, 0x66666667, 0); // MOV EAX,0x66666667 917 cg = gen2(cg,0xF7,modregrm (3,5,reg));// IMUL R1951 cg = gen2(cg,0xF7,modregrmx(3,5,reg)); // IMUL R1 918 952 genmovreg(cg, AX, reg); // MOV EAX,R1 919 953 genc2(cg,0xC1,modregrm(3,7,AX),31); // SAR EAX,31 920 954 genc2(cg,0xC1,modregrm(3,7,DX),2); // SAR EDX,2 921 955 gen2(cg,0x2B,modregrm(3,DX,AX)); // SUB EDX,EAX 922 956 923 957 switch (oper) 924 958 { case OPdiv: 925 959 resreg = mDX; 926 960 break; 927 961 928 962 case OPmod: 929 963 genmulimm(cg,AX,DX,10); // IMUL EAX,EDX,10 930 gen2(cg,0x2B,modreg rm(3,reg,AX));// SUB R1,EAX964 gen2(cg,0x2B,modregxrm(3,reg,AX)); // SUB R1,EAX 931 965 resreg = regm; 932 966 break; 933 967 934 968 case OPremquo: 935 969 genmulimm(cg,AX,DX,10); // IMUL EAX,EDX,10 936 gen2(cg,0x2B,modreg rm(3,reg,AX));// SUB R1,EAX970 gen2(cg,0x2B,modregxrm(3,reg,AX)); // SUB R1,EAX 937 971 genmovreg(cg, AX, DX); // MOV EAX,EDX 938 972 genmovreg(cg, DX, reg); // MOV EDX,R1 939 973 resreg = mDX | mAX; 940 974 break; 941 975 942 976 default: 943 977 assert(0); 944 978 } 945 979 freenode(e2); 946 980 goto L3; 947 981 } 948 982 949 983 if (sz > REGSIZE) 950 984 goto L2; 951 985 952 986 if (oper == OPmul && config.target_cpu >= TARGET_80286) 953 987 { unsigned reg; 954 988 int ss; 955 989 956 990 freenode(e2); 957 991 retregs = byte ? BYTEREGS : ALLREGS; 958 992 resreg = *pretregs & (ALLREGS | mBP); 959 993 if (!resreg) 960 994 resreg = retregs; 961 995 962 if ( I32)996 if (!I16) 963 997 { // See if we can use an LEA instruction 964 998 int ss2 = 0; 965 999 int shift; 966 1000 967 1001 switch (e2factor) 968 1002 { 969 1003 case 12: ss = 1; ss2 = 2; goto L4; 970 1004 case 24: ss = 1; ss2 = 3; goto L4; 971 1005 972 1006 case 6: 973 1007 case 3: ss = 1; goto L4; 974 1008 975 1009 case 20: ss = 2; ss2 = 2; goto L4; 976 1010 case 40: ss = 2; ss2 = 3; goto L4; 977 1011 978 1012 case 10: 979 1013 case 5: ss = 2; goto L4; 980 1014 981 1015 case 36: ss = 3; ss2 = 2; goto L4; 982 1016 case 72: ss = 3; ss2 = 3; goto L4; 983 1017 984 1018 case 18: 985 1019 case 9: ss = 3; goto L4; 986 1020 987 1021 L4: 988 1022 { 989 1023 #if 1 990 regm_t regm; 991 int r; 992 993 regm = byte ? BYTEREGS : ALLREGS; // don't use EBP 1024 regm_t regm = byte ? BYTEREGS : ALLREGS; // don't use EBP 994 1025 cl = codelem(e->E1,®m,TRUE); 995 r = findreg(regm);1026 unsigned r = findreg(regm); 996 1027 997 1028 if (ss2) 998 1029 { // Don't use EBP 999 1030 resreg &= ~mBP; 1000 1031 if (!resreg) 1001 1032 resreg = retregs; 1002 1033 } 1003 1034 cg = allocreg(&resreg,®,tyml); 1004 1035 1005 c = gen2sib(CNIL,0x8D, modregrm(0,reg,4),1006 modreg rm(ss,r,r));1036 c = gen2sib(CNIL,0x8D,grex | modregxrm(0,reg,4), 1037 modregxrmx(ss,r,r)); 1007 1038 if (ss2) 1008 1039 { 1009 gen2sib(c,0x8D, modregrm(0,reg,4),1010 modreg rm(ss2,reg,5));1040 gen2sib(c,0x8D,grex | modregxrm(0,reg,4), 1041 modregxrm(ss2,reg,5)); 1011 1042 code_last(c)->IFL1 = FLconst; 1012 1043 code_last(c)->IEV1.Vint = 0; 1013 1044 } 1014 1045 else if (!(e2factor & 1)) // if even factor 1015 genregs(c,0x03,reg,reg); // ADD reg,reg 1046 { genregs(c,0x03,reg,reg); // ADD reg,reg 1047 code_orrex(c,rex); 1048 } 1016 1049 cg = cat(cg,c); 1017 1050 goto L3; 1018 1051 #else 1019 1052 1020 1053 // Don't use EBP 1021 1054 resreg &= ~mBP; 1022 1055 if (!resreg) 1023 1056 resreg = retregs; 1024 1057 1025 1058 cl = codelem(e->E1,&resreg,FALSE); 1026 1059 reg = findreg(resreg); 1027 1060 cg = getregs(resreg); 1028 1061 c = gen2sib(CNIL,0x8D,modregrm(0,reg,4), 1029 1062 modregrm(ss,reg,reg)); 1030 1063 if (ss2) 1031 1064 { 1032 1065 gen2sib(c,0x8D,modregrm(0,reg,4), 1033 1066 modregrm(ss2,reg,5)); 1034 1067 code_last(c)->IFL1 = FLconst; 1035 1068 code_last(c)->IEV1.Vint = 0; 1036 1069 } 1037 1070 else if (!(e2factor & 1)) // if even factor 1038 1071 genregs(c,0x03,reg,reg); // ADD reg,reg 1039 1072 cg = cat(cg,c); 1040 1073 goto L3; 1041 1074 #endif 1042 1075 } 1043 1076 case 37: 1044 1077 case 74: shift = 2; 1045 1078 goto L5; 1046 1079 case 13: 1047 1080 case 26: shift = 0; 1048 1081 goto L5; 1049 1082 L5: 1050 { regm_t sregm; 1051 unsigned sreg; 1052 1083 { 1053 1084 // Don't use EBP 1054 1085 resreg &= ~mBP; 1055 1086 if (!resreg) 1056 1087 resreg = retregs; 1057 1088 cl = allocreg(&resreg,®,TYint); 1058 1089 1059 sregm = ALLREGS & ~resreg;1090 regm_t sregm = ALLREGS & ~resreg; 1060 1091 cl = cat(cl,codelem(e->E1,&sregm,FALSE)); 1061 sreg = findreg(sregm);1092 unsigned sreg = findreg(sregm); 1062 1093 cg = getregs(resreg | sregm); 1063 1094 // LEA reg,[sreg * 4][sreg] 1064 1095 // SHL sreg,shift 1065 1096 // LEA reg,[sreg * 8][reg] 1066 c = gen2sib(CNIL,0x8D, modregrm(0,reg,4),1067 modreg rm(2,sreg,sreg));1097 c = gen2sib(CNIL,0x8D,grex | modregxrm(0,reg,4), 1098 modregxrmx(2,sreg,sreg)); 1068 1099 if (shift) 1069 genc2(c,0xC1, modregrm(3,4,sreg),shift);1070 gen2sib(c,0x8D, modregrm(0,reg,4),1071 modreg rm(3,sreg,reg));1100 genc2(c,0xC1,grex | modregrmx(3,4,sreg),shift); 1101 gen2sib(c,0x8D,grex | modregxrm(0,reg,4), 1102 modregxrmx(3,sreg,reg)); 1072 1103 if (!(e2factor & 1)) // if even factor 1073 genregs(c,0x03,reg,reg); // ADD reg,reg 1104 { genregs(c,0x03,reg,reg); // ADD reg,reg 1105 code_orrex(c,rex); 1106 } 1074 1107 cg = cat(cg,c); 1075 1108 goto L3; 1076 1109 } 1077 1110 } 1078 1111 } 1079 1112 1080 1113 cl = scodelem(e->E1,&retregs,0,TRUE); // eval left leaf 1081 1114 reg = findreg(retregs); 1082 1115 cg = allocreg(&resreg,&rreg,e->Ety); 1083 1116 1084 1117 /* IMUL reg,imm16 */ 1085 cg = genc2(cg,0x69, modregrm(3,rreg,reg),e2factor);1118 cg = genc2(cg,0x69,grex | modregxrmx(3,rreg,reg),e2factor); 1086 1119 goto L3; 1087 1120 } 1088 1121 1089 1122 // Special code for signed divide or modulo by power of 2 1090 if (sz == REGSIZE && (oper == OPdiv || oper == OPmod) && !uns && 1123 if ((sz == REGSIZE || (I64 && sz == 4)) && 1124 (oper == OPdiv || oper == OPmod) && !uns && 1091 1125 (pow2 = ispow2(e2factor)) != -1 && 1092 1126 !(config.target_cpu < TARGET_80286 && pow2 != 1 && oper == OPdiv) 1093 1127 ) 1094 1128 { 1095 1129 if (pow2 == 1 && oper == OPdiv && config.target_cpu > TARGET_80386) 1096 1130 { 1097 1131 // test eax,eax 1098 1132 // jns L1 1099 1133 // add eax,1 1100 1134 // L1: sar eax,1 1101 1135 1102 1136 code *cnop; 1103 unsigned reg;1104 1137 1105 1138 retregs = allregs; 1106 1139 cl = codelem(e->E1,&retregs,FALSE); // eval left leaf 1107 reg = findreg(retregs);1140 unsigned reg = findreg(retregs); 1108 1141 freenode(e2); 1109 1142 cg = getregs(retregs); 1110 1143 cg = gentstreg(cg,reg); // TEST reg,reg 1144 code_orrex(cg, rex); 1111 1145 cnop = gennop(CNIL); 1112 1146 genjmp(cg,JNS,FLcode,(block *)cnop); // JNS cnop 1113 gen1(cg,0x40 + reg); // INC reg 1147 if (I64) 1148 { 1149 gen2(cg,0xFF,modregrmx(3,0,reg)); // INC reg 1150 code_orrex(cg,rex); 1151 } 1152 else 1153 gen1(cg,0x40 + reg); // INC reg 1114 1154 cg = cat(cg,cnop); 1115 gen2(cg,0xD1, modregrm(3,7,reg)); // SAR reg,11155 gen2(cg,0xD1,grex | modregrmx(3,7,reg)); // SAR reg,1 1116 1156 resreg = retregs; 1117 1157 goto L3; 1118 1158 } 1119 1159 cl = codelem(e->E1,&retregs,FALSE); // eval left leaf 1120 1160 freenode(e2); 1121 1161 cg = getregs(mAX | mDX); // trash these regs 1122 1162 cg = gen1(cg,0x99); // CWD 1163 code_orrex(cg, rex); 1123 1164 if (pow2 == 1) 1124 1165 { 1125 1166 if (oper == OPdiv) 1126 { gen2(cg,0x2B, modregrm(3,AX,DX)); // SUB AX,DX1127 gen2(cg,0xD1, modregrm(3,7,AX)); // SAR AX,11167 { gen2(cg,0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1168 gen2(cg,0xD1,grex | modregrm(3,7,AX)); // SAR AX,1 1128 1169 } 1129 1170 else // OPmod 1130 { gen2(cg,0x33, modregrm(3,AX,DX)); // XOR AX,DX1131 genc2(cg,0x81, modregrm(3,4,AX),1); // AND AX,11132 gen2(cg,0x03, modregrm(3,DX,AX)); // ADD DX,AX1171 { gen2(cg,0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1172 genc2(cg,0x81,grex | modregrm(3,4,AX),1); // AND AX,1 1173 gen2(cg,0x03,grex | modregrm(3,DX,AX)); // ADD DX,AX 1133 1174 } 1134 1175 } 1135 1176 else 1136 1177 { targ_ulong m; 1137 1178 1138 1179 m = (1 << pow2) - 1; 1139 1180 if (oper == OPdiv) 1140 { genc2(cg,0x81, modregrm(3,4,DX),m); // AND DX,m1141 gen2(cg,0x03, modregrm(3,AX,DX)); // ADD AX,DX1181 { genc2(cg,0x81,grex | modregrm(3,4,DX),m); // AND DX,m 1182 gen2(cg,0x03,grex | modregrm(3,AX,DX)); // ADD AX,DX 1142 1183 // Be careful not to generate this for 8088 1143 1184 assert(config.target_cpu >= TARGET_80286); 1144 genc2(cg,0xC1, modregrm(3,7,AX),pow2); // SAR AX,pow21185 genc2(cg,0xC1,grex | modregrm(3,7,AX),pow2); // SAR AX,pow2 1145 1186 } 1146 1187 else // OPmod 1147 { gen2(cg,0x33, modregrm(3,AX,DX)); // XOR AX,DX1148 gen2(cg,0x2B, modregrm(3,AX,DX)); // SUB AX,DX1149 genc2(cg,0x81, modregrm(3,4,AX),m); // AND AX,mask1150 gen2(cg,0x33, modregrm(3,AX,DX)); // XOR AX,DX1151 gen2(cg,0x2B, modregrm(3,AX,DX)); // SUB AX,DX1188 { gen2(cg,0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1189 gen2(cg,0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1190 genc2(cg,0x81,grex | modregrm(3,4,AX),m); // AND AX,mask 1191 gen2(cg,0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1192 gen2(cg,0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1152 1193 resreg = mAX; 1153 1194 } 1154 1195 } 1155 1196 goto L3; 1156 1197 } 1157 1198 goto L2; 1158 1199 case OPind: 1159 1200 if (!e2->Ecount) /* if not CSE */ 1160 1201 goto L1; /* try OP reg,EA */ 1161 1202 goto L2; 1162 1203 default: /* OPconst and operators */ 1163 1204 L2: 1164 1205 cl = codelem(e1,&retregs,FALSE); /* eval left leaf */ 1165 1206 cr = scodelem(e2,&rretregs,retregs,TRUE); /* get rvalue */ 1166 1207 if (sz <= REGSIZE) 1167 1208 { cg = getregs(mAX | mDX); /* trash these regs */ 1168 1209 if (op == 7) /* signed divide */ 1169 cg = gen1(cg,0x99); /* CWD */ 1210 { cg = gen1(cg,0x99); // CWD 1211 code_orrex(cg,rex); 1212 } 1170 1213 else if (op == 6) /* unsigned divide */ 1171 { cg = movregconst(cg,DX,0,0); // MOV DX,0 1214 { 1215 cg = movregconst(cg,DX,0,(sz == 8) ? 64 : 0); // MOV DX,0 1172 1216 cg = cat(cg,getregs(mDX)); 1173 1217 } 1174 1218 rreg = findreg(rretregs); 1175 cg = gen2(cg,0xF7 ^ byte, modregrm(3,op,rreg)); /* OP AX,rreg */1219 cg = gen2(cg,0xF7 ^ byte,grex | modregrmx(3,op,rreg)); // OP AX,rreg 1176 1220 L3: 1177 1221 c = fixresult(e,resreg,pretregs); 1178 1222 } 1179 1223 else if (sz == 2 * REGSIZE) 1180 1224 { 1181 1225 if (config.target_cpu >= TARGET_PentiumPro && oper == OPmul) 1182 1226 { 1183 1227 /* IMUL ECX,EAX 1184 1228 IMUL EDX,EBX 1185 1229 ADD ECX,EDX 1186 1230 MUL EBX 1187 1231 ADD EDX,ECX 1188 1232 */ 1189 1233 cg = getregs(mAX|mDX|mCX); 1190 1234 cg = gen2(cg,0x0FAF,modregrm(3,CX,AX)); 1191 1235 gen2(cg,0x0FAF,modregrm(3,DX,BX)); 1192 1236 gen2(cg,0x03,modregrm(3,CX,DX)); 1193 1237 gen2(cg,0xF7,modregrm(3,4,BX)); 1194 1238 gen2(cg,0x03,modregrm(3,DX,CX)); 1195 1239 c = fixresult(e,mDX|mAX,pretregs); 1196 1240 } 1197 1241 else 1198 1242 c = callclib(e,lib,pretregs,keepregs); 1199 1243 } 1200 1244 else 1201 1245 assert(0); 1202 1246 break; 1203 1247 case OPvar: 1204 1248 L1: 1205 if ( I32&& sz <= REGSIZE)1249 if (!I16 && sz <= REGSIZE) 1206 1250 { 1207 1251 if (oper == OPmul && sz > 1) /* no byte version */ 1208 1252 { 1209 1253 /* Generate IMUL r32,r/m32 */ 1210 1254 retregs = *pretregs & (ALLREGS | mBP); 1211 1255 if (!retregs) 1212 1256 retregs = ALLREGS; 1213 1257 cl = codelem(e1,&retregs,FALSE); /* eval left leaf */ 1214 1258 resreg = retregs; 1215 1259 cr = loadea(e2,&cs,0x0FAF,findreg(resreg),0,retregs,retregs); 1216 1260 freenode(e2); 1217 1261 goto L3; 1218 1262 } 1219 1263 } 1220 1264 else 1221 1265 { 1222 1266 if (sz == 2 * REGSIZE) 1223 1267 { int reg; 1224 1268 1225 1269 if (oper != OPmul || e->E1->Eoper != opunslng || … … 1278 1322 code *cdnot(elem *e,regm_t *pretregs) 1279 1323 { unsigned reg; 1280 1324 tym_t forflags; 1281 1325 code *c1,*c,*cfalse,*ctrue,*cnop; 1282 1326 unsigned sz; 1283 1327 regm_t retregs; 1284 1328 elem *e1; 1285 1329 int op; 1286 1330 1287 1331 e1 = e->E1; 1288 1332 if (*pretregs == 0) 1289 1333 goto L1; 1290 1334 if (*pretregs == mPSW) 1291 1335 { /*assert(e->Eoper != OPnot && e->Eoper != OPbool);*/ /* should've been optimized */ 1292 1336 L1: 1293 1337 return codelem(e1,pretregs,FALSE); /* evaluate e1 for cc */ 1294 1338 } 1295 1339 1296 1340 op = e->Eoper; 1297 1341 sz = tysize(e1->Ety); 1342 unsigned rex = (I64 && sz == 8) ? REX_W : 0; 1343 unsigned grex = rex << 16; 1298 1344 if (!tyfloating(e1->Ety)) 1299 1345 { 1300 1346 if (sz <= REGSIZE && e1->Eoper == OPvar) 1301 1347 { code cs; 1302 1348 1303 1349 c = getlvalue(&cs,e1,0); 1304 1350 freenode(e1); 1305 if ( I32&& sz == 2)1351 if (!I16 && sz == 2) 1306 1352 cs.Iflags |= CFopsize; 1307 1353 1308 1354 retregs = *pretregs & (ALLREGS | mBP); 1309 1355 if (config.target_cpu >= TARGET_80486 && 1310 1356 tysize(e->Ety) == 1) 1311 1357 { 1312 1358 if (reghasvalue((sz == 1) ? BYTEREGS : ALLREGS,0,®)) 1313 1359 cs.Iop = 0x39; 1314 1360 else 1315 1361 { cs.Iop = 0x81; 1316 1362 reg = 7; 1317 1363 cs.IFL2 = FLconst; 1318 1364 cs.IEV2.Vint = 0; 1319 1365 } 1320 1366 cs.Iop ^= (sz == 1); 1321 c s.Irm |= modregrm(0,reg,0);1367 code_newreg(&cs,reg); 1322 1368 c = gen(c,&cs); // CMP e1,0 1323 1369 1324 1370 retregs &= BYTEREGS; 1325 1371 if (!retregs) 1326 1372 retregs = BYTEREGS; 1327 1373 c1 = allocreg(&retregs,®,TYint); 1328 1374 1329 1375 int iop; 1330 1376 if (op == OPbool) 1331 1377 { 1332 1378 iop = 0x0F95; // SETNZ rm8 1333 1379 } 1334 1380 else 1335 1381 { 1336 1382 iop = 0x0F94; // SETZ rm8 1337 1383 } 1338 c1 = gen2(c1,iop, modregrm(3,0,reg));1384 c1 = gen2(c1,iop,grex | modregrmx(3,0,reg)); 1339 1385 if (op == OPbool) 1340 1386 *pretregs &= ~mPSW; 1341 1387 goto L4; 1342 1388 } 1343 1389 1344 1390 if (reghasvalue((sz == 1) ? BYTEREGS : ALLREGS,1,®)) 1345 1391 cs.Iop = 0x39; 1346 1392 else 1347 1393 { cs.Iop = 0x81; 1348 1394 reg = 7; 1349 1395 cs.IFL2 = FLconst; 1350 1396 cs.IEV2.Vint = 1; 1351 1397 } 1352 1398 cs.Iop ^= (sz == 1); 1353 c s.Irm |= modregrm(0,reg,0);1399 code_newreg(&cs,reg); 1354 1400 c = gen(c,&cs); // CMP e1,1 1355 1401 1356 1402 c1 = allocreg(&retregs,®,TYint); 1357 1403 op ^= (OPbool ^ OPnot); // switch operators 1358 1404 goto L2; 1359 1405 } 1360 1406 else if (sz <= REGSIZE && 1361 1407 // NEG bytereg is too expensive 1362 1408 (sz != 1 || config.target_cpu < TARGET_PentiumPro)) 1363 1409 { 1364 1410 retregs = *pretregs & (ALLREGS | mBP); 1365 1411 if (sz == 1 && !(retregs &= BYTEREGS)) 1366 1412 retregs = BYTEREGS; 1367 1413 c = codelem(e->E1,&retregs,FALSE); 1368 1414 reg = findreg(retregs); 1369 1415 c1 = getregs(retregs); 1370 c1 = gen2(c1,0xF7 ^ (sz == 1), modregrm(3,3,reg));// NEG reg1416 c1 = gen2(c1,0xF7 ^ (sz == 1),grex | modregrmx(3,3,reg)); // NEG reg 1371 1417 code_orflag(c1,CFpsw); 1372 1418 if (I32 && sz == SHORTSIZE) 1373 1419 code_orflag(c1,CFopsize); 1374 1420 L2: 1375 1421 c1 = genregs(c1,0x19,reg,reg); // SBB reg,reg 1422 code_orrex(c1, rex); 1376 1423 // At this point, reg==0 if e1==0, reg==-1 if e1!=0 1377 1424 if (op == OPnot) 1378 gen1(c1,0x40 + reg); // INC reg 1425 { 1426 if (I64) 1427 gen2(c1,0xFF,grex | modregrmx(3,0,reg)); // INC reg 1428 else 1429 gen1(c1,0x40 + reg); // INC reg 1430 } 1379 1431 else 1380 gen2(c1,0xF7, modregrm(3,3,reg));// NEG reg1432 gen2(c1,0xF7,grex | modregrmx(3,3,reg)); // NEG reg 1381 1433 if (*pretregs & mPSW) 1382 1434 { code_orflag(c1,CFpsw); 1383 1435 *pretregs &= ~mPSW; // flags are always set anyway 1384 1436 } 1385 1437 L4: 1386 1438 return cat3(c,c1,fixresult(e,retregs,pretregs)); 1387 1439 } 1388 1440 } 1389 1441 cnop = gennop(CNIL); 1390 1442 ctrue = gennop(CNIL); 1391 1443 c = logexp(e->E1,(op == OPnot) ? FALSE : TRUE,FLcode,ctrue); 1392 1444 forflags = *pretregs & mPSW; 1445 if (I64 && sz == 8) 1446 forflags |= 64; 1393 1447 assert(tysize(e->Ety) <= REGSIZE); // result better be int 1394 1448 cfalse = allocreg(pretregs,®,e->Ety); // allocate reg for result 1395 1449 for (c1 = cfalse; c1; c1 = code_next(c1)) 1396 1450 gen(ctrue,c1); // duplicate reg save code 1397 1451 cfalse = movregconst(cfalse,reg,0,forflags); // mov 0 into reg 1398 1452 regcon.immed.mval &= ~mask[reg]; // mark reg as unavail 1399 1453 ctrue = movregconst(ctrue,reg,1,forflags); // mov 1 into reg 1400 1454 regcon.immed.mval &= ~mask[reg]; // mark reg as unavail 1401 1455 genjmp(cfalse,JMP,FLcode,(block *) cnop); // skip over ctrue 1402 1456 c = cat4(c,cfalse,ctrue,cnop); 1403 1457 return c; 1404 1458 } 1405 1459 1406 1460 1407 1461 /************************ 1408 1462 * Complement operator 1409 1463 */ 1410 1464 1411 1465 code *cdcom(elem *e,regm_t *pretregs) 1412 1466 { unsigned reg,op; 1413 1467 regm_t retregs,possregs; 1414 1468 code *c,*c1,*cg; 1415 1469 tym_t tym; 1416 1470 int sz; 1417 1471 1418 1472 if (*pretregs == 0) 1419 1473 return codelem(e->E1,pretregs,FALSE); 1420 1474 tym = tybasic(e->Ety); 1421 1475 sz = tysize[tym]; 1476 unsigned rex = (I64 && sz == 8) ? REX_W : 0; 1477 unsigned grex = rex << 16; 1422 1478 possregs = (sz == 1) ? BYTEREGS : allregs; 1423 1479 retregs = *pretregs & possregs; 1424 1480 if (retregs == 0) 1425 1481 retregs = possregs; 1426 1482 c1 = codelem(e->E1,&retregs,FALSE); 1427 1483 cg = getregs(retregs); /* retregs will be destroyed */ 1428 1484 #if 0 1429 1485 if (sz == 4 * REGSIZE) 1430 1486 { 1431 1487 c = gen2(CNIL,0xF7,modregrm(3,2,AX)); // NOT AX 1432 1488 gen2(c,0xF7,modregrm(3,2,BX)); // NOT BX 1433 1489 gen2(c,0xF7,modregrm(3,2,CX)); // NOT CX 1434 1490 gen2(c,0xF7,modregrm(3,2,DX)); // NOT DX 1435 1491 } 1436 1492 else 1437 1493 #endif 1438 1494 { 1439 1495 reg = (sz <= REGSIZE) ? findreg(retregs) : findregmsw(retregs); 1440 1496 op = (sz == 1) ? 0xF6 : 0xF7; 1441 1497 c = genregs(CNIL,op,2,reg); // NOT reg 1498 code_orrex(c, rex); 1442 1499 if (sz == 2 * REGSIZE) 1443 1500 { reg = findreglsw(retregs); 1444 1501 genregs(c,op,2,reg); // NOT reg+1 1445 1502 } 1446 1503 } 1447 1504 return cat4(c1,cg,c,fixresult(e,retregs,pretregs)); 1448 1505 } 1449 1506 1450 1507 /************************ 1451 1508 * Bswap operator 1452 1509 */ 1453 1510 1454 1511 code *cdbswap(elem *e,regm_t *pretregs) 1455 1512 { unsigned reg,op; 1456 1513 regm_t retregs; 1457 1514 code *c,*c1,*cg; 1458 1515 tym_t tym; 1459 1516 int sz; 1460 1517 1461 1518 if (*pretregs == 0) 1462 1519 return codelem(e->E1,pretregs,FALSE); 1463 1520 1464 1521 tym = tybasic(e->Ety); 1465 1522 assert(tysize[tym] == 4); 1466 1523 retregs = *pretregs & allregs; 1467 1524 if (retregs == 0) 1468 1525 retregs = allregs; 1469 1526 c1 = codelem(e->E1,&retregs,FALSE); 1470 1527 cg = getregs(retregs); // retregs will be destroyed 1471 1528 reg = findreg(retregs); 1472 c = gen2(CNIL,0x0FC8 + reg,0); // BSWAP reg 1529 c = gen2(CNIL,0x0FC8 + (reg & 7),0); // BSWAP reg 1530 if (reg & 8) 1531 code_orrex(c, REX_B); 1473 1532 return cat4(c1,cg,c,fixresult(e,retregs,pretregs)); 1474 1533 } 1475 1534 1476 1535 /************************* 1477 1536 * ?: operator 1478 1537 */ 1479 1538 1480 1539 code *cdcond(elem *e,regm_t *pretregs) 1481 1540 { regm_t psw; 1482 1541 code *cc,*c,*c1,*cnop1,*c2,*cnop2; 1483 1542 con_t regconold,regconsave; 1484 1543 unsigned stackpushold,stackpushsave; 1485 1544 int ehindexold,ehindexsave; 1486 1545 unsigned jop; 1487 1546 unsigned op1; 1488 1547 unsigned sz1; 1489 1548 unsigned sz2; 1490 1549 elem *e1; 1491 1550 elem *e2; 1492 1551 elem *e21; 1493 1552 elem *e22; 1494 1553 1495 1554 /* vars to save state of 8087 */ 1496 1555 int stackusedold,stackusedsave; 1497 1556 NDP _8087old[arraysize(_8087elems)]; 1498 1557 NDP _8087save[arraysize(_8087elems)]; 1499 1558 1500 1559 _chkstack(); 1501 1560 1502 1561 //dbg_printf("cdcond(e = %p, *pretregs = x%x)\n",e,*pretregs); 1503 1562 e1 = e->E1; 1504 1563 e2 = e->E2; 1505 1564 e21 = e2->E1; 1506 1565 e22 = e2->E2; 1507 1566 cc = docommas(&e1); 1508 1567 cgstate.stackclean++; 1509 1568 psw = *pretregs & mPSW; /* save PSW bit */ 1510 1569 op1 = e1->Eoper; 1511 1570 sz1 = tysize(e1->Ety); 1571 unsigned rex = (I64 && sz1 == 8) ? REX_W : 0; 1572 unsigned grex = rex << 16; 1512 1573 jop = jmpopcode(e1); 1513 1574 1514 1575 if (!OTrel(op1) && e1 == e21 && 1515 1576 sz1 <= REGSIZE && !tyfloating(e1->Ety)) 1516 1577 { // Recognize (e ? e : f) 1517 1578 regm_t retregs; 1518 1579 1519 1580 cnop1 = gennop(CNIL); 1520 1581 retregs = *pretregs | mPSW; 1521 1582 c = codelem(e1,&retregs,FALSE); 1522 1583 1523 1584 c = cat(c,cse_flush(1)); // flush CSEs to memory 1524 1585 c = genjmp(c,jop,FLcode,(block *)cnop1); 1525 1586 freenode(e21); 1526 1587 1527 1588 regconsave = regcon; 1528 1589 stackpushsave = stackpush; 1529 1590 1530 1591 retregs |= psw; 1531 1592 if (retregs & (mBP | ALLREGS)) … … 1561 1622 reg = findreg(retregs); 1562 1623 v1 = e21->EV.Vlong; 1563 1624 v2 = e22->EV.Vlong; 1564 1625 if (jop == JNC) 1565 1626 { v1 = v2; 1566 1627 v2 = e21->EV.Vlong; 1567 1628 } 1568 1629 1569 1630 opcode = 0x81; 1570 1631 switch (sz2) 1571 1632 { case 1: opcode--; 1572 1633 v1 = (signed char) v1; 1573 1634 v2 = (signed char) v2; 1574 1635 break; 1575 1636 case 2: v1 = (short) v1; 1576 1637 v2 = (short) v2; 1577 1638 break; 1578 1639 } 1579 1640 1580 1641 if (v1 == 0 && v2 == -1L) 1581 c = gen2(c,0xF6 + (opcode & 1), modregrm(3,2,reg)); // NOT reg1642 c = gen2(c,0xF6 + (opcode & 1),grex | modregrmx(3,2,reg)); // NOT reg 1582 1643 else 1583 1644 { 1584 1645 v1 -= v2; 1585 c = genc2(c,opcode, modregrm(3,4,reg),v1); // AND reg,v1-v21586 if (v2 == 1 )1646 c = genc2(c,opcode,grex | modregrmx(3,4,reg),v1); // AND reg,v1-v2 1647 if (v2 == 1 && !I64) 1587 1648 gen1(c,0x40 + reg); // INC reg 1588 else if (v2 == -1L )1649 else if (v2 == -1L && !I64) 1589 1650 gen1(c,0x48 + reg); // DEC reg 1590 1651 else 1591 genc2(c,opcode, modregrm(3,0,reg),v2); // ADD reg,v21652 genc2(c,opcode,grex | modregrmx(3,0,reg),v2); // ADD reg,v2 1592 1653 } 1593 1654 1594 1655 freenode(e21); 1595 1656 freenode(e22); 1596 1657 freenode(e2); 1597 1658 1598 1659 c = cat(c,fixresult(e,retregs,pretregs)); 1599 1660 goto Lret; 1600 1661 } 1601 1662 1602 1663 if (op1 != OPcond && op1 != OPandand && op1 != OPoror && 1603 1664 op1 != OPnot && op1 != OPbool && 1604 1665 e21->Eoper == OPconst && 1605 1666 sz1 <= REGSIZE && 1606 1667 *pretregs & (mBP | ALLREGS) && 1607 1668 tysize(e21->Ety) <= REGSIZE && !tyfloating(e21->Ety)) 1608 1669 { // Recognize (e ? c : f) 1609 1670 unsigned reg; 1610 1671 regm_t retregs; 1611 1672 1612 1673 cnop1 = gennop(CNIL); 1613 1674 retregs = mPSW; 1614 1675 jop = jmpopcode(e1); // get jmp condition 1615 1676 c = codelem(e1,&retregs,FALSE); 1616 1677 1617 1678 // Set the register with e21 without affecting the flags 1618 1679 retregs = *pretregs & (ALLREGS | mBP); 1619 1680 if (retregs & ~regcon.mvar) 1620 1681 retregs &= ~regcon.mvar; // don't disturb register variables 1621 c = regwithvalue(c,retregs,e21->EV.Vint,®, 8);1682 c = regwithvalue(c,retregs,e21->EV.Vint,®,sz1 == 8 ? 64|8 : 8); 1622 1683 retregs = mask[reg]; 1623 1684 1624 1685 c = cat(c,cse_flush(1)); // flush CSE's to memory 1625 1686 c = genjmp(c,jop,FLcode,(block *)cnop1); 1626 1687 freenode(e21); 1627 1688 1628 1689 regconsave = regcon; 1629 1690 stackpushsave = stackpush; 1630 1691 1631 1692 c2 = codelem(e22,&retregs,FALSE); 1632 1693 1633 1694 andregcon(®consave); 1634 1695 assert(stackpushsave == stackpush); 1635 1696 1636 1697 freenode(e2); 1637 1698 c = cat6(cc,c,c2,cnop1,fixresult(e,retregs,pretregs),NULL); 1638 1699 goto Lret; 1639 1700 } 1640 1701 1641 1702 cnop1 = gennop(CNIL); … … 1716 1777 * cg: [save reg code] ;if we must preserve reg 1717 1778 * CLR reg ;FALSE result (set Z also) 1718 1779 * JMP cnop2 1719 1780 * 1720 1781 * cnop1: NOP ;if e1 evaluates to TRUE 1721 1782 * [save reg code] ;preserve reg 1722 1783 * 1723 1784 * MOV reg,1 ;TRUE result 1724 1785 * or 1725 1786 * CLR reg ;if return result in flags 1726 1787 * INC reg 1727 1788 * 1728 1789 * cnop2: NOP ;mark end of code 1729 1790 */ 1730 1791 1731 1792 code *cdloglog(elem *e,regm_t *pretregs) 1732 1793 { regm_t retregs; 1733 1794 unsigned reg; 1734 1795 code *c; 1735 1796 code *cl,*cr,*cg,*cnop1,*cnop2,*cnop3; 1736 registercode *c1;1797 code *c1; 1737 1798 con_t regconsave; 1738 1799 unsigned stackpushsave; 1739 1800 int jcond; 1740 1801 elem *e2; 1802 unsigned sz = tysize(e->Ety); 1741 1803 1742 1804 /* We can trip the assert with the following: */ 1743 1805 /* if ( (b<=a) ? (c<b || a<=c) : c>=a ) */ 1744 1806 /* We'll generate ugly code for it, but it's too obscure a case */ 1745 1807 /* to expend much effort on it. */ 1746 1808 /*assert(*pretregs != mPSW);*/ 1747 1809 1748 1810 cgstate.stackclean++; 1749 1811 cnop1 = gennop(CNIL); 1750 1812 cnop3 = gennop(CNIL); 1751 1813 jcond = 0; 1752 1814 e2 = e->E2; 1753 1815 cl = (e->Eoper == OPoror) 1754 1816 ? logexp(e->E1,jcond | 1,FLcode,cnop1) 1755 1817 : logexp(e->E1,jcond,FLcode,cnop3); 1756 1818 regconsave = regcon; 1757 1819 stackpushsave = stackpush; 1758 1820 if (*pretregs == 0) /* if don't want result */ 1759 1821 { int noreturn = el_noreturn(e2); 1760 1822 1761 1823 cr = codelem(e2,pretregs,FALSE); 1762 1824 if (noreturn) 1763 1825 { 1764 1826 regconsave.used |= regcon.used; 1765 1827 regcon = regconsave; 1766 1828 } 1767 1829 else 1768 1830 andregcon(®consave); 1769 1831 assert(stackpush == stackpushsave); 1770 1832 c = cat4(cl,cr,cnop3,cnop1); // eval code, throw away result 1771 1833 goto Lret; 1772 1834 } 1773 1835 cnop2 = gennop(CNIL); 1774 1836 if (tybasic(e2->Ety) == TYbool && 1775 tysize(e->Ety)== tysize(e2->Ety) &&1837 sz == tysize(e2->Ety) && 1776 1838 !(*pretregs & mPSW) && 1777 1839 e2->Eoper == OPcall) 1778 1840 { 1779 1841 cr = codelem(e2,pretregs,FALSE); 1780 1842 1781 1843 andregcon(®consave); 1782 1844 1783 1845 // stack depth should not change when evaluating E2 1784 1846 assert(stackpush == stackpushsave); 1785 1847 1786 assert( tysize(e->Ety) <= REGSIZE);// result better be int1848 assert(sz <= 4); // result better be int 1787 1849 retregs = *pretregs & allregs; 1788 1850 cnop1 = cat(cnop1,allocreg(&retregs,®,TYint)); // allocate reg for result 1789 1851 cg = genjmp(NULL,JMP,FLcode,(block *) cnop2); // JMP cnop2 1790 1852 cnop1 = movregconst(cnop1,reg,e->Eoper == OPoror,0); // reg = 1 1791 1853 regcon.immed.mval &= ~mask[reg]; // mark reg as unavail 1792 1854 *pretregs = retregs; 1793 1855 if (e->Eoper == OPoror) 1794 1856 c = cat6(cl,cr,cnop3,cg,cnop1,cnop2); 1795 1857 else 1796 1858 c = cat6(cl,cr,cg,cnop3,cnop1,cnop2); 1797 1859 1798 1860 goto Lret; 1799 1861 } 1800 1862 cr = logexp(e2,1,FLcode,cnop1); 1801 1863 andregcon(®consave); 1802 1864 1803 1865 /* stack depth should not change when evaluating E2 */ 1804 1866 assert(stackpush == stackpushsave); 1805 1867 1806 assert( tysize(e->Ety) <= REGSIZE);// result better be int1868 assert(sz <= 4); // result better be int 1807 1869 retregs = *pretregs & (ALLREGS | mBP); 1808 1870 if (!retregs) retregs = ALLREGS; // if mPSW only 1809 1871 cg = allocreg(&retregs,®,TYint); // allocate reg for result 1810 1872 for (c1 = cg; c1; c1 = code_next(c1)) // for each instruction 1811 1873 gen(cnop1,c1); // duplicate it 1812 1874 cg = movregconst(cg,reg,0,*pretregs & mPSW); // MOV reg,0 1813 1875 regcon.immed.mval &= ~mask[reg]; // mark reg as unavail 1814 1876 genjmp(cg,JMP,FLcode,(block *) cnop2); // JMP cnop2 1815 1877 cnop1 = movregconst(cnop1,reg,1,*pretregs & mPSW); // reg = 1 1816 1878 regcon.immed.mval &= ~mask[reg]; // mark reg as unavail 1817 1879 *pretregs = retregs; 1818 1880 c = cat6(cl,cr,cnop3,cg,cnop1,cnop2); 1819 1881 Lret: 1820 1882 cgstate.stackclean--; 1821 1883 return c; 1822 1884 } 1823 1885 1824 1886 1825 1887 /********************* 1826 1888 * Generate code for shift left or shift right (OPshl,OPshr,OPashr). … … 1834 1896 regm_t retregs,rretregs; 1835 1897 code *cg,*cl,*cr; 1836 1898 code *c; 1837 1899 elem *e1; 1838 1900 elem *e2; 1839 1901 regm_t forccs,forregs; 1840 1902 bool e2isconst; 1841 1903 1842 1904 e1 = e->E1; 1843 1905 if (*pretregs == 0) // if don't want result 1844 1906 { c = codelem(e1,pretregs,FALSE); // eval left leaf 1845 1907 *pretregs = 0; // in case they got set 1846 1908 return cat(c,codelem(e->E2,pretregs,FALSE)); 1847 1909 } 1848 1910 1849 1911 tyml = tybasic(e1->Ety); 1850 1912 sz = tysize[tyml]; 1851 1913 assert(!tyfloating(tyml)); 1852 1914 uns = tyuns(tyml); 1853 1915 oper = e->Eoper; 1916 unsigned rex = (I64 && sz == 8) ? REX_W : 0; 1917 unsigned grex = rex << 16; 1854 1918 1855 1919 #if SCPP 1856 1920 // Do this until the rest of the compiler does OPshr/OPashr correctly 1857 1921 if (oper == OPshr) 1858 1922 oper = (uns) ? OPshr : OPashr; 1859 1923 #endif 1860 1924 1861 1925 switch (oper) 1862 1926 { case OPshl: 1863 1927 s1 = 4; // SHL 1864 1928 s2 = 2; // RCL 1865 1929 break; 1866 1930 case OPshr: 1867 1931 s1 = 5; // SHR 1868 1932 s2 = 3; // RCR 1869 1933 break; 1870 1934 case OPashr: 1871 1935 s1 = 7; // SAR 1872 1936 s2 = 3; // RCR 1873 1937 break; 1874 1938 default: 1875 1939 assert(0); 1876 1940 } 1877 1941 1878 1942 c = cg = cr = CNIL; /* initialize */ 1879 1943 e2 = e->E2; 1880 1944 forccs = *pretregs & mPSW; /* if return result in CCs */ 1881 1945 forregs = *pretregs & (ALLREGS | mBP); // mask of possible return regs 1882 1946 e2isconst = FALSE; /* assume for the moment */ 1883 1947 byte = (sz == 1); 1884 1948 switch (e2->Eoper) 1885 1949 { 1886 1950 case OPconst: 1887 1951 e2isconst = TRUE; /* e2 is a constant */ 1888 1952 shiftcnt = e2->EV.Vint; /* get shift count */ 1889 if (( I32&& sz <= REGSIZE) ||1953 if ((!I16 && sz <= REGSIZE) || 1890 1954 shiftcnt <= 4 || /* if sequence of shifts */ 1891 1955 (sz == 2 && 1892 1956 (shiftcnt == 8 || config.target_cpu >= TARGET_80286)) || 1893 1957 (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE) 1894 1958 ) 1895 1959 { retregs = (forregs) ? forregs 1896 1960 : ALLREGS; 1897 1961 if (byte) 1898 1962 { retregs &= BYTEREGS; 1899 1963 if (!retregs) 1900 1964 retregs = BYTEREGS; 1901 1965 } 1902 1966 else if (sz > REGSIZE && sz <= 2 * REGSIZE && 1903 1967 !(retregs & mMSW)) 1904 1968 retregs |= mMSW & ALLREGS; 1905 1969 if (s1 == 7) /* if arithmetic right shift */ 1906 1970 { 1907 1971 if (shiftcnt == 8) 1908 1972 retregs = mAX; 1909 1973 else if (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE) … … 1918 1982 ) 1919 1983 { // Handle (shtlng)s << 16 1920 1984 regm_t r; 1921 1985 1922 1986 r = retregs & mMSW; 1923 1987 cl = codelem(e1->E1,&r,FALSE); // eval left leaf 1924 1988 cl = regwithvalue(cl,retregs & mLSW,0,&resreg,0); 1925 1989 cg = getregs(r); 1926 1990 retregs = r | mask[resreg]; 1927 1991 if (forccs) 1928 1992 { sreg = findreg(r); 1929 1993 cg = gentstreg(cg,sreg); 1930 1994 *pretregs &= ~mPSW; // already set 1931 1995 } 1932 1996 freenode(e1); 1933 1997 freenode(e2); 1934 1998 break; 1935 1999 } 1936 2000 1937 2001 // See if we should use LEA reg,xxx instead of shift 1938 if (I32 && shiftcnt >= 1 && shiftcnt <= 3 && 1939 sz == REGSIZE && oper == OPshl && 2002 if (!I16 && shiftcnt >= 1 && shiftcnt <= 3 && 2003 (sz == REGSIZE || (I64 && sz == 4)) && 2004 oper == OPshl && 1940 2005 e1->Eoper == OPvar && 1941 2006 !(*pretregs & mPSW) && 1942 2007 config.flags4 & CFG4speed 1943 2008 ) 1944 2009 { Symbol *s1 = e1->EV.sp.Vsym; 1945 2010 unsigned reg; 1946 2011 regm_t regm; 1947 2012 code cs; 1948 2013 1949 2014 if (isregvar(e1,®m,®) && !(regm & retregs)) 1950 2015 { 1951 2016 cl = allocreg(&retregs,&resreg,e->Ety); 1952 2017 buildEA(&cs,-1,reg,1 << shiftcnt,0); 1953 2018 cs.Iop = 0x8D; 1954 c s.Irm |= modregrm(0,resreg,0);2019 code_newreg(&cs,resreg); 1955 2020 cs.Iflags = 0; 1956 2021 cg = gen(NULL,&cs); // LEA resreg,[reg * ss] 1957 2022 freenode(e1); 1958 2023 freenode(e2); 1959 2024 break; 1960 2025 } 1961 2026 } 1962 2027 1963 2028 cl = codelem(e1,&retregs,FALSE); // eval left leaf 1964 2029 //assert((retregs & regcon.mvar) == 0); 1965 2030 cg = getregs(retregs); // trash these regs 1966 2031 1967 2032 { 1968 2033 if (sz == 2 * REGSIZE) 1969 2034 { resreg = findregmsw(retregs); 1970 2035 sreg = findreglsw(retregs); 1971 2036 } 1972 2037 else 1973 2038 resreg = findreg(retregs); 1974 2039 if (config.target_cpu >= TARGET_80286 && 1975 2040 sz <= REGSIZE) 1976 2041 { 1977 2042 /* SHL resreg,shiftcnt */ 1978 2043 assert(!(sz == 1 && (mask[resreg] & ~BYTEREGS))); 1979 c = genc2(CNIL,0xC1 ^ byte, modregrm(3,s1,resreg),shiftcnt);2044 c = genc2(CNIL,0xC1 ^ byte,grex | modregxrmx(3,s1,resreg),shiftcnt); 1980 2045 if (shiftcnt == 1) 1981 2046 c->Iop += 0x10; /* short form of shift */ 1982 2047 // See if we need operand size prefix 1983 if ( I32&& oper != OPshl && sz == 2)2048 if (!I16 && oper != OPshl && sz == 2) 1984 2049 c->Iflags |= CFopsize; 1985 2050 if (forccs) 1986 2051 c->Iflags |= CFpsw; // need flags result 1987 2052 } 1988 2053 else if (shiftcnt == 8) 1989 { if (!(retregs & BYTEREGS) )2054 { if (!(retregs & BYTEREGS) || resreg >= 4) 1990 2055 { 1991 2056 cl = cat(cl,cg); 1992 2057 goto L1; 1993 2058 } 1994 2059 1995 2060 if (pass != PASSfinal && (!forregs || forregs & (mSI | mDI))) 1996 2061 { 1997 2062 // e1 might get into SI or DI in a later pass, 1998 2063 // so don't put CX into a register 1999 2064 cg = cat(cg, getregs(mCX)); 2000 2065 } 2001 2066 2002 2067 if (oper == OPshl) 2003 2068 { /* MOV regH,regL XOR regL,regL */ 2069 assert(resreg < 4 && !rex); 2004 2070 c = genregs(CNIL,0x8A,resreg+4,resreg); 2005 2071 genregs(c,0x32,resreg,resreg); 2006 2072 } 2007 2073 else // OPshr/OPashr 2008 2074 { 2009 2075 /* MOV regL,regH */ 2010 2076 c = genregs(CNIL,0x8A,resreg,resreg+4); 2011 2077 if (oper == OPashr) 2012 2078 gen1(c,0x98); /* CBW */ 2013 2079 else 2014 2080 genregs(c,0x32,resreg+4,resreg+4); /* CLR regH */ 2015 2081 } 2016 2082 if (forccs) 2017 2083 gentstreg(c,resreg); 2018 2084 } 2019 2085 else if (shiftcnt == REGSIZE * 8) // it's an lword 2020 2086 { 2021 2087 if (oper == OPshl) 2022 2088 swap((int *) &resreg,(int *) &sreg); 2023 2089 c = genmovreg(CNIL,sreg,resreg); // MOV sreg,resreg … … 2066 2132 else 2067 2133 { 2068 2134 if (!(retregs & mMSW)) 2069 2135 retregs = ALLREGS & ~mCX; 2070 2136 } 2071 2137 cl = codelem(e->E1,&retregs,FALSE); /* eval left leaf */ 2072 2138 2073 2139 if (sz <= REGSIZE) 2074 2140 resreg = findreg(retregs); 2075 2141 else 2076 2142 { 2077 2143 resreg = findregmsw(retregs); 2078 2144 sreg = findreglsw(retregs); 2079 2145 } 2080 2146 L1: 2081 2147 rretregs = mCX; /* CX is shift count */ 2082 2148 if (sz <= REGSIZE) 2083 2149 { 2084 2150 cr = scodelem(e2,&rretregs,retregs,FALSE); /* get rvalue */ 2085 2151 cg = getregs(retregs); /* trash these regs */ 2086 c = gen2(CNIL,0xD3 ^ byte, modregrm(3,s1,resreg)); /* Sxx resreg,CX */2152 c = gen2(CNIL,0xD3 ^ byte,grex | modregrmx(3,s1,resreg)); /* Sxx resreg,CX */ 2087 2153 2088 2154 // Note that a shift by CL does not set the flags if 2089 2155 // CL == 0. If e2 is a constant, we know it isn't 0 2090 2156 // (it would have been optimized out). 2091 2157 if (e2isconst) 2092 2158 *pretregs &= mBP | ALLREGS; // flags already set with result 2093 2159 } 2094 2160 else if (sz == 2 * REGSIZE && 2095 2161 config.target_cpu >= TARGET_80386) 2096 2162 { unsigned hreg,lreg; 2097 2163 2098 2164 hreg = resreg; 2099 2165 lreg = sreg; 2100 2166 if (e2isconst) 2101 2167 { 2102 2168 cr = NULL; 2103 2169 cg = getregs(retregs); 2104 2170 if (shiftcnt & (REGSIZE * 8)) 2105 2171 { 2106 2172 if (oper == OPshr) … … 2309 2375 return cdind87(e, pretregs); 2310 2376 } 2311 2377 } 2312 2378 2313 2379 e1 = e->E1; 2314 2380 assert(e1); 2315 2381 switch (tym) 2316 2382 { case TYstruct: 2317 2383 case TYarray: 2318 2384 // This case should never happen, why is it here? 2319 2385 tym = TYnptr; // don't confuse allocreg() 2320 2386 #if !TARGET_FLAT 2321 2387 if (*pretregs & (mES | mCX) || e->Ety & mTYfar) 2322 2388 tym = TYfptr; 2323 2389 #endif 2324 2390 2325 2391 #if 0 2326 2392 c = getlvalue(&cs,e,RMload); // get addressing mode 2327 2393 if (*pretregs == 0) 2328 2394 return c; 2329 idxregs = idxregm( cs.Irm,cs.Isib); /* mask of index regs used */2395 idxregs = idxregm(&cs); // mask of index regs used 2330 2396 c = cat(c,fixresult(e,idxregs,pretregs)); 2331 2397 return c; 2332 2398 #endif 2333 2399 break; 2334 2400 } 2335 2401 sz = tysize[tym]; 2336 2402 byte = tybyte(tym) != 0; 2337 2403 2338 2404 c = getlvalue(&cs,e,RMload); // get addressing mode 2339 2405 /*fprintf(stderr,"cd2 :\n"); WRcodlst(c);*/ 2340 2406 if (*pretregs == 0) 2341 2407 return c; 2342 2408 2343 idxregs = idxregm( cs.Irm,cs.Isib); /* mask of index regs used */2409 idxregs = idxregm(&cs); // mask of index regs used 2344 2410 2345 2411 if (*pretregs == mPSW) 2346 2412 { 2347 if ( I32&& tym == TYfloat)2413 if (!I16 && tym == TYfloat) 2348 2414 { retregs = ALLREGS & ~idxregs; 2349 2415 c = cat(c,allocreg(&retregs,®,TYfloat)); 2350 2416 cs.Iop = 0x8B; 2351 c s.Irm |= modregrm(0,reg,0);2352 ce = gen(CNIL,&cs); / * MOV reg,lsw */2353 gen2(ce,0xD1,modregrm (3,4,reg)); /* SHL reg,1 */2417 code_newreg(&cs,reg); 2418 ce = gen(CNIL,&cs); // MOV reg,lsw 2419 gen2(ce,0xD1,modregrmx(3,4,reg)); // SHL reg,1 2354 2420 } 2355 2421 else if (sz <= REGSIZE) 2356 2422 { 2357 2423 cs.Iop = 0x81 ^ byte; 2358 2424 cs.Irm |= modregrm(0,7,0); 2359 2425 cs.IFL2 = FLconst; 2360 2426 cs.IEV2.Vint = 0; 2361 2427 ce = gen(CNIL,&cs); /* CMP [idx],0 */ 2362 2428 } 2363 else if ( I32&& sz == REGSIZE + 2) // if far pointer2429 else if (!I16 && sz == REGSIZE + 2) // if far pointer 2364 2430 { retregs = ALLREGS & ~idxregs; 2365 2431 c = cat(c,allocreg(&retregs,®,TYint)); 2366 2432 cs.Iop = 0x0F; 2367 2433 cs.Iop2 = 0xB7; 2368 2434 cs.Irm |= modregrm(0,reg,0); 2369 2435 getlvalue_msw(&cs); 2370 2436 ce = gen(CNIL,&cs); /* MOVZX reg,msw */ 2371 2437 goto L4; 2372 2438 } 2373 2439 else if (sz <= 2 * REGSIZE) 2374 2440 { retregs = ALLREGS & ~idxregs; 2375 2441 c = cat(c,allocreg(&retregs,®,TYint)); 2376 2442 cs.Iop = 0x8B; 2377 2443 cs.Irm |= modregrm(0,reg,0); 2378 2444 getlvalue_msw(&cs); 2379 2445 ce = gen(CNIL,&cs); /* MOV reg,msw */ 2380 2446 if (I32) 2381 2447 { if (tym == TYdouble || tym == TYdouble_alias) 2382 2448 gen2(ce,0xD1,modregrm(3,4,reg)); // SHL reg,1 2383 2449 } … … 2407 2473 2408 2474 /* Optimizer should not CSE these, as the result is worse code! */ 2409 2475 assert(!e->Ecount); 2410 2476 2411 2477 cs.Iop = 0xFF; 2412 2478 cs.Irm |= modregrm(0,6,0); 2413 2479 cs.IEVoffset1 += 8 - REGSIZE; 2414 2480 stackchanged = 1; 2415 2481 i = 8 - REGSIZE; 2416 2482 do 2417 2483 { 2418 2484 c = gen(c,&cs); /* PUSH EA+i */ 2419 2485 c = genadjesp(c,REGSIZE); 2420 2486 cs.IEVoffset1 -= REGSIZE; 2421 2487 stackpush += REGSIZE; 2422 2488 i -= REGSIZE; 2423 2489 } 2424 2490 while (i >= 0); 2425 2491 goto L3; 2426 2492 } 2427 if ( !I32&& sz == 8)2493 if (I16 && sz == 8) 2428 2494 retregs = DOUBLEREGS_16; 2429 2495 2430 2496 /* Watch out for loading an lptr from an lptr! We must have */ 2431 2497 /* the offset loaded into a different register. */ 2432 2498 /*if (retregs & mES && (cs.Iflags & CFSEG) == CFes) 2433 2499 retregs = ALLREGS;*/ 2434 2500 2435 2501 { 2436 2502 assert(!byte || retregs & BYTEREGS); 2437 2503 c = cat(c,allocreg(&retregs,®,tym)); /* alloc registers */ 2438 2504 } 2439 2505 if (sz <= REGSIZE) 2440 2506 { 2441 2507 cs.Iop = 0x8B ^ byte; 2442 L2: c s.Irm |= modregrm(0,reg,0);2508 L2: code_newreg(&cs,reg); 2443 2509 ce = gen(CNIL,&cs); /* MOV reg,[idx] */ 2444 2510 } 2445 2511 else if ((tym == TYfptr || tym == TYhptr) && retregs & mES) 2446 2512 { 2447 2513 cs.Iop = 0xC4; /* LES reg,[idx] */ 2448 2514 goto L2; 2449 2515 } 2450 2516 else if (sz <= 2 * REGSIZE) 2451 2517 { unsigned lsreg; 2452 2518 2453 2519 cs.Iop = 0x8B; 2454 2520 /* Be careful not to interfere with index registers */ 2455 2521 if (I32) 2456 2522 { 2457 2523 /* Can't handle if both result registers are used in */ 2458 2524 /* the addressing mode. */ 2459 2525 if ((retregs & idxregs) == retregs) 2460 2526 { 2461 2527 retregs = mMSW & allregs & ~idxregs; 2462 2528 if (!retregs) … … 2501 2567 ce = gen(CNIL,&cs); // MOV reg,msw 2502 2568 if (sz == REGSIZE + 2) 2503 2569 ce->Iflags |= CFopsize; 2504 2570 getlvalue_lsw(&cs); // MOV lsreg,lsw 2505 2571 } 2506 2572 NEWREG(cs.Irm,lsreg); 2507 2573 gen(ce,&cs); 2508 2574 } 2509 2575 else 2510 2576 { 2511 2577 /* Index registers are always the lsw! */ 2512 2578 cs.Irm |= modregrm(0,reg,0); 2513 2579 getlvalue_msw(&cs); 2514 2580 ce = gen(CNIL,&cs); /* MOV reg,msw */ 2515 2581 lsreg = findreglsw(retregs); 2516 2582 NEWREG(cs.Irm,lsreg); 2517 2583 getlvalue_lsw(&cs); /* MOV lsreg,lsw */ 2518 2584 gen(ce,&cs); 2519 2585 } 2520 2586 } 2521 else if ( !I32&& sz == 8)2587 else if (I16 && sz == 8) 2522 2588 { 2523 2589 assert(reg == AX); 2524 2590 cs.Iop = 0x8B; 2525 2591 cs.IEVoffset1 += 6; 2526 2592 ce = gen(CNIL,&cs); /* MOV AX,EA+6 */ 2527 2593 cs.Irm |= modregrm(0,CX,0); 2528 2594 cs.IEVoffset1 -= 4; 2529 2595 gen(ce,&cs); /* MOV CX,EA+2 */ 2530 2596 NEWREG(cs.Irm,DX); 2531 2597 cs.IEVoffset1 -= 2; 2532 2598 gen(ce,&cs); /* MOV DX,EA */ 2533 2599 cs.IEVoffset1 += 4; 2534 2600 NEWREG(cs.Irm,BX); 2535 2601 gen(ce,&cs); /* MOV BX,EA+4 */ 2536 2602 } 2537 2603 else 2538 2604 assert(0); 2539 2605 c = cat(c,ce); 2540 2606 L3: 2541 2607 c = cat(c,fixresult(e,retregs,pretregs)); 2542 2608 } 2543 2609 /*fprintf(stderr,"cdafter :\n"); WRcodlst(c);*/ 2544 2610 return c; 2545 2611 } 2546 2612 2547 2613 2548 2614 2549 2615 #if TARGET_FLAT 2550 2616 #define cod2_setES(ty) NULL 2551 2617 #else 2552 2618 /******************************** 2553 2619 * Generate code to load ES with the right segment value, 2554 2620 * do nothing if e is a far pointer. 2555 2621 */ 2556 2622 2557 STATIC code * cod2_setES(tym_t ty)2623 STATIC code *cod2_setES(tym_t ty) 2558 2624 { code *c2; 2559 2625 int push; 2560 2626 2561 2627 c2 = CNIL; 2562 2628 switch (tybasic(ty)) 2563 2629 { 2564 2630 case TYnptr: 2565 2631 if (!(config.flags3 & CFG3eseqds)) 2566 2632 { push = 0x1E; /* PUSH DS */ 2567 2633 goto L1; 2568 2634 } 2569 2635 break; 2570 2636 case TYcptr: 2571 2637 push = 0x0E; /* PUSH CS */ 2572 2638 goto L1; 2573 2639 case TYsptr: 2574 2640 if ((config.wflags & WFssneds) || !(config.flags3 & CFG3eseqds)) 2575 2641 { push = 0x16; /* PUSH SS */ 2576 2642 L1: 2577 2643 /* Must load ES */ 2578 2644 c2 = getregs(mES); 2579 2645 c2 = gen1(c2,push); 2580 2646 gen1(c2,0x07); /* POP ES */ 2581 2647 } 2582 2648 break; 2583 2649 } 2584 2650 return c2; 2585 2651 } 2586 2652 #endif 2587 2653 2588 2654 /******************************** 2589 2655 * Generate code for intrinsic strlen(). 2590 2656 */ 2591 2657 2592 2658 code *cdstrlen( elem *e, regm_t *pretregs) 2593 2659 { code *c1,*c2,*c3,*c4; 2594 regm_t retregs;2595 tym_t ty1;2596 2660 2597 2661 /* Generate strlen in CX: 2598 2662 LES DI,e1 2599 2663 CLR AX ;scan for 0 2600 2664 MOV CX,-1 ;largest possible string 2601 2665 REPNE SCASB 2602 2666 NOT CX 2603 2667 DEC CX 2604 2668 */ 2605 2669 2606 re tregs = mDI;2607 ty 1 = e->E1->Ety;2670 regm_t retregs = mDI; 2671 tym_t ty1 = e->E1->Ety; 2608 2672 if (!tyreg(ty1)) 2609 2673 retregs |= mES; 2610 2674 c1 = codelem(e->E1,&retregs,FALSE); 2611 2675 2612 2676 /* Make sure ES contains proper segment value */ 2613 2677 c2 = cod2_setES(ty1); 2614 2678 2679 unsigned char rex = I64 ? REX_W : 0; 2680 2615 2681 c3 = getregs_imm(mAX | mCX); 2616 2682 c3 = movregconst(c3,AX,0,1); /* MOV AL,0 */ 2617 c3 = movregconst(c3,CX,-1 ,0); /* MOV CX,-1 */2683 c3 = movregconst(c3,CX,-1LL,I64 ? 64 : 0); // MOV CX,-1 2618 2684 c3 = cat(c3,getregs(mDI|mCX)); 2619 2685 c3 = gen1(c3,0xF2); /* REPNE */ 2620 2686 gen1(c3,0xAE); /* SCASB */ 2621 2687 genregs(c3,0xF7,2,CX); /* NOT CX */ 2622 c4 = gen1(CNIL,0x48 + CX); /* DEC CX */ 2688 code_orrex(c3,rex); 2689 if (I64) 2690 c4 = gen2(CNIL,0xFF,(rex << 16) | modregrm(3,1,CX)); // DEC reg 2691 else 2692 c4 = gen1(CNIL,0x48 + CX); // DEC CX 2623 2693 2624 2694 if (*pretregs & mPSW) 2625 2695 { 2626 2696 c4->Iflags |= CFpsw; 2627 2697 *pretregs &= ~mPSW; 2628 2698 } 2629 2699 return cat6(c1,c2,c3,c4,fixresult(e,mCX,pretregs),CNIL); 2630 2700 } 2631 2701 2632 2702 2633 2703 /********************************* 2634 2704 * Generate code for strcmp(s1,s2) intrinsic. 2635 2705 */ 2636 2706 2637 2707 code *cdstrcmp( elem *e, regm_t *pretregs) 2638 2708 { code *c1,*c1a,*c2,*c3,*c4; 2639 regm_t retregs1;2640 regm_t retregs;2641 tym_t ty1,ty2;2642 2709 char need_DS; 2643 2710 int segreg; 2644 2711 2645 2712 /* 2646 2713 MOV SI,s1 ;get destination pointer (s1) 2647 2714 MOV CX,s1+2 2648 2715 LES DI,s2 ;get source pointer (s2) 2649 2716 PUSH DS 2650 2717 MOV DS,CX 2651 2718 CLR AX ;scan for 0 2652 2719 MOV CX,-1 ;largest possible string 2653 2720 REPNE SCASB 2654 2721 NOT CX ;CX = string length of s2 2655 2722 SUB DI,CX ;point DI back to beginning 2656 2723 REPE CMPSB ;compare string 2657 2724 POP DS 2658 2725 JE L1 ;strings are equal 2659 2726 SBB AX,AX 2660 2727 SBB AX,-1 2661 2728 L1: 2662 2729 */ 2663 2730 2664 re tregs1 = mSI;2665 ty 1 = e->E1->Ety;2731 regm_t retregs1 = mSI; 2732 tym_t ty1 = e->E1->Ety; 2666 2733 if (!tyreg(ty1)) 2667 2734 retregs1 |= mCX; 2668 2735 c1 = codelem(e->E1,&retregs1,FALSE); 2669 2736 2670 re tregs = mDI;2671 ty 2 = e->E2->Ety;2737 regm_t retregs = mDI; 2738 tym_t ty2 = e->E2->Ety; 2672 2739 if (!tyreg(ty2)) 2673 2740 retregs |= mES; 2674 2741 c1 = cat(c1,scodelem(e->E2,&retregs,retregs1,FALSE)); 2675 2742 2676 2743 /* Make sure ES contains proper segment value */ 2677 2744 c2 = cod2_setES(ty2); 2678 2745 c3 = getregs_imm(mAX | mCX); 2746 2747 unsigned char rex = I64 ? REX_W : 0; 2679 2748 2680 2749 /* Load DS with right value */ 2681 2750 switch (tybasic(ty1)) 2682 2751 { 2683 2752 case TYnptr: 2684 2753 need_DS = FALSE; 2685 2754 break; 2686 2755 case TYsptr: 2687 2756 if (config.wflags & WFssneds) /* if sptr can't use DS segment */ 2688 2757 segreg = SEG_SS; 2689 2758 else 2690 2759 segreg = SEG_DS; 2691 2760 goto L1; 2692 2761 case TYcptr: 2693 2762 segreg = SEG_CS; 2694 2763 L1: 2695 2764 c3 = gen1(c3,0x1E); /* PUSH DS */ 2696 2765 gen1(c3,0x06 + (segreg << 3)); /* PUSH segreg */ 2697 2766 gen1(c3,0x1F); /* POP DS */ 2698 2767 need_DS = TRUE; 2699 2768 break; 2700 2769 case TYfptr: 2701 2770 case TYvptr: 2702 2771 case TYhptr: 2703 2772 c3 = gen1(c3,0x1E); /* PUSH DS */ 2704 2773 gen2(c3,0x8E,modregrm(3,SEG_DS,CX)); /* MOV DS,CX */ 2705 2774 need_DS = TRUE; 2706 2775 break; 2707 2776 default: 2708 2777 assert(0); 2709 2778 } 2710 2779 2711 2780 c3 = movregconst(c3,AX,0,0); /* MOV AX,0 */ 2712 c3 = movregconst(c3,CX,-1 ,0); /* MOV CX,-1 */2781 c3 = movregconst(c3,CX,-1LL,I64 ? 64 : 0); // MOV CX,-1 2713 2782 c3 = cat(c3,getregs(mSI|mDI|mCX)); 2714 2783 c3 = gen1(c3,0xF2); /* REPNE */ 2715 2784 gen1(c3,0xAE); /* SCASB */ 2716 2785 genregs(c3,0xF7,2,CX); /* NOT CX */ 2786 code_orrex(c3,rex); 2717 2787 genregs(c3,0x2B,DI,CX); /* SUB DI,CX */ 2788 code_orrex(c3,rex); 2718 2789 gen1(c3,0xF3); /* REPE */ 2719 2790 gen1(c3,0xA6); /* CMPSB */ 2720 2791 if (need_DS) 2721 2792 gen1(c3,0x1F); /* POP DS */ 2722 2793 c4 = gennop(CNIL); 2723 2794 if (*pretregs != mPSW) /* if not flags only */ 2724 2795 { 2725 2796 genjmp(c3,JE,FLcode,(block *) c4); /* JE L1 */ 2726 2797 c3 = cat(c3,getregs(mAX)); 2727 2798 genregs(c3,0x1B,AX,AX); /* SBB AX,AX */ 2728 genc2(c3,0x81,modregrm(3,3,AX),(targ_uns)-1); /* SBB AX,-1 */ 2799 code_orrex(c3,rex); 2800 genc2(c3,0x81,(rex << 16) | modregrm(3,3,AX),(targ_uns)-1); // SBB AX,-1 2729 2801 } 2730 2802 2731 2803 *pretregs &= ~mPSW; 2732 2804 return cat6(c1,c2,c3,c4,fixresult(e,mAX,pretregs),CNIL); 2733 2805 } 2734 2806 2735 2807 /********************************* 2736 2808 * Generate code for memcmp(s1,s2,n) intrinsic. 2737 2809 */ 2738 2810 2739 2811 code *cdmemcmp(elem *e,regm_t *pretregs) 2740 2812 { code *c1,*c2,*c3,*c4; 2741 regm_t retregs1;2742 regm_t retregs;2743 regm_t retregs3;2744 tym_t ty1,ty2;2745 2813 char need_DS; 2746 2814 int segreg; 2747 elem *e1;2748 2815 2749 2816 /* 2750 2817 MOV SI,s1 ;get destination pointer (s1) 2751 2818 MOV DX,s1+2 2752 2819 LES DI,s2 ;get source pointer (s2) 2753 2820 MOV CX,n ;get number of bytes to compare 2754 2821 PUSH DS 2755 2822 MOV DS,DX 2756 2823 XOR AX,AX 2757 2824 REPE CMPSB ;compare string 2758 2825 POP DS 2759 2826 JE L1 ;strings are equal 2760 2827 SBB AX,AX 2761 2828 SBB AX,-1 2762 2829 L1: 2763 2830 */ 2764 2831 2765 e 1 = e->E1;2832 elem *e1 = e->E1; 2766 2833 assert(e1->Eoper == OPparam); 2767 2834 2768 2835 // Get s1 into DX:SI 2769 re tregs1 = mSI;2770 ty 1 = e1->E1->Ety;2836 regm_t retregs1 = mSI; 2837 tym_t ty1 = e1->E1->Ety; 2771 2838 if (!tyreg(ty1)) 2772 2839 retregs1 |= mDX; 2773 2840 c1 = codelem(e1->E1,&retregs1,FALSE); 2774 2841 2775 2842 // Get s2 into ES:DI 2776 re tregs = mDI;2777 ty 2 = e1->E2->Ety;2843 regm_t retregs = mDI; 2844 tym_t ty2 = e1->E2->Ety; 2778 2845 if (!tyreg(ty2)) 2779 2846 retregs |= mES; 2780 2847 c1 = cat(c1,scodelem(e1->E2,&retregs,retregs1,FALSE)); 2781 2848 freenode(e1); 2782 2849 2783 2850 // Get nbytes into CX 2784 re tregs3 = mCX;2851 regm_t retregs3 = mCX; 2785 2852 c1 = cat(c1,scodelem(e->E2,&retregs3,retregs | retregs1,FALSE)); 2786 2853 2787 2854 /* Make sure ES contains proper segment value */ 2788 2855 c2 = cod2_setES(ty2); 2789 2856 2790 2857 /* Load DS with right value */ 2791 2858 c3 = NULL; 2792 2859 switch (tybasic(ty1)) 2793 2860 { 2794 2861 case TYnptr: 2795 2862 need_DS = FALSE; 2796 2863 break; 2797 2864 case TYsptr: 2798 2865 if (config.wflags & WFssneds) /* if sptr can't use DS segment */ 2799 2866 segreg = SEG_SS; 2800 2867 else 2801 2868 segreg = SEG_DS; 2802 2869 goto L1; 2803 2870 case TYcptr: 2804 2871 segreg = SEG_CS; … … 2863 2930 CLR AX ;scan for 0 2864 2931 MOV CX,-1 ;largest possible string 2865 2932 REPNE SCASB ;find end of s2 2866 2933 NOT CX ;CX = strlen(s2) + 1 (for EOS) 2867 2934 SUB DI,CX 2868 2935 MOV SI,DI 2869 2936 PUSH DS 2870 2937 PUSH ES 2871 2938 LES DI,s1 2872 2939 POP DS 2873 2940 MOV AX,DI ;return value is s1 2874 2941 REP MOVSB 2875 2942 POP DS 2876 2943 */ 2877 2944 2878 2945 stackchanged = 1; 2879 2946 retregs = mDI; 2880 2947 ty2 = tybasic(e->E2->Ety); 2881 2948 if (!tyreg(ty2)) 2882 2949 retregs |= mES; 2950 unsigned char rex = I64 ? REX_W : 0; 2883 2951 c1 = codelem(e->E2,&retregs,FALSE); 2884 2952 2885 2953 /* Make sure ES contains proper segment value */ 2886 2954 c2 = cod2_setES(ty2); 2887 2955 c3 = getregs_imm(mAX | mCX); 2888 2956 c3 = movregconst(c3,AX,0,1); /* MOV AL,0 */ 2889 c3 = movregconst(c3,CX,-1, 0); /* MOV CX,-1 */2957 c3 = movregconst(c3,CX,-1,I64?64:0); // MOV CX,-1 2890 2958 c3 = cat(c3,getregs(mAX|mCX|mSI|mDI)); 2891 2959 c3 = gen1(c3,0xF2); /* REPNE */ 2892 2960 gen1(c3,0xAE); /* SCASB */ 2893 2961 genregs(c3,0xF7,2,CX); /* NOT CX */ 2962 code_orrex(c3,rex); 2894 2963 genregs(c3,0x2B,DI,CX); /* SUB DI,CX */ 2964 code_orrex(c3,rex); 2895 2965 genmovreg(c3,SI,DI); /* MOV SI,DI */ 2966 code_orrex(c3,rex); 2896 2967 2897 2968 /* Load DS with right value */ 2898 2969 switch (ty2) 2899 2970 { 2900 2971 case TYnptr: 2901 2972 need_DS = FALSE; 2902 2973 break; 2903 2974 case TYsptr: 2904 2975 if (config.wflags & WFssneds) /* if sptr can't use DS segment */ 2905 2976 segreg = SEG_SS; 2906 2977 else 2907 2978 segreg = SEG_DS; 2908 2979 goto L1; 2909 2980 case TYcptr: 2910 2981 segreg = SEG_CS; 2911 2982 L1: 2912 2983 c3 = gen1(c3,0x1E); /* PUSH DS */ 2913 2984 gen1(c3,0x06 + (segreg << 3)); /* PUSH segreg */ 2914 2985 genadjesp(c3,REGSIZE * 2); 2915 2986 need_DS = TRUE; … … 2923 2994 default: 2924 2995 assert(0); 2925 2996 } 2926 2997 2927 2998 retregs = mDI; 2928 2999 ty1 = tybasic(e->E1->Ety); 2929 3000 if (!tyreg(ty1)) 2930 3001 retregs |= mES; 2931 3002 c3 = cat(c3,scodelem(e->E1,&retregs,mCX|mSI,FALSE)); 2932 3003 c3 = cat(c3,getregs(mAX|mCX|mSI|mDI)); 2933 3004 2934 3005 /* Make sure ES contains proper segment value */ 2935 3006 if (ty2 != TYnptr || ty1 != ty2) 2936 3007 c4 = cod2_setES(ty1); 2937 3008 else 2938 3009 c4 = CNIL; /* ES is already same as DS */ 2939 3010 2940 3011 if (need_DS) 2941 3012 c4 = gen1(c4,0x1F); /* POP DS */ 2942 3013 if (*pretregs) 2943 c4 = genmovreg(c4,AX,DI); /* MOV AX,DI */ 3014 { c4 = genmovreg(c4,AX,DI); /* MOV AX,DI */ 3015 code_orrex(c4,rex); 3016 } 2944 3017 c4 = gen1(c4,0xF3); /* REP */ 2945 3018 gen1(c4,0xA4); /* MOVSB */ 2946 3019 2947 3020 if (need_DS) 2948 3021 { gen1(c4,0x1F); /* POP DS */ 2949 3022 genadjesp(c4,-(REGSIZE * 2)); 2950 3023 } 2951 3024 return cat6(c1,c2,c3,c4,fixresult(e,mAX | mES,pretregs),CNIL); 2952 3025 } 2953 3026 2954 3027 /********************************* 2955 3028 * Generate code for memcpy(s1,s2,n) intrinsic. 2956 3029 * OPmemcpy 2957 3030 * / \ 2958 3031 * s1 OPparam 2959 3032 * / \ 2960 3033 * s2 n 2961 3034 */ 2962 3035 2963 3036 code *cdmemcpy(elem *e,regm_t *pretregs) … … 2986 3059 assert(e2->Eoper == OPparam); 2987 3060 2988 3061 // Get s2 into DX:SI 2989 3062 retregs2 = mSI; 2990 3063 ty2 = e2->E1->Ety; 2991 3064 if (!tyreg(ty2)) 2992 3065 retregs2 |= mDX; 2993 3066 c1 = codelem(e2->E1,&retregs2,FALSE); 2994 3067 2995 3068 // Get nbytes into CX 2996 3069 retregs3 = mCX; 2997 3070 c1 = cat(c1,scodelem(e2->E2,&retregs3,retregs2,FALSE)); 2998 3071 freenode(e2); 2999 3072 3000 3073 // Get s1 into ES:DI 3001 3074 retregs1 = mDI; 3002 3075 ty1 = e->E1->Ety; 3003 3076 if (!tyreg(ty1)) 3004 3077 retregs1 |= mES; 3005 3078 c1 = cat(c1,scodelem(e->E1,&retregs1,retregs2 | retregs3,FALSE)); 3079 3080 unsigned char rex = I64 ? REX_W : 0; 3006 3081 3007 3082 /* Make sure ES contains proper segment value */ 3008 3083 c2 = cod2_setES(ty1); 3009 3084 3010 3085 /* Load DS with right value */ 3011 3086 c3 = NULL; 3012 3087 switch (tybasic(ty2)) 3013 3088 { 3014 3089 case TYnptr: 3015 3090 need_DS = FALSE; 3016 3091 break; 3017 3092 case TYsptr: 3018 3093 if (config.wflags & WFssneds) /* if sptr can't use DS segment */ 3019 3094 segreg = SEG_SS; 3020 3095 else 3021 3096 segreg = SEG_DS; 3022 3097 goto L1; 3023 3098 case TYcptr: 3024 3099 segreg = SEG_CS; 3025 3100 L1: 3026 3101 c3 = gen1(c3,0x1E); /* PUSH DS */ 3027 3102 gen1(c3,0x06 + (segreg << 3)); /* PUSH segreg */ 3028 3103 gen1(c3,0x1F); /* POP DS */ 3029 3104 need_DS = TRUE; 3030 3105 break; 3031 3106 case TYfptr: 3032 3107 case TYvptr: 3033 3108 case TYhptr: 3034 3109 c3 = gen1(c3,0x1E); /* PUSH DS */ 3035 3110 gen2(c3,0x8E,modregrm(3,SEG_DS,DX)); /* MOV DS,DX */ 3036 3111 need_DS = TRUE; 3037 3112 break; 3038 3113 default: 3039 3114 assert(0); 3040 3115 } 3041 3116 3042 3117 if (*pretregs) // if need return value 3043 3118 { c3 = cat(c3,getregs(mAX)); 3044 3119 c3 = genmovreg(c3,AX,DI); 3120 code_orrex(c3, rex); 3045 3121 } 3046 3122 3047 3123 if (0 && I32 && config.flags4 & CFG4speed) 3048 3124 { 3049 3125 /* This is only faster if the memory is dword aligned, if not 3050 3126 * it is significantly slower than just a rep movsb. 3051 3127 */ 3052 3128 /* mov EDX,ECX 3053 3129 * shr ECX,2 3054 3130 * jz L1 3055 3131 * repe movsd 3056 3132 * L1: and EDX,3 3057 3133 * jz L2 3058 3134 * mov ECX,EDX 3059 3135 * repe movsb 3060 3136 * L2: nop 3061 3137 */ 3062 3138 c3 = cat(c3,getregs(mSI | mDI | mCX | mDX)); 3063 3139 c3 = genmovreg(c3,DX,CX); // MOV EDX,ECX 3064 3140 c3 = genc2(c3,0xC1,modregrm(3,5,CX),2); // SHR ECX,2 3065 3141 code *cx = genc2(CNIL, 0x81, modregrm(3,4,DX),3); // AND EDX,3 3066 3142 genjmp(c3, JE, FLcode, (block *)cx); // JZ L1 3067 3143 gen1(c3,0xF3); // REPE 3068 3144 gen1(c3,0xA5); // MOVSW 3069 3145 c3 = cat(c3,cx); 3070 3146 3071 3147 code *cnop = gennop(CNIL); 3072 3148 genjmp(c3, JE, FLcode, (block *)cnop); // JZ L2 3073 3149 genmovreg(c3,CX,DX); // MOV ECX,EDX 3074 3150 gen1(c3,0xF3); // REPE 3075 3151 gen1(c3,0xA4); // MOVSB 3076 3152 c3 = cat(c3, cnop); 3077 3153 } 3078 3154 else 3079 3155 { 3080 3156 c3 = cat(c3,getregs(mSI | mDI | mCX)); 3081 3157 if (!I32 && config.flags4 & CFG4speed) // if speed optimization 3082 { c3 = gen2(c3,0xD1, modregrm(3,5,CX)); // SHR CX,13158 { c3 = gen2(c3,0xD1,(rex << 16) | modregrm(3,5,CX)); // SHR CX,1 3083 3159 gen1(c3,0xF3); // REPE 3084 3160 gen1(c3,0xA5); // MOVSW 3085 gen2(c3,0x11, modregrm(3,CX,CX)); // ADC CX,CX3161 gen2(c3,0x11,(rex << 16) | modregrm(3,CX,CX)); // ADC CX,CX 3086 3162 } 3087 3163 c3 = gen1(c3,0xF3); // REPE 3088 3164 gen1(c3,0xA4); // MOVSB 3089 3165 if (need_DS) 3090 3166 gen1(c3,0x1F); // POP DS 3091 3167 } 3092 3168 return cat4(c1,c2,c3,fixresult(e,mES|mAX,pretregs)); 3093 3169 } 3094 3170 3095 3171 3096 3172 /********************************* 3097 3173 * Generate code for memset(s,val,n) intrinsic. 3098 3174 * (s OPmemset (n OPparam val)) 3099 3175 */ 3100 3176 3101 3177 #if 1 3102 3178 code *cdmemset(elem *e,regm_t *pretregs) 3103 3179 { code *c1,*c2,*c3 = NULL,*c4; 3104 3180 regm_t retregs1; 3105 3181 regm_t retregs2; 3106 3182 regm_t retregs3; 3107 3183 unsigned reg,vreg; 3108 3184 tym_t ty1; 3109 3185 elem *e2,*e1; 3110 3186 int segreg; 3111 3187 unsigned remainder; 3112 3188 targ_uns numbytes,numwords; 3113 3189 int op; 3114 targ_ unsvalue;3190 targ_size_t value; 3115 3191 3116 3192 //printf("cdmemset(*pretregs = x%x)\n", *pretregs); 3117 3193 e1 = e->E1; 3118 3194 e2 = e->E2; 3119 3195 assert(e2->Eoper == OPparam); 3196 3197 unsigned char rex = I64 ? REX_W : 0; 3120 3198 3121 3199 if (e2->E2->Eoper == OPconst) 3122 3200 { 3123 3201 value = el_tolong(e2->E2); 3124 3202 value &= 0xFF; 3125 3203 value |= value << 8; 3126 3204 value |= value << 16; 3205 value |= value << 32; 3127 3206 } 3128 3207 3129 3208 if (e2->E1->Eoper == OPconst) 3130 3209 { 3131 3210 numbytes = el_tolong(e2->E1); 3132 if (numbytes <= REP_THRESHOLD && I32 && // doesn't work for 16 bits 3211 if (numbytes <= REP_THRESHOLD && 3212 !I16 && // doesn't work for 16 bits 3133 3213 e2->E2->Eoper == OPconst) 3134 3214 { 3135 3215 targ_uns offset = 0; 3136 3216 retregs1 = *pretregs; 3137 3217 if (!retregs1) 3138 3218 retregs1 = ALLREGS; 3139 3219 c1 = codelem(e->E1,&retregs1,FALSE); 3140 3220 reg = findreg(retregs1); 3141 3221 if (e2->E2->Eoper == OPconst) 3142 3222 { 3143 3223 switch (numbytes) 3144 3224 { 3145 3225 case 4: // MOV [reg],imm32 3146 c3 = genc2(CNIL,0xC7,modregrm (0,0,reg),value);3226 c3 = genc2(CNIL,0xC7,modregrmx(0,0,reg),value); 3147 3227 goto fixres; 3148 3228 case 2: // MOV [reg],imm16 3149 c3 = genc2(CNIL,0xC7,modregrm (0,0,reg),value);3229 c3 = genc2(CNIL,0xC7,modregrmx(0,0,reg),value); 3150 3230 c3->Iflags = CFopsize; 3151 3231 goto fixres; 3152 3232 case 1: // MOV [reg],imm8 3153 c3 = genc2(CNIL,0xC6,modregrm (0,0,reg),value);3233 c3 = genc2(CNIL,0xC6,modregrmx(0,0,reg),value); 3154 3234 goto fixres; 3155 3235 } 3156 3236 } 3157 3237 3158 c1 = regwithvalue(c1, BYTEREGS & ~retregs1, value, &vreg, 0);3238 c1 = regwithvalue(c1, BYTEREGS & ~retregs1, value, &vreg, I64 ? 64 : 0); 3159 3239 freenode(e2->E2); 3160 3240 freenode(e2); 3161 3241 3162 3242 while (numbytes >= REGSIZE) 3163 3243 { // MOV dword ptr offset[reg],vreg 3164 c2 = gen2(CNIL,0x89, modregrm(2,vreg,reg));3244 c2 = gen2(CNIL,0x89,(rex << 16) | modregxrmx(2,vreg,reg)); 3165 3245 c2->IEVoffset1 = offset; 3166 3246 c2->IFL1 = FLconst; 3167 3247 numbytes -= REGSIZE; 3168 3248 offset += REGSIZE; 3169 3249 c3 = cat(c3,c2); 3170 3250 } 3251 if (numbytes & 4) 3252 { // MOV dword ptr offset[reg],vreg 3253 c2 = gen2(CNIL,0x89,modregxrmx(2,vreg,reg)); 3254 c2->IEVoffset1 = offset; 3255 c2->IFL1 = FLconst; 3256 offset += 4; 3257 c3 = cat(c3,c2); 3258 } 3171 3259 if (numbytes & 2) 3172 3260 { // MOV word ptr offset[reg],vreg 3173 c2 = gen2(CNIL,0x89,modreg rm(2,vreg,reg));3261 c2 = gen2(CNIL,0x89,modregxrmx(2,vreg,reg)); 3174 3262 c2->IEVoffset1 = offset; 3175 3263 c2->IFL1 = FLconst; 3176 3264 c2->Iflags = CFopsize; 3177 3265 offset += 2; 3178 3266 c3 = cat(c3,c2); 3179 3267 } 3180 3268 if (numbytes & 1) 3181 3269 { // MOV byte ptr offset[reg],vreg 3182 c2 = gen2(CNIL,0x88,modreg rm(2,vreg,reg));3270 c2 = gen2(CNIL,0x88,modregxrmx(2,vreg,reg)); 3183 3271 c2->IEVoffset1 = offset; 3184 3272 c2->IFL1 = FLconst; 3185 3273 c3 = cat(c3,c2); 3186 3274 } 3187 3275 fixres: 3188 3276 return cat3(c1,c3,fixresult(e,retregs1,pretregs)); 3189 3277 } 3190 3278 } 3191 3279 3192 3280 // Get nbytes into CX 3193 3281 retregs2 = mCX; 3194 if ( I32&& e2->E1->Eoper == OPconst && e2->E2->Eoper == OPconst)3282 if (!I16 && e2->E1->Eoper == OPconst && e2->E2->Eoper == OPconst) 3195 3283 { 3196 3284 remainder = numbytes & (REGSIZE - 1); 3197 3285 numwords = numbytes / REGSIZE; // number of words 3198 3286 op = 0xAB; // moving by words 3199 3287 c1 = getregs(mCX); 3200 c1 = movregconst(c1,CX,numwords, 0); // # of bytes/words3288 c1 = movregconst(c1,CX,numwords,I64?64:0); // # of bytes/words 3201 3289 } 3202 3290 else 3203 3291 { 3204 3292 remainder = 0; 3205 3293 op = 0xAA; // must move by bytes 3206 3294 c1 = codelem(e2->E1,&retregs2,FALSE); 3207 3295 } 3208 3296 3209 3297 // Get val into AX 3210 3298 3211 3299 retregs3 = mAX; 3212 if ( I32&& e2->E2->Eoper == OPconst)3300 if (!I16 && e2->E2->Eoper == OPconst) 3213 3301 { 3214 c1 = regwithvalue(c1, mAX, value, NULL, 0);3302 c1 = regwithvalue(c1, mAX, value, NULL, I64?64:0); 3215 3303 freenode(e2->E2); 3216 3304 } 3217 3305 else 3218 3306 { 3219 3307 c1 = cat(c1,scodelem(e2->E2,&retregs3,retregs2,FALSE)); 3220 3308 #if 0 3221 3309 if (I32) 3222 3310 { 3223 3311 c1 = gen2(c1,0x8A,modregrm(3,AH,AL)); // MOV AH,AL 3224 3312 c1 = genc2(c1,0xC1,modregrm(3,4,AX),8); // SHL EAX,8 3225 3313 c1 = gen2(c1,0x8A,modregrm(3,AL,AH)); // MOV AL,AH 3226 3314 c1 = genc2(c1,0xC1,modregrm(3,4,AX),8); // SHL EAX,8 3227 3315 c1 = gen2(c1,0x8A,modregrm(3,AL,AH)); // MOV AL,AH 3228 3316 } 3229 3317 #endif 3230 3318 } 3231 3319 freenode(e2); 3232 3320 3233 3321 // Get s into ES:DI 3234 3322 retregs1 = mDI; 3235 3323 ty1 = e->E1->Ety; 3236 3324 if (!tyreg(ty1)) 3237 3325 retregs1 |= mES; 3238 3326 c1 = cat(c1,scodelem(e->E1,&retregs1,retregs2 | retregs3,FALSE)); 3239 3327 reg = DI; //findreg(retregs1); 3240 3328 3241 3329 // Make sure ES contains proper segment value 3242 3330 c2 = cod2_setES(ty1); 3243 3331 3244 3332 c3 = NULL; 3245 3333 if (*pretregs) // if need return value 3246 3334 { c3 = getregs(mBX); 3247 3335 c3 = genmovreg(c3,BX,DI); 3336 code_orrex(c3,rex); 3248 3337 } 3249 3338 3250 3339 c3 = cat(c3,getregs(mDI | mCX)); 3251 if ( !I32&& config.flags4 & CFG4speed) // if speed optimization3340 if (I16 && config.flags4 & CFG4speed) // if speed optimization 3252 3341 { 3253 3342 c3 = cat(c3,getregs(mAX)); 3254 3343 c3 = gen2(c3,0x8A,modregrm(3,AH,AL)); // MOV AH,AL 3255 3344 gen2(c3,0xD1,modregrm(3,5,CX)); // SHR CX,1 3256 3345 gen1(c3,0xF3); // REP 3257 3346 gen1(c3,0xAB); // STOSW 3258 3347 gen2(c3,0x11,modregrm(3,CX,CX)); // ADC CX,CX 3259 3348 op = 0xAA; 3260 3349 } 3261 3350 3262 3351 c3 = gen1(c3,0xF3); // REP 3263 3352 gen1(c3,op); // STOSD 3353 if (remainder & 4) 3354 { 3355 code *ctmp; 3356 ctmp = gen2(CNIL,0x89,modregrmx(2,AX,reg)); 3357 ctmp->IFL1 = FLconst; 3358 c3 = cat(c3,ctmp); 3359 } 3264 3360 if (remainder & 2) 3265 3361 { 3266 3362 code *ctmp; 3267 ctmp = gen2(CNIL,0x89,modregrm (2,AX,reg));3363 ctmp = gen2(CNIL,0x89,modregrmx(2,AX,reg)); 3268 3364 ctmp->Iflags = CFopsize; 3365 ctmp->IEVoffset1 = remainder & 4; 3269 3366 ctmp->IFL1 = FLconst; 3270 3367 c3 = cat(c3,ctmp); 3271 3368 } 3272 3369 if (remainder & 1) 3273 3370 { 3274 3371 code *ctmp; 3275 ctmp = gen2(CNIL,0x88,modregrm (2,AX,reg));3276 ctmp->IEVoffset1 = (remainder & 2) ? 2 : 0;3372 ctmp = gen2(CNIL,0x88,modregrmx(2,AX,reg)); 3373 ctmp->IEVoffset1 = remainder & ~1; 3277 3374 ctmp->IFL1 = FLconst; 3278 3375 c3 = cat(c3,ctmp); 3279 3376 } 3280 3377 regimmed_set(CX,0); 3281 3378 return cat4(c1,c2,c3,fixresult(e,mES|mBX,pretregs)); 3282 3379 } 3283 3380 #else 3284 3381 // BUG: Pat made many improvements in the linux version, I need 3285 3382 // to verify they work for 16 bits and fold them in. -Walter 3286 3383 3287 3384 code *cdmemset(elem *e,regm_t *pretregs) 3288 3385 { code *c1,*c2,*c3 = NULL,*c4; 3289 3386 regm_t retregs1; 3290 3387 regm_t retregs2; 3291 3388 regm_t retregs3; 3292 3389 tym_t ty1; 3293 3390 elem *e2; 3294 3391 targ_size_t value; 3295 3392 3296 3393 /* … … 3349 3445 return cat4(c1,c2,c3,fixresult(e,mES|mBX,pretregs)); 3350 3446 } 3351 3447 #endif 3352 3448 3353 3449 3354 3450 /********************** 3355 3451 * Do structure assignments. 3356 3452 * This should be fixed so that (s1 = s2) is rewritten to (&s1 = &s2). 3357 3453 * Mebbe call cdstreq() for double assignments??? 3358 3454 */ 3359 3455 3360 3456 code *cdstreq(elem *e,regm_t *pretregs) 3361 3457 { code *c1,*c2,*c3; 3362 3458 code *c1a; 3363 3459 regm_t srcregs,dstregs; /* source & destination reg masks */ 3364 3460 targ_uns numbytes; 3365 3461 char need_DS = FALSE; 3366 3462 elem *e1 = e->E1,*e2 = e->E2; 3367 3463 int segreg; 3368 3464 3369 numbytes = e->Enumbytes; /* # of bytes in structure/union */ 3465 numbytes = e->Enumbytes; // # of bytes in structure/union 3466 unsigned char rex = I64 ? REX_W : 0; 3370 3467 3371 3468 //printf("cdstreq(e = %p, *pretregs = x%x)\n", e, *pretregs); 3372 3469 3373 3470 /* First, load pointer to rvalue into SI */ 3374 3471 srcregs = mSI; /* source is DS:SI */ 3375 3472 c1 = docommas(&e2); 3376 3473 if (e2->Eoper == OPind) /* if (.. = *p) */ 3377 3474 { elem *e21 = e2->E1; 3378 3475 3379 3476 segreg = SEG_DS; 3380 3477 switch (tybasic(e21->Ety)) 3381 3478 { 3382 3479 case TYsptr: 3383 3480 if (config.wflags & WFssneds) /* if sptr can't use DS segment */ 3384 3481 segreg = SEG_SS; 3385 3482 break; 3386 3483 case TYcptr: 3387 3484 if (!(config.exe & EX_flat)) 3388 3485 segreg = SEG_CS; 3389 3486 break; … … 3445 3542 if (e1->Eoper == OPind) /* if (*p = ..) */ 3446 3543 { 3447 3544 if (tyreg(e1->E1->Ety)) 3448 3545 dstregs = mDI; 3449 3546 c2 = cod2_setES(e1->E1->Ety); 3450 3547 c2 = cat(c2,scodelem(e1->E1,&dstregs,srcregs,FALSE)); 3451 3548 } 3452 3549 else 3453 3550 c2 = cdrelconst(e1,&dstregs); 3454 3551 freenode(e1); 3455 3552 3456 3553 c3 = getregs((srcregs | dstregs) & (mLSW | mDI)); 3457 3554 if (need_DS) 3458 3555 { assert(!(config.exe & EX_flat)); 3459 3556 c3 = gen1(c3,0x1E); /* PUSH DS */ 3460 3557 gen2(c3,0x8E,modregrm(3,SEG_DS,CX)); /* MOV DS,CX */ 3461 3558 } 3462 3559 if (numbytes <= REGSIZE * (6 + (REGSIZE == 4))) 3463 3560 { while (numbytes >= REGSIZE) 3464 3561 { c3 = gen1(c3,0xA5); /* MOVSW */ 3562 code_orrex(c3, rex); 3465 3563 numbytes -= REGSIZE; 3466 3564 } 3467 3565 //if (numbytes) 3468 3566 // printf("cdstreq numbytes %d\n",numbytes); 3469 3567 while (numbytes--) 3470 3568 c3 = gen1(c3,0xA4); /* MOVSB */ 3471 3569 } 3472 3570 else 3473 3571 { 3474 3572 #if 1 3475 3573 unsigned remainder; 3476 3574 3477 3575 remainder = numbytes & (REGSIZE - 1); 3478 3576 numbytes /= REGSIZE; // number of words 3479 3577 c3 = cat(c3,getregs_imm(mCX)); 3480 3578 c3 = movregconst(c3,CX,numbytes,0); // # of bytes/words 3481 3579 gen1(c3,0xF3); // REP 3482 3580 gen1(c3,0xA5); // REP MOVSD 3483 3581 regimmed_set(CX,0); // note that CX == 0 3484 3582 for (; remainder; remainder--) … … 3491 3589 if (numbytes & (REGSIZE - 1)) /* if odd */ 3492 3590 movs = 0xA4; /* MOVSB */ 3493 3591 else 3494 3592 { movs = 0xA5; /* MOVSW */ 3495 3593 numbytes /= REGSIZE; /* # of words */ 3496 3594 } 3497 3595 c3 = cat(c3,getregs_imm(mCX)); 3498 3596 c3 = movregconst(c3,CX,numbytes,0); /* # of bytes/words */ 3499 3597 gen1(c3,0xF3); /* REP */ 3500 3598 gen1(c3,movs); 3501 3599 regimmed_set(CX,0); /* note that CX == 0 */ 3502 3600 #endif 3503 3601 } 3504 3602 if (need_DS) 3505 3603 gen1(c3,0x1F); // POP DS 3506 3604 assert(!(*pretregs & mPSW)); 3507 3605 if (*pretregs) 3508 3606 { /* ES:DI points past what we want */ 3509 3607 regm_t retregs; 3510 3608 3511 genc2(c3,0x81, modregrm(3,5,DI),e->Enumbytes); /* SUB DI,numbytes */3609 genc2(c3,0x81,(rex << 16) | modregrm(3,5,DI),e->Enumbytes); // SUB DI,numbytes 3512 3610 retregs = mDI; 3513 3611 if (*pretregs & mMSW && !(config.exe & EX_flat)) 3514 3612 retregs |= mES; 3515 3613 c3 = cat(c3,fixresult(e,retregs,pretregs)); 3516 3614 } 3517 3615 return cat3(c1,c2,c3); 3518 3616 } 3519 3617 3520 3618 3521 3619 /********************** 3522 3620 * Get the address of. 3523 3621 * Is also called by cdstreq() to set up pointer to a structure. 3524 3622 */ 3525 3623 3526 3624 code *cdrelconst(elem *e,regm_t *pretregs) 3527 3625 { code *c,*c1; 3528 3626 enum SC sclass; 3529 3627 unsigned mreg, /* segment of the address (TYfptrs only) */ 3530 3628 lreg; /* offset of the address */ 3531 3629 tym_t tym; … … 3620 3718 fl = s->Sfl; 3621 3719 if (s->ty() & mTYcs) 3622 3720 fl = FLcsdata; 3623 3721 c = gen2(c,0x8C, /* MOV mreg,SEG REGISTER */ 3624 3722 modregrm(3,segfl[fl],mreg)); 3625 3723 } 3626 3724 if (*pretregs & mES) 3627 3725 gen2(c,0x8E,modregrm(3,0,mreg)); /* MOV ES,mreg */ 3628 3726 } 3629 3727 return cat(c,getoffset(e,lreg)); 3630 3728 } 3631 3729 3632 3730 /********************************* 3633 3731 * Load the offset portion of the address represented by e into 3634 3732 * reg. 3635 3733 */ 3636 3734 3637 3735 code *getoffset(elem *e,unsigned reg) 3638 3736 { code cs; 3639 3737 code *c; 3640 enum FL fl;3641 3738 3642 3739 cs.Iflags = 0; 3643 cs.Irex = 0; 3740 unsigned char rex = I64 ? REX_W : 0; 3741 cs.Irex = rex; 3644 3742 assert(e->Eoper == OPvar || e->Eoper == OPrelconst); 3645 fl = el_fl(e);3743 enum FL fl = el_fl(e); 3646 3744 switch (fl) 3647 3745 { 3648 3746 case FLdatseg: 3649 3747 cs.IEV2._EP.Vpointer = e->EV.Vpointer; 3650 3748 goto L3; 3651 3749 3652 3750 case FLfardata: 3653 3751 assert(!TARGET_FLAT); 3654 3752 goto L4; 3655 3753 3656 3754 case FLtlsdata: 3657 3755 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 3658 3756 { /* Generate: 3659 3757 * MOV reg,GS:[00000000] 3660 3758 * ADD reg, offset s@TLS_LE 3661 3759 * for locals, and for globals: 3662 3760 * MOV reg,GS:[00000000] 3663 3761 * ADD reg, s@TLS_IE 3664 3762 * note different fixup 3665 3763 */ 3666 3764 L5: 3667 3765 int stack = 0; 3668 3766 c = NULL; 3669 3767 if (reg == STACK) 3670 3768 { regm_t retregs = ALLREGS; 3671 3769 3672 3770 c = allocreg(&retregs,®,TYoffset); 3673 3771 reg = findreg(retregs); 3674 3772 stack = 1; 3675 3773 } 3676 3774 3677 3775 code css; 3776 css.Irex = rex; 3678 3777 css.Iop = 0x8B; 3679 css.Irm = modregrm(0, reg, BPRM); 3778 css.Irm = modregrm(0, 0, BPRM); 3779 code_newreg(&css, reg); 3680 3780 css.Iflags = CFgs; 3681 css.Irex = 0;3682 3781 css.IFL1 = FLconst; 3683 3782 css.IEV1.Vuns = 0; 3684 3783 c = gen(c, &css); // MOV reg,GS:[00000000] 3685 3784 3686 3785 if (e->EV.sp.Vsym->Sclass == SCstatic || e->EV.sp.Vsym->Sclass == SClocstat) 3687 3786 { // ADD reg, offset s 3787 cs.Irex = rex; 3688 3788 cs.Iop = 0x81; 3689 cs.Irm = modregrm(3,0,reg); 3789 cs.Irm = modregrm(3,0,reg & 7); 3790 if (reg & 8) 3791 cs.Irex |= REX_B; 3690 3792 cs.Iflags = CFoff; 3691 css.Irex = 0;3692 3793 cs.IFL2 = fl; 3693 3794 cs.IEVsym2 = e->EV.sp.Vsym; 3694 3795 cs.IEVoffset2 = e->EV.sp.Voffset; 3695 3796 } 3696 3797 else 3697 3798 { // ADD reg, s 3799 cs.Irex = rex; 3698 3800 cs.Iop = 0x03; 3699 cs.Irm = modregrm(0,reg,BPRM); 3801 cs.Irm = modregrm(0,0,BPRM); 3802 code_newreg(&cs, reg); 3700 3803 cs.Iflags = CFoff; 3701 css.Irex = 0;3702 3804 cs.IFL1 = fl; 3703 3805 cs.IEVsym1 = e->EV.sp.Vsym; 3704 3806 cs.IEVoffset1 = e->EV.sp.Voffset; 3705 3807 } 3706 3808 c = gen(c, &cs); // ADD reg, xxxx 3707 3809 3708 3810 if (stack) 3709 3811 { 3710 c = gen1(c,0x50 + reg); /* PUSH reg */ 3812 c = gen1(c,0x50 + (reg & 7)); // PUSH reg 3813 if (reg & 8) 3814 code_orrex(c, REX_B); 3711 3815 c = genadjesp(c,REGSIZE); 3712 3816 stackchanged = 1; 3713 3817 } 3714 3818 break; 3715 3819 } 3716 3820 #else 3717 3821 goto L4; 3718 3822 #endif 3719 3823 3720 3824 case FLfunc: 3721 3825 fl = FLextern; /* don't want PC relative addresses */ 3722 3826 goto L4; 3723 3827 3724 3828 case FLextern: 3725 3829 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 3726 3830 if (e->EV.sp.Vsym->ty() & mTYthread) 3727 3831 goto L5; 3728 3832 #endif 3729 3833 case FLdata: 3730 3834 case FLudata: 3731 3835 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 3732 3836 case FLgot: 3733 3837 case FLgotoff: 3734 3838 #endif 3735 3839 case FLcsdata: 3736 3840 L4: 3737 3841 cs.IEVsym2 = e->EV.sp.Vsym; 3738 3842 cs.IEVoffset2 = e->EV.sp.Voffset; 3739 3843 L3: 3740 3844 if (reg == STACK) 3741 3845 { stackchanged = 1; 3742 3846 cs.Iop = 0x68; /* PUSH immed16 */ 3743 3847 c = genadjesp(NULL,REGSIZE); 3744 3848 } 3745 3849 else 3746 { cs.Iop = 0xB8 + reg; /* MOV reg,immed16 */ 3850 { cs.Iop = 0xB8 + (reg & 7); // MOV reg,immed16 3851 if (reg & 8) 3852 cs.Irex |= REX_B; 3747 3853 c = NULL; 3748 3854 } 3749 3855 cs.Iflags = CFoff; /* want offset only */ 3750 3856 cs.IFL2 = fl; 3751 3857 c = gen(c,&cs); 3752 3858 break; 3753 3859 3754 3860 #if 0 && TARGET_LINUX 3755 3861 case FLgot: 3756 3862 case FLgotoff: 3757 3863 { 3758 3864 gotref = 1; 3759 3865 symbol *s = e->EV.sp.Vsym; 3760 3866 // When using 8B (MOV), indicating that rm is used 3761 3867 // rm operands are always placed in IEV1 not IEV2 3762 3868 cs.IEVsym1 = s; 3763 3869 cs.IEVoffset1 = e->EV.sp.Voffset; 3764 3870 cs.Irm = modregrm(2,reg,BX); // reg,disp32[EBX] 3765 3871 cs.IFL1 = fl; 3766 3872 cs.Iop = (fl == FLgotoff) … … 3774 3880 3775 3881 case FLreg: 3776 3882 /* Allow this since the tree optimizer puts & in front of */ 3777 3883 /* register doubles. */ 3778 3884 goto L2; 3779 3885 case FLauto: 3780 3886 case FLtmp: 3781 3887 case FLbprel: 3782 3888 case FLfltreg: 3783 3889 reflocal = TRUE; 3784 3890 goto L2; 3785 3891 case FLpara: 3786 3892 refparam = TRUE; 3787 3893 L2: 3788 3894 if (reg == STACK) 3789 3895 { regm_t retregs = ALLREGS; 3790 3896 3791 3897 c = allocreg(&retregs,®,TYoffset); 3792 3898 reg = findreg(retregs); 3793 3899 c = cat(c,loadea(e,&cs,0x8D,reg,0,0,0)); /* LEA reg,EA */ 3794 c = gen1(c,0x50 + reg); /* PUSH reg */ 3900 c = gen1(c,0x50 + (reg & 7)); // PUSH reg 3901 if (reg & 8) 3902 code_orrex(c, REX_B); 3795 3903 c = genadjesp(c,REGSIZE); 3796 3904 stackchanged = 1; 3797 3905 } 3798 3906 else 3799 3907 c = loadea(e,&cs,0x8D,reg,0,0,0); /* LEA reg,EA */ 3800 3908 break; 3801 3909 default: 3802 3910 #ifdef DEBUG 3803 3911 elem_print(e); 3804 3912 debugx(WRFL(fl)); 3805 3913 #endif 3806 3914 assert(0); 3807 3915 } 3808 3916 return c; 3809 3917 } 3810 3918 3811 3919 3812 3920 /****************** 3813 3921 * Negate, sqrt operator 3814 3922 */ 3815 3923 3816 3924 code *cdneg(elem *e,regm_t *pretregs) 3817 3925 { unsigned byte; 3818 3926 regm_t retregs,possregs; 3819 3927 int reg; 3820 3928 int sz; 3821 3929 tym_t tyml; 3822 3930 code *c,*c1,*cg; 3823 3931 3824 3932 //printf("cdneg()\n"); 3825 3933 //elem_print(e); 3826 3934 if (*pretregs == 0) 3827 3935 return codelem(e->E1,pretregs,FALSE); 3828 3936 tyml = tybasic(e->E1->Ety); 3829 3937 sz = tysize[tyml]; 3830 3938 if (tyfloating(tyml)) 3831 3939 { if (tycomplex(tyml)) 3832 3940 return neg_complex87(e, pretregs); 3833 if (config.inline8087 && ((*pretregs & (ALLREGS | mBP)) == 0 || e->Eoper == OPsqrt)) 3941 if (config.inline8087 && 3942 ((*pretregs & (ALLREGS | mBP)) == 0 || e->Eoper == OPsqrt || I64)) 3834 3943 return neg87(e,pretregs); 3835 retregs = ( !I32&& sz == 8) ? DOUBLEREGS_16 : ALLREGS;3944 retregs = (I16 && sz == 8) ? DOUBLEREGS_16 : ALLREGS; 3836 3945 c1 = codelem(e->E1,&retregs,FALSE); 3837 3946 c1 = cat(c1,getregs(retregs)); 3838 3947 if (I32) 3839 3948 { reg = (sz == 8) ? findregmsw(retregs) : findreg(retregs); 3840 3949 c1 = genc2(c1,0x81,modregrm(3,6,reg),0x80000000); /* XOR EDX,sign bit */ 3841 3950 } 3842 3951 else 3843 3952 { reg = (sz == 8) ? AX : findregmsw(retregs); 3844 3953 c1 = genc2(c1,0x81,modregrm(3,6,reg),0x8000); /* XOR AX,0x8000 */ 3845 3954 } 3846 3955 return cat(c1,fixresult(e,retregs,pretregs)); 3847 3956 } 3848 3957 3849 3958 byte = sz == 1; 3850 3959 possregs = (byte) ? BYTEREGS : allregs; 3851 3960 retregs = *pretregs & possregs; 3852 3961 if (retregs == 0) 3853 3962 retregs = possregs; 3854 3963 c1 = codelem(e->E1,&retregs,FALSE); 3855 3964 cg = getregs(retregs); /* retregs will be destroyed */ 3856 3965 if (sz <= REGSIZE) 3857 { unsigned reg;3858 3859 reg = findreg(retregs);3860 c = gen2(CNIL,0xF7 ^ byte, modregrm(3,3,reg)); /* NEG reg */3861 if ( I32&& tysize[tyml] == SHORTSIZE && *pretregs & mPSW)3966 { 3967 unsigned reg = findreg(retregs); 3968 unsigned rex = (I64 && sz == 8) ? REX_W : 0; 3969 c = gen2(CNIL,0xF7 ^ byte,(rex << 16) | modregrmx(3,3,reg)); // NEG reg 3970 if (!I16 && tysize[tyml] == SHORTSIZE && *pretregs & mPSW) 3862 3971 c->Iflags |= CFopsize | CFpsw; 3863 3972 *pretregs &= mBP | ALLREGS; // flags already set 3864 3973 } 3865 3974 else if (sz == 2 * REGSIZE) 3866 3975 { unsigned msreg,lsreg; 3867 3976 3868 3977 msreg = findregmsw(retregs); 3869 3978 c = gen2(CNIL,0xF7,modregrm(3,3,msreg)); /* NEG msreg */ 3870 3979 lsreg = findreglsw(retregs); 3871 3980 gen2(c,0xF7,modregrm(3,3,lsreg)); /* NEG lsreg */ 3872 3981 genc2(c,0x81,modregrm(3,3,msreg),0); /* SBB msreg,0 */ 3873 3982 } 3874 3983 else 3875 3984 assert(0); 3876 3985 return cat4(c1,cg,c,fixresult(e,retregs,pretregs)); 3877 3986 } 3878 3987 3879 3988 3880 3989 /****************** 3881 3990 * Absolute value operator 3882 3991 */ 3883 3992 3884 3993 code *cdabs( elem *e, regm_t *pretregs) 3885 3994 { unsigned byte; 3886 3995 regm_t retregs,possregs; 3887 3996 int reg; 3888 3997 tym_t tyml; 3889 3998 code *c,*c1,*cg; 3890 int sz;3891 3999 3892 4000 if (*pretregs == 0) 3893 4001 return codelem(e->E1,pretregs,FALSE); 3894 4002 tyml = tybasic(e->E1->Ety); 3895 sz = tysize[tyml]; 4003 int sz = tysize[tyml]; 4004 unsigned rex = (I64 && sz == 8) ? REX_W : 0; 3896 4005 if (tyfloating(tyml)) 3897 { if (config.inline8087 && ( *pretregs & (ALLREGS | mBP)) == 0)4006 { if (config.inline8087 && ((*pretregs & (ALLREGS | mBP)) == 0 || I64)) 3898 4007 return neg87(e,pretregs); 3899 4008 retregs = (!I32 && sz == 8) ? DOUBLEREGS_16 : ALLREGS; 3900 4009 c1 = codelem(e->E1,&retregs,FALSE); 3901 4010 /*cg = callclib(e,CLIBdneg,pretregs,0);*/ 3902 4011 c1 = cat(c1,getregs(retregs)); 3903 4012 if (I32) 3904 4013 { reg = (sz == 8) ? findregmsw(retregs) : findreg(retregs); 3905 4014 c1 = genc2(c1,0x81,modregrm(3,4,reg),0x7FFFFFFF); /* AND EDX,~sign bit */ 3906 4015 } 3907 4016 else 3908 4017 { reg = (sz == 8) ? AX : findregmsw(retregs); 3909 4018 c1 = genc2(c1,0x81,modregrm(3,4,reg),0x7FFF); /* AND AX,0x7FFF */ 3910 4019 } 3911 4020 return cat(c1,fixresult(e,retregs,pretregs)); 3912 4021 } 3913 4022 3914 4023 byte = sz == 1; 3915 4024 assert(byte == 0); 3916 4025 byte = 0; 3917 4026 possregs = (sz <= REGSIZE) ? mAX : allregs; 3918 4027 retregs = *pretregs & possregs; 3919 4028 if (retregs == 0) 3920 4029 retregs = possregs; 3921 4030 c1 = codelem(e->E1,&retregs,FALSE); 3922 4031 cg = getregs(retregs); /* retregs will be destroyed */ 3923 4032 if (sz <= REGSIZE) 3924 4033 { unsigned reg; 3925 4034 code *c2; 3926 4035 3927 4036 /* cwd 3928 4037 xor AX,DX 3929 4038 sub AX,DX 3930 4039 */ 3931 4040 3932 4041 cg = cat(cg,getregs(mDX)); 3933 4042 reg = findreg(retregs); 3934 if ( I32&& sz == SHORTSIZE)4043 if (!I16 && sz == SHORTSIZE) 3935 4044 cg = gen1(cg,0x98); // CWDE 3936 4045 cg = gen1(cg,0x99); // CWD 3937 gen2(cg,0x33 ^ byte,modregrm(3,AX,DX)); // XOR EAX,EDX 3938 c = gen2(CNIL,0x2B ^ byte,modregrm(3,AX,DX)); // SUB EAX,EDX 3939 if (I32 && sz == SHORTSIZE && *pretregs & mPSW) 4046 code_orrex(cg, rex); 4047 gen2(cg,0x33 ^ byte,(rex << 16) | modregrm(3,AX,DX)); // XOR EAX,EDX 4048 c = gen2(CNIL,0x2B ^ byte,(rex << 16) | modregrm(3,AX,DX)); // SUB EAX,EDX 4049 if (!I16 && sz == SHORTSIZE && *pretregs & mPSW) 3940 4050 c->Iflags |= CFopsize | CFpsw; 3941 4051 if (*pretregs & mPSW) 3942 4052 c->Iflags |= CFpsw; 3943 4053 *pretregs &= ~mPSW; // flags already set 3944 4054 } 3945 4055 else if (sz == 2 * REGSIZE) 3946 4056 { unsigned msreg,lsreg; 3947 4057 code *cnop; 3948 4058 3949 4059 /* tst DX 3950 4060 jns L2 3951 4061 neg DX 3952 4062 neg AX 3953 4063 sbb DX,0 3954 4064 L2: 3955 4065 */ 3956 4066 3957 4067 cnop = gennop(CNIL); 3958 4068 msreg = findregmsw(retregs); 3959 4069 lsreg = findreglsw(retregs); … … 3975 4084 */ 3976 4085 3977 4086 code *cdpost(elem *e,regm_t *pretregs) 3978 4087 { code cs,*c1,*c2,*c3,*c4,*c5,*c6; 3979 4088 unsigned reg,op,byte; 3980 4089 tym_t tyml; 3981 4090 regm_t retregs,possregs,idxregs; 3982 4091 targ_int n; 3983 4092 elem *e2; 3984 4093 int sz; 3985 4094 int stackpushsave; 3986 4095 3987 4096 retregs = *pretregs; 3988 4097 op = e->Eoper; /* OPxxxx */ 3989 4098 if (retregs == 0) /* if nothing to return */ 3990 4099 return cdaddass(e,pretregs); 3991 4100 c4 = c5 = CNIL; 3992 4101 tyml = tybasic(e->E1->Ety); 3993 4102 sz = tysize[tyml]; 3994 4103 e2 = e->E2; 4104 unsigned rex = (I64 && sz == 8) ? REX_W : 0; 3995 4105 3996 4106 if (tyfloating(tyml)) 3997 4107 { 3998 4108 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 3999 4109 return post87(e,pretregs); 4000 4110 #else 4001 4111 if (config.inline8087) 4002 4112 return post87(e,pretregs); 4003 4113 assert(sz <= 8); 4004 4114 c1 = getlvalue(&cs,e->E1,DOUBLEREGS); 4005 4115 freenode(e->E1); 4006 idxregs = idxregm( cs.Irm,cs.Isib); /* mask of index regs used */4116 idxregs = idxregm(&cs); // mask of index regs used 4007 4117 cs.Iop = 0x8B; /* MOV DOUBLEREGS,EA */ 4008 4118 c2 = fltregs(&cs,tyml); 4009 4119 stackchanged = 1; 4010 4120 stackpushsave = stackpush; 4011 4121 if (sz == 8) 4012 4122 { 4013 4123 if (I32) 4014 4124 { 4015 4125 gen1(c2,0x50 + DX); /* PUSH DOUBLEREGS */ 4016 4126 gen1(c2,0x50 + AX); 4017 4127 stackpush += DOUBLESIZE; 4018 4128 retregs = DOUBLEREGS2_32; 4019 4129 } 4020 4130 else 4021 4131 { 4022 4132 gen1(c2,0x50 + AX); 4023 4133 gen1(c2,0x50 + BX); 4024 4134 gen1(c2,0x50 + CX); 4025 4135 gen1(c2,0x50 + DX); /* PUSH DOUBLEREGS */ 4026 4136 stackpush += DOUBLESIZE + DOUBLESIZE; … … 4081 4191 } 4082 4192 } 4083 4193 else 4084 4194 { gen1(c5,0x58 + AX); 4085 4195 if (!I32) 4086 4196 gen1(c5,0x58 + DX); 4087 4197 stackpush -= FLOATSIZE; 4088 4198 retregs = FLOATREGS; 4089 4199 } 4090 4200 c5 = genadjesp(c5,stackpush - stackpushsave); 4091 4201 c6 = fixresult(e,retregs,pretregs); 4092 4202 return cat6(c1,c2,c3,c4,c5,c6); 4093 4203 #endif 4094 4204 } 4095 4205 4096 4206 assert(e2->Eoper == OPconst); 4097 4207 byte = (sz == 1); 4098 4208 possregs = byte ? BYTEREGS : allregs; 4099 4209 c1 = getlvalue(&cs,e->E1,0); 4100 4210 freenode(e->E1); 4101 idxregs = idxregm( cs.Irm,cs.Isib); /* mask of index regs used */4211 idxregs = idxregm(&cs); // mask of index regs used 4102 4212 if (sz <= REGSIZE && *pretregs == mPSW && (cs.Irm & 0xC0) == 0xC0 && 4103 ( I32|| (idxregs & (mBX | mSI | mDI | mBP))))4213 (!I16 || (idxregs & (mBX | mSI | mDI | mBP)))) 4104 4214 { // Generate: 4105 4215 // TEST reg,reg 4106 4216 // LEA reg,n[reg] // don't affect flags 4107 4217 int rm; 4108 4218 4109 4219 reg = cs.Irm & 7; 4220 if (cs.Irex & REX_B) 4221 reg |= 8; 4110 4222 cs.Iop = 0x85 ^ byte; 4111 c s.Irm |= modregrm(0,reg,0);4223 code_newreg(&cs, reg); 4112 4224 cs.Iflags |= CFpsw; 4113 4225 c2 = gen(NULL,&cs); // TEST reg,reg 4114 4226 4115 4227 // If lvalue is a register variable, we must mark it as modified 4116 c3 = modEA( cs.Irm);4228 c3 = modEA(&cs); 4117 4229 4118 4230 n = e2->EV.Vint; 4119 4231 if (op == OPpostdec) 4120 4232 n = -n; 4121 4233 rm = reg; 4122 if ( !I32)4234 if (I16) 4123 4235 rm = regtorm[reg]; 4124 c4 = genc1(NULL,0x8D, modregrm(2,reg,rm),FLconst,n);// LEA reg,n[reg]4236 c4 = genc1(NULL,0x8D,(rex << 16) | modregxrmx(2,reg,rm),FLconst,n); // LEA reg,n[reg] 4125 4237 return cat4(c1,c2,c3,c4); 4126 4238 } 4127 4239 else if (sz <= REGSIZE || tyfv(tyml)) 4128 4240 { code cs2; 4129 4241 4130 4242 cs.Iop = 0x8B ^ byte; 4131 4243 retregs = possregs & ~idxregs & *pretregs; 4132 4244 if (!tyfv(tyml)) 4133 4245 { if (retregs == 0) 4134 4246 retregs = possregs & ~idxregs; 4135 4247 } 4136 4248 else /* tyfv(tyml) */ 4137 4249 { if ((retregs &= mLSW) == 0) 4138 4250 retregs = mLSW & ~idxregs; 4139 4251 /* Can't use LES if the EA uses ES as a seg override */ 4140 4252 if (*pretregs & mES && (cs.Iflags & CFSEG) != CFes) 4141 4253 { cs.Iop = 0xC4; /* LES */ 4142 4254 c1 = cat(c1,getregs(mES)); /* allocate ES */ 4143 4255 } 4144 4256 } 4145 4257 c2 = allocreg(&retregs,®,TYint); 4146 c s.Irm |= modregrm(0,reg,0);4258 code_newreg(&cs, reg); 4147 4259 c3 = gen(CNIL,&cs); /* MOV reg,EA */ 4148 4260 cs2 = cs; 4149 4261 4150 4262 /* If lvalue is a register variable, we must mark it as modified */ 4151 c3 = cat(c3,modEA( cs.Irm));4263 c3 = cat(c3,modEA(&cs)); 4152 4264 4153 4265 cs.Iop = 0x81 ^ byte; 4154 4266 cs.Irm &= ~modregrm(0,7,0); /* reg field = 0 */ 4267 cs.Irex &= ~REX_R; 4155 4268 if (op == OPpostdec) 4156 4269 cs.Irm |= modregrm(0,5,0); /* SUB */ 4157 4270 cs.IFL2 = FLconst; 4158 4271 n = e2->EV.Vint; 4159 4272 cs.IEV2.Vint = n; 4160 4273 if (n == 1) /* can use INC or DEC */ 4161 4274 { cs.Iop |= 0xFE; /* xFE is dec byte, xFF is word */ 4162 4275 if (op == OPpostdec) 4163 4276 NEWREG(cs.Irm,1); // DEC EA 4164 4277 else 4165 4278 NEWREG(cs.Irm,0); // INC EA 4166 4279 } 4167 4280 else if (n == -1) // can use INC or DEC 4168 4281 { cs.Iop |= 0xFE; // xFE is dec byte, xFF is word 4169 4282 if (op == OPpostinc) 4170 4283 NEWREG(cs.Irm,1); // DEC EA 4171 4284 else 4172 4285 NEWREG(cs.Irm,0); // INC EA 4173 4286 } 4174 4287 4175 4288 // For scheduling purposes, we wish to replace: 4176 4289 // MOV reg,EA 4177 4290 // OP EA 4178 4291 // with: 4179 4292 // MOV reg,EA 4180 4293 // OP reg 4181 4294 // MOV EA,reg 4182 4295 // ~OP reg 4183 4296 if (sz <= REGSIZE && (cs.Irm & 0xC0) != 0xC0 && 4184 4297 config.target_cpu >= TARGET_Pentium && 4185 4298 config.flags4 & CFG4speed) 4186 4299 { 4187 4300 // Replace EA in cs with reg 4188 cs.Irm = (cs.Irm & ~modregrm(3,0,7)) | modregrm(3,0,reg); 4301 cs.Irm = (cs.Irm & ~modregrm(3,0,7)) | modregrm(3,0,reg & 7); 4302 if (reg & 8) 4303 { cs.Irex &= ~REX_R; 4304 cs.Irex |= REX_B; 4305 } 4189 4306 gen(c3,&cs); // ADD/SUB reg,const 4190 4307 4191 4308 // Reverse MOV direction 4192 4309 cs2.Iop ^= 2; 4193 4310 gen(c3,&cs2); // MOV EA,reg 4194 4311 4195 4312 // Toggle INC <-> DEC, ADD <-> SUB 4196 4313 cs.Irm ^= (n == 1 || n == -1) ? modregrm(0,1,0) : modregrm(0,5,0); 4197 4314 gen(c3,&cs); 4198 4315 4199 4316 if (*pretregs & mPSW) 4200 4317 { *pretregs &= ~mPSW; // flags already set 4201 4318 code_orflag(c3,CFpsw); 4202 4319 } 4203 4320 } 4204 4321 else 4205 4322 gen(c3,&cs); // ADD/SUB EA,const 4206 4323 4207 4324 freenode(e2); 4208 4325 if (tyfv(tyml)) … … 4496 4613 /***************************************** 4497 4614 */ 4498 4615 4499 4616 code *cdhalt(elem *e,regm_t *pretregs) 4500 4617 { 4501 4618 assert(*pretregs == 0); 4502 4619 return gen1(NULL, 0xF4); // HLT 4503 4620 } 4504 4621 4505 4622 /**************************************** 4506 4623 * Check to see if pointer is NULL. 4507 4624 */ 4508 4625 4509 4626 code *cdnullcheck(elem *e,regm_t *pretregs) 4510 4627 { regm_t retregs; 4511 4628 regm_t scratch; 4512 4629 unsigned reg; 4513 4630 code *c; 4514 4631 code *cs; 4515 4632 4516 assert( I32);4633 assert(!I16); 4517 4634 retregs = *pretregs; 4518 4635 if ((retregs & allregs) == 0) 4519 4636 retregs |= allregs; 4520 4637 c = codelem(e->E1,&retregs,FALSE); 4521 4638 scratch = allregs & ~retregs; 4522 4639 cs = allocreg(&scratch,®,TYint); 4523 cs = genc1(cs,0x8B,modregrm(2,reg,findreg(retregs)),FLconst,0); // MOV reg,0[e] 4640 unsigned rex = I64 ? REX_W : 0; 4641 cs = genc1(cs,0x8B,(rex << 16) | modregxrmx(2,reg,findreg(retregs)),FLconst,0); // MOV reg,0[e] 4524 4642 return cat3(c,cs,fixresult(e,retregs,pretregs)); 4525 4643 } 4526 4644 4527 4645 #endif // !SPP trunk/src/backend/cod3.c
r569 r577 269 269 DOUBLEREGS = DOUBLEREGS_16; 270 270 } 271 271 #endif 272 272 } 273 273 274 274 /******************************** 275 275 * Fix global variables for I64. 276 276 */ 277 277 278 278 void cod3_set64() 279 279 { 280 280 inssize[0xA0] = T|5; // MOV AL,mem 281 281 inssize[0xA1] = T|5; // MOV RAX,mem 282 282 inssize[0xA2] = T|5; // MOV mem,AL 283 283 inssize[0xA3] = T|5; // MOV mem,RAX 284 284 BPRM = 5; // [RBP] addressing mode 285 285 fregsaved = mBP | mBX | mR12 | mR13 | mR14 | mR15 | mES; // saved across function calls 286 286 FLOATREGS = FLOATREGS_64; 287 287 FLOATREGS2 = FLOATREGS2_64; 288 288 DOUBLEREGS = DOUBLEREGS_64; 289 STACKALIGN = 16; 289 290 290 291 for (unsigned i = 0x80; i < 0x90; i++) 291 292 inssize2[i] = W|T|6; 292 293 } 293 294 294 295 /********************************* 295 296 * Word or dword align start of function. 296 297 */ 297 298 298 299 void cod3_align() 299 300 { 300 301 static char nops[7] = { 0x90,0x90,0x90,0x90,0x90,0x90,0x90 }; 301 302 unsigned nbytes; 302 303 #if OMFOBJ 303 304 if (config.flags4 & CFG4speed) // if optimized for speed 304 305 { 305 306 // Pick alignment based on CPU target 306 307 if (config.target_cpu == TARGET_80486 || 307 308 config.target_cpu >= TARGET_PentiumPro) 308 309 { // 486 does reads on 16 byte boundaries, so if we are near … … 353 354 354 355 e = b->Belem; 355 356 elem_debug(e); 356 357 cc = docommas(&e); 357 358 cgstate.stackclean++; 358 359 tys = tybasic(e->Ety); 359 360 sz = tysize[tys]; 360 361 dword = (sz == 2 * REGSIZE); 361 362 mswsame = 1; // assume all msw's are the same 362 363 p = b->BS.Bswitch; /* pointer to case data */ 363 364 assert(p); 364 365 ncases = *p++; /* number of cases */ 365 366 366 367 vmax = MINLL; // smallest possible llong 367 368 vmin = MAXLL; // largest possible llong 368 369 for (n = 0; n < ncases; n++) // find max and min case values 369 370 { val = *p++; 370 371 if (val > vmax) vmax = val; 371 372 if (val < vmin) vmin = val; 372 373 if (REGSIZE == 2) 373 { unsigned short ms; 374 375 #if __DMC__ 376 ms = ((unsigned short *)&val)[1]; 377 #else 378 ms = (val >> 16) & 0xFFFF; 379 #endif 374 { 375 unsigned short ms = (val >> 16) & 0xFFFF; 380 376 if (n == 0) 381 377 msw = ms; 382 378 else if (msw != ms) 383 379 mswsame = 0; 384 380 } 385 381 else // REGSIZE == 4 386 { targ_ulong ms; 387 388 #if __DMC__ 389 /* This statement generates garbage for ms under g++, 390 * I don't know why. 391 */ 392 ms = ((targ_ulong *)&val)[1]; 393 #else 394 ms = (val >> 32) & 0xFFFFFFFF; 395 #endif 382 { 383 targ_ulong ms = (val >> 32) & 0xFFFFFFFF; 396 384 if (n == 0) 397 385 msw = ms; 398 386 else if (msw != ms) 399 387 mswsame = 0; 400 388 } 401 389 } 402 390 p -= ncases; 403 391 //dbg_printf("vmax = x%lx, vmin = x%lx, vmax-vmin = x%lx\n",vmax,vmin,vmax - vmin); 404 392 flags = (config.flags & CFGromable) ? CFcs : 0; // table is in code seg 405 393 394 if (I64) 395 { // For now, just generate basic if-then sequence to get us running 396 retregs = ALLREGS; 397 b->BC = BCifthen; 398 c = scodelem(e,&retregs,0,TRUE); 399 assert(!dword); // 128 bit switches not supported 400 reg = findreg(retregs); // reg that result is in 401 bl = b->Bsucc; 402 for (n = 0; n < ncases; n++) 403 { code *cx; 404 val = *p; 405 if (sz == 4) 406 cx = genc2(CNIL,0x81,modregrmx(3,7,reg),val); // CMP reg,val 407 else if (sz == 8) 408 { 409 if (val == (int)val) // if val is a 64 bit value sign-extended from 32 bits 410 { 411 cx = genc2(CNIL,0x81,modregrmx(3,7,reg),val); // CMP reg,value32 412 cx->Irex |= REX_W; // 64 bit operand 413 } 414 else 415 { unsigned sreg; 416 // MOV sreg,value64 417 cx = regwithvalue(CNIL, ALLREGS & ~mask[reg], val, &sreg, 64); 418 cx = genregs(cx,0x3B,reg,sreg); // CMP reg,sreg 419 code_orrex(cx, REX_W); 420 } 421 } 422 else 423 assert(0); 424 bl = list_next(bl); 425 genjmp(cx,JE,FLblock,list_block(bl)); // JE caseaddr 426 c = cat(c,cx); 427 p++; 428 } 429 if (list_block(b->Bsucc) != b->Bnext) /* if default is not next block */ 430 c = cat(c,genjmp(CNIL,JMP,FLblock,list_block(b->Bsucc))); 431 ce = NULL; 432 } 406 433 // Need to do research on MACHOBJ to see about better methods 407 if (MACHOBJ || ncases <= 3) // generate if-then sequence408 { 434 else if (MACHOBJ || ncases <= 3) 435 { // generate if-then sequence 409 436 retregs = ALLREGS; 410 437 L1: 411 438 b->BC = BCifthen; 412 439 c = scodelem(e,&retregs,0,TRUE); 413 440 if (dword) 414 441 { reg = findreglsw(retregs); 415 442 reg2 = findregmsw(retregs); 416 443 } 417 444 else 418 445 reg = findreg(retregs); /* reg that result is in */ 419 446 bl = b->Bsucc; 420 447 if (dword && mswsame) 421 448 { /* CMP reg2,MSW */ 422 449 c = genc2(c,0x81,modregrm(3,7,reg2),msw); 423 450 genjmp(c,JNE,FLblock,list_block(b->Bsucc)); /* JNE default */ 424 451 } 425 452 for (n = 0; n < ncases; n++) 426 453 { code *cnext = CNIL; 427 454 /* CMP reg,casevalue */ 428 455 c = cat(c,ce = genc2(CNIL,0x81,modregrm(3,7,reg),(targ_int)*p)); … … 833 860 L1: 834 861 #if DEBUG 835 862 if ((jp & 0xF0) != 0x70) 836 863 WROP(op), 837 864 printf("i %d zero %d op x%x jp x%x\n",i,zero,op,jp); 838 865 #endif 839 866 assert((jp & 0xF0) == 0x70); 840 867 return jp; 841 868 } 842 869 843 870 844 871 /********************************** 845 872 * Append code to *pc which validates pointer described by 846 873 * addressing mode in *pcs. Modify addressing mode in *pcs. 847 874 * Input: 848 875 * keepmsk mask of registers we must not destroy or use 849 876 * if (keepmsk & RMstore), this will be only a store operation 850 877 * into the lvalue 851 878 */ 852 879 853 void cod3_ptrchk(code * __ss *pc,code __ss*pcs,regm_t keepmsk)880 void cod3_ptrchk(code **pc,code *pcs,regm_t keepmsk) 854 881 { code *c; 855 882 code *cs2; 856 883 unsigned char rm,sib; 857 884 unsigned reg; 858 885 unsigned flagsave; 859 886 unsigned opsave; 860 887 regm_t idxregs; 861 888 regm_t tosave; 862 889 regm_t used; 863 890 int i; 864 891 892 assert(!I64); 865 893 if (!I16 && pcs->Iflags & (CFes | CFss | CFcs | CFds | CFfs | CFgs)) 866 894 return; // not designed to deal with 48 bit far pointers 867 895 868 896 c = *pc; 869 897 870 898 rm = pcs->Irm; 871 899 assert(!(rm & 0x40)); // no disp8 or reg addressing modes 872 900 873 901 // If the addressing mode is already a register 874 902 reg = rm & 7; 875 903 if (I16) 876 904 { static const unsigned char imode[8] = { BP,BP,BP,BP,SI,DI,BP,BX }; 877 905 878 906 reg = imode[reg]; // convert [SI] to SI, etc. 879 907 } 880 908 idxregs = mask[reg]; 881 909 if ((rm & 0x80 && (pcs->IFL1 != FLoffset || pcs->IEV1.Vuns)) || 882 910 !(idxregs & ALLREGS) 883 911 ) 884 912 { … … 937 965 case CFcs: segreg = 0x0E; break; 938 966 case 0: segreg = 0x1E; break; // DS 939 967 default: 940 968 assert(0); 941 969 } 942 970 943 971 // See if we should default to SS: 944 972 // (Happens when BP is part of the addressing mode) 945 973 if (segreg == 0x1E && (rm & 0xC0) != 0xC0 && 946 974 rm & 2 && (rm & 7) != 7) 947 975 { segreg = 0x16; 948 976 if (config.wflags & WFssneds) 949 977 pcs->Iflags |= CFss; // because BP won't be there anymore 950 978 } 951 979 c = gen1(c,segreg); // PUSH segreg 952 980 } 953 981 954 982 c = gen1(c,0x50 + reg); // PUSH reg 955 983 956 984 // Rewrite the addressing mode in *pcs so it is just 0[reg] 957 pcs->Irm = getaddrmode(idxregs);985 setaddrmode(pcs, idxregs); 958 986 pcs->IFL1 = FLoffset; 959 987 pcs->IEV1.Vuns = 0; 960 988 961 989 // Call the validation function 962 990 { 963 991 makeitextern(rtlsym[RTLSYM_PTRCHK]); 964 992 965 993 used &= ~(keepmsk | idxregs); // regs destroyed by this exercise 966 994 c = cat(c,getregs(used)); 967 995 // CALL __ptrchk 968 996 gencs(c,(LARGECODE) ? 0x9A : 0xE8,0,FLfunc,rtlsym[RTLSYM_PTRCHK]); 969 997 } 970 998 971 999 *pc = cat(c,cs2); 972 1000 } 973 1001 974 1002 975 1003 976 1004 /*********************************** 977 1005 * Determine if BP can be used as a general purpose register. … … 1017 1045 config.flags & CFGstack || 1018 1046 localsize >= 0x100 || // arbitrary value < 0x1000 1019 1047 (usednteh & ~NTEHjmonitor) || 1020 1048 usedalloca 1021 1049 ) 1022 1050 goto Lcant; 1023 1051 } 1024 1052 Lcan: 1025 1053 return mBP; 1026 1054 1027 1055 Lcant: 1028 1056 return 0; 1029 1057 } 1030 1058 1031 1059 /*************************************** 1032 1060 * Gen code for OPframeptr 1033 1061 */ 1034 1062 1035 1063 code *cdframeptr(elem *e, regm_t *pretregs) 1036 1064 { 1037 regm_t retregs;1038 1065 unsigned reg; 1039 code *cg;1040 code *c1;1041 1066 code cs; 1042 1067 1043 re tregs = *pretregs & allregs;1068 regm_t retregs = *pretregs & allregs; 1044 1069 if (!retregs) 1045 1070 retregs = allregs; 1046 cg = allocreg(&retregs, ®, TYint); 1047 //c1 = genmovreg(cg, reg, BP); 1071 code *cg = allocreg(&retregs, ®, TYint); 1048 1072 1049 1073 cs.Iop = ESCAPE; 1050 1074 cs.Iop2 = ESCframeptr; 1051 1075 cs.Iflags = 0; 1052 1076 cs.Irex = 0; 1053 1077 cs.Irm = reg; 1054 c 1= gen(cg,&cs);1055 1056 return cat(c 1,fixresult(e,retregs,pretregs));1078 cg = gen(cg,&cs); 1079 1080 return cat(cg,fixresult(e,retregs,pretregs)); 1057 1081 } 1058 1082 1059 1083 /*************************************** 1060 1084 * Gen code for load of _GLOBAL_OFFSET_TABLE_. 1061 1085 * This value gets cached in the local variable 'localgot'. 1062 1086 */ 1063 1087 1064 1088 code *cdgot(elem *e, regm_t *pretregs) 1065 1089 { 1066 1090 #if TARGET_OSX 1067 1091 regm_t retregs; 1068 1092 unsigned reg; 1069 1093 code *c; 1070 1094 1071 1095 retregs = *pretregs & allregs; 1072 1096 if (!retregs) 1073 1097 retregs = allregs; 1074 1098 c = allocreg(&retregs, ®, TYnptr); 1075 1099 1076 1100 c = genc(c,0xE8,0,0,0,FLgot,0); // CALL L1 … … 1843 1867 static unsigned char ops0[] = { 0x07,0x1F,0x5F,0x5E, 1844 1868 0x5D,0x5B,0x5B,0x5A, 1845 1869 0x59,0x58,0xCF,0 }; 1846 1870 unsigned char *p; 1847 1871 1848 1872 c = genregs(c,0x8B,SP,BP); // MOV SP,BP 1849 1873 p = (config.target_cpu >= TARGET_80286) ? ops2 : ops0; 1850 1874 do 1851 1875 gen1(c,*p); 1852 1876 while (*++p); 1853 1877 goto Lopt; 1854 1878 } 1855 1879 1856 1880 if (config.flags & CFGtrace && 1857 1881 (!(config.flags4 & CFG4allcomdat) || 1858 1882 funcsym_p->Sclass == SCcomdat || 1859 1883 funcsym_p->Sclass == SCglobal || 1860 1884 (config.flags2 & CFG2comdat && SymInline(funcsym_p)) 1861 1885 ) 1862 1886 ) 1863 { symbol *s; 1864 1865 s = rtlsym[farfunc ? RTLSYM_TRACE_EPI_F : RTLSYM_TRACE_EPI_N]; 1887 { 1888 symbol *s = rtlsym[farfunc ? RTLSYM_TRACE_EPI_F : RTLSYM_TRACE_EPI_N]; 1866 1889 makeitextern(s); 1867 1890 c = gencs(c,I16 ? 0x9A : 0xE8,0,FLfunc,s); // CALLF _trace 1868 1891 if (!I16) 1869 1892 code_orflag(c,CFoff | CFselfrel); 1870 1893 useregs((ALLREGS | mBP | mES) & ~s->Sregsaved); 1871 1894 } 1872 1895 1873 1896 if (usednteh & ~NTEHjmonitor && (config.exe == EX_NT || MARS)) 1874 1897 c = cat(c,nteh_epilog()); 1875 1898 1876 1899 cpopds = CNIL; 1877 1900 if (tyf & mTYloadds) 1878 1901 { cpopds = gen1(cpopds,0x1F); // POP DS 1879 1902 c = cat(c,cpopds); 1880 1903 spoff += intsize; 1881 1904 } 1882 1905 1883 reg = 7; 1884 regm = 1 << 7; 1906 /* Pop all the general purpose registers saved on the stack 1907 * by the prolog code. Remember to do them in the reverse 1908 * order they were pushed. 1909 */ 1910 reg = I64 ? R15 : DI; 1911 regm = 1 << reg; 1885 1912 topop = fregsaved & ~mfuncreg; 1886 1913 #ifdef DEBUG 1887 if (topop & ~0xFF )1914 if (topop & ~0xFFFF) 1888 1915 printf("fregsaved = x%x, mfuncreg = x%x\n",fregsaved,mfuncreg); 1889 1916 #endif 1890 assert(!(topop & ~0xFF ));1917 assert(!(topop & ~0xFFFF)); 1891 1918 while (topop) 1892 1919 { if (topop & regm) 1893 { c = gen1(c,0x58 + reg); /* POP reg */1894 if (reg & 8)1895 code_orrex(c, REX_B);1896 topop &= ~regm;1897 spoff += intsize;1920 { c = gen1(c,0x58 + reg); // POP reg 1921 if (reg & 8) 1922 code_orrex(c, REX_B); 1923 topop &= ~regm; 1924 spoff += intsize; 1898 1925 } 1899 1926 regm >>= 1; 1900 1927 reg--; 1901 1928 } 1902 1929 1903 1930 #if MARS 1904 1931 if (usednteh & NTEHjmonitor) 1905 1932 { 1906 1933 regm_t retregs = 0; 1907 1934 if (b->BC == BCretexp) 1908 1935 retregs = regmask(b->Belem->Ety, tym); 1909 1936 code *cn = nteh_monitor_epilog(retregs); 1910 1937 c = cat(c,cn); 1911 1938 xlocalsize += 8; 1912 1939 } 1913 1940 #endif 1914 1941 1915 1942 if (config.wflags & WFwindows && farfunc) 1916 1943 { 1917 1944 int wflags = config.wflags; … … 1983 2010 op = tyfarfunc(tym) ? 0xCA : 0xC2; 1984 2011 if (tym == TYhfunc) 1985 2012 { 1986 2013 c = genc2(c,0xC2,0,4); // RET 4 1987 2014 } 1988 2015 else if (!typfunc(tym) || Poffset == 0) 1989 2016 { op++; // to a regular RET 1990 2017 c = gen1(c,op); 1991 2018 } 1992 2019 else 1993 2020 { // Stack is always aligned on register size boundary 1994 2021 Poffset = (Poffset + (REGSIZE - 1)) & ~(REGSIZE - 1); 1995 2022 c = genc2(c,op,0,Poffset); // RET Poffset 1996 2023 } 1997 2024 } 1998 2025 1999 2026 Lopt: 2000 2027 // If last instruction in ce is ADD SP,imm, and first instruction 2001 2028 // in c sets SP, we can dump the ADD. 2002 2029 cr = code_last(ce); 2003 if (cr && c )2030 if (cr && c && !I64) 2004 2031 { 2005 2032 if (cr->Iop == 0x81 && cr->Irm == modregrm(3,0,SP)) // if ADD SP,imm 2006 2033 { 2007 2034 if ( 2008 2035 c->Iop == 0xC9 || // LEAVE 2009 2036 (c->Iop == 0x8B && c->Irm == modregrm(3,SP,BP)) || // MOV SP,BP 2010 2037 (c->Iop == 0x8D && c->Irm == modregrm(1,SP,6)) // LEA SP,-imm[BP] 2011 2038 ) 2012 2039 cr->Iop = NOP; 2013 2040 else if (c->Iop == 0x58 + BP) // if POP BP 2014 2041 { cr->Iop = 0x8B; 2015 2042 cr->Irm = modregrm(3,SP,BP); // MOV SP,BP 2016 2043 } 2017 2044 } 2018 2045 #if 0 // These optimizations don't work if the called function 2019 2046 // cleans off the stack. 2020 2047 else if (c->Iop == 0xC3 && cr->Iop == 0xE8) // CALL near 2021 2048 { cr->Iop = 0xE9; // JMP near 2022 2049 c->Iop = NOP; 2023 2050 } … … 2375 2401 break; 2376 2402 2377 2403 default: 2378 2404 goto L3; 2379 2405 } 2380 2406 2381 2407 if (disp == 0) // bra to next instruction 2382 2408 { bytesaved += csize; 2383 2409 c->Iop = NOP; // del branch instruction 2384 2410 c->IEV2.Vcode = NULL; 2385 2411 c = cn; 2386 2412 if (!c) 2387 2413 break; 2388 2414 continue; 2389 2415 } 2390 2416 else if ((targ_size_t)(targ_schar)(disp - 2) == (disp - 2) && 2391 2417 (targ_size_t)(targ_schar)disp == disp) 2392 2418 { 2393 2419 if (op == JMP) 2394 2420 { c->Iop = JMPS; // JMP SHORT 2395 bytesaved += I 32 ? 3 : 1;2421 bytesaved += I16 ? 1 : 3; 2396 2422 } 2397 2423 else // else Jcond 2398 2424 { c->Iflags &= ~CFjmp16; // a branch is ok 2399 bytesaved += I 32 ? 4 : 3;2425 bytesaved += I16 ? 3 : 4; 2400 2426 2401 2427 // Replace a cond jump around a call to a function that 2402 2428 // never returns with a cond jump to that function. 2403 2429 if (config.flags4 & CFG4optimized && 2404 2430 config.target_cpu >= TARGET_80386 && 2405 disp == (I 32 ? 5 : 3) &&2431 disp == (I16 ? 3 : 5) && 2406 2432 cn && 2407 2433 cn->Iop == 0xE8 && 2408 2434 cn->IFL2 == FLfunc && 2409 2435 cn->IEVsym2->Sflags & SFLexit && 2410 2436 !(cn->Iflags & (CFtarg | CFtarg2)) 2411 2437 ) 2412 2438 { 2413 2439 cn->Iop = 0x0F; 2414 2440 cn->Iop2 = (c->Iop & 0x0F) ^ 0x81; 2415 2441 c->Iop = NOP; 2416 2442 c->IEV2.Vcode = NULL; 2417 2443 bytesaved++; 2418 2444 2419 2445 // If nobody else points to ct, we can remove the CFtarg 2420 2446 if (flag && ct) 2421 2447 { code *cx; 2422 2448 2423 2449 for (cx = bl->Bcode; 1; cx = code_next(cx)) 2424 2450 { 2425 2451 if (!cx) … … 2533 2559 #ifdef DEBUG 2534 2560 if (0) 2535 2561 { printf("assignaddrc()\n"); 2536 2562 c->print(); 2537 2563 } 2538 2564 if (code_next(c) && code_next(code_next(c)) == c) 2539 2565 assert(0); 2540 2566 #endif 2541 2567 if (c->Iop == 0x0F) 2542 2568 ins = inssize2[c->Iop2]; 2543 2569 else if (c->Iop == ESCAPE) 2544 2570 { 2545 2571 if (c->Iop2 == ESCadjesp) 2546 2572 { 2547 2573 //printf("adjusting EBPtoESP (%d) by %ld\n",EBPtoESP,c->IEV2.Vint); 2548 2574 EBPtoESP += c->IEV2.Vint; 2549 2575 c->Iop = NOP; 2550 2576 } 2551 2577 if (c->Iop2 == ESCframeptr) 2552 2578 { // Convert to load of frame pointer 2579 // c->Irm is the register to use 2553 2580 if (hasframe) 2554 2581 { // MOV reg,EBP 2555 2582 c->Iop = 0x89; 2556 c->Irm = modregrm(3,BP,c->Irm); 2583 if (c->Irm & 8) 2584 c->Irex |= REX_B; 2585 c->Irm = modregrm(3,BP,c->Irm & 7); 2557 2586 } 2558 2587 else 2559 2588 { // LEA reg,EBPtoESP[ESP] 2560 2589 c->Iop = 0x8D; 2561 c->Irm = modregrm(2,c->Irm,4); 2590 if (c->Irm & 8) 2591 c->Irex |= REX_R; 2592 c->Irm = modregrm(2,c->Irm & 7,4); 2562 2593 c->Isib = modregrm(0,4,SP); 2563 2594 c->Iflags = CFoff; 2564 2595 c->IFL1 = FLconst; 2565 2596 c->IEV1.Vuns = EBPtoESP; 2566 2597 } 2567 2598 } 2599 if (I64) 2600 c->Irex |= REX_W; 2568 2601 continue; 2569 2602 } 2570 2603 else 2571 2604 ins = inssize[c->Iop]; 2572 2605 if (!(ins & M) || 2573 2606 ((rm = c->Irm) & 0xC0) == 0xC0) 2574 2607 goto do2; /* if no first operand */ 2575 2608 if (is32bitaddr(I32,c->Iflags)) 2576 2609 { 2577 2610 2578 2611 if ( 2579 2612 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c->Isib & 7) == 5 || (rm & 7) == 5)) 2580 2613 ) 2581 2614 goto do2; /* if no first operand */ 2582 2615 } 2583 2616 else 2584 2617 { 2585 2618 if ( 2586 2619 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) 2587 2620 ) … … 2637 2670 2638 2671 case FLreg: 2639 2672 case FLauto: 2640 2673 soff = Aoff; 2641 2674 L1: 2642 2675 if (s->Sflags & SFLunambig && !(s->Sflags & SFLread) && // if never loaded 2643 2676 !anyiasm && 2644 2677 // if not optimized, leave it in for debuggability 2645 2678 (config.flags4 & CFG4optimized || !config.fulltypes)) 2646 2679 { c->Iop = NOP; // remove references to it 2647 2680 continue; 2648 2681 } 2649 2682 if (s->Sfl == FLreg && c->IEVpointer1 < 2) 2650 2683 { int reg = s->Sreglsw; 2651 2684 2652 2685 assert(!(s->Sregm & ~mask[reg])); 2653 2686 if (c->IEVpointer1 == 1) 2654 2687 { assert(reg < 4); /* must be a BYTEREGS */ 2655 2688 reg |= 4; /* convert to high byte reg */ 2656 2689 } 2690 if (reg & 8) 2691 { assert(I64); 2692 c->Irex |= REX_B; 2693 reg &= 7; 2694 } 2657 2695 c->Irm = (c->Irm & modregrm(0,7,0)) 2658 2696 | modregrm(3,0,reg); 2659 2697 assert(c->Iop != LES && c->Iop != LEA); 2660 2698 goto do2; 2661 2699 } 2662 2700 else 2663 2701 { c->IEVpointer1 += s->Soffset + soff + BPoff; 2664 2702 if (s->Sflags & SFLunambig) 2665 2703 c->Iflags |= CFunambig; 2666 2704 L2: 2667 2705 if (!hasframe) 2668 2706 { /* Convert to ESP relative address instead of EBP */ 2669 2707 unsigned char rm; 2670 2708 2671 assert( I32);2709 assert(!I16); 2672 2710 c->IEVpointer1 += EBPtoESP; 2673 2711 rm = c->Irm; 2674 2712 if ((rm & 7) == 4) // if SIB byte 2675 2713 { 2676 2714 assert((c->Isib & 7) == BP); 2677 2715 assert((rm & 0xC0) != 0); 2678 2716 c->Isib = (c->Isib & ~7) | modregrm(0,0,SP); 2679 2717 } 2680 2718 else 2681 2719 { 2682 2720 assert((rm & 7) == 5); 2683 2721 c->Irm = (rm & modregrm(0,7,0)) 2684 2722 | modregrm(2,0,4); 2685 2723 c->Isib = modregrm(0,4,SP); 2686 2724 } 2687 2725 } 2688 2726 } 2689 2727 break; 2690 2728 case FLpara: 2691 2729 soff = Poff - BPoff; // cancel out add of BPoff … … 2877 2915 void pinholeopt(code *c,block *b) 2878 2916 { targ_size_t a; 2879 2917 unsigned op,mod,rm,reg,ereg; 2880 2918 unsigned char ins; 2881 2919 int usespace; 2882 2920 int useopsize; 2883 2921 int space; 2884 2922 block *bn; 2885 2923 2886 2924 #if 0 2887 2925 code *cstart = c; 2888 2926 if (debugc) 2889 2927 { 2890 2928 printf("+pinholeopt(%p)\n",c); 2891 2929 } 2892 2930 #endif 2893 2931 2894 2932 if (b) 2895 2933 { bn = b->Bnext; 2896 2934 usespace = (config.flags4 & CFG4space && b->BC != BCasm); 2897 useopsize = ( !I32|| (config.flags4 & CFG4space && b->BC != BCasm));2935 useopsize = (I16 || (config.flags4 & CFG4space && b->BC != BCasm)); 2898 2936 } 2899 2937 else 2900 2938 { bn = NULL; 2901 2939 usespace = (config.flags4 & CFG4space); 2902 useopsize = ( !I32|| config.flags4 & CFG4space);2940 useopsize = (I16 || config.flags4 & CFG4space); 2903 2941 } 2904 2942 for (; c; c = code_next(c)) 2905 2943 { 2906 2944 L1: 2907 2945 op = c->Iop; 2908 2946 if (op == 0x0F) 2909 2947 ins = inssize2[c->Iop2]; 2910 2948 else 2911 2949 ins = inssize[c->Iop]; 2912 if ( ins & M) /* if modregrm byte */2913 { int longop = (c->Iflags & CFopsize) ? !I32: I32;2950 if (!I64 && ins & M) // if modregrm byte 2951 { int longop = (c->Iflags & CFopsize) ? I16 : I32; 2914 2952 int local_BPRM = BPRM; 2915 2953 2916 2954 if (c->Iflags & CFaddrsize) 2917 2955 local_BPRM ^= 5 ^ 6; // toggle between 5 and 6 2918 2956 2919 2957 rm = c->Irm; 2920 2958 reg = rm & (7<<3); // isolate reg field 2921 2959 ereg = rm & 7; 2922 2960 2923 2961 /* If immediate second operand */ 2924 2962 if ((ins & T || op == 0xF6 || op == 0xF7) && 2925 2963 c->IFL2 == FLconst) 2926 2964 { int flags; 2927 2965 targ_long u; 2928 2966 2929 2967 flags = c->Iflags & CFpsw; /* if want result in flags */ 2930 2968 u = c->IEV2.Vuns; 2931 2969 if (ins & E) 2932 2970 u = (signed char) u; 2933 2971 else if (!longop) 2934 2972 u = (short) u; 2935 2973 2936 2974 // Replace CMP reg,0 with TEST reg,reg 2937 #if 02938 // BUG: is this the right one?2939 if ((op & 0xFC) == 0x80 &&2940 #else2941 2975 if ((op & 0xFE) == 0x80 && 2942 #endif2943 2976 rm >= modregrm(3,7,AX) && 2944 2977 u == 0) 2945 2978 { c->Iop = (op & 1) | 0x84; 2946 2979 c->Irm = modregrm(3,ereg,ereg); 2947 2980 goto L1; 2948 2981 } 2949 2982 2950 2983 /* Optimize ANDs with an immediate constant */ 2951 2984 if ((op == 0x81 || op == 0x80) && reg == modregrm(0,4,0)) 2952 2985 { 2953 2986 if (rm >= modregrm(3,4,AX)) 2954 2987 { 2955 2988 if (u == 0) 2956 2989 { /* Replace with XOR reg,reg */ 2957 2990 c->Iop = 0x30 | (op & 1); 2958 2991 NEWREG(c->Irm,rm & 7); 2959 2992 goto L1; 2960 2993 } 2961 2994 if (u == 0xFFFFFFFF && !flags) 2962 2995 { c->Iop = NOP; … … 4023 4056 return offset; /* ending address */ 4024 4057 } 4025 4058 4026 4059 4027 4060 STATIC void do64bit(enum FL fl,union evc *uev,int flags) 4028 4061 { char *p; 4029 4062 symbol *s; 4030 4063 targ_size_t ad; 4031 4064 long tmp; 4032 4065 4033 4066 assert(I64); 4034 4067 switch (fl) 4035 4068 { 4036 4069 case FLconst: 4037 4070 ad = * (targ_size_t *) uev; 4038 4071 L1: 4039 4072 GENP(8,&ad); 4040 4073 return; 4041 4074 case FLdatseg: 4042 4075 FLUSH(); 4043 reftodatseg(cseg,offset,uev->_EP.Vpointer,uev->_EP.Vseg, flags);4076 reftodatseg(cseg,offset,uev->_EP.Vpointer,uev->_EP.Vseg,CFoffset64 | flags); 4044 4077 break; 4045 4078 case FLframehandler: 4046 4079 framehandleroffset = OFFSET(); 4047 4080 ad = 0; 4048 4081 goto L1; 4049 4082 case FLswitch: 4050 4083 FLUSH(); 4051 4084 ad = uev->Vswitch->Btableoffset; 4052 4085 if (config.flags & CFGromable) 4053 4086 reftocodseg(cseg,offset,ad); 4054 4087 else 4055 4088 reftodatseg(cseg,offset,ad,JMPSEG,CFoff); 4056 4089 break; 4057 4090 case FLcsdata: 4058 4091 case FLfardata: 4059 4092 #if DEBUG 4060 4093 symbol_print(uev->sp.Vsym); 4061 4094 #endif 4062 4095 assert(!TARGET_FLAT); 4063 4096 // NOTE: In ELFOBJ all symbol refs have been tagged FLextern 4064 4097 // strings and statics are treated like offsets from a 4065 4098 // un-named external with is the start of .rodata or .data 4066 4099 case FLextern: /* external data symbol */ 4067 4100 case FLtlsdata: 4068 4101 #if TARGET_LINUX || TARGET_FREEBSD || TARGET_SOLARIS 4069 4102 case FLgot: 4070 4103 case FLgotoff: 4071 4104 #endif 4072 4105 FLUSH(); 4073 4106 s = uev->sp.Vsym; /* symbol pointer */ 4074 reftoident(cseg,offset,s,uev->sp.Voffset, flags);4107 reftoident(cseg,offset,s,uev->sp.Voffset,CFoffset64 | flags); 4075 4108 break; 4076 4109 4077 4110 #if TARGET_OSX 4078 4111 case FLgot: 4079 4112 funcsym_p->Slocalgotoffset = OFFSET(); 4080 4113 ad = 0; 4081 4114 goto L1; 4082 4115 #endif 4083 4116 4084 4117 case FLfunc: /* function call */ 4085 4118 s = uev->sp.Vsym; /* symbol pointer */ 4086 4119 assert(!(TARGET_FLAT && tyfarfunc(s->ty()))); 4087 4120 FLUSH(); 4088 reftoident(cseg,offset,s,0, flags);4121 reftoident(cseg,offset,s,0,CFoffset64 | flags); 4089 4122 break; 4090 4123 4091 4124 case FLblock: /* displacement to another block */ 4092 4125 ad = uev->Vblock->Boffset - OFFSET() - 4; 4093 4126 //printf("FLblock: funcoffset = %x, OFFSET = %x, Boffset = %x, ad = %x\n", funcoffset, OFFSET(), uev->Vblock->Boffset, ad); 4094 4127 goto L1; 4095 4128 4096 4129 case FLblockoff: 4097 4130 FLUSH(); 4098 4131 assert(uev->Vblock); 4099 4132 //printf("FLblockoff: offset = %x, Boffset = %x, funcoffset = %x\n", offset, uev->Vblock->Boffset, funcoffset); 4100 4133 reftocodseg(cseg,offset,uev->Vblock->Boffset); 4101 4134 break; 4102 4135 4103 4136 default: 4104 4137 #ifdef DEBUG 4105 4138 WRFL(fl); 4106 4139 #endif 4107 4140 assert(0); 4108 4141 } trunk/src/backend/cod4.c
r569 r577 33 33 * Return number of times symbol s appears in tree e. 34 34 */ 35 35 36 36 STATIC int intree(symbol *s,elem *e) 37 37 { 38 38 if (EOP(e)) 39 39 return intree(s,e->E1) + (EBIN(e) ? intree(s,e->E2) : 0); 40 40 return e->Eoper == OPvar && e->EV.sp.Vsym == s; 41 41 } 42 42 43 43 /*********************************** 44 44 * Determine if expression e can be evaluated directly into register 45 45 * variable s. 46 46 * Have to be careful about things like x=x+x+x, and x=a+x. 47 47 * Returns: 48 48 * !=0 can 49 49 * 0 can't 50 50 */ 51 51 52 52 STATIC int doinreg(symbol *s, elem *e) 53 { int in ;53 { int in = 0; 54 54 int op; 55 55 56 56 L1: 57 57 op = e->Eoper; 58 58 if (op == OPind || 59 59 OTcall(op) || 60 60 OTleaf(op) || 61 61 (in = intree(s,e)) == 0 || 62 62 (OTunary(op) && !EOP(e->E1)) 63 63 ) 64 64 return 1; 65 65 if (in == 1) 66 66 { 67 67 switch (op) 68 68 { 69 69 case OPadd: 70 70 case OPmin: 71 71 case OPand: 72 72 case OPor: 73 73 case OPxor: 74 74 case OPshl: 75 75 case OPmul: 76 76 if (!intree(s,e->E2)) 77 77 { 78 78 e = e->E1; 79 79 goto L1; 80 80 } 81 81 } 82 82 } 83 83 return 0; 84 84 } 85 85 86 86 /**************************** 87 87 * Return code for saving common subexpressions if EA 88 88 * turns out to be a register. 89 89 * This is called just before modifying an EA. 90 90 */ 91 91 92 code *modEA( unsigned Irm)92 code *modEA(code *c) 93 93 { 94 return ((Irm & 0xC0) == 0xC0) ? getregs(mask[Irm & 7]) : CNIL; 94 if ((c->Irm & 0xC0) == 0xC0) // addressing mode refers to a register 95 { 96 unsigned reg = c->Irm & 7; 97 if (c->Irex & REX_B) 98 { reg |= 8; 99 assert(I64); 100 } 101 return getregs(mask[reg]); 102 } 103 return CNIL; 95 104 } 96 105 97 106 #if TARGET_WINDOS 98 107 // This code is for CPUs that do not support the 8087 99 108 100 109 /**************************** 101 110 * Gen code for op= for doubles. 102 111 */ 103 112 104 113 STATIC code * opassdbl(elem *e,regm_t *pretregs,unsigned op) 105 114 { code *c1,*c2,*c3,*c4,*c5,*c6,cs; 106 115 unsigned clib; 107 116 regm_t retregs2,retregs,idxregs; 108 117 tym_t tym; 109 118 elem *e1; 110 119 111 120 static unsigned clibtab[OPdivass - OPpostinc + 1] = 112 121 /* OPpostinc,OPpostdec,OPeq,OPaddass,OPminass,OPmulass,OPdivass */ 113 122 { CLIBdadd, CLIBdsub, (unsigned)-1, CLIBdadd,CLIBdsub,CLIBdmul,CLIBddiv }; 114 123 … … 121 130 122 131 if (tym == TYfloat) 123 132 { 124 133 clib += CLIBfadd - CLIBdadd; /* convert to float operation */ 125 134 126 135 /* Load EA into FLOATREGS */ 127 136 c1 = cat(c1,getregs(FLOATREGS)); 128 137 cs.Iop = 0x8B; 129 138 cs.Irm |= modregrm(0,AX,0); 130 139 c1 = gen(c1,&cs); 131 140 132 141 if (!I32) 133 142 { 134 143 cs.Irm |= modregrm(0,DX,0); 135 144 getlvalue_msw(&cs); 136 145 c1 = gen(c1,&cs); 137 146 getlvalue_lsw(&cs); 138 147 139 148 } 140 149 retregs2 = FLOATREGS2; 141 idxregs = FLOATREGS | idxregm( cs.Irm,cs.Isib);150 idxregs = FLOATREGS | idxregm(&cs); 142 151 retregs = FLOATREGS; 143 152 } 144 153 else 145 154 { 146 155 if (I32) 147 156 { 148 157 /* Load EA into DOUBLEREGS */ 149 158 c1 = cat(c1,getregs(DOUBLEREGS_32)); 150 159 cs.Iop = 0x8B; 151 160 cs.Irm |= modregrm(0,AX,0); 152 161 c1 = gen(c1,&cs); 153 162 cs.Irm |= modregrm(0,DX,0); 154 163 getlvalue_msw(&cs); 155 164 c1 = gen(c1,&cs); 156 165 getlvalue_lsw(&cs); 157 166 158 167 retregs2 = DOUBLEREGS2_32; 159 idxregs = DOUBLEREGS_32 | idxregm( cs.Irm,cs.Isib);168 idxregs = DOUBLEREGS_32 | idxregm(&cs); 160 169 } 161 170 else 162 171 { 163 172 /* Push EA onto stack */ 164 173 cs.Iop = 0xFF; 165 174 cs.Irm |= modregrm(0,6,0); 166 175 cs.IEVoffset1 += DOUBLESIZE - REGSIZE; 167 176 c1 = gen(c1,&cs); 168 177 getlvalue_lsw(&cs); 169 178 gen(c1,&cs); 170 179 getlvalue_lsw(&cs); 171 180 gen(c1,&cs); 172 181 getlvalue_lsw(&cs); 173 182 gen(c1,&cs); 174 183 stackpush += DOUBLESIZE; 175 184 176 185 retregs2 = DOUBLEREGS_16; 177 idxregs = idxregm( cs.Irm,cs.Isib);186 idxregs = idxregm(&cs); 178 187 } 179 188 retregs = DOUBLEREGS; 180 189 } 181 190 182 191 if ((cs.Iflags & CFSEG) == CFes) 183 192 idxregs |= mES; 184 193 cgstate.stackclean++; 185 194 c3 = scodelem(e->E2,&retregs2,idxregs,FALSE); 186 195 cgstate.stackclean--; 187 196 c4 = callclib(e,clib,&retregs,0); 188 197 if (e1->Ecount) 189 198 cssave(e1,retregs,EOP(e1)); /* if lvalue is a CSE */ 190 199 freenode(e1); 191 200 cs.Iop = 0x89; /* MOV EA,DOUBLEREGS */ 192 201 c5 = fltregs(&cs,tym); 193 202 c6 = fixresult(e,retregs,pretregs); 194 203 return cat6(c1,CNIL,c3,c4,c5,c6); 195 204 } 196 205 197 206 /**************************** 198 207 * Gen code for OPnegass for doubles. 199 208 */ 200 209 201 210 STATIC code * opnegassdbl(elem *e,regm_t *pretregs) 202 211 { code *c1,*c2,*c3,*c,*cl,*cr,cs; 203 212 unsigned clib; 204 213 regm_t retregs2,retregs,idxregs; 205 214 tym_t tym; 206 215 elem *e1; 207 216 int sz; 208 217 209 218 if (config.inline8087) 210 219 return cdnegass87(e,pretregs); 211 220 e1 = e->E1; 212 221 tym = tybasic(e1->Ety); 213 222 sz = tysize[tym]; 214 223 215 224 cl = getlvalue(&cs,e1,*pretregs ? DOUBLEREGS | mBX | mCX : 0); 216 cr = modEA( cs.Irm);225 cr = modEA(&cs); 217 226 cs.Irm |= modregrm(0,6,0); 218 227 cs.Iop = 0x80; 219 228 cs.IEVoffset1 += sz - 1; 220 229 cs.IFL2 = FLconst; 221 230 cs.IEV2.Vuns = 0x80; 222 231 c = gen(NULL,&cs); // XOR 7[EA],0x80 223 232 if (tycomplex(tym)) 224 233 { 225 234 cs.IEVoffset1 -= sz / 2; 226 235 gen(c,&cs); // XOR 7[EA],0x80 227 236 } 228 237 c = cat3(cl,cr,c); 229 238 230 239 if (*pretregs || e1->Ecount) 231 240 { 232 241 cs.IEVoffset1 -= sz - 1; 233 242 234 243 if (tym == TYfloat) 235 244 { 236 245 // Load EA into FLOATREGS … … 353 362 sz = tysize[tyml]; 354 363 assert((int)sz > 0); 355 364 356 365 if (retregs == 0) /* if no return value */ 357 366 { int fl; 358 367 359 368 if ((e2oper == OPconst || /* if rvalue is a constant */ 360 369 e2oper == OPrelconst && 361 370 ((fl = el_fl(e2)) == FLdata || 362 371 fl==FLudata || fl == FLextern) && 363 372 !(e2->EV.sp.Vsym->ty() & mTYcs) 364 373 ) && 365 374 !evalinregister(e2) && 366 375 !e1->Ecount) /* and no CSE headaches */ 367 376 { 368 377 // Look for special case of (*p++ = ...), where p is a register variable 369 378 if (e1->Eoper == OPind && 370 379 ((e11 = e1->E1)->Eoper == OPpostinc || e11->Eoper == OPpostdec) && 371 380 e11->E1->Eoper == OPvar && 372 381 e11->E1->EV.sp.Vsym->Sfl == FLreg && 373 ( I32|| e11->E1->EV.sp.Vsym->Sregm & IDXREGS)382 (!I16 || e11->E1->EV.sp.Vsym->Sregm & IDXREGS) 374 383 ) 375 384 { 376 385 postinc = e11->E2->EV.Vint; 377 386 if (e11->Eoper == OPpostdec) 378 387 postinc = -postinc; 379 388 cl = getlvalue(&cs,e11,RMstore); 380 389 freenode(e11->E2); 381 390 } 382 391 else 383 392 { postinc = 0; 384 393 cl = getlvalue(&cs,e1,RMstore); 385 394 386 395 if (e2oper == OPconst && 387 396 config.flags4 & CFG4speed && 388 397 (config.target_cpu == TARGET_Pentium || 389 398 config.target_cpu == TARGET_PentiumMMX) && 390 399 (cs.Irm & 0xC0) == 0x80 391 400 ) 392 401 { 393 if (sz == REGSIZE && e2->EV.Vint) 394 { regm_t rregm; 395 unsigned rreg; 396 402 if (I64 && sz == 8 && e2->EV.Vpointer) 403 { 404 // MOV reg,imm64 405 // MOV EA,reg 406 regm_t rregm = allregs & ~idxregm(&cs); 407 unsigned reg; 408 cl = regwithvalue(cl,rregm,e2->EV.Vpointer,®,CFoffset64); 409 cs.Iop = 0x89; 410 cs.Irm |= modregrm(0,reg & 7,0); 411 if (reg & 8) 412 cs.Irex |= REX_R; 413 c = gen(cl,&cs); 414 freenode(e2); 415 goto Lp; 416 } 417 if ((sz == REGSIZE || (I64 && sz == 4)) && e2->EV.Vint) 418 { 397 419 // MOV reg,imm 398 420 // MOV EA,reg 399 rregm = allregs & ~idxregm(cs.Irm,cs.Isib); 421 regm_t rregm = allregs & ~idxregm(&cs); 422 unsigned reg; 400 423 cl = regwithvalue(cl,rregm,e2->EV.Vint,®,0); 401 424 cs.Iop = 0x89; 402 cs.Irm |= modregrm(0,reg,0); 425 cs.Irm |= modregrm(0,reg & 7,0); 426 if (reg & 8) 427 cs.Irex |= REX_R; 403 428 c = gen(cl,&cs); 404 429 freenode(e2); 405 430 goto Lp; 406 431 } 407 432 if (sz == 2 * REGSIZE && e2->EV.Vllong == 0) 408 433 { regm_t rregm; 409 unsigned r reg;434 unsigned reg; 410 435 411 436 // MOV reg,imm 412 437 // MOV EA,reg 413 438 // MOV EA+2,reg 414 rregm = getscratch() & ~idxregm( cs.Irm,cs.Isib);439 rregm = getscratch() & ~idxregm(&cs); 415 440 if (rregm) 416 441 { cl = regwithvalue(cl,rregm,e2->EV.Vint,®,0); 417 442 cs.Iop = 0x89; 418 443 cs.Irm |= modregrm(0,reg,0); 419 444 c = gen(cl,&cs); 420 445 getlvalue_msw(&cs); 421 446 c = gen(c,&cs); 422 447 freenode(e2); 423 448 goto Lp; 424 449 } 425 450 } 426 451 } 427 452 } 428 453 429 454 /* If loading result into a register */ 430 455 if ((cs.Irm & 0xC0) == 0xC0) 431 { cl = cat(cl, getregs(mask[cs.Irm & 7]));456 { cl = cat(cl,modEA(&cs)); 432 457 if (sz == 2 * REGSIZE && cs.IFL1 == FLreg) 433 458 cl = cat(cl,getregs(cs.IEVsym1->Sregm)); 434 459 } 435 460 cs.Iop = (sz == 1) ? 0xC6 : 0xC7; 436 461 437 462 if (e2oper == OPrelconst) 438 463 { 439 464 cs.IEVoffset2 = e2->EV.sp.Voffset; 440 465 cs.IFL2 = fl; 441 466 cs.IEVsym2 = e2->EV.sp.Vsym; 442 467 cs.Iflags |= CFoff; 443 468 cl = gen(cl,&cs); /* MOV EA,&variable */ 469 if (I64 && sz == 8) 470 code_orrex(cl, REX_W); 444 471 if (sz > REGSIZE) 445 472 { 446 473 cs.Iop = 0x8C; 447 474 getlvalue_msw(&cs); 448 475 cs.Irm |= modregrm(0,3,0); 449 476 cl = gen(cl,&cs); /* MOV EA+2,DS */ 450 477 } 451 478 } 452 479 else 453 { targ_int *p;454 480 { 481 assert(e2oper == OPconst); 455 482 cs.IFL2 = FLconst; 456 p = (targ_int *) &(e2->EV);457 cs.IEV2.V int = *p;483 targ_size_t *p = (targ_size_t *) &(e2->EV); 484 cs.IEV2.Vsize_t = *p; 458 485 // Look for loading a register variable 459 486 if ((cs.Irm & 0xC0) == 0xC0) 460 { 461 cl = movregconst(cl,cs.Irm & 7,*p,1 ^ (cs.Iop & 1)); 462 if (sz == 2 * REGSIZE) 463 { getlvalue_msw(&cs); 464 cl = movregconst(cl,cs.Irm & 7,p[1],0); 465 } 487 { unsigned reg = cs.Irm & 7; 488 489 if (cs.Irex & REX_B) 490 reg |= 8; 491 if (I64 && sz == 8) 492 cl = movregconst(cl,reg,*p,CFoffset64); 493 else 494 cl = movregconst(cl,reg,*p,1 ^ (cs.Iop & 1)); 495 if (sz == 2 * REGSIZE) 496 { getlvalue_msw(&cs); 497 cl = movregconst(cl,cs.Irm & 7,p[1],0); 498 } 466 499 } 467 500 else 468 501 { int regsize; 469 502 470 i = sz; 471 do 472 { regsize = REGSIZE; 473 retregs = (sz == 1) ? BYTEREGS : allregs; 474 if (i >= 4 && !I32 && I386) 503 i = sz; 504 do 505 { regsize = REGSIZE; 506 retregs = (sz == 1) ? BYTEREGS : allregs; 507 if (i >= 4 && I16 && I386) 508 { 509 regsize = 4; 510 cs.Iflags |= CFopsize; // use opsize to do 32 bit operation 511 } 512 else 513 { 514 if (reghasvalue(retregs,*p,®)) 475 515 { 476 regsize = 4; 516 cs.Iop = (cs.Iop & 1) | 0x88; 517 cs.Irm |= modregrm(0,reg & 7,0); // MOV EA,reg 518 if (reg & 8) 519 cs.Irex |= REX_R; 520 } 521 if (!I16 && i == 2) // if 16 bit operand 477 522 cs.Iflags |= CFopsize; 478 } 479 else 480 { 481 if (reghasvalue(retregs,*p,®)) 482 { 483 cs.Iop = (cs.Iop & 1) | 0x88; 484 cs.Irm |= reg << 3; /* MOV EA,reg */ 485 } 486 if (I32 && i == 2) // if 16 bit operand 487 cs.Iflags |= CFopsize; 488 } 489 cl = gen(cl,&cs); /* MOV EA,const */ 490 491 p = (targ_int *)((char *) p + regsize); 492 cs.Iop = (cs.Iop & 1) | 0xC6; 493 cs.Irm &= ~(7 << 3); 494 cs.IEVoffset1 += regsize; 495 cs.IEV2.Vint = *p; 496 i -= regsize; 497 } while (i > 0); 523 if (I64 && sz == 8) 524 assert(cs.Irex & REX_W); 525 } 526 cl = gen(cl,&cs); /* MOV EA,const */ 527 528 p = (targ_size_t *)((char *) p + regsize); 529 cs.Iop = (cs.Iop & 1) | 0xC6; 530 cs.Irm &= ~modregrm(0,7,0); 531 cs.Irex &= ~REX_R; 532 cs.IEVoffset1 += regsize; 533 cs.IEV2.Vint = *p; 534 i -= regsize; 535 } while (i > 0); 498 536 } 499 537 } 500 538 freenode(e2); 501 539 c = cl; 502 540 goto Lp; 503 541 } 504 542 retregs = allregs; /* pick a reg, any reg */ 505 543 } 506 544 if (retregs == mPSW) 507 545 retregs = allregs; 508 546 cs.Iop = 0x89; 509 547 if (sz == 1) // must have byte regs 510 548 { cs.Iop = 0x88; 511 549 retregs &= BYTEREGS; 512 550 if (!retregs) 513 551 retregs = BYTEREGS; 514 552 } 515 553 else if (retregs & mES && 516 554 ((e1->Eoper == OPind && 517 555 ((tymll = tybasic(e1->E1->Ety)) == TYfptr || tymll == TYhptr)) … … 542 580 if (e1->EV.sp.Voffset) 543 581 retregs &= mMSW; 544 582 else 545 583 retregs &= mLSW; 546 584 reg = findreg(retregs); 547 585 } 548 586 } 549 587 } 550 588 if (*pretregs & mPSW && !EOP(e1)) /* if evaluating e1 couldn't change flags */ 551 589 { /* Be careful that this lines up with jmpopcode() */ 552 590 retregs |= mPSW; 553 591 *pretregs &= ~mPSW; 554 592 } 555 593 cr = scodelem(e2,&retregs,0,TRUE); /* get rvalue */ 556 594 557 595 // Look for special case of (*p++ = ...), where p is a register variable 558 596 if (e1->Eoper == OPind && 559 597 ((e11 = e1->E1)->Eoper == OPpostinc || e11->Eoper == OPpostdec) && 560 598 e11->E1->Eoper == OPvar && 561 599 e11->E1->EV.sp.Vsym->Sfl == FLreg && 562 ( I32|| e11->E1->EV.sp.Vsym->Sregm & IDXREGS)600 (!I16 || e11->E1->EV.sp.Vsym->Sregm & IDXREGS) 563 601 ) 564 602 { 565 603 postinc = e11->E2->EV.Vint; 566 604 if (e11->Eoper == OPpostdec) 567 605 postinc = -postinc; 568 606 cl = getlvalue(&cs,e11,RMstore | retregs); 569 607 freenode(e11->E2); 570 608 } 571 609 else 572 610 { postinc = 0; 573 611 cl = getlvalue(&cs,e1,RMstore | retregs); // get lvalue (cl == CNIL if regvar) 574 612 } 575 613 576 614 c = getregs_imm(varregm); 577 615 578 616 assert(!(retregs & mES && (cs.Iflags & CFSEG) == CFes)); 579 617 if ((tyml == TYfptr || tyml == TYhptr) && retregs & mES) 580 618 { 581 619 reg = findreglsw(retregs); 582 620 cs.Irm |= modregrm(0,reg,0); 583 621 c = gen(c,&cs); /* MOV EA,reg */ 584 622 getlvalue_msw(&cs); // point to where segment goes 585 623 cs.Iop = 0x8C; 586 624 NEWREG(cs.Irm,0); 587 625 gen(c,&cs); /* MOV EA+2,ES */ 588 626 } 589 627 else 590 628 { 591 if ( I32)629 if (!I16) 592 630 { 593 631 reg = findreg(retregs & 594 632 ((sz > REGSIZE) ? mBP | mLSW : mBP | ALLREGS)); 595 cs.Irm |= modregrm(0,reg,0); 633 cs.Irm |= modregrm(0,reg & 7,0); 634 if (reg & 8) 635 cs.Irex |= REX_R; 596 636 for (; TRUE; sz -= REGSIZE) 597 637 { 598 638 // Do not generate mov from register onto itself 599 639 if (regvar && reg == (cs.Irm & 7)) 600 640 break; 601 641 if (sz == 2) // if 16 bit operand 602 642 cs.Iflags |= CFopsize; 603 643 c = gen(c,&cs); // MOV EA+offset,reg 604 644 if (sz <= REGSIZE) 605 645 break; 606 646 getlvalue_msw(&cs); 607 647 reg = findregmsw(retregs); 608 NEWREG(cs.Irm,reg);648 code_newreg(&cs, reg); 609 649 } 610 650 } 611 651 else 612 652 { 613 653 if (sz > REGSIZE) 614 654 cs.IEVoffset1 += sz - REGSIZE; /* 0,2,6 */ 615 655 reg = findreg(retregs & 616 656 (sz > REGSIZE ? mMSW : ALLREGS)); 617 657 if (tyml == TYdouble || tyml == TYdouble_alias) 618 658 reg = AX; 619 659 cs.Irm |= modregrm(0,reg,0); 620 660 /* Do not generate mov from register onto itself */ 621 661 if (!regvar || reg != (cs.Irm & 7)) 622 662 for (; TRUE; sz -= REGSIZE) /* 1,2,4 */ 623 663 { 624 664 c = gen(c,&cs); /* MOV EA+offset,reg */ 625 665 if (sz <= REGSIZE) 626 666 break; 627 667 cs.IEVoffset1 -= REGSIZE; 628 668 if (tyml == TYdouble || tyml == TYdouble_alias) 629 669 reg = dblreg[reg]; 630 670 else 631 671 reg = findreglsw(retregs); 632 672 NEWREG(cs.Irm,reg); 633 673 } 634 674 } 635 675 } 636 676 if (e1->Ecount || /* if lvalue is a CSE or */ 637 677 regvar) /* rvalue can't be a CSE */ 638 678 { 639 679 c = cat(c,getregs_imm(retregs)); // necessary if both lvalue and 640 680 // rvalue are CSEs (since a reg 641 681 // can hold only one e at a time) 642 682 cssave(e1,retregs,EOP(e1)); /* if lvalue is a CSE */ 643 683 } 644 684 645 685 c = cat4(cr,cl,c,fixresult(e,retregs,pretregs)); 646 686 Lp: 647 687 if (postinc) 648 { int reg; 649 650 reg = findreg(idxregm(cs.Irm,cs.Isib)); 688 { 689 int reg = findreg(idxregm(&cs)); 651 690 if (*pretregs & mPSW) 652 691 { // Use LEA to avoid touching the flags 653 c = genc1(c,0x8D,modregrm(2,reg,cs.Irm & 7),FLconst,postinc); 692 unsigned rm = cs.Irm & 7; 693 if (cs.Irex & REX_B) 694 rm |= 8; 695 c = genc1(c,0x8D,modregxrmx(2,reg,rm),FLconst,postinc); 696 if (sz == 8) 697 code_orrex(c, REX_W); 698 } 699 else if (I64) 700 { 701 c = genc2(c,0x81,modregrmx(3,0,reg),postinc); 702 if (sz == 8) 703 code_orrex(c, REX_W); 654 704 } 655 705 else 656 706 { 657 707 if (postinc == 1) 658 708 c = gen1(c,0x40 + reg); // INC reg 659 709 else if (postinc == -(targ_int)1) 660 710 c = gen1(c,0x48 + reg); // DEC reg 661 711 else 662 712 { 663 713 c = genc2(c,0x81,modregrm(3,0,reg),postinc); 664 714 } 665 715 } 666 716 } 667 717 freenode(e1); 668 718 return c; 669 719 } 670 720 671 721 672 722 /************************ 673 723 * Generate code for += -= &= |= ^= negass … … 695 745 reverse = 0; 696 746 e1 = e->E1; 697 747 tyml = tybasic(e1->Ety); // type of lvalue 698 748 sz = tysize[tyml]; 699 749 byte = (sz == 1); // 1 for byte operation, else 0 700 750 if (tyfloating(tyml)) 701 751 { 702 752 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 703 753 if (op == OPnegass) 704 754 c = cdnegass87(e,pretregs); 705 755 else 706 756 c = opass87(e,pretregs); 707 757 #else 708 758 if (op == OPnegass) 709 759 c = opnegassdbl(e,pretregs); 710 760 else 711 761 c = opassdbl(e,pretregs,op); 712 762 #endif 713 763 return c; 714 764 } 715 opsize = ( !I32&& tylong(tyml) && config.target_cpu >= TARGET_80386)765 opsize = (I16 && tylong(tyml) && config.target_cpu >= TARGET_80386) 716 766 ? CFopsize : 0; 717 767 cflags = 0; 718 768 forccs = *pretregs & mPSW; // return result in flags 719 forregs = *pretregs & (mBP | ALLREGS | mES);// return result in regs769 forregs = *pretregs & ~mPSW; // return result in regs 720 770 /* TRUE if we want the result in a register */ 721 771 wantres = forregs || (e1->Ecount && EOP(e1)); 722 772 723 773 switch (op) /* select instruction opcodes */ 724 774 { case OPpostinc: op = OPaddass; /* i++ => += */ 725 775 case OPaddass: op1 = 0x01; op2 = 0x11; 726 776 cflags = CFpsw; 727 777 mode = 0; break; /* ADD, ADC */ 728 778 case OPpostdec: op = OPminass; /* i-- => -= */ 729 779 case OPminass: op1 = 0x29; op2 = 0x19; 730 780 cflags = CFpsw; 731 781 mode = 5; break; /* SUB, SBC */ 732 782 case OPandass: op1 = op2 = 0x21; 733 783 mode = 4; break; /* AND, AND */ 734 784 case OPorass: op1 = op2 = 0x09; 735 785 mode = 1; break; /* OR , OR */ 736 786 case OPxorass: op1 = op2 = 0x31; 737 787 mode = 6; break; /* XOR, XOR */ 738 788 case OPnegass: op1 = 0xF7; // NEG 739 789 break; 740 790 default: 741 791 assert(0); 742 792 } 743 793 op1 ^= byte; /* bit 0 is 0 for byte operation */ 744 794 745 795 if (op == OPnegass) 746 796 { 747 797 cl = getlvalue(&cs,e1,0); 748 cr = modEA( cs.Irm);798 cr = modEA(&cs); 749 799 cs.Irm |= modregrm(0,3,0); 750 800 cs.Iop = op1; 751 801 switch (tysize[tyml]) 752 802 { case CHARSIZE: 753 803 c = gen(CNIL,&cs); 754 804 break; 755 805 case SHORTSIZE: 756 806 c = gen(CNIL,&cs); 757 if ( I32&& *pretregs & mPSW)808 if (!I16 && *pretregs & mPSW) 758 809 c->Iflags |= CFopsize | CFpsw; 759 810 break; 760 811 case LONGSIZE: 761 if ( I32|| opsize)813 if (!I16 || opsize) 762 814 { c = gen(CNIL,&cs); 763 815 c->Iflags |= opsize; 764 816 break; 765 817 } 766 818 neg_2reg: 767 819 getlvalue_msw(&cs); 768 820 c = gen(CNIL,&cs); // NEG EA+2 769 821 getlvalue_lsw(&cs); 770 822 gen(c,&cs); // NEG EA 771 823 code_orflag(c,CFpsw); 772 824 cs.Iop = 0x81; 773 825 getlvalue_msw(&cs); 774 826 cs.IFL2 = FLconst; 775 827 cs.IEV2.Vuns = 0; 776 828 gen(c,&cs); // SBB EA+2,0 777 829 break; 778 830 case LLONGSIZE: 779 if (I 32)780 goto neg_2reg;781 assert(0); // not implemented yet782 break; 832 if (I16) 833 assert(0); // not implemented yet 834 goto neg_2reg; 835 783 836 default: 784 837 assert(0); 785 838 } 786 839 c = cat3(cl,cr,c); 787 840 forccs = 0; // flags already set by NEG 788 841 *pretregs &= ~mPSW; 789 842 } 790 843 else if ((e2 = e->E2)->Eoper == OPconst && // if rvalue is a const 791 844 // Don't evaluate e2 in register if we can use an INC or DEC 792 845 (((sz <= REGSIZE || tyfv(tyml)) && 793 846 (op == OPaddass || op == OPminass) && 794 847 ((val = el_tolong(e2)) == 1 || val == -1) 795 848 ) || 796 849 (!evalinregister(e2) && tyml != TYhptr) 797 850 ) 798 851 ) 799 852 { 800 853 cl = getlvalue(&cs,e1,0); 801 cl = cat(cl,modEA( cs.Irm));854 cl = cat(cl,modEA(&cs)); 802 855 cs.IFL2 = FLconst; 803 856 cs.IEV2.Vint = e2->EV.Vint; 804 857 if (sz <= REGSIZE || tyfv(tyml) || opsize) 805 { targ_int i; 806 807 i = cs.IEV2.Vint; 858 { 859 targ_size_t i = cs.IEV2.Vsize_t; 808 860 809 861 /* Handle shortcuts. Watch out for if result has */ 810 862 /* to be in flags. */ 811 863 812 864 if (reghasvalue(ALLREGS,i,®) && i != 1 && i != -1 && 813 865 !opsize) 814 866 { 815 867 cs.Iop = op1; 816 868 cs.Irm |= modregrm(0,reg,0); 817 869 } 818 870 else 819 871 { 820 872 cs.Iop = 0x81; 821 873 cs.Irm |= modregrm(0,mode,0); 822 874 switch (op) 823 875 { case OPminass: /* convert to += */ 824 876 cs.Irm ^= modregrm(0,5,0); 825 877 i = -i; 826 cs.IEV2.V int = i;878 cs.IEV2.Vsize_t = i; 827 879 /* FALL-THROUGH */ 828 880 case OPaddass: 829 881 if (i == 1) /* INC EA */ 830 882 goto L1; 831 883 else if (i == -1) /* DEC EA */ 832 884 { cs.Irm |= modregrm(0,1,0); 833 885 L1: cs.Iop = 0xFF; 834 886 } 835 887 break; 836 888 } 837 889 } 838 890 cs.Iop ^= byte; /* for byte operations */ 839 891 cs.Iflags |= opsize; 840 892 if (forccs) 841 893 cs.Iflags |= CFpsw; 842 else if ( I32&& cs.Iflags & CFopsize)894 else if (!I16 && cs.Iflags & CFopsize) 843 895 { 844 896 switch (op) 845 897 { case OPorass: 846 898 case OPxorass: 847 cs.IEV2.V int &= 0xFFFF;899 cs.IEV2.Vsize_t &= 0xFFFF; 848 900 cs.Iflags &= ~CFopsize; // don't worry about MSW 849 901 break; 850 902 case OPandass: 851 cs.IEV2.V int |= ~0xFFFFL;903 cs.IEV2.Vsize_t |= ~0xFFFFLL; 852 904 cs.Iflags &= ~CFopsize; // don't worry about MSW 853 905 break; 854 906 case OPminass: 855 907 case OPaddass: 856 908 #if 1 857 909 if ((cs.Irm & 0xC0) == 0xC0) // EA is register 858 910 cs.Iflags &= ~CFopsize; 859 911 #else 860 912 if ((cs.Irm & 0xC0) == 0xC0 && // EA is register and 861 913 e1->Eoper == OPind) // not a register var 862 914 cs.Iflags &= ~CFopsize; 863 915 #endif 864 916 break; 865 917 default: 866 918 assert(0); 867 919 break; 868 920 } 869 921 } 870 922 871 923 // For scheduling purposes, we wish to replace: 872 924 // OP EA 873 925 // with: 874 926 // MOV reg,EA 875 927 // OP reg 876 928 // MOV EA,reg 877 929 if (forregs && sz <= REGSIZE && (cs.Irm & 0xC0) != 0xC0 && 878 930 (config.target_cpu == TARGET_Pentium || 879 931 config.target_cpu == TARGET_PentiumMMX) && 880 932 config.flags4 & CFG4speed) 881 933 { regm_t sregm; 882 934 code cs2; 883 935 884 936 // Determine which registers to use 885 sregm = allregs & ~idxregm( cs.Irm,cs.Isib);937 sregm = allregs & ~idxregm(&cs); 886 938 if (byte) 887 939 sregm &= BYTEREGS; 888 940 if (sregm & forregs) 889 941 sregm &= forregs; 890 942 891 943 cr = allocreg(&sregm,®,tyml); // allocate register 892 944 893 945 cs2 = cs; 894 946 cs2.Iflags &= ~CFpsw; 895 947 cs2.Iop = 0x8B ^ byte; 896 c s2.Irm = (cs2.Irm & modregrm(3,0,7)) | modregrm(0,reg,0);948 code_newreg(&cs2, reg); 897 949 cr = gen(cr,&cs2); // MOV reg,EA 898 950 899 cs.Irm = (cs.Irm & modregrm(0,7,0)) | modregrm(3,0,reg); 951 cs.Irm = (cs.Irm & modregrm(0,7,0)) | modregrm(3,0,reg & 7); 952 if (reg & 8) 953 cs.Irex |= REX_B; 900 954 gen(cr,&cs); // OP reg 901 955 902 956 cs2.Iop ^= 2; 903 957 gen(cr,&cs2); // MOV EA,reg 904 958 905 959 c = cat(cl,cr); 906 960 retregs = sregm; 907 961 wantres = 0; 908 962 if (e1->Ecount) 909 963 cssave(e1,retregs,EOP(e1)); 910 964 } 911 965 else 912 966 { 913 967 c = gen(cl,&cs); 914 968 cs.Iflags &= ~opsize; 915 969 cs.Iflags &= ~CFpsw; 916 if ( !I32&& opsize) // if DWORD operand970 if (I16 && opsize) // if DWORD operand 917 971 cs.IEVoffset1 += 2; // compensate for wantres code 918 972 } 919 973 } 920 974 else if (sz == 2 * REGSIZE) 921 975 { targ_uns msw; 922 976 923 977 cs.Iop = 0x81; 924 978 cs.Irm |= modregrm(0,mode,0); 925 979 c = cl; 926 980 cs.Iflags |= cflags; 927 981 c = gen(c,&cs); 928 982 cs.Iflags &= ~CFpsw; 929 983 930 984 getlvalue_msw(&cs); // point to msw 931 985 msw = MSREG(e->E2->EV.Vllong); 932 986 cs.IEV2.Vuns = msw; /* msw of constant */ 933 987 switch (op) 934 988 { case OPminass: 935 989 cs.Irm ^= modregrm(0,6,0); /* SUB => SBB */ 936 990 break; 937 991 case OPaddass: 938 992 cs.Irm |= modregrm(0,2,0); /* ADD => ADC */ 939 993 break; 940 994 } 941 995 c = gen(c,&cs); 942 996 } 943 997 freenode(e->E2); /* don't need it anymore */ 944 998 } 945 999 else if (isregvar(e1,&varregm,&varreg) && 946 1000 (e2->Eoper == OPvar || e2->Eoper == OPind) && 947 1001 !evalinregister(e2) && 948 1002 sz <= REGSIZE) // deal with later 949 1003 { 950 1004 cr = getlvalue(&cs,e2,0); 951 1005 freenode(e2); 952 1006 cl = getregs(varregm); 953 c s.Irm |= modregrm(0,varreg,0);1007 code_newreg(&cs, varreg); 954 1008 cs.Iop = op1 ^ 2; // toggle direction bit 955 1009 if (forccs) 956 1010 cs.Iflags |= CFpsw; 957 1011 reverse = 2; // remember we toggled it 958 1012 cl = gen(cl,&cs); 959 1013 c = cat(cr,cl); 960 1014 retregs = 0; /* to trigger a bug if we attempt to use it */ 961 1015 } 962 1016 else // evaluate e2 into register 963 1017 { 964 1018 retregs = (byte) ? BYTEREGS : ALLREGS; // pick working reg 965 1019 if (tyml == TYhptr) 966 1020 retregs &= ~mCX; // need CX for shift count 967 1021 cr = scodelem(e->E2,&retregs,0,TRUE); // get rvalue 968 1022 cl = getlvalue(&cs,e1,retregs); // get lvalue 969 cl = cat(cl,modEA( cs.Irm));1023 cl = cat(cl,modEA(&cs)); 970 1024 cs.Iop = op1; 971 1025 if (sz <= REGSIZE || tyfv(tyml)) 972 1026 { reg = findreg(retregs); 973 c s.Irm |= modregrm(0,reg,0);// OP1 EA,reg1027 code_newreg(&cs, reg); // OP1 EA,reg 974 1028 } 975 1029 else if (tyml == TYhptr) 976 1030 { unsigned mreg,lreg; 977 1031 978 1032 mreg = findregmsw(retregs); 979 1033 lreg = findreglsw(retregs); 980 1034 cl = cat(cl,getregs(retregs | mCX)); 981 1035 982 1036 // If h -= l, convert to h += -l 983 1037 if (e->Eoper == OPminass) 984 1038 { 985 1039 cl = gen2(cl,0xF7,modregrm(3,3,mreg)); // NEG mreg 986 1040 gen2(cl,0xF7,modregrm(3,3,lreg)); // NEG lreg 987 1041 code_orflag(cl,CFpsw); 988 1042 genc2(cl,0x81,modregrm(3,3,mreg),0); // SBB mreg,0 989 1043 } 990 1044 cs.Iop = 0x01; 991 1045 cs.Irm |= modregrm(0,lreg,0); 992 1046 cl = gen(cl,&cs); // ADD EA,lreg 993 1047 code_orflag(cl,CFpsw); … … 1015 1069 1016 1070 /* See if we need to reload result into a register. */ 1017 1071 /* Need result in registers in case we have a 32 bit */ 1018 1072 /* result and we want the flags as a result. */ 1019 1073 if (wantres || (sz > REGSIZE && forccs)) 1020 1074 { 1021 1075 if (sz <= REGSIZE) 1022 1076 { regm_t possregs; 1023 1077 1024 1078 possregs = ALLREGS; 1025 1079 if (byte) 1026 1080 possregs = BYTEREGS; 1027 1081 retregs = forregs & possregs; 1028 1082 if (!retregs) 1029 1083 retregs = possregs; 1030 1084 1031 1085 // If reg field is destination 1032 1086 if (cs.Iop & 2 && cs.Iop < 0x40 && (cs.Iop & 7) <= 5) 1033 1087 { 1034 1088 reg = (cs.Irm >> 3) & 7; 1089 if (cs.Irex & REX_R) 1090 reg |= 8; 1035 1091 retregs = mask[reg]; 1036 1092 ce = allocreg(&retregs,®,tyml); 1037 1093 } 1038 1094 // If lvalue is a register, just use that register 1039 1095 else if ((cs.Irm & 0xC0) == 0xC0) 1040 1096 { 1041 1097 reg = cs.Irm & 7; 1098 if (cs.Irex & REX_B) 1099 reg |= 8; 1042 1100 retregs = mask[reg]; 1043 1101 ce = allocreg(&retregs,®,tyml); 1044 1102 } 1045 1103 else 1046 1104 { 1047 1105 ce = allocreg(&retregs,®,tyml); 1048 1106 cs.Iop = 0x8B ^ byte ^ reverse; 1049 NEWREG(cs.Irm,reg);1107 code_newreg(&cs, reg); 1050 1108 ce = gen(ce,&cs); // MOV reg,EA 1051 1109 } 1052 1110 } 1053 1111 else if (tyfv(tyml) || tyml == TYhptr) 1054 1112 { regm_t idxregs; 1055 1113 1056 1114 if (tyml == TYhptr) 1057 1115 getlvalue_lsw(&cs); 1058 idxregs = idxregm( cs.Irm,cs.Isib);1116 idxregs = idxregm(&cs); 1059 1117 retregs = forregs & ~idxregs; 1060 1118 if (!(retregs & IDXREGS)) 1061 1119 retregs |= IDXREGS & ~idxregs; 1062 1120 if (!(retregs & mMSW)) 1063 1121 retregs |= mMSW & ALLREGS; 1064 1122 ce = allocreg(&retregs,®,tyml); 1065 1123 NEWREG(cs.Irm,findreglsw(retregs)); 1066 1124 if (retregs & mES) /* if want ES loaded */ 1067 1125 { cs.Iop = 0xC4; 1068 1126 ce = gen(ce,&cs); /* LES lreg,EA */ 1069 1127 } 1070 1128 else 1071 1129 { cs.Iop = 0x8B; 1072 1130 ce = gen(ce,&cs); /* MOV lreg,EA */ 1073 1131 getlvalue_msw(&cs); 1074 1132 if (I32) 1075 1133 cs.Iflags |= CFopsize; 1076 1134 NEWREG(cs.Irm,reg); 1077 1135 gen(ce,&cs); /* MOV mreg,EA+2 */ 1078 1136 } 1079 1137 } 1080 1138 else if (sz == 2 * REGSIZE) 1081 1139 { regm_t idx; 1082 1140 code *cm,*cl; 1083 1141 1084 idx = idxregm( cs.Irm,cs.Isib);1142 idx = idxregm(&cs); 1085 1143 retregs = forregs; 1086 1144 if (!retregs) 1087 1145 retregs = ALLREGS; 1088 1146 ce = allocreg(&retregs,®,tyml); 1089 1147 cs.Iop = 0x8B; 1090 1148 NEWREG(cs.Irm,reg); 1091 1149 cm = gen(NULL,&cs); // MOV reg,EA+2 1092 1150 NEWREG(cs.Irm,findreglsw(retregs)); 1093 1151 getlvalue_lsw(&cs); 1094 1152 cl = gen(NULL,&cs); // MOV reg+1,EA 1095 1153 if (mask[reg] & idx) 1096 1154 ce = cat3(ce,cl,cm); 1097 1155 else 1098 1156 ce = cat3(ce,cm,cl); 1099 1157 } 1100 1158 c = cat(c,ce); 1101 1159 if (e1->Ecount) /* if we gen a CSE */ 1102 1160 cssave(e1,retregs,EOP(e1)); 1103 1161 } 1104 1162 freenode(e1); 1105 1163 if (sz <= REGSIZE) 1106 *pretregs &= mES | ALLREGS | mBP;// flags are already set1164 *pretregs &= ~mPSW; // flags are already set 1107 1165 return cat(c,fixresult(e,retregs,pretregs)); 1108 1166 } 1109 1167 1110 1168 1111 1169 /******************************** 1112 1170 * Generate code for *= /= %= 1113 1171 */ 1114 1172 1115 1173 code *cdmulass(elem *e,regm_t *pretregs) 1116 1174 { elem *e1,*e2; 1117 1175 code *cr,*cl,*cg,*c,cs; 1118 1176 tym_t tym,tyml; 1119 1177 regm_t retregs; 1120 1178 char uns; 1121 1179 unsigned op,resreg,reg,opr,lib,byte; 1122 1180 unsigned sz; 1123 1181 1124 1182 e1 = e->E1; 1125 1183 e2 = e->E2; 1126 1184 op = e->Eoper; /* OPxxxx */ 1127 1185 1128 1186 tyml = tybasic(e1->Ety); /* type of lvalue */ 1129 1187 uns = tyuns(tyml) || tyuns(e2->Ety); 1130 1188 tym = tybasic(e->Ety); /* type of result */ 1131 1189 sz = tysize[tyml]; 1189 1190 unsigned rex = (I64 && sz == 8) ? REX_W : 0; 1191 unsigned grex = rex << 16; // 64 bit operands 1192 1132 1193 1133 1194 if (tyfloating(tyml)) 1134 1195 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 1135 1196 return opass87(e,pretregs); 1136 1197 #else 1137 1198 return opassdbl(e,pretregs,op); 1138 1199 #endif 1139 1200 1140 1201 if (sz <= REGSIZE) /* if word or byte */ 1141 1202 { byte = (sz == 1); /* 1 for byte operation */ 1142 1203 resreg = AX; /* result register for * or / */ 1143 1204 if (uns) /* if unsigned operation */ 1144 1205 opr = 4; /* MUL */ 1145 1206 else /* else signed */ 1146 1207 opr = 5; /* IMUL */ 1147 1208 if (op != OPmulass) /* if /= or %= */ 1148 1209 { opr += 2; /* MUL => DIV, IMUL => IDIV */ 1149 1210 if (op == OPmodass) 1150 1211 resreg = DX; /* remainder is in DX */ 1151 1212 } 1152 1213 if (op == OPmulass) /* if multiply */ 1153 1214 { 1154 1215 if (config.target_cpu >= TARGET_80286 && 1155 1216 e2->Eoper == OPconst && !byte) 1156 { targ_int e2factor;1157 regm_t idxregs;1158 1159 e2factor = el_tolong(e2);1217 { 1218 targ_size_t e2factor = el_tolong(e2); 1219 if (I64 && sz == 8 && e2factor != (int)e2factor) 1220 goto L1; 1160 1221 freenode(e2); 1161 1222 cr = CNIL; 1162 1223 cl = getlvalue(&cs,e1,0); /* get EA */ 1163 idxregs = idxregm(cs.Irm,cs.Isib);1224 regm_t idxregs = idxregm(&cs); 1164 1225 retregs = *pretregs & (ALLREGS | mBP) & ~idxregs; 1165 1226 if (!retregs) 1166 1227 retregs = ALLREGS & ~idxregs; 1167 1228 cg = allocreg(&retregs,&resreg,tyml); 1168 1229 cs.Iop = 0x69; /* IMUL reg,EA,e2value */ 1169 1230 cs.IFL2 = FLconst; 1170 1231 cs.IEV2.Vint = e2factor; 1171 1232 opr = resreg; 1172 1233 } 1173 else if (I32 && !byte) 1174 { 1234 else if (!I16 && !byte) 1235 { 1236 L1: 1175 1237 retregs = *pretregs & (ALLREGS | mBP); 1176 1238 if (!retregs) 1177 1239 retregs = ALLREGS; 1178 1240 cr = codelem(e2,&retregs,FALSE); /* load rvalue in reg */ 1179 1241 cl = getlvalue(&cs,e1,retregs); /* get EA */ 1180 1242 cg = getregs(retregs); /* destroy these regs */ 1181 1243 cs.Iop = 0x0F; /* IMUL resreg,EA */ 1182 1244 cs.Iop2 = 0xAF; 1183 1245 resreg = findreg(retregs); 1184 1246 opr = resreg; 1185 1247 } 1186 1248 else 1187 1249 { 1188 1250 retregs = mAX; 1189 1251 cr = codelem(e2,&retregs,FALSE); // load rvalue in AX 1190 1252 cl = getlvalue(&cs,e1,mAX); // get EA 1191 1253 cg = getregs(byte ? mAX : mAX | mDX); // destroy these regs 1192 1254 cs.Iop = 0xF7 ^ byte; // [I]MUL EA 1193 1255 } 1194 1256 cs.Irm |= modregrm(0,opr,0); 1195 1257 c = gen(CNIL,&cs); 1196 1258 } 1197 1259 else // /= or %= 1198 { targ_ int e2factor;1260 { targ_size_t e2factor; 1199 1261 int pow2; 1200 1262 targ_ulong m; 1201 1263 1202 1264 assert(!byte); // should never happen 1203 assert( !I32|| sz != SHORTSIZE);1265 assert(I16 || sz != SHORTSIZE); 1204 1266 if (config.flags4 & CFG4speed && 1205 e2->Eoper == OPconst && sz == REGSIZE && !uns && 1267 e2->Eoper == OPconst && !uns && 1268 (sz == REGSIZE || (I64 && sz == 4)) && 1206 1269 (pow2 = ispow2(e2factor = el_tolong(e2))) != -1 && 1270 e2factor == (int)e2factor && 1207 1271 !(config.target_cpu < TARGET_80286 && pow2 != 1 && op == OPdivass) 1208 1272 ) 1209 1273 { 1210 1274 // Signed divide or modulo by power of 2 1211 1275 cr = NULL; 1212 1276 c = NULL; 1213 1277 cl = getlvalue(&cs,e1,mAX | mDX); 1214 1278 cs.Iop = 0x8B; 1215 NEWREG(cs.Irm,AX);1279 code_newreg(&cs, AX); 1216 1280 cl = gen(cl,&cs); // MOV AX,EA 1217 1281 freenode(e2); 1218 1282 cg = getregs(mAX | mDX); // trash these regs 1219 1283 cg = gen1(cg,0x99); // CWD 1284 code_orrex(cg, rex); 1220 1285 if (pow2 == 1) 1221 1286 { 1222 1287 if (op == OPdivass) 1223 { gen2(cg,0x2B, modregrm(3,AX,DX)); // SUB AX,DX1224 gen2(cg,0xD1, modregrm(3,7,AX)); // SAR AX,11288 { gen2(cg,0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1289 gen2(cg,0xD1,grex | modregrm(3,7,AX)); // SAR AX,1 1225 1290 resreg = AX; 1226 1291 } 1227 1292 else // OPmod 1228 { gen2(cg,0x33, modregrm(3,AX,DX)); // XOR AX,DX1229 genc2(cg,0x81, modregrm(3,4,AX),1); // AND AX,11230 gen2(cg,0x03, modregrm(3,DX,AX)); // ADD DX,AX1293 { gen2(cg,0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1294 genc2(cg,0x81,grex | modregrm(3,4,AX),1); // AND AX,1 1295 gen2(cg,0x03,grex | modregrm(3,DX,AX)); // ADD DX,AX 1231 1296 resreg = DX; 1232 1297 } 1233 1298 } 1234 1299 else 1235 { targ_ulong m;1236 1237 m = (1 << pow2) - 1;1300 { 1301 assert(pow2 < 32); 1302 targ_ulong m = (1 << pow2) - 1; 1238 1303 if (op == OPdivass) 1239 { genc2(cg,0x81, modregrm(3,4,DX),m); // AND DX,m1240 gen2(cg,0x03, modregrm(3,AX,DX)); // ADD AX,DX1304 { genc2(cg,0x81,grex | modregrm(3,4,DX),m); // AND DX,m 1305 gen2(cg,0x03,grex | modregrm(3,AX,DX)); // ADD AX,DX 1241 1306 // Be careful not to generate this for 8088 1242 1307 assert(config.target_cpu >= TARGET_80286); 1243 genc2(cg,0xC1, modregrm(3,7,AX),pow2); // SAR AX,pow21308 genc2(cg,0xC1,grex | modregrm(3,7,AX),pow2); // SAR AX,pow2 1244 1309 resreg = AX; 1245 1310 } 1246 1311 else // OPmodass 1247 { gen2(cg,0x33, modregrm(3,AX,DX)); // XOR AX,DX1248 gen2(cg,0x2B, modregrm(3,AX,DX)); // SUB AX,DX1249 genc2(cg,0x81, modregrm(3,4,AX),m); // AND AX,m1250 gen2(cg,0x33, modregrm(3,AX,DX)); // XOR AX,DX1251 gen2(cg,0x2B, modregrm(3,AX,DX)); // SUB AX,DX1312 { gen2(cg,0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1313 gen2(cg,0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1314 genc2(cg,0x81,grex | modregrm(3,4,AX),m); // AND AX,m 1315 gen2(cg,0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1316 gen2(cg,0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1252 1317 resreg = AX; 1253 1318 } 1254 1319 } 1255 1320 } 1256 1321 else 1257 1322 { 1258 1323 retregs = ALLREGS & ~(mAX|mDX); // DX gets sign extension 1259 1324 cr = codelem(e2,&retregs,FALSE); // load rvalue in retregs 1260 1325 reg = findreg(retregs); 1261 1326 cl = getlvalue(&cs,e1,mAX | mDX | retregs); // get EA 1262 1327 cg = getregs(mAX | mDX); // destroy these regs 1263 1328 cs.Irm |= modregrm(0,AX,0); 1264 1329 cs.Iop = 0x8B; 1265 1330 c = gen(CNIL,&cs); // MOV AX,EA 1266 1331 if (uns) // if unsigned 1267 1332 movregconst(c,DX,0,0); // CLR DX 1268 1333 else // else signed 1269 gen1(c,0x99); // CWD 1334 { gen1(c,0x99); // CWD 1335 code_orrex(c,rex); 1336 } 1270 1337 c = cat(c,getregs(mDX | mAX)); // DX and AX will be destroyed 1271 1338 genregs(c,0xF7,opr,reg); // OPR reg 1339 code_orrex(c,rex); 1272 1340 } 1273 1341 } 1274 1342 cs.Iop = 0x89 ^ byte; 1275 1343 NEWREG(cs.Irm,resreg); 1276 1344 c = gen(c,&cs); // MOV EA,resreg 1277 1345 if (e1->Ecount) // if we gen a CSE 1278 1346 cssave(e1,mask[resreg],EOP(e1)); 1279 1347 freenode(e1); 1280 1348 c = cat(c,fixresult(e,mask[resreg],pretregs)); 1281 1349 return cat4(cr,cl,cg,c); 1282 1350 } 1283 1351 else if (sz == 2 * REGSIZE) 1284 1352 { 1285 1353 lib = CLIBlmul; 1286 1354 if (op == OPdivass || op == OPmodass) 1287 1355 { lib = (uns) ? CLIBuldiv : CLIBldiv; 1288 1356 if (op == OPmodass) 1289 1357 lib++; 1290 1358 } 1291 1359 retregs = mCX | mBX; … … 1299 1367 gen(cl,&cs); /* MOV DX,EA+2 */ 1300 1368 getlvalue_lsw(&cs); 1301 1369 retregs = 0; 1302 1370 if (config.target_cpu >= TARGET_PentiumPro && op == OPmulass) 1303 1371 { 1304 1372 /* IMUL ECX,EAX 1305 1373 IMUL EDX,EBX 1306 1374 ADD ECX,EDX 1307 1375 MUL EBX 1308 1376 ADD EDX,ECX 1309 1377 */ 1310 1378 c = getregs(mAX|mDX|mCX); 1311 1379 c = gen2(c,0x0FAF,modregrm(3,CX,AX)); 1312 1380 gen2(c,0x0FAF,modregrm(3,DX,BX)); 1313 1381 gen2(c,0x03,modregrm(3,CX,DX)); 1314 1382 gen2(c,0xF7,modregrm(3,4,BX)); 1315 1383 gen2(c,0x03,modregrm(3,DX,CX)); 1316 1384 retregs = mDX | mAX; 1317 1385 } 1318 1386 else 1319 c = callclib(e,lib,&retregs,idxregm( cs.Irm,cs.Isib));1387 c = callclib(e,lib,&retregs,idxregm(&cs)); 1320 1388 reg = (op == OPmodass) ? BX : AX; 1321 1389 retregs = mask[reg]; 1322 1390 cs.Iop = 0x89; 1323 1391 NEWREG(cs.Irm,reg); 1324 1392 gen(c,&cs); /* MOV EA,lsreg */ 1325 1393 reg = (op == OPmodass) ? CX : DX; 1326 1394 retregs |= mask[reg]; 1327 1395 NEWREG(cs.Irm,reg); 1328 1396 getlvalue_msw(&cs); 1329 1397 gen(c,&cs); /* MOV EA+2,msreg */ 1330 1398 if (e1->Ecount) /* if we gen a CSE */ 1331 1399 cssave(e1,retregs,EOP(e1)); 1332 1400 freenode(e1); 1333 1401 cg = fixresult(e,retregs,pretregs); 1334 1402 return cat4(cr,cl,c,cg); 1335 1403 } 1336 1404 else 1337 1405 { assert(0); 1338 1406 /* NOTREACHED */ 1339 1407 return 0; … … 1348 1416 code *cdshass(elem *e,regm_t *pretregs) 1349 1417 { elem *e1,*e2; 1350 1418 code *cr,*cl,*cg,*c,cs,*ce; 1351 1419 tym_t tym,tyml,uns; 1352 1420 regm_t retregs; 1353 1421 unsigned shiftcnt,op1,op2,reg,v,oper,byte,conste2; 1354 1422 unsigned loopcnt; 1355 1423 unsigned sz; 1356 1424 1357 1425 e1 = e->E1; 1358 1426 e2 = e->E2; 1359 1427 1360 1428 tyml = tybasic(e1->Ety); /* type of lvalue */ 1361 1429 sz = tysize[tyml]; 1362 1430 byte = tybyte(e->Ety) != 0; /* 1 for byte operations */ 1363 1431 uns = tyuns(tyml); 1364 1432 tym = tybasic(e->Ety); /* type of result */ 1365 1433 oper = e->Eoper; 1366 1434 assert(tysize(e2->Ety) <= REGSIZE); 1367 1435 1436 unsigned rex = (I64 && sz == 8) ? REX_W : 0; 1437 unsigned grex = rex << 16; // 64 bit operands 1438 1368 1439 // if our lvalue is a cse, make sure we evaluate for result in register 1369 1440 if (e1->Ecount && !(*pretregs & (ALLREGS | mBP)) && !isregvar(e1,&retregs,®)) 1370 1441 *pretregs |= ALLREGS; 1371 1442 1372 1443 #if SCPP 1373 1444 // Do this until the rest of the compiler does OPshr/OPashr correctly 1374 1445 if (oper == OPshrass) 1375 1446 oper = (uns) ? OPshrass : OPashrass; 1376 1447 #endif 1377 1448 1378 1449 // Select opcodes. op2 is used for msw for long shifts. 1379 1450 1380 1451 switch (oper) 1381 1452 { case OPshlass: 1382 1453 op1 = 4; // SHL 1383 1454 op2 = 2; // RCL 1384 1455 break; 1385 1456 case OPshrass: 1386 1457 op1 = 5; // SHR 1387 1458 op2 = 3; // RCR … … 1402 1473 if (cnst(e2)) 1403 1474 { 1404 1475 conste2 = TRUE; /* e2 is a constant */ 1405 1476 shiftcnt = e2->EV.Vint; /* byte ordering of host */ 1406 1477 if (config.target_cpu >= TARGET_80286 && 1407 1478 sz <= REGSIZE && 1408 1479 shiftcnt != 1) 1409 1480 v = 0xC1; // SHIFT xx,shiftcnt 1410 1481 else if (shiftcnt <= 3) 1411 1482 { loopcnt = shiftcnt; 1412 1483 v = 0xD1; // SHIFT xx,1 1413 1484 } 1414 1485 } 1415 1486 if (v == 0xD3) /* if COUNT == CL */ 1416 1487 { retregs = mCX; 1417 1488 cr = codelem(e2,&retregs,FALSE); 1418 1489 } 1419 1490 else 1420 1491 freenode(e2); 1421 1492 cl = getlvalue(&cs,e1,mCX); /* get lvalue, preserve CX */ 1422 cl = cat(cl,modEA( cs.Irm)); /* check for modifying register */1493 cl = cat(cl,modEA(&cs)); // check for modifying register 1423 1494 1424 1495 if (*pretregs == 0 || /* if don't return result */ 1425 1496 (*pretregs == mPSW && conste2 && tysize[tym] <= REGSIZE) || 1426 1497 sz > REGSIZE 1427 1498 ) 1428 1499 { retregs = 0; // value not returned in a register 1429 1500 cs.Iop = v ^ byte; 1430 1501 c = CNIL; 1431 1502 while (loopcnt--) 1432 1503 { 1433 1504 NEWREG(cs.Irm,op1); /* make sure op1 is first */ 1434 1505 if (sz <= REGSIZE) 1435 1506 { cs.IFL2 = FLconst; 1436 1507 cs.IEV2.Vint = shiftcnt; 1437 1508 c = gen(c,&cs); /* SHIFT EA,[CL|1] */ 1438 1509 if (*pretregs & mPSW && !loopcnt && conste2) 1439 1510 code_orflag(c,CFpsw); 1440 1511 } 1441 1512 else /* TYlong */ 1442 1513 { cs.Iop = 0xD1; /* plain shift */ … … 1477 1548 1478 1549 if (sz == 2 * REGSIZE && *pretregs) 1479 1550 { retregs = *pretregs & (ALLREGS | mBP); 1480 1551 if (retregs) 1481 1552 { ce = allocreg(&retregs,®,tym); 1482 1553 cs.Iop = 0x8B; 1483 1554 1484 1555 /* be careful not to trash any index regs */ 1485 1556 /* do MSW first (which can't be an index reg) */ 1486 1557 getlvalue_msw(&cs); 1487 1558 NEWREG(cs.Irm,reg); 1488 1559 cg = gen(CNIL,&cs); 1489 1560 getlvalue_lsw(&cs); 1490 1561 reg = findreglsw(retregs); 1491 1562 NEWREG(cs.Irm,reg); 1492 1563 gen(cg,&cs); 1493 1564 if (*pretregs & mPSW) 1494 1565 cg = cat(cg,tstresult(retregs,tyml,TRUE)); 1495 1566 } 1496 1567 else /* flags only */ 1497 { retregs = ALLREGS & ~idxregm( cs.Irm,cs.Isib);1568 { retregs = ALLREGS & ~idxregm(&cs); 1498 1569 ce = allocreg(&retregs,®,TYint); 1499 1570 cs.Iop = 0x8B; 1500 1571 NEWREG(cs.Irm,reg); 1501 1572 cg = gen(CNIL,&cs); /* MOV reg,EA */ 1502 1573 cs.Iop = 0x0B; /* OR reg,EA+2 */ 1503 1574 cs.Iflags |= CFpsw; 1504 1575 getlvalue_msw(&cs); 1505 1576 gen(cg,&cs); 1506 1577 } 1507 1578 c = cat3(c,ce,cg); 1508 1579 } 1509 1580 cg = CNIL; 1510 1581 } 1511 1582 1512 1583 1513 1584 else /* else must evaluate in register */ 1514 1585 { 1515 1586 if (sz <= REGSIZE) 1516 { regm_t possregs; 1517 1518 possregs = ALLREGS & ~mCX & ~idxregm(cs.Irm,cs.Isib); 1585 { 1586 regm_t possregs = ALLREGS & ~mCX & ~idxregm(&cs); 1519 1587 if (byte) 1520 1588 possregs &= BYTEREGS; 1521 1589 retregs = *pretregs & possregs; 1522 1590 if (retregs == 0) 1523 1591 retregs = possregs; 1524 1592 cg = allocreg(&retregs,®,tym); 1525 1593 cs.Iop = 0x8B ^ byte; 1526 c s.Irm |= modregrm(0,reg,0);1594 code_newreg(&cs, reg); 1527 1595 c = ce = gen(CNIL,&cs); /* MOV reg,EA */ 1528 if ( I32)1596 if (!I16) 1529 1597 { 1530 1598 assert(!byte || (mask[reg] & BYTEREGS)); 1531 ce = genc2(CNIL,v ^ byte,modregrm(3,op1,reg),shiftcnt); 1599 ce = genc2(CNIL,v ^ byte,modregrmx(3,op1,reg),shiftcnt); 1600 code_orrex(ce, rex); 1532 1601 /* We can do a 32 bit shift on a 16 bit operand if */ 1533 1602 /* it's a left shift and we're not concerned about */ 1534 1603 /* the flags. Remember that flags are not set if */ 1535 1604 /* a shift of 0 occurs. */ 1536 1605 if (tysize[tym] == SHORTSIZE && 1537 1606 (oper == OPshrass || oper == OPashrass || 1538 1607 (*pretregs & mPSW && conste2))) 1539 1608 ce->Iflags |= CFopsize; /* 16 bit operand */ 1540 1609 cat(c,ce); 1541 1610 } 1542 1611 else 1543 1612 { 1544 1613 while (loopcnt--) 1545 1614 { /* Generate shift instructions. */ 1546 1615 genc2(ce,v ^ byte,modregrm(3,op1,reg),shiftcnt); 1547 1616 } 1548 1617 } 1549 1618 if (*pretregs & mPSW && conste2) 1550 1619 { assert(shiftcnt); 1551 1620 *pretregs &= ~mPSW; // result is already in flags … … 1596 1665 e1 = e->E1; 1597 1666 e2 = e->E2; 1598 1667 if (*pretregs == 0) /* if don't want result */ 1599 1668 { cl = codelem(e1,pretregs,FALSE); 1600 1669 *pretregs = 0; /* in case e1 changed it */ 1601 1670 cr = codelem(e2,pretregs,FALSE); 1602 1671 return cat(cl,cr); 1603 1672 } 1604 1673 1605 1674 jop = jmpopcode(e); // must be computed before 1606 1675 // leaves are free'd 1607 1676 reverse = 0; 1608 1677 cl = cr = CNIL; 1609 1678 op = e->Eoper; 1610 1679 assert(OTrel(op)); 1611 1680 eqorne = (op == OPeqeq) || (op == OPne); 1612 1681 1613 1682 tym = tybasic(e1->Ety); 1614 1683 sz = tysize[tym]; 1615 1684 byte = sz == 1; 1685 1686 unsigned rex = (I64 && sz == 8) ? REX_W : 0; 1687 unsigned grex = rex << 16; // 64 bit operands 1688 1616 1689 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 1617 1690 if (tyfloating(tym)) /* if floating operation */ 1618 1691 { 1619 1692 retregs = mPSW; 1620 1693 c = orth87(e,&retregs); 1621 1694 goto L3; 1622 1695 } 1623 1696 #else 1624 1697 if (tyfloating(tym)) /* if floating operation */ 1625 1698 { 1626 1699 if (config.inline8087) 1627 1700 { retregs = mPSW; 1628 1701 c = orth87(e,&retregs); 1629 1702 } 1630 1703 else 1631 1704 { int clib; 1632 1705 1633 1706 retregs = 0; /* skip result for now */ 1634 1707 if (iffalse(e2)) /* second operand is constant 0 */ 1635 1708 { assert(!eqorne); /* should be OPbool or OPnot */ … … 1646 1719 cl = codelem(e1,&retregs,FALSE); 1647 1720 retregs = 0; 1648 1721 c = callclib(e,clib,&retregs,0); 1649 1722 freenode(e2); 1650 1723 } 1651 1724 else 1652 1725 { clib = CLIBdcmp; 1653 1726 if (rel_exception(op)) 1654 1727 clib += CLIBdcmpexc - CLIBdcmp; 1655 1728 c = opdouble(e,&retregs,clib); 1656 1729 } 1657 1730 } 1658 1731 goto L3; 1659 1732 } 1660 1733 #endif 1661 1734 1662 1735 /* If it's a signed comparison of longs, we have to call a library */ 1663 1736 /* routine, because we don't know the target of the signed branch */ 1664 1737 /* (have to set up flags so that jmpopcode() will do it right) */ 1665 1738 if (!eqorne && 1666 ( !I32&& tym == TYlong && tybasic(e2->Ety) == TYlong ||1667 I32 && tym == TYllong && tybasic(e2->Ety) == TYllong)1739 (I16 && tym == TYlong && tybasic(e2->Ety) == TYlong || 1740 I32 && tym == TYllong && tybasic(e2->Ety) == TYllong) 1668 1741 ) 1669 1742 { retregs = mDX | mAX; 1670 1743 cl = codelem(e1,&retregs,FALSE); 1671 1744 retregs = mCX | mBX; 1672 1745 cr = scodelem(e2,&retregs,mDX | mAX,FALSE); 1673 1746 retregs = 0; 1674 1747 c = callclib(e,CLIBlcmp,&retregs,0); /* gross, but it works */ 1675 1748 goto L3; 1676 1749 } 1677 1750 1678 1751 /* See if we should swap operands */ 1679 1752 if (e1->Eoper == OPvar && e2->Eoper == OPvar && evalinregister(e2)) 1680 1753 { e1 = e->E2; 1681 1754 e2 = e->E1; 1682 1755 reverse = 2; 1683 1756 } 1684 1757 1685 1758 retregs = allregs; 1686 1759 if (byte) 1687 1760 retregs = BYTEREGS; 1688 1761 1689 1762 c = CNIL; 1690 1763 ce = CNIL; 1691 cs.Iflags = ( I32&& sz == SHORTSIZE) ? CFopsize : 0;1692 cs.Irex = 0;1764 cs.Iflags = (!I16 && sz == SHORTSIZE) ? CFopsize : 0; 1765 cs.Irex = rex; 1693 1766 if (sz > REGSIZE) 1694 1767 ce = gennop(ce); 1695 1768 1696 1769 switch (e2->Eoper) 1697 1770 { 1698 1771 default: 1699 1772 L2: 1700 1773 cl = scodelem(e1,&retregs,0,TRUE); /* compute left leaf */ 1701 1774 L1: 1702 1775 rretregs = allregs & ~retregs; 1703 1776 if (byte) 1704 1777 rretregs &= BYTEREGS; 1705 1778 cr = scodelem(e2,&rretregs,retregs,TRUE); /* get right leaf */ 1706 1779 if (sz <= REGSIZE) /* CMP reg,rreg */ 1707 1780 { reg = findreg(retregs); /* get reg that e1 is in */ 1708 1781 rreg = findreg(rretregs); 1709 1782 c = genregs(CNIL,0x3B ^ byte ^ reverse,reg,rreg); 1710 if (I32 && sz == SHORTSIZE) 1783 code_orrex(c, rex); 1784 if (!I16 && sz == SHORTSIZE) 1711 1785 c->Iflags |= CFopsize; /* compare only 16 bits */ 1712 1786 } 1713 1787 else 1714 1788 { assert(sz <= 2 * REGSIZE); 1715 1789 1716 1790 /* Compare MSW, if they're equal then compare the LSW */ 1717 1791 reg = findregmsw(retregs); 1718 1792 rreg = findregmsw(rretregs); 1719 1793 c = genregs(CNIL,0x3B ^ reverse,reg,rreg); /* CMP reg,rreg */ 1720 if ( I32&& sz == 6)1794 if (!I16 && sz == 6) 1721 1795 c->Iflags |= CFopsize; /* seg is only 16 bits */ 1722 1796 genjmp(c,JNE,FLcode,(block *) ce); /* JNE nop */ 1723 1797 1724 1798 reg = findreglsw(retregs); 1725 1799 rreg = findreglsw(rretregs); 1726 1800 genregs(c,0x3B ^ reverse,reg,rreg); /* CMP reg,rreg */ 1727 1801 } 1728 1802 break; 1729 1803 case OPrelconst: 1730 1804 fl = el_fl(e2); 1731 1805 switch (fl) 1732 1806 { case FLfunc: 1733 1807 fl = FLextern; // so it won't be self-relative 1734 1808 break; 1735 1809 case FLdata: 1736 1810 case FLudata: 1737 1811 case FLextern: 1738 1812 if (sz > REGSIZE) // compare against DS, not DGROUP 1739 1813 goto L2; 1740 1814 break; … … 1747 1821 cs.IEVsym2 = e2->EV.sp.Vsym; 1748 1822 offset2 = e2->EV.sp.Voffset; 1749 1823 if (sz > REGSIZE) 1750 1824 { cs.Iflags |= CFseg; 1751 1825 cs.IEVoffset2 = 0; 1752 1826 } 1753 1827 else 1754 1828 { cs.Iflags |= CFoff; 1755 1829 cs.IEVoffset2 = offset2; 1756 1830 } 1757 1831 goto L4; 1758 1832 1759 1833 case OPconst: 1760 1834 // If compare against 0 1761 1835 if (sz <= REGSIZE && *pretregs == mPSW && !boolres(e2) && 1762 1836 isregvar(e1,&retregs,®) 1763 1837 ) 1764 1838 { // Just do a TEST instruction 1765 1839 c = genregs(NULL,0x85 ^ byte,reg,reg); // TEST reg,reg 1766 1840 c->Iflags |= (cs.Iflags & CFopsize) | CFpsw; 1841 code_orrex(c, rex); 1767 1842 retregs = mPSW; 1768 1843 break; 1769 1844 } 1770 1845 1771 1846 if (!tyuns(tym) && !tyuns(e2->Ety) && 1772 !boolres(e2) && !(*pretregs & mPSW) && sz == REGSIZE && 1773 (I32 || op == OPlt || op == OPge)) 1847 !boolres(e2) && !(*pretregs & mPSW) && 1848 (sz == REGSIZE || (I64 && sz == 4)) && 1849 (!I16 || op == OPlt || op == OPge)) 1774 1850 { unsigned regi; 1775 1851 1776 1852 assert(*pretregs & (allregs)); 1777 1853 cl = codelem(e1,pretregs,FALSE); 1778 1854 reg = findreg(*pretregs); 1779 1855 c = getregs(mask[reg]); 1780 1856 switch (op) 1781 1857 { case OPle: 1782 c = genc2(c,0x81, modregrm(3,0,reg),(unsigned)-1); /* ADD reg,-1 */1783 genc2(c,0x81, modregrm(3,2,reg),0); /* ADC reg,0 */1858 c = genc2(c,0x81,grex | modregrmx(3,0,reg & 7),(unsigned)-1); // ADD reg,-1 1859 genc2(c,0x81,grex | modregrmx(3,2,reg & 7),0); // ADC reg,0 1784 1860 goto oplt; 1785 1861 case OPgt: 1786 c = gen2(c,0xF7, modregrm(3,3,reg)); // NEG reg1862 c = gen2(c,0xF7,grex | modregrmx(3,3,reg & 7)); // NEG reg 1787 1863 #if TARGET_WINDOS 1788 1864 // What does the Windows platform do? 1789 1865 // lower INT_MIN by 1? See test exe9.c 1790 1866 // BUG: fix later 1791 genc2(c,0x81, modregrm(3,3,reg),0); // SBB reg,01867 genc2(c,0x81,grex | modregrmx(3,3,reg & 7),0); // SBB reg,0 1792 1868 #endif 1793 1869 goto oplt; 1794 1870 case OPlt: 1795 1871 oplt: 1796 if ( I32)1797 c = genc2(c,0xC1, modregrm(3,5,reg),31); /* SHR reg,31 */1872 if (!I16) 1873 c = genc2(c,0xC1,grex | modregrmx(3,5,reg & 7),sz * 8 - 1); // SHR reg,31 1798 1874 else 1799 1875 { /* 8088-286 do not have a barrel shifter, so use this 1800 1876 faster sequence 1801 1877 */ 1802 1878 c = genregs(c,0xD1,0,reg); /* ROL reg,1 */ 1803 1879 if (reghasvalue(allregs,1,®i)) 1804 1880 c = genregs(c,0x23,reg,regi); /* AND reg,regi */ 1805 1881 else 1806 1882 c = genc2(c,0x81,modregrm(3,4,reg),1); /* AND reg,1 */ 1807 1883 } 1808 1884 break; 1809 1885 case OPge: 1810 1886 c = genregs(c,0xD1,4,reg); /* SHL reg,1 */ 1887 code_orrex(c,rex); 1811 1888 genregs(c,0x19,reg,reg); /* SBB reg,reg */ 1812 gen1(c,0x40 + reg); /* INC reg */ 1889 code_orrex(c,rex); 1890 if (I64) 1891 { 1892 c = gen2(c,0xFF,modregrmx(3,0,reg)); // INC reg 1893 code_orrex(c, rex); 1894 } 1895 else 1896 c = gen1(c,0x40 + reg); // INC reg 1813 1897 break; 1814 1898 default: 1815 1899 assert(0); 1816 1900 } 1817 1901 freenode(e2); 1818 1902 goto ret; 1819 1903 } 1820 1904 1821 1905 if (sz > REGSIZE) 1822 1906 cs.IEV2.Vint = MSREG(e2->EV.Vllong); 1823 1907 else 1824 1908 cs.IEV2.Vint = e2->EV.Vint; 1825 1909 cs.IFL2 = FLconst; 1826 1910 L4: 1827 1911 cs.Iop = 0x81 ^ byte; 1828 1912 1829 1913 /* if ((e1 is data or a '*' reference) and it's not a 1830 1914 * common subexpression 1831 1915 */ 1832 1916 1833 1917 if ((e1->Eoper == OPvar && datafl[el_fl(e1)] || 1834 1918 e1->Eoper == OPind) && 1835 1919 !evalinregister(e1)) 1836 1920 { cl = getlvalue(&cs,e1,RMload); 1837 1921 freenode(e1); 1838 1922 if (evalinregister(e2)) 1839 1923 { 1840 retregs = idxregm( cs.Irm,cs.Isib);1925 retregs = idxregm(&cs); 1841 1926 if ((cs.Iflags & CFSEG) == CFes) 1842 1927 retregs |= mES; /* take no chances */ 1843 1928 rretregs = allregs & ~retregs; 1844 1929 if (byte) 1845 1930 rretregs &= BYTEREGS; 1846 1931 cr = scodelem(e2,&rretregs,retregs,TRUE); 1847 1932 cs.Iop = 0x39 ^ byte ^ reverse; 1848 1933 if (sz > REGSIZE) 1849 1934 { 1850 1935 rreg = findregmsw(rretregs); 1851 1936 cs.Irm |= modregrm(0,rreg,0); 1852 1937 getlvalue_msw(&cs); 1853 1938 c = gen(CNIL,&cs); /* CMP EA+2,rreg */ 1854 1939 if (I32 && sz == 6) 1855 1940 c->Iflags |= CFopsize; /* seg is only 16 bits */ 1856 1941 genjmp(c,JNE,FLcode,(block *) ce); /* JNE nop */ 1857 1942 rreg = findreglsw(rretregs); 1858 1943 NEWREG(cs.Irm,rreg); 1859 1944 getlvalue_lsw(&cs); 1860 1945 } 1861 1946 else 1862 1947 { 1863 1948 rreg = findreg(rretregs); 1864 c s.Irm |= modregrm(0,rreg,0);1949 code_newreg(&cs, rreg); 1865 1950 } 1866 1951 } 1867 1952 else 1868 1953 { 1869 1954 cs.Irm |= modregrm(0,7,0); 1870 1955 if (sz > REGSIZE) 1871 1956 { 1872 1957 #if TARGET_FLAT 1873 1958 if (sz == 6) 1874 1959 assert(0); 1875 1960 #endif 1876 1961 if (e2->Eoper == OPrelconst) 1877 1962 { cs.Iflags = (cs.Iflags & ~(CFoff | CFseg)) | CFseg; 1878 1963 cs.IEVoffset2 = 0; 1879 1964 } 1880 1965 getlvalue_msw(&cs); 1881 1966 c = gen(CNIL,&cs); /* CMP EA+2,const */ 1882 if ( I32&& sz == 6)1967 if (!I16 && sz == 6) 1883 1968 c->Iflags |= CFopsize; /* seg is only 16 bits */ 1884 1969 genjmp(c,JNE,FLcode,(block *) ce); /* JNE nop */ 1885 1970 if (e2->Eoper == OPconst) 1886 1971 cs.IEV2.Vint = e2->EV.Vllong; 1887 1972 else 1888 1973 { /* Turn off CFseg, on CFoff */ 1889 1974 cs.Iflags ^= CFseg | CFoff; 1890 1975 cs.IEVoffset2 = offset2; 1891 1976 } 1892 1977 getlvalue_lsw(&cs); 1893 1978 } 1894 1979 freenode(e2); 1895 1980 } 1896 1981 c = gen(c,&cs); 1897 1982 break; 1898 1983 } 1899 1984 1900 1985 if (evalinregister(e2) && !OTassign(e1->Eoper) && 1901 1986 !isregvar(e1,NULL,NULL)) 1902 1987 { regm_t m; … … 1909 1994 } 1910 1995 if ((e1->Eoper == OPstrcmp || (OTassign(e1->Eoper) && sz <= REGSIZE)) && 1911 1996 !boolres(e2) && !evalinregister(e1)) 1912 1997 { 1913 1998 retregs = mPSW; 1914 1999 cl = scodelem(e1,&retregs,0,FALSE); 1915 2000 freenode(e2); 1916 2001 break; 1917 2002 } 1918 2003 if (sz <= REGSIZE && !boolres(e2) && e1->Eoper == OPadd && *pretregs == mPSW) 1919 2004 { 1920 2005 retregs |= mPSW; 1921 2006 cl = scodelem(e1,&retregs,0,FALSE); 1922 2007 freenode(e2); 1923 2008 break; 1924 2009 } 1925 2010 cl = scodelem(e1,&retregs,0,TRUE); /* compute left leaf */ 1926 2011 if (sz == 1) 1927 2012 { 1928 2013 reg = findreg(retregs & allregs); // get reg that e1 is in 1929 cs.Irm = modregrm(3,7,reg); 2014 cs.Irm = modregrm(3,7,reg & 7); 2015 if (reg & 8) 2016 cs.Irex |= REX_B; 1930 2017 if (e1->Eoper == OPvar && e1->EV.sp.Voffset == 1 && e1->EV.sp.Vsym->Sfl == FLreg) 2018 { assert(reg < 4); 1931 2019 cs.Irm |= 4; // use upper register half 2020 } 1932 2021 } 1933 2022 else if (sz <= REGSIZE) 1934 2023 { /* CMP reg,const */ 1935 2024 reg = findreg(retregs & allregs); // get reg that e1 is in 1936 2025 rretregs = allregs & ~retregs; 1937 2026 if (cs.IFL2 == FLconst && reghasvalue(rretregs,cs.IEV2.Vint,&rreg)) 1938 { code *cc;1939 1940 c c = genregs(CNIL,0x3B,reg,rreg);1941 if ( I32)2027 { 2028 code *cc = genregs(CNIL,0x3B,reg,rreg); 2029 code_orrex(cc, rex); 2030 if (!I16) 1942 2031 cc->Iflags |= cs.Iflags & CFopsize; 1943 2032 c = cat(c,cc); 1944 2033 freenode(e2); 1945 2034 break; 1946 2035 } 1947 cs.Irm = modregrm(3,7,reg); 2036 cs.Irm = modregrm(3,7,reg & 7); 2037 if (reg & 8) 2038 cs.Irex |= REX_B; 1948 2039 } 1949 2040 else if (sz <= 2 * REGSIZE) 1950 2041 { 1951 2042 reg = findregmsw(retregs); // get reg that e1 is in 1952 2043 cs.Irm = modregrm(3,7,reg); 1953 2044 c = gen(CNIL,&cs); /* CMP reg,MSW */ 1954 2045 if (I32 && sz == 6) 1955 2046 c->Iflags |= CFopsize; /* seg is only 16 bits */ 1956 2047 genjmp(c,JNE,FLcode,(block *) ce); /* JNE ce */ 1957 2048 1958 2049 reg = findreglsw(retregs); 1959 2050 cs.Irm = modregrm(3,7,reg); 1960 2051 if (e2->Eoper == OPconst) 1961 2052 cs.IEV2.Vint = e2->EV.Vlong; 1962 2053 else 1963 2054 { /* Turn off CFseg, on CFoff */ 1964 2055 cs.Iflags ^= CFseg | CFoff; 1965 2056 cs.IEVoffset2 = offset2; 1966 2057 } 1967 2058 } … … 1975 2066 if (e2->Ecount) 1976 2067 goto L2; 1977 2068 goto L5; 1978 2069 1979 2070 case OPvar: 1980 2071 if ((e1->Eoper == OPvar && 1981 2072 isregvar(e2,&rretregs,®) && 1982 2073 sz <= REGSIZE 1983 2074 ) || 1984 2075 (e1->Eoper == OPind && 1985 2076 isregvar(e2,&rretregs,®) && 1986 2077 !evalinregister(e1) && 1987 2078 sz <= REGSIZE 1988 2079 ) 1989 2080 ) 1990 2081 { 1991 2082 // CMP EA,e2 1992 2083 cl = getlvalue(&cs,e1,RMload); 1993 2084 freenode(e1); 1994 2085 cs.Iop = 0x39 ^ byte ^ reverse; 1995 c s.Irm |= modregrm(0,reg,0);2086 code_newreg(&cs,reg); 1996 2087 c = gen(c,&cs); 1997 2088 freenode(e2); 1998 2089 break; 1999 2090 } 2000 2091 L5: 2001 2092 cl = scodelem(e1,&retregs,0,TRUE); /* compute left leaf */ 2002 2093 if (sz <= REGSIZE) /* CMP reg,EA */ 2003 { unsigned opsize; 2004 2094 { 2005 2095 reg = findreg(retregs & allregs); // get reg that e1 is in 2006 opsize = cs.Iflags & CFopsize;2096 unsigned opsize = cs.Iflags & CFopsize; 2007 2097 c = cat(c,loadea(e2,&cs,0x3B ^ byte ^ reverse,reg,0,RMload | retregs,0)); 2008 2098 code_orflag(c,opsize); 2009 2099 } 2010 2100 else if (sz <= 2 * REGSIZE) 2011 2101 { 2012 2102 reg = findregmsw(retregs); /* get reg that e1 is in */ 2013 2103 // CMP reg,EA 2014 2104 c = loadea(e2,&cs,0x3B ^ reverse,reg,REGSIZE,RMload | retregs,0); 2015 2105 if (I32 && sz == 6) 2016 2106 c->Iflags |= CFopsize; /* seg is only 16 bits */ 2017 2107 genjmp(c,JNE,FLcode,(block *) ce); /* JNE ce */ 2018 2108 reg = findreglsw(retregs); 2019 2109 if (e2->Eoper == OPind) 2020 2110 { 2021 2111 NEWREG(cs.Irm,reg); 2022 2112 getlvalue_lsw(&cs); 2023 2113 c = gen(c,&cs); 2024 2114 } 2025 2115 else 2026 2116 c = cat(c,loadea(e2,&cs,0x3B ^ reverse,reg,0,RMload | retregs,0)); 2027 2117 } 2028 2118 else 2029 2119 assert(0); 2030 2120 freenode(e2); 2031 2121 break; 2032 2122 } 2033 2123 c = cat(c,ce); 2034 2124 2035 2125 L3: 2036 if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) /* if return result in register*/ 2037 { code *nop; 2038 regm_t save; 2039 2040 nop = CNIL; 2041 save = regcon.immed.mval; 2126 if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register 2127 { code *nop = CNIL; 2128 regm_t save = regcon.immed.mval; 2042 2129 cg = allocreg(&retregs,®,TYint); 2043 2130 regcon.immed.mval = save; 2044 2131 if ((*pretregs & mPSW) == 0 && 2045 2132 (jop == JC || jop == JNC)) 2046 2133 { 2047 2134 cg = cat(cg,getregs(retregs)); 2048 2135 cg = genregs(cg,0x19,reg,reg); /* SBB reg,reg */ 2136 if (rex) 2137 code_orrex(cg, rex); 2049 2138 if (flag) 2050 2139 ; // cdcond() will handle it 2051 2140 else if (jop == JNC) 2052 gen1(cg,0x40 + reg); /* INC reg */ 2141 { 2142 if (I64) 2143 { 2144 cg = gen2(cg,0xFF,modregrmx(3,0,reg)); // INC reg 2145 code_orrex(cg, rex); 2146 } 2147 else 2148 gen1(cg,0x40 + reg); // INC reg 2149 } 2053 2150 else 2054 gen2(cg,0xF7,modregrm(3,3,reg)); /* NEG reg */ 2151 { gen2(cg,0xF7,modregrmx(3,3,reg)); /* NEG reg */ 2152 code_orrex(cg, rex); 2153 } 2154 } 2155 else if (I64 && sz == 8) 2156 { 2157 assert(!flag); 2158 cg = movregconst(cg,reg,1,64|8); // MOV reg,1 2159 nop = gennop(nop); 2160 cg = genjmp(cg,jop,FLcode,(block *) nop); // Jtrue nop 2161 // MOV reg,0 2162 movregconst(cg,reg,0,(*pretregs & mPSW) ? 64|8 : 64); 2163 regcon.immed.mval &= ~mask[reg]; 2055 2164 } 2056 2165 else 2057 2166 { 2058 2167 assert(!flag); 2059 2168 cg = movregconst(cg,reg,1,8); // MOV reg,1 2060 2169 nop = gennop(nop); 2061 2170 cg = genjmp(cg,jop,FLcode,(block *) nop); // Jtrue nop 2062 2171 // MOV reg,0 2063 2172 movregconst(cg,reg,0,(*pretregs & mPSW) ? 8 : 0); 2064 2173 regcon.immed.mval &= ~mask[reg]; 2065 2174 } 2066 2175 *pretregs = retregs; 2067 2176 c = cat3(c,cg,nop); 2068 2177 } 2069 2178 ret: 2070 2179 return cat3(cl,cr,c); 2071 2180 } 2072 2181 2073 2182 2074 2183 /********************************** … … 2120 2229 reg = findreglsw(retregs); 2121 2230 rreg = findreglsw(rretregs); 2122 2231 clsw = genregs(CNIL,0x3B,reg,rreg); /* CMP reg,rreg */ 2123 2232 break; 2124 2233 case OPconst: 2125 2234 cs.IEV2.Vint = MSREG(e2->EV.Vllong); // MSW first 2126 2235 cs.IFL2 = FLconst; 2127 2236 cs.Iop = 0x81; 2128 2237 2129 2238 /* if ((e1 is data or a '*' reference) and it's not a 2130 2239 * common subexpression 2131 2240 */ 2132 2241 2133 2242 if ((e1->Eoper == OPvar && datafl[el_fl(e1)] || 2134 2243 e1->Eoper == OPind) && 2135 2244 !evalinregister(e1)) 2136 2245 { cl = getlvalue(&cs,e1,0); 2137 2246 freenode(e1); 2138 2247 if (evalinregister(e2)) 2139 2248 { 2140 retregs = idxregm( cs.Irm,cs.Isib);2249 retregs = idxregm(&cs); 2141 2250 if ((cs.Iflags & CFSEG) == CFes) 2142 2251 retregs |= mES; /* take no chances */ 2143 2252 rretregs = ALLREGS & ~retregs; 2144 2253 cr = scodelem(e2,&rretregs,retregs,TRUE); 2145 2254 rreg = findregmsw(rretregs); 2146 2255 cs.Iop = 0x39; 2147 2256 cs.Irm |= modregrm(0,rreg,0); 2148 2257 getlvalue_msw(&cs); 2149 2258 cmsw = gen(CNIL,&cs); /* CMP EA+2,rreg */ 2150 2259 rreg = findreglsw(rretregs); 2151 2260 NEWREG(cs.Irm,rreg); 2152 2261 } 2153 2262 else 2154 2263 { cs.Irm |= modregrm(0,7,0); 2155 2264 getlvalue_msw(&cs); 2156 2265 cmsw = gen(CNIL,&cs); /* CMP EA+2,const */ 2157 2266 cs.IEV2.Vint = e2->EV.Vlong; 2158 2267 freenode(e2); 2159 2268 } 2160 2269 getlvalue_lsw(&cs); … … 2280 2389 retregs = regmask(e->E1->Ety, e->E1->E1->Ety); 2281 2390 if (retregs & (mST01 | mST0)) // if return in ST0 2282 2391 { 2283 2392 c1 = codelem(e->E1,pretregs,FALSE); 2284 2393 if (*pretregs & mST0) 2285 2394 note87(e, 0, 0); 2286 2395 return c1; 2287 2396 } 2288 2397 else 2289 2398 break; 2290 2399 } 2291 2400 if (tycomplex(e->E1->Ety)) 2292 2401 goto Lcomplex; 2293 2402 /* FALL-THROUGH */ 2294 2403 case OPs64_d: 2295 2404 case OPs32_d: 2296 2405 case OPs16_d: 2297 2406 case OPu16_d: 2298 2407 return load87(e,0,pretregs,NULL,-1); 2299 2408 case OPu32_d: 2300 if (I32) 2301 { unsigned reg; 2302 2303 retregs = ALLREGS; 2409 if (!I16) 2410 { 2411 unsigned retregs = ALLREGS; 2304 2412 c1 = codelem(e->E1, &retregs, FALSE); 2305 reg = findreg(retregs);2413 unsigned reg = findreg(retregs); 2306 2414 c1 = genfltreg(c1, 0x89, reg, 0); 2307 2415 regwithvalue(c1,ALLREGS,0,®,0); 2308 2416 genfltreg(c1, 0x89, reg, REGSIZE); 2309 2417 2310 2418 cat(c1, push87()); 2311 2419 genfltreg(c1,0xDF,5,0); // FILD m64int 2312 2420 2313 2421 retregs = mST0 | (*pretregs & mPSW); 2314 2422 c2 = fixresult87(e, retregs, pretregs); 2315 2423 return cat(c1, c2); 2316 2424 } 2317 2425 break; 2318 2426 case OPd_s16: 2319 2427 case OPd_s32: 2320 2428 case OPd_u16: 2321 2429 case OPd_s64: 2322 2430 return cnvt87(e,pretregs); 2323 2431 case OPd_u32: // use subroutine, not 8087 2324 2432 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 2325 2433 retregs = mST0; … … 2346 2454 c2 = callclib(e,CLIBld_u64,pretregs,0); 2347 2455 return cat(c1,c2); 2348 2456 } 2349 2457 } 2350 2458 retregs = regmask(e->E1->Ety, TYnfunc); 2351 2459 L1: 2352 2460 c1 = codelem(e->E1,&retregs,FALSE); 2353 2461 for (i = 0; 1; i++) 2354 2462 { assert(i < arraysize(clib)); 2355 2463 if (clib[i][0] == e->Eoper) 2356 2464 { c2 = callclib(e,clib[i][1],pretregs,0); 2357 2465 break; 2358 2466 } 2359 2467 } 2360 2468 return cat(c1,c2); 2361 2469 } 2362 2470 2363 2471 2364 2472 /*************************** 2365 2473 * Convert short to long. 2366 * For OPs htlng, OPu16_32, OPptrlptr, OPulngllng, OPlngllng2474 * For OPs16_32, OPu16_32, OPptrlptr, OPu32_64, OPs32_64 2367 2475 */ 2368 2476 2369 2477 code *cdshtlng(elem *e,regm_t *pretregs) 2370 2478 { code *c,*ce,*c1,*c2,*c3,*c4; 2371 2479 unsigned reg; 2372 2480 unsigned char op; 2373 2481 regm_t retregs; 2374 2482 int e1comsub; 2375 2483 2376 2484 e1comsub = e->E1->Ecount; 2377 2485 if ((*pretregs & (ALLREGS | mBP)) == 0) // if don't need result in regs 2378 2486 c = codelem(e->E1,pretregs,FALSE); /* then conversion isn't necessary */ 2379 2487 2380 2488 else if ((op = e->Eoper) == OPptrlptr || 2381 ( !I32&& op == OPu16_32) ||2382 ( I32 && op == OPulngllng)2489 (I16 && op == OPu16_32) || 2490 (I32 && op == OPu32_64) 2383 2491 ) 2384 2492 { 2385 2493 regm_t regm; 2386 2494 tym_t tym1; 2387 2495 2388 2496 retregs = *pretregs & mLSW; 2389 2497 assert(retregs); 2390 2498 tym1 = tybasic(e->E1->Ety); 2391 2499 c = codelem(e->E1,&retregs,FALSE); 2392 2500 2393 2501 regm = *pretregs & (mMSW & ALLREGS); 2394 2502 if (regm == 0) /* *pretregs could be mES */ 2395 2503 regm = mMSW & ALLREGS; 2396 2504 ce = allocreg(®m,®,TYint); 2397 2505 if (e1comsub) 2398 2506 ce = cat(ce,getregs(retregs)); 2399 2507 if (op == OPptrlptr) 2400 2508 { int segreg; 2401 2509 2402 2510 /* BUG: what about pointers to functions? */ 2403 2511 switch (tym1) 2404 2512 { 2405 2513 case TYnptr: segreg = SEG_DS; break; 2406 2514 case TYcptr: segreg = SEG_CS; break; 2407 2515 case TYsptr: segreg = SEG_SS; break; 2408 2516 default: assert(0); 2409 2517 } 2410 2518 ce = gen2(ce,0x8C,modregrm(3,segreg,reg)); /* MOV reg,segreg */ 2411 2519 } 2412 2520 else 2413 2521 ce = movregconst(ce,reg,0,0); /* 0 extend */ 2414 2522 2415 2523 c = cat3(c,ce,fixresult(e,retregs | regm,pretregs)); 2416 2524 } 2417 else if ( I32 && (op == OPshtlng|| op == OPu16_32))2525 else if (!I16 && (op == OPs16_32 || op == OPu16_32)) 2418 2526 { 2419 /* OPshtlng || OPu16_32 */2420 elem *e1;2421 2527 elem *e11; 2422 2528 2423 e 1 = e->E1;2529 elem *e1 = e->E1; 2424 2530 2425 2531 if (e1->Eoper == OPu8_16 && !e1->Ecount && 2426 2532 ((e11 = e1->E1)->Eoper == OPvar || (e11->Eoper == OPind && !e11->Ecount)) 2427 2533 ) 2428 2534 { code cs; 2429 2535 2430 2536 retregs = *pretregs & BYTEREGS; 2431 2537 if (!retregs) 2432 2538 retregs = BYTEREGS; 2433 2539 c1 = allocreg(&retregs,®,TYint); 2434 2540 c2 = movregconst(NULL,reg,0,0); // XOR reg,reg 2435 2541 c3 = loadea(e11,&cs,0x8A,reg,0,retregs,retregs); // MOV regL,EA 2436 2542 freenode(e11); 2437 2543 freenode(e1); 2438 2544 } 2439 2545 else if (e1->Eoper == OPvar || 2440 2546 (e1->Eoper == OPind && !e1->Ecount)) 2441 2547 { code cs; 2442 2548 unsigned opcode; 2443 2549 … … 2453 2559 else 2454 2560 { 2455 2561 L2: 2456 2562 retregs = *pretregs; 2457 2563 *pretregs &= ~mPSW; /* flags are already set */ 2458 2564 c1 = codelem(e1,&retregs,FALSE); 2459 2565 c2 = getregs(retregs); 2460 2566 if (op == OPu16_32 && c1) 2461 2567 { code *cx; 2462 2568 2463 2569 cx = code_last(c1); 2464 2570 if (cx->Iop == 0x81 && (cx->Irm & modregrm(3,7,0)) == modregrm(3,4,0)) 2465 2571 { 2466 2572 // Convert AND of a word to AND of a dword, zeroing upper word 2467 2573 retregs = mask[cx->Irm & 7]; 2468 2574 cx->Iflags &= ~CFopsize; 2469 2575 cx->IEV2.Vint &= 0xFFFF; 2470 2576 goto L1; 2471 2577 } 2472 2578 } 2473 if (op == OPs htlng&& retregs == mAX)2579 if (op == OPs16_32 && retregs == mAX) 2474 2580 c2 = gen1(c2,0x98); /* CWDE */ 2475 2581 else 2476 2582 { 2477 2583 reg = findreg(retregs); 2478 2584 if (config.flags4 & CFG4speed && op == OPu16_32) 2479 2585 { // AND reg,0xFFFF 2480 2586 c3 = genc2(NULL,0x81,modregrm(3,4,reg),0xFFFFu); 2481 2587 } 2482 2588 else 2483 2589 { 2484 2590 c3 = genregs(CNIL,0x0F,reg,reg); 2485 2591 c3->Iop2 = (op == OPu16_32) ? 0xB7 : 0xBF; /* MOVZX/MOVSX reg,reg */ 2486 2592 } 2487 2593 c2 = cat(c2,c3); 2488 2594 } 2489 2595 L1: 2490 2596 c3 = e1comsub ? getregs(retregs) : CNIL; 2491 2597 } 2492 2598 c4 = fixresult(e,retregs,pretregs); 2493 2599 c = cat4(c1,c2,c3,c4); 2494 2600 } 2495 2601 else if (*pretregs & mPSW || config.target_cpu < TARGET_80286) 2496 2602 { 2497 // OPs htlng, OPlngllng2603 // OPs16_32, OPs32_64 2498 2604 // CWD doesn't affect flags, so we can depend on the integer 2499 2605 // math to provide the flags. 2500 2606 retregs = mAX | mPSW; // want integer result in AX 2501 2607 *pretregs &= ~mPSW; // flags are already set 2502 2608 c1 = codelem(e->E1,&retregs,FALSE); 2503 2609 c2 = getregs(mDX); // sign extend into DX 2504 2610 c2 = gen1(c2,0x99); // CWD/CDQ 2505 2611 c3 = e1comsub ? getregs(retregs) : CNIL; 2506 2612 c4 = fixresult(e,mDX | retregs,pretregs); 2507 2613 c = cat4(c1,c2,c3,c4); 2508 2614 } 2509 2615 else 2510 2616 { 2511 // OPs htlng, OPlngllng2617 // OPs16_32, OPs32_64 2512 2618 unsigned msreg,lsreg; 2513 2619 2514 2620 retregs = *pretregs & mLSW; 2515 2621 assert(retregs); 2516 2622 c1 = codelem(e->E1,&retregs,FALSE); 2517 2623 retregs |= *pretregs & mMSW; 2518 2624 c2 = allocreg(&retregs,®,e->Ety); 2519 2625 msreg = findregmsw(retregs); 2520 2626 lsreg = findreglsw(retregs); 2521 2627 c3 = genmovreg(NULL,msreg,lsreg); // MOV msreg,lsreg 2522 2628 assert(config.target_cpu >= TARGET_80286); // 8088 can't handle SAR reg,imm8 2523 2629 c3 = genc2(c3,0xC1,modregrm(3,7,msreg),REGSIZE * 8 - 1); // SAR msreg,31 2524 2630 c4 = fixresult(e,retregs,pretregs); 2525 2631 c = cat4(c1,c2,c3,c4); 2526 2632 } 2527 2633 return c; 2528 2634 } 2529 2635 2530 2636 2531 2637 /*************************** … … 2533 2639 * For OPu8int and OPs8int. 2534 2640 */ 2535 2641 2536 2642 code *cdbyteint(elem *e,regm_t *pretregs) 2537 2643 { code *c,*ce,*c0,*c1,*c2,*c3,*c4; 2538 2644 regm_t retregs; 2539 2645 unsigned reg; 2540 2646 char op; 2541 2647 char size; 2542 2648 elem *e1; 2543 2649 2544 2650 2545 2651 if ((*pretregs & (ALLREGS | mBP)) == 0) // if don't need result in regs 2546 2652 return codelem(e->E1,pretregs,FALSE); /* then conversion isn't necessary */ 2547 2653 2548 2654 op = e->Eoper; 2549 2655 e1 = e->E1; 2550 2656 c0 = NULL; 2551 2657 if (e1->Eoper == OPcomma) 2552 2658 c0 = docommas(&e1); 2553 if ( I32)2659 if (!I16) 2554 2660 { 2555 2661 if (e1->Eoper == OPvar || (e1->Eoper == OPind && !e1->Ecount)) 2556 2662 { code cs; 2557 2663 unsigned opcode; 2558 2664 2559 2665 retregs = *pretregs; 2560 2666 c1 = allocreg(&retregs,®,TYint); 2561 2667 if (config.flags4 & CFG4speed && 2562 2668 op == OPu8int && mask[reg] & BYTEREGS && 2563 2669 config.target_cpu < TARGET_PentiumPro) 2564 2670 { 2565 2671 c2 = movregconst(NULL,reg,0,0); // XOR reg,reg 2566 2672 c3 = loadea(e1,&cs,0x8A,reg,0,retregs,retregs); // MOV regL,EA 2567 2673 } 2568 2674 else 2569 2675 { 2570 2676 opcode = (op == OPu8int) ? 0x0FB6 : 0x0FBE; // MOVZX/MOVSX reg,EA 2571 2677 c2 = loadea(e1,&cs,opcode,reg,0,0,retregs); 2572 2678 c3 = CNIL; 2573 2679 } … … 2590 2696 retregs = BYTEREGS; 2591 2697 } 2592 2698 else 2593 2699 { 2594 2700 /* CBW doesn't affect flags, so we can depend on the integer */ 2595 2701 /* math to provide the flags. */ 2596 2702 retregs = mAX | (*pretregs & mPSW); /* want integer result in AX */ 2597 2703 } 2598 2704 } 2599 2705 2600 2706 c3 = CNIL; 2601 2707 c1 = codelem(e1,&retregs,FALSE); 2602 2708 reg = findreg(retregs); 2603 2709 if (!c1) 2604 2710 goto L1; 2605 2711 2606 2712 for (c = c1; c->next; c = c->next) 2607 2713 ; /* find previous instruction */ 2608 2714 2609 2715 /* If previous instruction is an AND bytereg,value */ 2610 if (c->Iop == 0x80 && c->Irm == modregrm(3,4,reg ) &&2716 if (c->Iop == 0x80 && c->Irm == modregrm(3,4,reg & 7) && 2611 2717 (op == OPu8int || (c->IEV2.Vuns & 0x80) == 0)) 2612 2718 { 2613 2719 if (*pretregs & mPSW) 2614 2720 c->Iflags |= CFpsw; 2615 2721 c->Iop |= 1; /* convert to word operation */ 2616 2722 c->IEV2.Vuns &= 0xFF; /* dump any high order bits */ 2617 2723 *pretregs &= ~mPSW; /* flags already set */ 2618 2724 } 2619 2725 else 2620 2726 { 2621 2727 L1: 2622 if ( I32)2728 if (!I16) 2623 2729 { 2624 2730 if (op == OPs8int && reg == AX && size == 2) 2625 2731 { c3 = gen1(c3,0x98); /* CBW */ 2626 2732 c3->Iflags |= CFopsize; /* don't do a CWDE */ 2627 2733 } 2628 2734 else 2629 2735 { 2630 2736 /* We could do better by not forcing the src and dst */ 2631 2737 /* registers to be the same. */ 2632 2738 2633 2739 if (config.flags4 & CFG4speed && op == OPu8_16) 2634 2740 { // AND reg,0xFF 2635 2741 c3 = genc2(c3,0x81,modregrm(3,4,reg),0xFF); 2636 2742 } 2637 2743 else 2638 2744 { c3 = genregs(c3,0x0F,reg,reg); 2639 2745 c3->Iop2 = (op == OPu8int) ? 0xB6 : 0xBE; /* MOVZX/MOVSX reg,reg */ 2640 2746 } 2641 2747 } 2642 2748 } 2643 2749 else 2644 2750 { 2645 2751 if (op == OPu8int) 2646 2752 c3 = genregs(c3,0x30,reg+4,reg+4); // XOR regH,regH 2647 2753 else 2648 2754 { 2649 2755 c3 = gen1(c3,0x98); /* CBW */ 2650 2756 *pretregs &= ~mPSW; /* flags already set */ 2651 2757 } 2652 2758 } 2653 2759 } 2654 2760 c2 = getregs(retregs); 2655 2761 L2: 2656 2762 c4 = fixresult(e,retregs,pretregs); 2657 2763 return cat6(c0,c1,c2,c3,c4,NULL); 2658 2764 } 2659 2765 2660 2766 2661 2767 /*************************** 2662 * Convert long to short (OP lngsht).2768 * Convert long to short (OP32_16). 2663 2769 * Get offset of far pointer (OPoffset). 2664 * Convert int to byte (OP int8).2770 * Convert int to byte (OP16_8). 2665 2771 * Convert long long to long (OP64_32). 2666 2772 */ 2667 2773 2668 2774 code *cdlngsht(elem *e,regm_t *pretregs) 2669 2775 { regm_t retregs; 2670 2776 code *c; 2671 2777 2672 2778 #ifdef DEBUG 2673 2779 switch (e->Eoper) 2674 2780 { 2675 case OP lngsht:2781 case OP32_16: 2676 2782 case OPoffset: 2677 case OP int8:2783 case OP16_8: 2678 2784 case OP64_32: 2679 2785 break; 2680 2786 2681 2787 default: 2682 2788 assert(0); 2683 2789 } 2684 2790 #endif 2685 2791 2686 if (e->Eoper == OP int8)2792 if (e->Eoper == OP16_8) 2687 2793 { retregs = *pretregs ? BYTEREGS : 0; 2688 2794 c = codelem(e->E1,&retregs,FALSE); 2689 2795 } 2690 2796 else 2691 2797 { if (e->E1->Eoper == OPrelconst) 2692 2798 c = offsetinreg(e->E1,&retregs); 2693 2799 else 2694 2800 { retregs = *pretregs ? ALLREGS : 0; 2695 2801 c = codelem(e->E1,&retregs,FALSE); 2696 if ( !I32 || e->Eoper == OPoffset || e->Eoper == OP64_32)2802 if (I16 || I32 && (e->Eoper == OPoffset || e->Eoper == OP64_32)) 2697 2803 retregs &= mLSW; /* want LSW only */ 2698 2804 } 2699 2805 } 2700 2806 2701 2807 /* We "destroy" a reg by assigning it the result of a new e, even */ 2702 2808 /* though the values are the same. Weakness of our CSE strategy that */ 2703 2809 /* a register can only hold the contents of one elem at a time. */ 2704 2810 if (e->Ecount) 2705 2811 c = cat(c,getregs(retregs)); 2706 2812 else 2707 2813 useregs(retregs); 2708 2814 2709 2815 #ifdef DEBUG 2710 2816 if (!(!*pretregs || retregs)) 2711 2817 WROP(e->Eoper), 2712 2818 printf(" *pretregs = x%x, retregs = x%x, e = %p\n",*pretregs,retregs,e); 2713 2819 #endif 2714 2820 assert(!*pretregs || retregs); 2715 2821 return cat(c,fixresult(e,retregs,pretregs)); /* lsw only */ 2716 2822 } … … 2799 2905 c3->Iflags |= CFopsize; 2800 2906 retregs = mAX; 2801 2907 return cat4(c1,c2,c3,fixresult(e,retregs,pretregs)); 2802 2908 } 2803 2909 2804 2910 /************************ 2805 2911 * Generate code for an asm elem. 2806 2912 */ 2807 2913 2808 2914 code *cdasm(elem *e,regm_t *pretregs) 2809 2915 { code *c; 2810 2916 2811 2917 #if 1 2812 2918 /* Assume only regs normally destroyed by a function are destroyed */ 2813 2919 c = getregs((ALLREGS | mES) & ~fregsaved); 2814 2920 #else 2815 2921 /* Assume all regs are destroyed */ 2816 2922 c = getregs(ALLREGS | mES); 2817 2923 #endif 2818 2924 c = genasm(c,e->EV.ss.Vstring,e->EV.ss.Vstrlen); 2819 return cat(c,fixresult(e,(I 32 ? mAX : mDX |mAX),pretregs));2925 return cat(c,fixresult(e,(I16 ? mDX | mAX : mAX),pretregs)); 2820 2926 } 2821 2927 2822 2928 /************************ 2823 2929 * Generate code for OPtofar16 and OPfromfar16. 2824 2930 */ 2825 2931 2826 2932 code *cdfar16( elem *e, regm_t *pretregs) 2827 2933 { code *c; 2828 2934 code *c1; 2829 2935 code *c3; 2830 2936 code *cnop; 2831 2937 code cs; 2832 2938 unsigned reg; 2833 2939 2834 2940 assert(I32); 2835 2941 c = codelem(e->E1,pretregs,FALSE); 2836 2942 reg = findreg(*pretregs); 2837 2943 c = cat(c,getregs(*pretregs)); /* we will destroy the regs */ 2838 2944 2839 2945 cs.Iop = 0xC1; … … 2921 3027 2922 3028 switch (e->Eoper) 2923 3029 { 2924 3030 case OPbt: op = 0xA3; mode = 4; break; 2925 3031 case OPbtc: op = 0xBB; mode = 7; break; 2926 3032 case OPbtr: op = 0xB3; mode = 6; break; 2927 3033 case OPbts: op = 0xAB; mode = 5; break; 2928 3034 2929 3035 default: 2930 3036 assert(0); 2931 3037 } 2932 3038 2933 3039 e1 = e->E1; 2934 3040 e2 = e->E2; 2935 3041 cs.Iflags = 0; 2936 3042 c = getlvalue(&cs, e, RMload); // get addressing mode 2937 3043 if (e->Eoper == OPbt && *pretregs == 0) 2938 3044 return cat(c, codelem(e2,pretregs,FALSE)); 2939 3045 2940 3046 ty1 = tybasic(e1->Ety); 2941 word = ( I32&& tysize[ty1] == SHORTSIZE) ? CFopsize : 0;2942 idxregs = idxregm( cs.Irm, cs.Isib); // mask if index regs used3047 word = (!I16 && tysize[ty1] == SHORTSIZE) ? CFopsize : 0; 3048 idxregs = idxregm(&cs); // mask if index regs used 2943 3049 2944 3050 // if (e2->Eoper == OPconst && e2->EV.Vuns < 0x100) // should do this instead? 2945 3051 if (e2->Eoper == OPconst) 2946 3052 { 2947 3053 cs.Iop = 0x0F; 2948 3054 cs.Iop2 = 0xBA; // BT rm,imm8 2949 3055 cs.Irm |= modregrm(0,mode,0); 2950 3056 cs.Iflags |= CFpsw | word; 2951 3057 cs.IFL2 = FLconst; 2952 3058 if (tysize[ty1] == SHORTSIZE) 2953 3059 { 2954 3060 cs.IEVoffset1 += (e2->EV.Vuns & ~15) >> 3; 2955 3061 cs.IEV2.Vint = e2->EV.Vint & 15; 2956 3062 } 2957 else 3063 else if (tysize[ty1] == 4) 2958 3064 { 2959 3065 cs.IEVoffset1 += (e2->EV.Vuns & ~31) >> 3; 2960 3066 cs.IEV2.Vint = e2->EV.Vint & 31; 3067 } 3068 else 3069 { 3070 cs.IEVoffset1 += (e2->EV.Vuns & ~63) >> 3; 3071 cs.IEV2.Vint = e2->EV.Vint & 63; 2961 3072 } 2962 3073 c2 = gen(CNIL,&cs); 2963 3074 } 2964 3075 else 2965 3076 { 2966 3077 retregs = ALLREGS & ~idxregs; 2967 3078 c2 = scodelem(e2,&retregs,idxregs,TRUE); 2968 3079 reg = findreg(retregs); 2969 3080 2970 3081 cs.Iop = 0x0F; 2971 3082 cs.Iop2 = op; // BT rm,reg 2972 c s.Irm |= modregrm(0,reg,0);3083 code_newreg(&cs,reg); 2973 3084 cs.Iflags |= CFpsw | word; 2974 3085 c2 = gen(c2,&cs); 2975 3086 } 2976 3087 2977 3088 if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register 2978 { code *nop; 2979 regm_t save; 2980 code *cg; 2981 2982 nop = CNIL; 2983 save = regcon.immed.mval; 2984 cg = allocreg(&retregs,®,TYint); 3089 { 3090 code *nop = CNIL; 3091 regm_t save = regcon.immed.mval; 3092 code *cg = allocreg(&retregs,®,TYint); 2985 3093 regcon.immed.mval = save; 2986 3094 if ((*pretregs & mPSW) == 0) 2987 3095 { 2988 3096 cg = cat(cg,getregs(retregs)); 2989 3097 cg = genregs(cg,0x19,reg,reg); // SBB reg,reg 2990 3098 } 2991 3099 else 2992 3100 { 2993 3101 cg = movregconst(cg,reg,1,8); // MOV reg,1 2994 3102 nop = gennop(nop); 2995 3103 cg = genjmp(cg,JC,FLcode,(block *) nop); // Jtrue nop 2996 3104 // MOV reg,0 2997 3105 movregconst(cg,reg,0,8); 2998 3106 regcon.immed.mval &= ~mask[reg]; 2999 3107 } 3000 3108 *pretregs = retregs; 3001 3109 c2 = cat3(c2,cg,nop); 3002 3110 } 3003 3111 3004 3112 return cat(c,c2); … … 3017 3125 code *cl,*cg; 3018 3126 code cs; 3019 3127 3020 3128 //printf("cdbscan()\n"); 3021 3129 //elem_print(e); 3022 3130 if (*pretregs == 0) 3023 3131 return codelem(e->E1,pretregs,FALSE); 3024 3132 tyml = tybasic(e->E1->Ety); 3025 3133 sz = tysize[tyml]; 3026 3134 assert(sz == 2 || sz == 4); 3027 3135 3028 3136 if ((e->E1->Eoper == OPind && !e->E1->Ecount) || e->E1->Eoper == OPvar) 3029 3137 { 3030 3138 cl = getlvalue(&cs, e->E1, RMload); // get addressing mode 3031 3139 } 3032 3140 else 3033 3141 { 3034 3142 retregs = allregs; 3035 3143 cl = codelem(e->E1, &retregs, FALSE); 3036 3144 reg = findreg(retregs); 3037 cs.Irm = modregrm(3,0,reg );3145 cs.Irm = modregrm(3,0,reg & 7); 3038 3146 cs.Iflags = 0; 3039 3147 cs.Irex = 0; 3148 if (reg & 8) 3149 cs.Irex |= REX_B; 3040 3150 } 3041 3151 3042 3152 retregs = *pretregs & allregs; 3043 3153 if (!retregs) 3044 3154 retregs = allregs; 3045 3155 cg = allocreg(&retregs, ®, e->Ety); 3046 3156 3047 3157 cs.Iop = 0x0F; 3048 3158 cs.Iop2 = (e->Eoper == OPbsf) ? 0xBC : 0xBD; // BSF/BSR reg,EA 3049 c s.Irm |= modregrm(0,reg,0);3050 if ( I32&& sz == SHORTSIZE)3159 code_newreg(&cs, reg); 3160 if (!I16 && sz == SHORTSIZE) 3051 3161 cs.Iflags |= CFopsize; 3052 3162 cg = gen(cg,&cs); 3053 3163 3054 3164 return cat3(cl,cg,fixresult(e,retregs,pretregs)); 3055 3165 } 3056 3166 3057 3167 /******************************************* 3058 3168 * Generate code for OPpair, OPrpair. 3059 3169 */ 3060 3170 3061 3171 code *cdpair(elem *e, regm_t *pretregs) 3062 3172 { 3063 3173 regm_t retregs; 3064 3174 regm_t regs1; 3065 3175 regm_t regs2; 3066 3176 unsigned reg; 3067 3177 code *cg; 3068 3178 code *c1; 3069 3179 code *c2; 3070 3180 trunk/src/backend/code.c
r428 r577 12 12 13 13 #if !SPP 14 14 15 15 #include <stdio.h> 16 16 #include <time.h> 17 17 #include "cc.h" 18 18 #include "el.h" 19 19 #include "code.h" 20 20 #include "global.h" 21 21 22 22 static code *code_list; 23 23 24 24 /***************** 25 25 * Allocate code 26 26 */ 27 27 28 28 #if SCPP && __SC__ && __INTSIZE == 4 && TX86 && !_DEBUG_TRACE && !MEM_DEBUG 29 29 30 30 __declspec(naked) code *code_calloc() 31 31 { 32 if (sizeof(code) != 0x2 0)32 if (sizeof(code) != 0x24) 33 33 util_assert("code",__LINE__); 34 34 __asm 35 35 { 36 36 mov EAX,code_list 37 37 test EAX,EAX 38 38 je L20 39 39 mov ECX,[EAX] 40 40 mov code_list,ECX 41 41 jmp L29 42 42 43 43 L20: push sizeof(code) 44 44 call mem_fmalloc 45 45 ;add ESP,4 46 46 L29: 47 47 xor ECX,ECX 48 48 mov DWORD PTR [EAX],0 49 49 50 50 mov 4[EAX],ECX ;these pair 51 51 mov 8[EAX],ECX 52 52 53 53 mov 12[EAX],ECX 54 54 mov 16[EAX],ECX 55 55 56 56 mov 20[EAX],ECX 57 57 mov 24[EAX],ECX 58 58 59 59 mov 28[EAX],ECX 60 mov 32[EAX],ECX 60 61 ret 61 62 } 62 63 } 63 64 64 65 #else 65 66 66 67 code *code_calloc() 67 68 { code *c; 68 69 static code czero; 69 70 71 //printf("code %x\n", sizeof(code)); 70 72 c = code_list; 71 73 if (c) 72 74 code_list = code_next(c); 73 75 else 74 76 c = (code *)mem_fmalloc(sizeof(*c)); 75 77 *c = czero; // zero it out 76 78 //dbg_printf("code_calloc: %p\n",c); 77 79 return c; 78 80 } 79 81 80 82 #endif 81 83 82 84 /***************** 83 85 * Free code 84 86 */ 85 87 86 88 void code_free(code *cstart) 87 89 { code **pc; 88 90 code *c; 89 91 trunk/src/backend/code.h
r572 r577 89 89 90 90 #define mXMM0 (1 << XMM0) 91 91 #define mXMM1 (1 << XMM1) 92 92 #define mXMM2 (1 << XMM2) 93 93 #define mXMM3 (1 << XMM3) 94 94 #define mXMM4 (1 << XMM4) 95 95 #define mXMM5 (1 << XMM5) 96 96 #define mXMM6 (1 << XMM6) 97 97 #define mXMM7 (1 << XMM7) 98 98 #define XMMREGS (mXMM0 |mXMM1 |mXMM2 |mXMM3 |mXMM4 |mXMM5 |mXMM6 |mXMM7) 99 99 100 100 #define mES (1 << ES) // 0x10000 101 101 #define mPSW (1 << PSW) // 0x20000 102 102 103 103 #define mSTACK (1 << STACK) // 0x40000 104 104 105 105 #define mST0 (1 << ST0) // 0x200000 106 106 #define mST01 (1 << ST01) // 0x400000 107 107 108 108 // Flags for getlvalue (must fit in regm_t) 109 #define RMload 0x4000110 #define RMstore 0x8000109 #define RMload (1 << 30) 110 #define RMstore (1 << 31) 111 111 112 112 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 113 113 // To support positional independent code, 114 114 // must be able to remove BX from available registers 115 115 extern regm_t ALLREGS; 116 116 #define ALLREGS_INIT (mAX|mBX|mCX|mDX|mSI|mDI) 117 117 #define ALLREGS_INIT_PIC (mAX|mCX|mDX|mSI|mDI) 118 118 extern regm_t BYTEREGS; 119 119 #define BYTEREGS_INIT (mAX|mBX|mCX|mDX) 120 120 #define BYTEREGS_INIT_PIC (mAX|mCX|mDX) 121 121 #else 122 122 #define ALLREGS (mAX|mBX|mCX|mDX|mSI|mDI) 123 123 #define ALLREGS_INIT ALLREGS 124 124 #undef BYTEREGS 125 125 #define BYTEREGS (mAX|mBX|mCX|mDX) 126 126 #endif 127 127 128 128 /* We use the same IDXREGS for the 386 as the 8088, because if 129 129 we used ALLREGS, it would interfere with mMSW 130 130 */ … … 241 241 #define ESCrelease2 9 // release eh stack 242 242 #define ESCframeptr 10 // replace with load of frame pointer 243 243 244 244 #define ASM 0x36 // string of asm bytes, actually an SS: opcode 245 245 246 246 /********************************* 247 247 * Macros to ease generating code 248 248 * modregrm: generate mod reg r/m field 249 249 * modregxrm: reg could be R8..R15 250 250 * modregrmx: rm could be R8..R15 251 251 * modregxrmx: reg or rm could be R8..R15 252 252 * NEWREG: change reg field of x to r 253 253 * genorreg: OR t,f 254 254 */ 255 255 256 256 #define modregrm(m,r,rm) (((m)<<6)|((r)<<3)|(rm)) 257 257 #define modregxrm(m,r,rm) ((((r)&8)<<15)|modregrm((m),(r)&7,rm)) 258 258 #define modregrmx(m,r,rm) ((((rm)&8)<<13)|modregrm((m),r,(rm)&7)) 259 259 #define modregxrmx(m,r,rm) ((((r)&8)<<15)|(((rm)&8)<<13)|modregrm((m),(r)&7,(rm)&7)) 260 260 261 #define NEWREXR(x,r) ((x)=((x)&~REX_R)|(((r)&8)>>1)) 261 262 #define NEWREG(x,r) ((x)=((x)&~(7<<3))|((r)<<3)) 263 #define code_newreg(c,r) (NEWREG((c)->Irm,(r)&7),NEWREXR((c)->Irex,(r))) 264 262 265 #define genorreg(c,t,f) genregs((c),0x09,(f),(t)) 263 266 264 267 #define REX 0x40 // REX prefix byte, OR'd with the following bits: 265 268 #define REX_W 8 // 0 = default operand size, 1 = 64 bit operand size 266 269 #define REX_R 4 // high bit of reg field of modregrm 267 270 #define REX_X 2 // high bit of sib index reg 268 271 #define REX_B 1 // high bit of rm field, sib base reg, or opcode reg 269 272 270 273 /********************** 271 274 * C library routines. 272 275 * See callclib(). 273 276 */ 274 277 275 278 enum CLIB 276 279 { 277 280 CLIBlcmp, 278 281 CLIBlmul, 279 282 CLIBldiv, 280 283 CLIBlmod, 281 284 CLIBuldiv, … … 374 377 // CFes at the same time, though!) 375 378 #define CFpsw 0x40 // we need the flags result after this instruction 376 379 #define CFopsize 0x80 // prefix with operand size 377 380 #define CFaddrsize 0x100 // prefix with address size 378 381 #define CFds 0x200 // need DS override (not with es, ss, or cs ) 379 382 #define CFcs 0x400 // need CS override 380 383 #define CFfs 0x800 // need FS override 381 384 #define CFgs (CFcs | CFfs) // need GS override 382 385 #define CFwait 0x1000 // If I32 it indicates when to output a WAIT 383 386 #define CFselfrel 0x2000 // if self-relative 384 387 #define CFunambig 0x4000 // indicates cannot be accessed by other addressing 385 388 // modes 386 389 #define CFtarg2 0x8000 // like CFtarg, but we can't optimize this away 387 390 #define CFvolatile 0x10000 // volatile reference, do not schedule 388 391 #define CFclassinit 0x20000 // class init code 389 392 #define CFoffset64 0x40000 // offset is 64 bits 390 393 391 394 #define CFPREFIX (CFSEG | CFopsize | CFaddrsize) 392 395 #define CFSEG (CFes | CFss | CFds | CFcs | CFfs | CFgs) 393 396 394 unsigned char Irex; // REX prefix395 397 396 398 unsigned char Iop; 397 unsigned char Irm; // reg/mode398 399 399 unsigned char Iop2; // second opcode byte 400 unsigned char Isib; // SIB byte401 402 400 unsigned char Iop3; // third opcode byte 401 402 union 403 { unsigned _Iea; 404 struct 405 { 406 unsigned char _Irm; // reg/mode 407 unsigned char _Isib; // SIB byte 408 unsigned char _Irex; // REX prefix 409 } _ea; 410 } _EA; 411 412 #define Iea _EA._Iea 413 #define Irm _EA._ea._Irm 414 #define Isib _EA._ea._Isib 415 #define Irex _EA._ea._Irex 403 416 404 417 unsigned char IFL1,IFL2; // FLavors of 1st, 2nd operands 405 418 union evc IEV1; // 1st operand, if any 406 419 #define IEVpointer1 IEV1._EP.Vpointer 407 420 #define IEVseg1 IEV1._EP.Vseg 408 421 #define IEVsym1 IEV1.sp.Vsym 409 422 #define IEVdsym1 IEV1.dsp.Vsym 410 423 #define IEVoffset1 IEV1.sp.Voffset 411 424 #define IEVlsym1 IEV1.lab.Vsym 412 425 #define IEVint1 IEV1.Vint 413 426 union evc IEV2; // 2nd operand, if any 414 427 #define IEVpointer2 IEV2._EP.Vpointer 415 428 #define IEVseg2 IEV2._EP.Vseg 416 429 #define IEVsym2 IEV2.sp.Vsym 417 430 #define IEVdsym2 IEV2.dsp.Vsym 418 431 #define IEVoffset2 IEV2.sp.Voffset 419 432 #define IEVlsym2 IEV2.lab.Vsym 420 433 #define IEVint2 IEV2.Vint 421 434 void print(); // pretty-printer 422 435 … … 528 541 /***********************************************************/ 529 542 530 543 extern regm_t msavereg,mfuncreg,allregs; 531 544 532 545 /*long cxmalloc,cxcalloc,cx1;*/ 533 546 534 547 typedef code *cd_t (elem *e , regm_t *pretregs ); 535 548 536 549 extern int BPRM; 537 550 extern regm_t FLOATREGS; 538 551 extern regm_t FLOATREGS2; 539 552 extern regm_t DOUBLEREGS; 540 553 extern const char datafl[],stackfl[],segfl[],flinsymtab[]; 541 554 extern char needframe,usedalloca,gotref; 542 555 extern targ_size_t localsize,Toff,Poff,Aoff, 543 556 Poffset,funcoffset, 544 557 framehandleroffset, 545 558 Aoffset,Toffset,EEoffset; 546 559 extern int Aalign; 547 560 extern int cseg; 561 extern int STACKALIGN; 548 562 #if TARGET_OSX 549 563 extern targ_size_t localgotoffset; 550 564 #endif 551 565 552 566 /* cgcod.c */ 553 567 extern int pass; 554 568 #define PASSinit 0 // initial pass through code generator 555 569 #define PASSreg 1 // register assignment pass 556 570 #define PASSfinal 2 // final pass 557 571 558 572 extern int dfoidx; 559 573 extern struct CSE *csextab; 560 574 extern unsigned cstop; 561 575 #if TX86 562 576 extern bool floatreg; 563 577 #endif 564 578 extern targ_size_t retoffset; 565 579 extern unsigned stackpush; 566 580 extern int stackchanged; 567 581 extern int refparam; … … 581 595 #endif 582 596 #define findregmsw(regm) findreg((regm) & mMSW) 583 597 #define findreglsw(regm) findreg((regm) & (mLSW | mBP)) 584 598 void freenode (elem *e ); 585 599 int isregvar (elem *e , regm_t *pregm , unsigned *preg ); 586 600 #ifdef DEBUG 587 601 code *allocreg (regm_t *pretregs , unsigned *preg , tym_t tym , int line , const char *file ); 588 602 #define allocreg(a,b,c) allocreg((a),(b),(c),__LINE__,__FILE__) 589 603 #else 590 604 code *allocreg (regm_t *pretregs , unsigned *preg , tym_t tym ); 591 605 #endif 592 606 void useregs (regm_t regm ); 593 607 code *getregs (regm_t r ); 594 608 code *getregs_imm (regm_t r ); 595 609 code *cse_flush(int); 596 610 void cssave (elem *e , regm_t regm , unsigned opsflag ); 597 611 bool evalinregister (elem *e ); 598 612 regm_t getscratch(); 599 613 code *codelem (elem *e , regm_t *pretregs , bool constflag ); 600 614 const char *regm_str(regm_t rm); 615 int numbitsset(regm_t); 601 616 602 617 /* cod1.c */ 603 618 extern int clib_inited; 604 619 605 620 int isscaledindex(elem *); 606 621 int ssindex(int op,targ_uns product); 607 622 void buildEA(code *c,int base,int index,int scale,targ_size_t disp); 608 623 void andregcon (con_t *pregconsave); 609 624 code *docommas (elem **pe ); 610 625 code *gencodelem(code *c,elem *e,regm_t *pretregs,bool constflag); 611 626 void gensaverestore(regm_t, code **, code **); 612 627 code *genstackclean(code *c,unsigned numpara,regm_t keepmsk); 613 628 code *logexp (elem *e , int jcond , unsigned fltarg , code *targ ); 614 code *loadea (elem *e , code __ss*cs , unsigned op , unsigned reg , targ_size_t offset , regm_t keepmsk , regm_t desmsk );629 code *loadea (elem *e , code *cs , unsigned op , unsigned reg , targ_size_t offset , regm_t keepmsk , regm_t desmsk ); 615 630 unsigned getaddrmode (regm_t idxregs ); 631 void setaddrmode(code *c, regm_t idxregs); 616 632 void getlvalue_msw(code *); 617 633 void getlvalue_lsw(code *); 618 code *getlvalue (code __ss*pcs , elem *e , regm_t keepmsk );634 code *getlvalue (code *pcs , elem *e , regm_t keepmsk ); 619 635 code *scodelem (elem *e , regm_t *pretregs , regm_t keepmsk , bool constflag ); 620 code *fltregs (code __ss*pcs , tym_t tym );636 code *fltregs (code *pcs , tym_t tym ); 621 637 code *tstresult (regm_t regm , tym_t tym , unsigned saveflag ); 622 638 code *fixresult (elem *e , regm_t retregs , regm_t *pretregs ); 623 639 code *callclib (elem *e , unsigned clib , regm_t *pretregs , regm_t keepmask ); 624 640 cd_t cdfunc; 625 641 cd_t cdstrthis; 626 642 code *params(elem *, unsigned); 627 643 code *offsetinreg (elem *e , regm_t *pretregs ); 628 644 code *loaddata (elem *e , regm_t *pretregs ); 629 645 630 646 /* cod2.c */ 631 regm_t idxregm (unsigned rm,unsigned sib);647 regm_t idxregm(code *c); 632 648 #if TARGET_WINDOS 633 649 code *opdouble (elem *e , regm_t *pretregs , unsigned clib ); 634 650 #endif 635 651 cd_t cdorth; 636 652 cd_t cdmul; 637 653 cd_t cdnot; 638 654 cd_t cdcom; 639 655 cd_t cdbswap; 640 656 cd_t cdcond; 641 657 void WRcodlst (code *c ); 642 658 cd_t cdcomma; 643 659 cd_t cdloglog; 644 660 cd_t cdshift; 645 661 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 646 662 cd_t cdindpic; 647 663 #endif 648 664 cd_t cdind; 649 665 cd_t cdstrlen; 650 666 cd_t cdstrcmp; 651 667 cd_t cdstrcpy; … … 662 678 cd_t cderr; 663 679 cd_t cdinfo; 664 680 cd_t cdctor; 665 681 cd_t cddtor; 666 682 cd_t cdmark; 667 683 cd_t cdnullcheck; 668 684 cd_t cdclassinit; 669 685 670 686 /* cod3.c */ 671 687 extern int BPoff; 672 688 673 689 int cod3_EA(code *c); 674 690 regm_t cod3_useBP(); 675 691 void cod3_set386 (void ); 676 692 void cod3_set64 (void ); 677 693 void cod3_align (void ); 678 694 void doswitch (block *b ); 679 695 void outjmptab (block *b ); 680 696 void outswitab (block *b ); 681 697 int jmpopcode (elem *e ); 682 void cod3_ptrchk(code * __ss *pc,code __ss*pcs,regm_t keepmsk);698 void cod3_ptrchk(code **pc,code *pcs,regm_t keepmsk); 683 699 code *prolog (void ); 684 700 void epilog (block *b); 685 701 cd_t cdframeptr; 686 702 cd_t cdgot; 687 703 targ_size_t cod3_spoff(); 688 704 code *cod3_load_got(); 689 705 void makeitextern (symbol *s ); 690 706 void fltused(void); 691 707 int branch(block *bl, int flag); 692 708 void cod3_adjSymOffsets(); 693 709 void assignaddr (block *bl ); 694 710 void assignaddrc (code *c ); 695 711 targ_size_t cod3_bpoffset(symbol *s); 696 712 void pinholeopt (code *c , block *bn ); 697 713 void jmpaddr (code *c ); 698 714 int code_match(code *c1,code *c2); 699 715 unsigned calcblksize (code *c); 700 716 unsigned calccodsize(code *c); 701 717 unsigned codout (code *c ); 702 718 void addtofixlist (symbol *s , targ_size_t soffset , int seg , targ_size_t val , int flags ); 703 719 void searchfixlist (symbol *s ); 704 720 void outfixlist (void ); 705 721 void code_hydrate(code **pc); 706 722 void code_dehydrate(code **pc); 707 723 708 724 /* cod4.c */ 709 725 extern const unsigned dblreg[]; 710 726 extern int cdcmp_flag; 711 727 712 code *modEA (unsigned Irm);728 code *modEA(code *c); 713 729 cd_t cdeq; 714 730 cd_t cdaddass; 715 731 cd_t cdmulass; 716 732 cd_t cdshass; 717 733 cd_t cdcmp; 718 734 cd_t cdcnvt; 719 735 cd_t cdshtlng; 720 736 cd_t cdbyteint; 721 737 cd_t cdlngsht; 722 738 cd_t cdmsw; 723 739 cd_t cdport; 724 740 cd_t cdasm; 725 741 cd_t cdsetjmp; 726 742 cd_t cdvoid; 727 743 cd_t cdhalt; 728 744 cd_t cdfar16; 729 745 cd_t cdbt; 730 746 cd_t cdbscan; 731 747 cd_t cdpair; 732 748 code *longcmp (elem *,bool,unsigned,code *); … … 786 802 code *nteh_filter(block *b); 787 803 void nteh_framehandler(symbol *); 788 804 code *nteh_gensindex(int); 789 805 #define GENSINDEXSIZE 7 790 806 code *nteh_monitor_prolog(Symbol *shandle); 791 807 code *nteh_monitor_epilog(regm_t retregs); 792 808 793 809 // cgen.c 794 810 code *code_last(code *c); 795 811 void code_orflag(code *c,unsigned flag); 796 812 void code_orrex(code *c,unsigned rex); 797 813 code * __pascal cat (code *c1 , code *c2 ); 798 814 code * cat3 (code *c1 , code *c2 , code *c3 ); 799 815 code * cat4 (code *c1 , code *c2 , code *c3 , code *c4 ); 800 816 code * cat6 (code *c1 , code *c2 , code *c3 , code *c4 , code *c5 , code *c6 ); 801 817 code *gen (code *c , code *cs ); 802 818 code *gen1 (code *c , unsigned op ); 803 819 code *gen2 (code *c , unsigned op , unsigned rm ); 804 820 code *gen2sib(code *c,unsigned op,unsigned rm,unsigned sib); 805 821 code *genregs (code *c , unsigned op , unsigned dstreg , unsigned srcreg ); 822 code *genpush (code *c , unsigned reg ); 823 code *genpop (code *c , unsigned reg ); 806 824 code *gentstreg (code *c , unsigned reg ); 807 825 code *genasm (code *c , char *s , unsigned slen ); 808 826 code *genmovreg (code *c , unsigned to , unsigned from ); 809 827 code *genjmp (code *c , unsigned op , unsigned fltarg , block *targ ); 810 828 code *gencsi (code *c , unsigned op , unsigned rm , unsigned FL2 , SYMIDX si ); 811 829 code *gencs (code *c , unsigned op , unsigned rm , unsigned FL2 , symbol *s ); 812 830 code *genc2 (code *c , unsigned op , unsigned rm , targ_size_t EV2 ); 813 831 code *genc1 (code *c , unsigned op , unsigned rm , unsigned FL1 , targ_size_t EV1 ); 814 832 code *genc (code *c , unsigned op , unsigned rm , unsigned FL1 , targ_size_t EV1 , unsigned FL2 , targ_size_t EV2 ); 815 833 code *genmulimm(code *c,unsigned r1,unsigned r2,targ_int imm); 816 834 code *genlinnum(code *,Srcpos); 817 835 void cgen_linnum(code **pc,Srcpos srcpos); 818 836 void cgen_prelinnum(code **pc,Srcpos srcpos); 819 837 code *genadjesp(code *c, int offset); 820 838 code *gennop(code *); 821 839 code *genshift(code *); 822 840 code *movregconst (code *c , unsigned reg , targ_size_t value , regm_t flags ); 823 841 bool reghasvalue (regm_t regm , targ_size_t value , unsigned *preg ); 824 842 code *regwithvalue (code *c , regm_t regm , targ_size_t value , unsigned *preg , regm_t flags ); 825 843 trunk/src/backend/iasm.h
r572 r577 71 71 // Operand flags - usOp1, usOp2, usOp3 72 72 // 73 73 74 74 typedef unsigned opflag_t; 75 75 76 76 // Operand flags for normal opcodes 77 77 78 78 #define _r8 CONSTRUCT_FLAGS( _8, _reg, _normal, 0 ) 79 79 #define _r16 CONSTRUCT_FLAGS(_16, _reg, _normal, 0 ) 80 80 #define _r32 CONSTRUCT_FLAGS(_32, _reg, _normal, 0 ) 81 81 #define _r64 CONSTRUCT_FLAGS(_64, _reg, _normal, 0 ) 82 82 #define _m8 CONSTRUCT_FLAGS(_8, _m, _normal, 0 ) 83 83 #define _m16 CONSTRUCT_FLAGS(_16, _m, _normal, 0 ) 84 84 #define _m32 CONSTRUCT_FLAGS(_32, _m, _normal, 0 ) 85 85 #define _m48 CONSTRUCT_FLAGS( _48, _m, _normal, 0 ) 86 86 #define _m64 CONSTRUCT_FLAGS( _64, _m, _normal, 0 ) 87 87 #define _m128 CONSTRUCT_FLAGS( _anysize, _m, _normal, 0 ) 88 88 #define _rm8 CONSTRUCT_FLAGS(_8, _rm, _normal, 0 ) 89 89 #define _rm16 CONSTRUCT_FLAGS(_16, _rm, _normal, 0 ) 90 90 #define _rm32 CONSTRUCT_FLAGS(_32, _rm, _normal, 0) 91 #define _rm64 CONSTRUCT_FLAGS(_64, _rm, _normal, 0) 91 92 #define _r32m16 CONSTRUCT_FLAGS(_32|_16, _rm, _normal, 0) 92 93 #define _imm8 CONSTRUCT_FLAGS(_8, _imm, _normal, 0 ) 93 94 #define _imm16 CONSTRUCT_FLAGS(_16, _imm, _normal, 0) 94 95 #define _imm32 CONSTRUCT_FLAGS(_32, _imm, _normal, 0) 95 96 #define _rel8 CONSTRUCT_FLAGS(_8, _rel, _normal, 0) 96 97 #define _rel16 CONSTRUCT_FLAGS(_16, _rel, _normal, 0) 97 98 #define _rel32 CONSTRUCT_FLAGS(_32, _rel, _normal, 0) 98 99 #define _p1616 CONSTRUCT_FLAGS(_32, _p, _normal, 0) 99 100 #define _m1616 CONSTRUCT_FLAGS(_32, _mnoi, _normal, 0) 100 101 #define _p1632 CONSTRUCT_FLAGS(_48, _p, _normal, 0 ) 101 102 #define _m1632 CONSTRUCT_FLAGS(_48, _mnoi, _normal, 0) 102 103 #define _special CONSTRUCT_FLAGS( 0, 0, _rspecial, 0 ) 103 104 #define _seg CONSTRUCT_FLAGS( 0, 0, _rseg, 0 ) 104 105 #define _a16 CONSTRUCT_FLAGS( 0, 0, _addr16, 0 ) 105 106 #define _a32 CONSTRUCT_FLAGS( 0, 0, _addr32, 0 ) 106 107 #define _f16 CONSTRUCT_FLAGS( 0, 0, _fn16, 0) 107 108 // Near function pointer 108 109 #define _f32 CONSTRUCT_FLAGS( 0, 0, _fn32, 0) 109 110 // Far function pointer 110 111 #define _lbl CONSTRUCT_FLAGS( 0, 0, _flbl, 0 ) trunk/src/backend/ptrntab.c
r572 r577 500 500 PTRNTAB1 aptb1LOOPZ[] = /* LOOPZ */ { 501 501 { 0xe1, _cb | _modcx,_rel8 }, 502 502 { ASM_END, 0, 0 } 503 503 }; 504 504 PTRNTAB1 aptb1LOOPNE[] = /* LOOPNE */ { 505 505 { 0xe0, _cb | _modcx,_rel8 }, 506 506 { ASM_END, 0, 0 } 507 507 }; 508 508 PTRNTAB1 aptb1LOOPNZ[] = /* LOOPNZ */ { 509 509 { 0xe0, _cb | _modcx,_rel8 }, 510 510 { ASM_END, 0, 0 } 511 511 }; 512 512 PTRNTAB1 aptb1LTR[] = /* LTR */ { 513 513 { 0x0f00, _3|_modnot1, _rm16 }, 514 514 { ASM_END, 0, 0 } 515 515 }; 516 516 PTRNTAB1 aptb1NEG[] = /* NEG */ { 517 517 { 0xf6, _3, _rm8 }, 518 518 { 0xf7, _3 | _16_bit, _rm16 }, 519 519 { 0xf7, _3 | _32_bit, _rm32 }, 520 { 0xf7, _3 | _64_bit, _rm64 }, 520 521 { ASM_END, 0, 0 } 521 522 }; 522 523 PTRNTAB1 aptb1NOT[] = /* NOT */ { 523 524 { 0xf6, _2, _rm8 }, 524 525 { 0xf7, _2 | _16_bit, _rm16 }, 525 526 { 0xf7, _2 | _32_bit, _rm32 }, 526 527 { ASM_END, 0, 0 } 527 528 }; 528 529 PTRNTAB1 aptb1POP[] = /* POP */ { 529 530 { 0x8f, _0 | _16_bit, _m16 }, 530 531 { 0x8f, _0 | _32_bit, _m32}, 531 532 { 0x58, _rw | _16_bit, _r16 | _plus_r }, 532 533 { 0x58, _rd | _32_bit, _r32 | _plus_r }, 534 { 0x58, _r | _64_bit, _r64 | _plus_r }, 533 535 { 0x1f, 0, _ds | _seg }, 534 536 { 0x07, _modes, _es | _seg}, 535 537 { 0x17, 0, _ss | _seg}, 536 538 { 0x0fa1, 0, _fs | _seg}, 537 539 { 0x0fa9, 0, _gs | _seg}, 538 540 { ASM_END, 0, 0 } 539 541 }; 540 542 PTRNTAB1 aptb1PUSH[] = /* PUSH */ { 541 543 { 0xff, _6 | _16_bit, _m16 }, 542 544 { 0xff, _6 | _32_bit, _m32 }, 545 { 0xff, _6 | _64_bit, _m64 }, 543 546 { 0x50, _r | _16_bit, _r16 | _plus_r }, 544 547 { 0x50, _r | _32_bit, _r32 | _plus_r }, 548 { 0x50, _r | _64_bit, _r64 | _plus_r }, 545 549 { 0x6a, 0,_imm8 }, 546 550 { 0x68, _16_bit,_imm16 }, 547 551 { 0x68, _16_bit,_rel16 }, 548 552 { 0x68, _32_bit,_imm32 }, 549 553 { 0x68, _32_bit,_rel32 }, 550 554 { 0x0e, 0,_cs | _seg}, 551 555 { 0x16, 0,_ss | _seg}, 552 556 { 0x1e, 0,_ds | _seg}, 553 557 { 0x06, 0,_es | _seg}, 554 558 { 0x0fa0, 0,_fs | _seg}, 555 559 { 0x0fa8, 0,_gs | _seg}, 556 560 { ASM_END, 0, 0 } 557 561 }; 558 562 PTRNTAB1 aptb1RET[] = /* RET */ { 559 563 { 0xc3, 0, 0 }, 560 564 { 0xc2, _iw, _imm16 }, 561 565 { ASM_END, 0, 0 } 562 566 }; 563 567 PTRNTAB1 aptb1RETF[] = /* RETF */ { 564 568 { 0xcb, 0, 0 }, trunk/src/freebsd.mak
r473 r577 5 5 6 6 CC=g++ -m32 7 7 8 8 #OPT=-g -g3 9 9 #OPT=-O2 10 10 11 11 #COV=-fprofile-arcs -ftest-coverage 12 12 13 13 #GFLAGS = -Wno-deprecated -D__near= -D__pascal= -fno-exceptions -g -DDEBUG=1 $(COV) 14 14 GFLAGS = -Wno-deprecated -D__near= -D__pascal= -fno-exceptions -O2 15 15 16 16 CFLAGS = $(GFLAGS) -I$(ROOT) -D__I86__=1 -DMARS=1 -DTARGET_FREEBSD=1 -D_DH 17 17 MFLAGS = $(GFLAGS) -I$C -I$(TK) -D__I86__=1 -DMARS=1 -DTARGET_FREEBSD=1 -D_DH 18 18 19 19 CH= $C/cc.h $C/global.h $C/parser.h $C/oper.h $C/code.h $C/type.h \ 20 20 $C/dt.h $C/cgcv.h $C/el.h $C/iasm.h 21 21 TOTALH= 22 22 23 23 DMD_OBJS = \ 24 24 access.o array.o attrib.o bcomplex.o bit.o blockopt.o \ 25 cast.o code.o cg.o cg87.o cgcod.o cgcs.o cg cv.o cgelem.o cgen.o \25 cast.o code.o cg.o cg87.o cgcod.o cgcs.o cgelem.o cgen.o \ 26 26 cgreg.o cgsched.o class.o cod1.o cod2.o cod3.o cod4.o cod5.o \ 27 27 constfold.o irstate.o dchar.o cond.o debug.o \ 28 28 declaration.o dsymbol.o dt.o dump.o e2ir.o ee.o eh.o el.o \ 29 29 dwarf.o enum.o evalu8.o expression.o func.o gdag.o gflow.o \ 30 30 glocal.o gloop.o glue.o gnuc.o go.o gother.o html.o iasm.o id.o \ 31 31 identifier.o impcnvtab.o import.o inifile.o init.o inline.o \ 32 32 lexer.o link.o lstring.o mangle.o mars.o rmem.o module.o msc.o mtype.o \ 33 33 nteh.o cppmangle.o opover.o optimize.o os.o out.o outbuf.o \ 34 34 parse.o ph.o ptrntab.o root.o rtlsym.o s2ir.o scope.o statement.o \ 35 35 stringtable.o struct.o csymbol.o template.o tk.o tocsym.o todt.o \ 36 36 type.o typinf.o util.o var.o version.o strtold.o utf.o staticassert.o \ 37 37 unialpha.o toobj.o toctype.o toelfdebug.o entity.o doc.o macro.o \ 38 38 hdrgen.o delegatize.o aa.o ti_achar.o toir.o interpret.o traits.o \ 39 39 builtin.o clone.o aliasthis.o \ 40 40 man.o arrayop.o port.o response.o async.o json.o speller.o aav.o unittests.o \ 41 41 imphint.o \ 42 42 libelf.o elfobj.o 43 43 44 44 SRC = win32.mak linux.mak osx.mak freebsd.mak solaris.mak \ 45 45 mars.c enum.c struct.c dsymbol.c import.c idgen.c impcnvgen.c \ trunk/src/iasm.c
r572 r577 93 93 EM_num, 94 94 EM_float, 95 95 EM_char, 96 96 EM_label_expected, 97 97 EM_uplevel, 98 98 EM_type_as_operand, 99 99 }; 100 100 101 101 const char *asmerrmsgs[] = 102 102 { 103 103 "unknown operand for floating point instruction", 104 104 "bad addr mode", 105 105 "align %d must be a power of 2", 106 106 "opcode expected, not %s", 107 107 "prefix", 108 108 "end of instruction", 109 109 "bad operand", 110 110 "bad integral operand", 111 111 "identifier expected", 112 112 "not struct", 113 " nops expected",113 "%u operands found for %s instead of the expected %u", 114 114 "bad type/size of operands '%s'", 115 115 "constant initializer expected", 116 116 "undefined identifier '%s'", 117 117 "pointer", 118 118 "colon", 119 119 "] expected instead of '%s'", 120 120 ") expected instead of '%s'", 121 121 "ptr expected", 122 122 "integer expected", 123 123 "floating point expected", 124 124 "character is truncated", 125 125 "label expected", 126 126 "uplevel nested reference to variable %s", 127 127 "cannot use type %s as an operand" 128 128 }; 129 129 130 130 // Additional tokens for the inline assembler 131 131 typedef enum 132 132 { 133 133 ASMTKlocalsize = TOKMAX + 1, … … 1476 1476 if (usNumops == 1) 1477 1477 { pc->IFL2 = FLblock; 1478 1478 pc->IEVlsym2 = label; 1479 1479 } 1480 1480 else 1481 1481 { pc->IFL1 = FLblock; 1482 1482 pc->IEVlsym1 = label; 1483 1483 } 1484 1484 } 1485 1485 } 1486 1486 } 1487 1487 1488 1488 switch (usNumops) 1489 1489 { 1490 1490 case 0: 1491 1491 break; 1492 1492 case 1: 1493 1493 if (((aoptyTable1 == _reg || aoptyTable1 == _float) && 1494 1494 amodTable1 == _normal && (uRegmaskTable1 & _rplus_r))) 1495 1495 { 1496 if (asmstate.ucItype == ITfloat) 1497 pc->Irm += popnd1->base->val; 1498 else if (pc->Iop == 0x0f) 1499 pc->Iop2 += popnd1->base->val; 1500 else 1501 pc->Iop += popnd1->base->val; 1496 unsigned reg = popnd1->base->val; 1497 if (reg & 8) 1498 { reg &= 7; 1499 pc->Irex |= REX_B; 1500 assert(I64); 1501 } 1502 if (asmstate.ucItype == ITfloat) 1503 pc->Irm += reg; 1504 else if (pc->Iop == 0x0f) 1505 pc->Iop2 += reg; 1506 else 1507 pc->Iop += reg; 1502 1508 #ifdef DEBUG 1503 auchOpcode[usIdx-1] += popnd1->base->val;1509 auchOpcode[usIdx-1] += reg; 1504 1510 #endif 1505 1511 } 1506 1512 else 1507 1513 { asm_make_modrm_byte( 1508 1514 #ifdef DEBUG 1509 1515 auchOpcode, &usIdx, 1510 1516 #endif 1511 1517 pc, 1512 1518 ptb.pptb1->usFlags, 1513 1519 popnd1, NULL); 1514 1520 } 1515 1521 popndTmp = popnd1; 1516 1522 aoptyTmp = aoptyTable1; 1517 1523 uSizemaskTmp = uSizemaskTable1; 1518 1524 L1: 1519 1525 if (aoptyTmp == _imm) 1520 1526 { 1521 1527 Declaration *d = popndTmp->s ? popndTmp->s->isDeclaration() 1522 1528 : NULL; 1523 1529 if (popndTmp->bSeg) … … 1607 1613 else 1608 1614 { 1609 1615 asm_make_modrm_byte( 1610 1616 #ifdef DEBUG 1611 1617 auchOpcode, &usIdx, 1612 1618 #endif 1613 1619 pc, 1614 1620 ptb.pptb1->usFlags, 1615 1621 popnd2, popnd1); 1616 1622 } 1617 1623 popndTmp = popnd1; 1618 1624 aoptyTmp = aoptyTable1; 1619 1625 uSizemaskTmp = uSizemaskTable1; 1620 1626 } 1621 1627 else 1622 1628 { 1623 1629 if (((aoptyTable1 == _reg || aoptyTable1 == _float) && 1624 1630 amodTable1 == _normal && 1625 1631 (uRegmaskTable1 & _rplus_r))) 1626 1632 { 1627 if (asmstate.ucItype == ITfloat) 1628 pc->Irm += popnd1->base->val; 1629 else 1630 if (pc->Iop == 0x0f) 1631 pc->Iop2 += popnd1->base->val; 1632 else 1633 pc->Iop += popnd1->base->val; 1633 unsigned reg = popnd1->base->val; 1634 if (reg & 8) 1635 { reg &= 7; 1636 pc->Irex |= REX_B; 1637 assert(I64); 1638 } 1639 if (asmstate.ucItype == ITfloat) 1640 pc->Irm += reg; 1641 else if (pc->Iop == 0x0f) 1642 pc->Iop2 += reg; 1643 else 1644 pc->Iop += reg; 1634 1645 #ifdef DEBUG 1635 auchOpcode[usIdx-1] += popnd1->base->val;1646 auchOpcode[usIdx-1] += reg; 1636 1647 #endif 1637 1648 } 1638 1649 else 1639 1650 if (((aoptyTable2 == _reg || aoptyTable2 == _float) && 1640 1651 amodTable2 == _normal && 1641 1652 (uRegmaskTable2 & _rplus_r))) 1642 1653 { 1643 if (asmstate.ucItype == ITfloat) 1644 pc->Irm += popnd2->base->val; 1645 else 1646 if (pc->Iop == 0x0f) 1647 pc->Iop2 += popnd2->base->val; 1648 else 1649 pc->Iop += popnd2->base->val; 1654 unsigned reg = popnd2->base->val; 1655 if (reg & 8) 1656 { reg &= 7; 1657 pc->Irex |= REX_B; 1658 assert(I64); 1659 } 1660 if (asmstate.ucItype == ITfloat) 1661 pc->Irm += reg; 1662 else if (pc->Iop == 0x0f) 1663 pc->Iop2 += reg; 1664 else 1665 pc->Iop += reg; 1650 1666 #ifdef DEBUG 1651 auchOpcode[usIdx-1] += popnd2->base->val;1667 auchOpcode[usIdx-1] += reg; 1652 1668 #endif 1653 1669 } 1654 1670 else if (ptb.pptb0->usOpcode == 0xF30FD6 || 1655 1671 ptb.pptb0->usOpcode == 0x0F12 || 1656 1672 ptb.pptb0->usOpcode == 0x0F16 || 1657 1673 ptb.pptb0->usOpcode == 0x660F50 || 1658 1674 ptb.pptb0->usOpcode == 0x0F50 || 1659 1675 ptb.pptb0->usOpcode == 0x660FD7 || 1660 1676 ptb.pptb0->usOpcode == 0x0FD7) 1661 1677 { 1662 1678 asm_make_modrm_byte( 1663 1679 #ifdef DEBUG 1664 1680 auchOpcode, &usIdx, 1665 1681 #endif 1666 1682 pc, 1667 1683 ptb.pptb1->usFlags, 1668 1684 popnd2, popnd1); 1669 1685 } 1670 1686 else 1671 1687 { … … 1697 1713 if (aoptyTable2 == _m || aoptyTable2 == _rm || 1698 1714 usOpcode == 0x0FC5) // PEXTRW 1699 1715 { 1700 1716 asm_make_modrm_byte( 1701 1717 #ifdef DEBUG 1702 1718 auchOpcode, &usIdx, 1703 1719 #endif 1704 1720 pc, 1705 1721 ptb.pptb1->usFlags, 1706 1722 popnd2, popnd1); 1707 1723 popndTmp = popnd3; 1708 1724 aoptyTmp = aoptyTable3; 1709 1725 uSizemaskTmp = uSizemaskTable3; 1710 1726 } 1711 1727 else { 1712 1728 1713 1729 if (((aoptyTable1 == _reg || aoptyTable1 == _float) && 1714 1730 amodTable1 == _normal && 1715 1731 (uRegmaskTable1 &_rplus_r))) 1716 1732 { 1717 if (asmstate.ucItype == ITfloat) 1718 pc->Irm += popnd1->base->val; 1719 else 1720 if (pc->Iop == 0x0f) 1721 pc->Iop2 += popnd1->base->val; 1722 else 1723 pc->Iop += popnd1->base->val; 1733 unsigned reg = popnd1->base->val; 1734 if (reg & 8) 1735 { reg &= 7; 1736 pc->Irex |= REX_B; 1737 assert(I64); 1738 } 1739 if (asmstate.ucItype == ITfloat) 1740 pc->Irm += reg; 1741 else if (pc->Iop == 0x0f) 1742 pc->Iop2 += reg; 1743 else 1744 pc->Iop += reg; 1724 1745 #ifdef DEBUG 1725 auchOpcode[usIdx-1] += popnd1->base->val;1746 auchOpcode[usIdx-1] += reg; 1726 1747 #endif 1727 1748 } 1728 1749 else 1729 1750 if (((aoptyTable2 == _reg || aoptyTable2 == _float) && 1730 1751 amodTable2 == _normal && 1731 1752 (uRegmaskTable2 &_rplus_r))) 1732 1753 { 1733 if (asmstate.ucItype == ITfloat) 1734 pc->Irm += popnd1->base->val; 1735 else 1736 if (pc->Iop == 0x0f) 1737 pc->Iop2 += popnd1->base->val; 1738 else 1739 pc->Iop += popnd2->base->val; 1754 unsigned reg = popnd1->base->val; 1755 if (reg & 8) 1756 { reg &= 7; 1757 pc->Irex |= REX_B; 1758 assert(I64); 1759 } 1760 if (asmstate.ucItype == ITfloat) 1761 pc->Irm += reg; 1762 else if (pc->Iop == 0x0f) 1763 pc->Iop2 += reg; 1764 else 1765 pc->Iop += reg; 1740 1766 #ifdef DEBUG 1741 auchOpcode[usIdx-1] += popnd2->base->val;1767 auchOpcode[usIdx-1] += reg; 1742 1768 #endif 1743 1769 } 1744 1770 else 1745 1771 asm_make_modrm_byte( 1746 1772 #ifdef DEBUG 1747 1773 auchOpcode, &usIdx, 1748 1774 #endif 1749 1775 pc, 1750 1776 ptb.pptb1->usFlags, 1751 1777 popnd1, popnd2); 1752 1778 1753 1779 popndTmp = popnd3; 1754 1780 aoptyTmp = aoptyTable3; 1755 1781 uSizemaskTmp = uSizemaskTable3; 1756 1782 1757 1783 } 1758 1784 goto L1; 1759 1785 } 1760 1786 L2: 1761 1787 … … 2654 2680 case _modsidi: 2655 2681 usRet |= (mSI | mDI); 2656 2682 break; 2657 2683 case _modcx: 2658 2684 usRet |= mCX; 2659 2685 break; 2660 2686 case _modes: 2661 2687 /*usRet |= mES;*/ 2662 2688 break; 2663 2689 case _modall: 2664 2690 asmstate.bReturnax = TRUE; 2665 2691 return /*mES |*/ ALLREGS; 2666 2692 case _modsiax: 2667 2693 usRet |= (mSI | mAX); 2668 2694 break; 2669 2695 case _modsinot1: 2670 2696 usRet |= mSI; 2671 2697 popnd1 = NULL; 2672 2698 break; 2673 2699 } 2674 if (popnd1 && ASM_GET_aopty(popnd1->usFlags) == _reg) { 2675 switch (ASM_GET_amod(popnd1->usFlags)) { 2700 if (popnd1 && ASM_GET_aopty(popnd1->usFlags) == _reg) 2701 { 2702 switch (ASM_GET_amod(popnd1->usFlags)) 2703 { 2676 2704 default: 2677 if (ASM_GET_uSizemask(popnd1->usFlags) == _8) { 2678 switch(popnd1->base->val) { 2679 case _AL: 2680 case _AH: 2681 usRet |= mAX; 2682 break; 2683 case _BL: 2684 case _BH: 2685 usRet |= mBX; 2686 break; 2687 case _CL: 2688 case _CH: 2689 usRet |= mCX; 2690 break; 2691 case _DL: 2692 case _DH: 2693 usRet |= mDX; 2694 break; 2695 default: 2696 assert(0); 2697 } 2698 } 2699 else { 2700 switch (popnd1->base->val) { 2701 case _AX: 2702 usRet |= mAX; 2703 break; 2704 case _BX: 2705 usRet |= mBX; 2706 break; 2707 case _CX: 2708 usRet |= mCX; 2709 break; 2710 case _DX: 2711 usRet |= mDX; 2712 break; 2713 case _SI: 2714 usRet |= mSI; 2715 break; 2716 case _DI: 2717 usRet |= mDI; 2718 break; 2719 } 2720 } 2705 usRet |= 1 << popnd1->base->val; 2706 usRet &= ~(mBP | mSP); // ignore changing these 2721 2707 break; 2722 2708 case _rseg: 2723 2709 //if (popnd1->base->val == _ES) 2724 2710 //usRet |= mES; 2725 2711 break; 2726 2712 2727 2713 case _rspecial: 2728 2714 break; 2729 2715 } 2730 2716 } 2731 2717 if (usRet & mAX) 2732 2718 asmstate.bReturnax = TRUE; 2733 2719 2734 2720 return usRet; 2735 2721 } 2736 2722 2737 2723 /******************************* 2738 2724 * Match flags in operand against flags in opcode table. 2739 2725 * Returns: 2740 2726 * !=0 if match trunk/src/linux.mak
r569 r577 9 9 10 10 #OPT=-g -g3 11 11 #OPT=-O2 12 12 13 13 #COV=-fprofile-arcs -ftest-coverage 14 14 15 15 WARNINGS=-Wno-deprecated -Wstrict-aliasing 16 16 17 17 #GFLAGS = $(WARNINGS) -D__near= -D__pascal= -fno-exceptions -g -DDEBUG=1 -DUNITTEST $(COV) 18 18 GFLAGS = $(WARNINGS) -D__near= -D__pascal= -fno-exceptions -O2 19 19 20 20 CFLAGS = $(GFLAGS) -I$(ROOT) -D__I86__=1 -DMARS=1 -DTARGET_LINUX=1 -D_DH 21 21 MFLAGS = $(GFLAGS) -I$C -I$(TK) -D__I86__=1 -DMARS=1 -DTARGET_LINUX=1 -D_DH 22 22 23 23 CH= $C/cc.h $C/global.h $C/parser.h $C/oper.h $C/code.h $C/type.h \ 24 24 $C/dt.h $C/cgcv.h $C/el.h $C/iasm.h 25 25 TOTALH= 26 26 27 27 DMD_OBJS = \ 28 28 access.o array.o attrib.o bcomplex.o bit.o blockopt.o \ 29 cast.o code.o cg.o cg87.o cgcod.o cgcs.o cg cv.o cgelem.o cgen.o \29 cast.o code.o cg.o cg87.o cgcod.o cgcs.o cgelem.o cgen.o \ 30 30 cgreg.o cgsched.o class.o cod1.o cod2.o cod3.o cod4.o cod5.o \ 31 31 constfold.o irstate.o dchar.o cond.o debug.o \ 32 32 declaration.o dsymbol.o dt.o dump.o e2ir.o ee.o eh.o el.o \ 33 33 dwarf.o enum.o evalu8.o expression.o func.o gdag.o gflow.o \ 34 34 glocal.o gloop.o glue.o gnuc.o go.o gother.o html.o iasm.o id.o \ 35 35 identifier.o impcnvtab.o import.o inifile.o init.o inline.o \ 36 36 lexer.o link.o lstring.o mangle.o mars.o rmem.o module.o msc.o mtype.o \ 37 37 nteh.o cppmangle.o opover.o optimize.o os.o out.o outbuf.o \ 38 38 parse.o ph.o ptrntab.o root.o rtlsym.o s2ir.o scope.o statement.o \ 39 39 stringtable.o struct.o csymbol.o template.o tk.o tocsym.o todt.o \ 40 40 type.o typinf.o util.o var.o version.o strtold.o utf.o staticassert.o \ 41 41 unialpha.o toobj.o toctype.o toelfdebug.o entity.o doc.o macro.o \ 42 42 hdrgen.o delegatize.o aa.o ti_achar.o toir.o interpret.o traits.o \ 43 43 builtin.o clone.o aliasthis.o \ 44 44 man.o arrayop.o port.o response.o async.o json.o speller.o aav.o unittests.o \ 45 45 imphint.o \ 46 46 libelf.o elfobj.o 47 47 48 48 SRC = win32.mak linux.mak osx.mak freebsd.mak solaris.mak \ 49 49 mars.c enum.c struct.c dsymbol.c import.c idgen.c impcnvgen.c \ trunk/src/osx.mak
r532 r577 15 15 16 16 #OPT=-g -g3 17 17 #OPT=-O2 18 18 19 19 #COV=-fprofile-arcs -ftest-coverage 20 20 21 21 WARNINGS=-Wno-deprecated -Wstrict-aliasing 22 22 23 23 #GFLAGS = $(WARNINGS) -D__near= -D__pascal= -fno-exceptions -g -DDEBUG=1 -DUNITTEST $(COV) 24 24 GFLAGS = $(WARNINGS) -D__near= -D__pascal= -fno-exceptions -O2 25 25 26 26 CFLAGS = $(GFLAGS) -I$(ROOT) -D__I86__=1 -DMARS=1 -DTARGET_OSX=1 -D_DH 27 27 MFLAGS = $(GFLAGS) -I$C -I$(TK) -D__I86__=1 -DMARS=1 -DTARGET_OSX=1 -D_DH 28 28 29 29 CH= $C/cc.h $C/global.h $C/parser.h $C/oper.h $C/code.h $C/type.h \ 30 30 $C/dt.h $C/cgcv.h $C/el.h $C/iasm.h 31 31 TOTALH= 32 32 33 33 DMD_OBJS = \ 34 34 access.o array.o attrib.o bcomplex.o bit.o blockopt.o \ 35 cast.o code.o cg.o cg87.o cgcod.o cgcs.o cg cv.o cgelem.o cgen.o \35 cast.o code.o cg.o cg87.o cgcod.o cgcs.o cgelem.o cgen.o \ 36 36 cgreg.o cgsched.o class.o cod1.o cod2.o cod3.o cod4.o cod5.o \ 37 37 constfold.o irstate.o dchar.o cond.o debug.o \ 38 38 declaration.o dsymbol.o dt.o dump.o e2ir.o ee.o eh.o el.o \ 39 39 dwarf.o enum.o evalu8.o expression.o func.o gdag.o gflow.o \ 40 40 glocal.o gloop.o glue.o gnuc.o go.o gother.o html.o iasm.o id.o \ 41 41 identifier.o impcnvtab.o import.o inifile.o init.o inline.o \ 42 42 lexer.o link.o lstring.o mangle.o mars.o rmem.o module.o msc.o mtype.o \ 43 43 nteh.o cppmangle.o opover.o optimize.o os.o out.o outbuf.o \ 44 44 parse.o ph.o ptrntab.o root.o rtlsym.o s2ir.o scope.o statement.o \ 45 45 stringtable.o struct.o csymbol.o template.o tk.o tocsym.o todt.o \ 46 46 type.o typinf.o util.o var.o version.o strtold.o utf.o staticassert.o \ 47 47 unialpha.o toobj.o toctype.o toelfdebug.o entity.o doc.o macro.o \ 48 48 hdrgen.o delegatize.o aa.o ti_achar.o toir.o interpret.o traits.o \ 49 49 builtin.o clone.o aliasthis.o \ 50 50 man.o arrayop.o port.o response.o async.o json.o speller.o aav.o unittests.o \ 51 51 imphint.o \ 52 52 libmach.o machobj.o 53 53 54 54 SRC = win32.mak linux.mak osx.mak freebsd.mak solaris.mak \ 55 55 mars.c enum.c struct.c dsymbol.c import.c idgen.c impcnvgen.c \ trunk/src/solaris.mak
r473 r577 5 5 6 6 CC=g++ -m32 7 7 8 8 #OPT=-g -g3 9 9 #OPT=-O2 10 10 11 11 #COV=-fprofile-arcs -ftest-coverage 12 12 13 13 #GFLAGS = -Wno-deprecated -D__near= -D__pascal= -fno-exceptions -g -DDEBUG=1 $(COV) 14 14 GFLAGS = -Wno-deprecated -D__near= -D__pascal= -fno-exceptions -O2 15 15 16 16 CFLAGS = $(GFLAGS) -I$(ROOT) -D__I86__=1 -DMARS=1 -DTARGET_SOLARIS=1 -D_DH 17 17 MFLAGS = $(GFLAGS) -I$C -I$(TK) -D__I86__=1 -DMARS=1 -DTARGET_SOLARIS=1 -D_DH 18 18 19 19 CH= $C/cc.h $C/global.h $C/parser.h $C/oper.h $C/code.h $C/type.h \ 20 20 $C/dt.h $C/cgcv.h $C/el.h $C/iasm.h 21 21 TOTALH= 22 22 23 23 DMD_OBJS = \ 24 24 access.o array.o attrib.o bcomplex.o bit.o blockopt.o \ 25 cast.o code.o cg.o cg87.o cgcod.o cgcs.o cg cv.o cgelem.o cgen.o \25 cast.o code.o cg.o cg87.o cgcod.o cgcs.o cgelem.o cgen.o \ 26 26 cgreg.o cgsched.o class.o cod1.o cod2.o cod3.o cod4.o cod5.o \ 27 27 constfold.o irstate.o dchar.o cond.o debug.o \ 28 28 declaration.o dsymbol.o dt.o dump.o e2ir.o ee.o eh.o el.o \ 29 29 dwarf.o enum.o evalu8.o expression.o func.o gdag.o gflow.o \ 30 30 glocal.o gloop.o glue.o gnuc.o go.o gother.o html.o iasm.o id.o \ 31 31 identifier.o impcnvtab.o import.o inifile.o init.o inline.o \ 32 32 lexer.o link.o lstring.o mangle.o mars.o rmem.o module.o msc.o mtype.o \ 33 33 nteh.o cppmangle.o opover.o optimize.o os.o out.o outbuf.o \ 34 34 parse.o ph.o ptrntab.o root.o rtlsym.o s2ir.o scope.o statement.o \ 35 35 stringtable.o struct.o csymbol.o template.o tk.o tocsym.o todt.o \ 36 36 type.o typinf.o util.o var.o version.o strtold.o utf.o staticassert.o \ 37 37 unialpha.o toobj.o toctype.o toelfdebug.o entity.o doc.o macro.o \ 38 38 hdrgen.o delegatize.o aa.o ti_achar.o toir.o interpret.o traits.o \ 39 39 builtin.o clone.o aliasthis.o \ 40 40 man.o arrayop.o port.o response.o async.o json.o speller.o aav.o unittests.o \ 41 41 imphint.o \ 42 42 libelf.o elfobj.o 43 43 44 44 SRC = win32.mak linux.mak osx.mak freebsd.mak solaris.mak \ 45 45 mars.c enum.c struct.c dsymbol.c import.c idgen.c impcnvgen.c \
