Changeset 596
- Timestamp:
- 08/04/10 08:19:55 (14 years ago)
- Files:
-
- branches/dmd-1.x/src/backend/cgcod.c (modified) (1 diff)
- branches/dmd-1.x/src/backend/cgen.c (modified) (5 diffs)
- branches/dmd-1.x/src/backend/cgsched.c (modified) (11 diffs)
- branches/dmd-1.x/src/backend/cod1.c (modified) (5 diffs)
- branches/dmd-1.x/src/backend/cod2.c (modified) (3 diffs)
- branches/dmd-1.x/src/backend/cod3.c (modified) (20 diffs)
- branches/dmd-1.x/src/backend/cod4.c (modified) (5 diffs)
- branches/dmd-1.x/src/backend/code.h (modified) (2 diffs)
- branches/dmd-1.x/src/iasm.c (modified) (3 diffs)
- trunk/src/backend/cgcod.c (modified) (1 diff)
- trunk/src/backend/cgen.c (modified) (5 diffs)
- trunk/src/backend/cgsched.c (modified) (11 diffs)
- trunk/src/backend/cod1.c (modified) (5 diffs)
- trunk/src/backend/cod2.c (modified) (3 diffs)
- trunk/src/backend/cod3.c (modified) (20 diffs)
- trunk/src/backend/cod4.c (modified) (5 diffs)
- trunk/src/backend/code.h (modified) (2 diffs)
- trunk/src/iasm.c (modified) (3 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
branches/dmd-1.x/src/backend/cgcod.c
r595 r596 1343 1343 dbg_printf(" block (%p) Btry=%p Bindex=%d\n",b,b->Btry,b->Bindex); 1344 1344 } 1345 1345 #endif 1346 1346 except_index_set(b->Bindex); 1347 1347 if (btry != b->Btry) // exited previous try block 1348 1348 { 1349 1349 except_pop(b,NULL,btry); 1350 1350 btry = b->Btry; 1351 1351 } 1352 1352 if (b->BC == BCtry) 1353 1353 { 1354 1354 except_push(b,NULL,b); 1355 1355 btry = b; 1356 1356 tryidx = except_index_get(); 1357 1357 b->Bcode = cat(nteh_gensindex(tryidx - 1),b->Bcode); 1358 1358 } 1359 1359 1360 1360 stack = NULL; 1361 1361 for (c = b->Bcode; c; c = code_next(c)) 1362 1362 { 1363 if ( c->Iop== ESCAPE)1363 if ((c->Iop & 0xFF) == ESCAPE) 1364 1364 { 1365 1365 c1 = NULL; 1366 switch (c->Iop 2)1366 switch (c->Iop & 0xFF00) 1367 1367 { 1368 1368 case ESCctor: 1369 1369 //printf("ESCctor\n"); 1370 1370 except_push(c,c->IEV1.Vtor,NULL); 1371 1371 goto L1; 1372 1372 1373 1373 case ESCdtor: 1374 1374 //printf("ESCdtor\n"); 1375 1375 except_pop(c,c->IEV1.Vtor,NULL); 1376 1376 L1: if (config.flags2 & CFG2seh) 1377 1377 { 1378 1378 c1 = nteh_gensindex(except_index_get() - 1); 1379 1379 code_next(c1) = code_next(c); 1380 1380 code_next(c) = c1; 1381 1381 } 1382 1382 break; 1383 1383 case ESCmark: 1384 1384 //printf("ESCmark\n"); 1385 1385 idx = except_index_get(); 1386 1386 list_prependdata(&stack,idx); 1387 1387 except_mark(); 1388 1388 break; 1389 1389 case ESCrelease: 1390 1390 //printf("ESCrelease\n"); 1391 1391 idx = list_data(stack); 1392 1392 list_pop(&stack); 1393 1393 if (idx != except_index_get()) 1394 1394 { 1395 1395 if (config.flags2 & CFG2seh) 1396 1396 { c1 = nteh_gensindex(idx - 1); 1397 1397 code_next(c1) = code_next(c); 1398 1398 code_next(c) = c1; 1399 1399 } 1400 1400 else 1401 1401 { except_pair_append(c,idx - 1); 1402 c->Iop 2 =ESCoffset;1402 c->Iop = ESCAPE | ESCoffset; 1403 1403 } 1404 1404 } 1405 1405 except_release(); 1406 1406 break; 1407 1407 case ESCmark2: 1408 1408 //printf("ESCmark2\n"); 1409 1409 except_mark(); 1410 1410 break; 1411 1411 case ESCrelease2: 1412 1412 //printf("ESCrelease2\n"); 1413 1413 except_release(); 1414 1414 break; 1415 1415 } 1416 1416 } 1417 1417 } 1418 1418 assert(stack == NULL); 1419 1419 b->Bendindex = except_index_get(); 1420 1420 1421 1421 if (b->BC != BCret && b->BC != BCretexp) 1422 1422 lastidx = b->Bendindex; branches/dmd-1.x/src/backend/cgen.c
r595 r596 49 49 c = c->next; 50 50 c->Iflags |= flag; 51 51 } 52 52 } 53 53 54 54 /***************************** 55 55 * Set rex bits on last code in list. 56 56 */ 57 57 58 58 void code_orrex(code *c,unsigned rex) 59 59 { 60 60 if (rex && c) 61 61 { while (c->next) 62 62 c = c->next; 63 63 c->Irex |= rex; 64 64 } 65 65 } 66 66 67 67 /************************************** 68 68 * Set the opcode fields in cs. 69 * This is ridiculously complex, cs.Iop should70 * just be an unsigned.71 69 */ 72 70 code *setOpcode(code *c, code *cs, unsigned op) 73 71 { 74 cs->Iflags = 0; 75 if (op > 0xFF) 76 { 77 switch (op & 0xFF0000) 78 { 79 case 0: 80 break; 81 case 0x660000: 82 cs->Iflags = CFopsize; 83 break; 84 case 0xF20000: // REPNE 85 case 0xF30000: // REP/REPE 86 c = gen1(c, op >> 16); 87 break; 88 } 89 cs->Iop = op >> 8; 90 cs->Iop2 = op & 0xFF; 91 } 92 else 93 cs->Iop = op; 72 cs->Iop = op; 94 73 return c; 95 74 } 96 75 97 76 /***************************** 98 77 * Concatenate two code lists together. Return pointer to result. 99 78 */ 100 79 101 80 #if TX86 && __INTSIZE == 4 && __SC__ 102 81 __declspec(naked) code * __pascal cat(code *c1,code *c2) 103 82 { 104 83 _asm 105 84 { 106 85 mov EAX,c1-4[ESP] 107 86 mov ECX,c2-4[ESP] 108 87 test EAX,EAX 109 88 jne L6D 110 89 mov EAX,ECX 111 90 ret 8 112 91 113 92 L6D: mov EDX,EAX … … 208 187 209 188 code *gen1(code *c,unsigned op) 210 189 { code *ce,*cstart; 211 190 212 191 ce = code_calloc(); 213 192 ce->Iop = op; 214 193 if (c) 215 194 { cstart = c; 216 195 while (code_next(c)) c = code_next(c); /* find end of list */ 217 196 code_next(c) = ce; /* link into list */ 218 197 return cstart; 219 198 } 220 199 return ce; 221 200 } 222 201 223 202 code *gen2(code *c,unsigned op,unsigned rm) 224 203 { code *ce,*cstart; 225 204 226 205 cstart = ce = code_calloc(); 227 206 /*cxcalloc++;*/ 228 if (op > 0xFF) 229 { ce->Iop = op >> 8; 230 ce->Iop2 = op & 0xFF; 231 } 232 else 233 ce->Iop = op; 207 ce->Iop = op; 234 208 ce->Iea = rm; 235 209 if (c) 236 210 { cstart = c; 237 211 while (code_next(c)) c = code_next(c); /* find end of list */ 238 212 code_next(c) = ce; /* link into list */ 239 213 } 240 214 return cstart; 241 215 } 242 216 243 217 code *gen2sib(code *c,unsigned op,unsigned rm,unsigned sib) 244 218 { code *ce,*cstart; 245 219 246 220 cstart = ce = code_calloc(); 247 221 /*cxcalloc++;*/ 248 222 ce->Iop = op; 249 223 ce->Irm = rm; 250 224 ce->Isib = sib; 251 225 ce->Irex = (rm | (sib & (REX_B << 16))) >> 16; 252 226 if (sib & (REX_R << 16)) 253 227 ce->Irex |= REX_X; … … 318 292 { 319 293 if (to == ES) 320 294 c = genregs(c,0x8E,0,from); 321 295 else if (from == ES) 322 296 c = genregs(c,0x8C,0,to); 323 297 else 324 298 c = genregs(c,0x89,from,to); 325 299 } 326 300 return c; 327 301 } 328 302 329 303 /************************** 330 304 * Generate a jump instruction. 331 305 */ 332 306 333 307 code *genjmp(code *c,unsigned op,unsigned fltarg,block *targ) 334 308 { code cs; 335 309 code *cj; 336 310 code *cnop; 337 311 338 cs.Iop = op ;312 cs.Iop = op & 0xFF; 339 313 cs.Iflags = 0; 340 314 cs.Irex = 0; 341 315 if (op != JMP) /* if not already long branch */ 342 316 cs.Iflags = CFjmp16; /* assume long branch for op = 0x7x */ 343 317 cs.IFL2 = fltarg; /* FLblock (or FLcode) */ 344 318 cs.IEV2.Vblock = targ; /* target block (or code) */ 345 319 if (fltarg == FLcode) 346 320 ((code *)targ)->Iflags |= CFtarg; 347 321 348 322 if (config.flags4 & CFG4fastfloat) // if fast floating point 349 323 return gen(c,&cs); 350 324 351 325 cj = gen(CNIL,&cs); 352 326 switch (op & 0xFF00) /* look at second jump opcode */ 353 327 { 354 328 /* The JP and JNP come from floating point comparisons */ 355 329 case JP << 8: 356 330 cs.Iop = JP; 357 331 gen(cj,&cs); 358 332 break; … … 373 347 } 374 348 return cat(c,cj); 375 349 } 376 350 377 351 code *gencs(code *c,unsigned op,unsigned ea,unsigned FL2,symbol *s) 378 352 { code cs; 379 353 380 354 cs.Iop = op; 381 355 cs.Iea = ea; 382 356 cs.Iflags = 0; 383 357 cs.IFL2 = FL2; 384 358 cs.IEVsym2 = s; 385 359 cs.IEVoffset2 = 0; 386 360 387 361 return gen(c,&cs); 388 362 } 389 363 390 364 code *genc2(code *c,unsigned op,unsigned ea,targ_size_t EV2) 391 365 { code cs; 392 366 393 if (op > 0xFF) 394 { cs.Iop = op >> 8; 395 cs.Iop2 = op & 0xFF; 396 } 397 else 398 cs.Iop = op; 367 cs.Iop = op; 399 368 cs.Iea = ea; 400 369 cs.Iflags = CFoff; 401 370 cs.IFL2 = FLconst; 402 371 cs.IEV2.Vsize_t = EV2; 403 372 return gen(c,&cs); 404 373 } 405 374 406 375 /***************** 407 376 * Generate code. 408 377 */ 409 378 410 379 code *genc1(code *c,unsigned op,unsigned ea,unsigned FL1,targ_size_t EV1) 411 380 { code cs; 412 381 413 382 assert(FL1 < FLMAX); 414 if (op > 0xFF) 415 { 416 c = setOpcode(c, &cs, op); 417 cs.Iflags |= CFoff; 418 } 419 else 420 { cs.Iop = op; 421 cs.Iflags = CFoff; 422 } 383 cs.Iop = op; 384 cs.Iflags = CFoff; 423 385 cs.Iea = ea; 424 386 cs.IFL1 = FL1; 425 387 cs.IEV1.Vsize_t = EV1; 426 388 return gen(c,&cs); 427 389 } 428 390 429 391 /***************** 430 392 * Generate code. 431 393 */ 432 394 433 395 code *genc(code *c,unsigned op,unsigned ea,unsigned FL1,targ_size_t EV1,unsigned FL2,targ_size_t EV2) 434 396 { code cs; 435 397 436 398 assert(FL1 < FLMAX); 437 assert(op < 256);438 399 cs.Iop = op; 439 400 cs.Iea = ea; 440 401 cs.Iflags = CFoff; 441 402 cs.IFL1 = FL1; 442 403 cs.IEV1.Vsize_t = EV1; 443 404 assert(FL2 < FLMAX); 444 405 cs.IFL2 = FL2; 445 406 cs.IEV2.Vsize_t = EV2; 446 407 return gen(c,&cs); 447 408 } 448 409 449 410 /*************************************** 450 411 * Generate immediate multiply instruction for r1=r2*imm. 451 412 * Optimize it into LEA's if we can. 452 413 */ 453 414 454 415 code *genmulimm(code *c,unsigned r1,unsigned r2,targ_int imm) 455 416 { code cs; 456 417 457 418 // These optimizations should probably be put into pinholeopt() … … 471 432 c = genc2(c,0x69,modregxrmx(3,r1,r2),imm); // IMUL r1,r2,imm 472 433 break; 473 434 } 474 435 return c; 475 436 } 476 437 477 438 /******************************** 478 439 * Generate 'instruction' which is actually a line number. 479 440 */ 480 441 481 442 code *genlinnum(code *c,Srcpos srcpos) 482 443 { code cs; 483 444 484 445 #if 0 485 446 #if MARS 486 447 printf("genlinnum(Sfilename = %p, Slinnum = %u)\n", srcpos.Sfilename, srcpos.Slinnum); 487 448 #else 488 449 printf("genlinnum(Sfilptr = %p, Slinnum = %u)\n", srcpos.Sfilptr, srcpos.Slinnum); 489 450 #endif 490 451 #endif 491 cs.Iop = ESCAPE; 492 cs.Iop2 = ESClinnum; 452 cs.Iop = ESCAPE | ESClinnum; 493 453 cs.Iflags = 0; 494 454 cs.Irex = 0; 495 455 cs.IFL1 = 0; 496 456 cs.IFL2 = 0; 497 457 cs.IEV2.Vsrcpos = srcpos; 498 458 return gen(c,&cs); 499 459 } 500 460 501 461 /****************************** 502 462 * Append line number to existing code. 503 463 */ 504 464 505 465 void cgen_linnum(code **pc,Srcpos srcpos) 506 466 { 507 467 *pc = genlinnum(*pc,srcpos); 508 468 } 509 469 510 470 /***************************** 511 471 * Prepend line number to existing code. 512 472 */ 513 473 514 474 void cgen_prelinnum(code **pc,Srcpos srcpos) 515 475 { 516 476 *pc = cat(genlinnum(NULL,srcpos),*pc); 517 477 } 518 478 519 479 /******************************** 520 480 * Generate 'instruction' which tells the address resolver that the stack has 521 481 * changed. 522 482 */ 523 483 524 484 code *genadjesp(code *c, int offset) 525 485 { code cs; 526 486 527 487 if (!I16 && offset) 528 488 { 529 cs.Iop = ESCAPE; 530 cs.Iop2 = ESCadjesp; 489 cs.Iop = ESCAPE | ESCadjesp; 531 490 cs.Iflags = 0; 532 491 cs.Irex = 0; 533 492 cs.IEV2.Vint = offset; 534 493 return gen(c,&cs); 535 494 } 536 495 else 537 496 return c; 538 497 } 539 498 540 499 /******************************** 541 500 * Generate 'nop' 542 501 */ 543 502 544 503 code *gennop(code *c) 545 504 { 546 505 return gen1(c,NOP); 547 506 } 548 507 549 508 /****************************** 550 509 * Load CX with the value of _AHSHIFT. branches/dmd-1.x/src/backend/cgsched.c
r569 r596 761 761 1,1,5,5, 4,4,0,0, /* F8 */ 762 762 }; 763 763 764 764 static unsigned char uopsx[8] = { 1,1,2,5,1,1,1,5 }; 765 765 766 766 /************************************************ 767 767 * Determine number of micro-ops for Pentium Pro and Pentium II processors. 768 768 * 5 means 'complex'. 769 769 * Doesn't currently handle: 770 770 * floating point 771 771 * MMX 772 772 * 0F opcodes 773 773 * prefix bytes 774 774 */ 775 775 776 776 STATIC int uops(code *c) 777 777 { int n; 778 778 int op; 779 779 int op2; 780 780 781 op = c->Iop; 781 op = c->Iop & 0xFF; 782 if ((c->Iop & 0xFF00) == 0x0F00) 783 op = 0x0F; 782 784 n = insuops[op]; 783 785 if (!n) // if special case 784 786 { unsigned char irm,mod,reg,rm; 785 787 786 788 irm = c->Irm; 787 789 mod = (irm >> 6) & 3; 788 790 reg = (irm >> 3) & 7; 789 791 rm = irm & 7; 790 792 791 793 switch (op) 792 794 { 793 795 case 0x10: 794 796 case 0x11: // ADC rm,r 795 797 case 0x18: 796 798 case 0x19: // SBB rm,r 797 799 n = (mod == 3) ? 2 : 4; 798 800 break; 799 801 800 802 case 0x12: 801 803 case 0x13: // ADC r,rm … … 966 968 n = 4; 967 969 else if (reg == 4 || reg == 5) // MUL/IMUL rm 968 970 n = (mod == 3) ? 3 : 4; 969 971 else if (reg == 2 || reg == 3) // NOT/NEG rm 970 972 n = (mod == 3) ? 1 : 4; 971 973 break; 972 974 973 975 case 0xFF: 974 976 if (reg == 2 || reg == 3 || // CALL rm, CALL m,rm 975 977 reg == 5) // JMP seg:offset 976 978 n = 5; 977 979 else if (reg == 4) 978 980 n = (mod == 3) ? 1 : 2; 979 981 else if (reg == 0 || reg == 1) // INC/DEC rm 980 982 n = (mod == 3) ? 1 : 4; 981 983 else if (reg == 6) // PUSH rm 982 984 n = (mod == 3) ? 3 : 4; 983 985 break; 984 986 985 987 case 0x0F: 986 op2 = c->Iop 2;988 op2 = c->Iop & 0xFF; 987 989 if ((op2 & 0xF0) == 0x80) // Jcc 988 990 { n = 1; 989 991 break; 990 992 } 991 993 if ((op2 & 0xF0) == 0x90) // SETcc 992 994 { n = (mod == 3) ? 1 : 3; 993 995 break; 994 996 } 995 997 if (op2 == 0xB6 || op2 == 0xB7 || // MOVZX 996 998 op2 == 0xBE || op2 == 0xBF) // MOVSX 997 999 { n = 1; 998 1000 break; 999 1001 } 1000 1002 if (op2 == 0xAF) // IMUL r,m 1001 1003 { n = (mod == 3) ? 1 : 2; 1002 1004 break; 1003 1005 } 1004 1006 break; 1005 1007 } 1006 1008 } … … 1008 1010 n = 5; // copout for now 1009 1011 return n; 1010 1012 } 1011 1013 1012 1014 /****************************************** 1013 1015 * Determine pairing classification. 1014 1016 * Don't deal with floating point, just assume they are all NP (Not Pairable). 1015 1017 * Returns: 1016 1018 * NP,UV,PU,PV optionally OR'd with PE 1017 1019 */ 1018 1020 1019 1021 STATIC int pair_class(code *c) 1020 1022 { unsigned char op; 1021 1023 unsigned char irm,mod,reg,rm; 1022 1024 unsigned a32; 1023 1025 int pc; 1024 1026 1025 1027 // Of course, with Intel this is *never* simple, and Intel's 1026 1028 // documentation is vague about the specifics. 1027 1029 1028 op = c->Iop; 1030 op = c->Iop & 0xFF; 1031 if ((c->Iop & 0xFF00) == 0x0F00) 1032 op = 0x0F; 1029 1033 pc = pentcycl[op]; 1030 1034 a32 = I32; 1031 1035 if (c->Iflags & CFaddrsize) 1032 1036 a32 ^= 1; 1033 1037 irm = c->Irm; 1034 1038 mod = (irm >> 6) & 3; 1035 1039 reg = (irm >> 3) & 7; 1036 1040 rm = irm & 7; 1037 1041 switch (op) 1038 1042 { 1039 1043 case 0x0F: // 2 byte opcode 1040 if ((c->Iop 2 & 0xF0) == 0x80)// if Jcc1044 if ((c->Iop & 0xF0) == 0x80) // if Jcc 1041 1045 pc = PV | PF; 1042 1046 break; 1043 1047 1044 1048 case 0x80: 1045 1049 case 0x81: 1046 1050 case 0x83: 1047 1051 if (reg == 2 || // ADC EA,immed 1048 1052 reg == 3) // SBB EA,immed 1049 1053 { pc = PU; 1050 1054 goto L2; 1051 1055 } 1052 1056 goto L1; // AND/OR/XOR/ADD/SUB/CMP EA,immed 1053 1057 1054 1058 case 0x84: 1055 1059 case 0x85: // TEST EA,reg 1056 1060 if (mod == 3) // TEST reg,reg 1057 1061 pc = UV; 1058 1062 break; 1059 1063 1060 1064 case 0xC0: … … 1167 1171 1168 1172 if (PRO) 1169 1173 { 1170 1174 ci->uops = uops(c); 1171 1175 ci->isz = calccodsize(c); 1172 1176 } 1173 1177 else 1174 1178 ci->pair = pair_class(c); 1175 1179 1176 1180 unsigned char op; 1177 1181 unsigned char op2; 1178 1182 unsigned char irm,mod,reg,rm; 1179 1183 unsigned a32; 1180 1184 int pc; 1181 1185 unsigned r,w; 1182 1186 int sz = I32 ? 4 : 2; 1183 1187 1184 1188 ci->r = 0; 1185 1189 ci->w = 0; 1186 1190 ci->a = 0; 1187 op = c->Iop; 1191 op = c->Iop & 0xFF; 1192 if ((c->Iop & 0xFF00) == 0x0F00) 1193 op = 0x0F; 1188 1194 //printf("\tgetinfo %x, op %x \n",c,op); 1189 1195 pc = pentcycl[op]; 1190 1196 a32 = I32; 1191 1197 if (c->Iflags & CFaddrsize) 1192 1198 a32 ^= 1; 1193 1199 if (c->Iflags & CFopsize) 1194 1200 sz ^= 2 | 4; 1195 1201 irm = c->Irm; 1196 1202 mod = (irm >> 6) & 3; 1197 1203 reg = (irm >> 3) & 7; 1198 1204 rm = irm & 7; 1199 1205 1200 1206 r = oprw[op][0]; 1201 1207 w = oprw[op][1]; 1202 1208 1203 1209 switch (op) 1204 1210 { 1205 1211 case 0x50: 1206 1212 case 0x51: 1207 1213 case 0x52: … … 1295 1301 w = F; 1296 1302 #if CLASSINIT2 1297 1303 ci->pair = UV; // it is patched to CMP EAX,0 1298 1304 #else 1299 1305 ci->pair = NP; 1300 1306 #endif 1301 1307 } 1302 1308 break; 1303 1309 1304 1310 case 0xF6: 1305 1311 r = grprw[3][reg][0]; // Grp 3, byte version 1306 1312 w = grprw[3][reg][1]; 1307 1313 break; 1308 1314 1309 1315 case 0xF7: 1310 1316 r = grprw[1][reg][0]; // Grp 3 1311 1317 w = grprw[1][reg][1]; 1312 1318 break; 1313 1319 1314 1320 case 0x0F: 1315 op2 = c->Iop 2;1321 op2 = c->Iop & 0xFF; 1316 1322 if ((op2 & 0xF0) == 0x80) // if Jxx instructions 1317 1323 { 1318 1324 ci->r = F | N; 1319 1325 ci->w = N; 1320 1326 goto Lret; 1321 1327 } 1322 1328 ci->r = N; 1323 1329 ci->w = N; // copout for now 1324 1330 goto Lret; 1325 1331 1326 1332 case 0xD7: // XLAT 1327 1333 ci->a = mAX | mBX; 1328 1334 break; 1329 1335 1330 1336 case 0xFF: 1331 1337 r = grprw[2][reg][0]; // Grp 5 1332 1338 w = grprw[2][reg][1]; 1333 1339 if (reg == 6) // PUSH rm 1334 1340 goto Lpush; 1335 1341 break; … … 1744 1750 Lnopair: 1745 1751 return 0; 1746 1752 } 1747 1753 1748 1754 /******************************************** 1749 1755 * Get next instruction worth looking at for scheduling. 1750 1756 * Returns: 1751 1757 * NULL no more instructions 1752 1758 */ 1753 1759 1754 1760 STATIC code * cnext(code *c) 1755 1761 { 1756 1762 while (1) 1757 1763 { 1758 1764 c = code_next(c); 1759 1765 if (!c) 1760 1766 break; 1761 1767 if (c->Iflags & (CFtarg | CFtarg2)) 1762 1768 break; 1763 1769 if (!(c->Iop == NOP || 1764 (c->Iop == ESCAPE && c->Iop2 ==ESClinnum)))1770 c->Iop == (ESCAPE | ESClinnum))) 1765 1771 break; 1766 1772 } 1767 1773 return c; 1768 1774 } 1769 1775 1770 1776 /****************************************** 1771 1777 * Instruction scheduler. 1772 1778 * Input: 1773 1779 * c list of instructions to schedule 1774 1780 * scratch scratch registers we can use 1775 1781 * Returns: 1776 1782 * revised list of scheduled instructions 1777 1783 */ 1778 1784 1779 1785 /////////////////////////////////// 1780 1786 // Determine if c1 and c2 are swappable. 1781 1787 // c1 comes before c2. 1782 1788 // If they do not conflict 1783 1789 // return 0 1784 1790 // If they do conflict … … 1867 1873 w2 &= ~F; // remove conflict 1868 1874 goto L1; // and try again 1869 1875 } 1870 1876 #endif 1871 1877 // If other than the memory reference is a conflict 1872 1878 if (w1 & r2 & ~mMEM || (r1 | w1) & w2 & ~mMEM) 1873 1879 { if (i) printf("\t1\n"); 1874 1880 if (i) printf("r1=%x, w1=%x, a1=%x, sz1=%d, r2=%x, w2=%x, a2=%x, sz2=%d\n",r1,w1,a1,sz1,r2,w2,a2,sz2); 1875 1881 goto Lconflict; 1876 1882 } 1877 1883 1878 1884 // If referring to distinct types, then no dependency 1879 1885 if (c1->Irex && c2->Irex && c1->Irex != c2->Irex) 1880 1886 goto Lswap; 1881 1887 1882 1888 ifl1 = c1->IFL1; 1883 1889 ifl2 = c2->IFL1; 1884 1890 1885 1891 // Special case: Allow indexed references using registers other than 1886 1892 // ESP and EBP to be swapped with PUSH instructions 1887 if (((c1->Iop & 0xF8) == 0x50 ||// PUSH reg1893 if (((c1->Iop & ~7) == 0x50 || // PUSH reg 1888 1894 c1->Iop == 0x6A || // PUSH imm8 1889 1895 c1->Iop == 0x68 || // PUSH imm16/imm32 1890 1896 (c1->Iop == 0xFF && ci1->reg == 6) // PUSH EA 1891 1897 ) && 1892 1898 ci2->flags & CIFLea && !(a2 & mSP) && 1893 1899 !(a2 & mBP && (long)c2->IEVpointer1 < 0) 1894 1900 ) 1895 1901 { 1896 1902 if (c1->Iop == 0xFF) 1897 1903 { 1898 1904 if (!(w2 & mMEM)) 1899 1905 goto Lswap; 1900 1906 } 1901 1907 else 1902 1908 goto Lswap; 1903 1909 } 1904 1910 1905 1911 // Special case: Allow indexed references using registers other than 1906 1912 // ESP and EBP to be swapped with PUSH instructions 1907 if (((c2->Iop & 0xF8) == 0x50 ||// PUSH reg1913 if (((c2->Iop & ~7) == 0x50 || // PUSH reg 1908 1914 c2->Iop == 0x6A || // PUSH imm8 1909 1915 c2->Iop == 0x68 || // PUSH imm16/imm32 1910 1916 (c2->Iop == 0xFF && ci2->reg == 6) // PUSH EA 1911 1917 ) && 1912 1918 ci1->flags & CIFLea && !(a1 & mSP) && 1913 1919 !(a2 & mBP && (long)c2->IEVpointer1 < 0) 1914 1920 ) 1915 1921 { 1916 1922 if (c2->Iop == 0xFF) 1917 1923 { 1918 1924 if (!(w1 & mMEM)) 1919 1925 goto Lswap; 1920 1926 } 1921 1927 else 1922 1928 goto Lswap; 1923 1929 } 1924 1930 1925 1931 // If not both an EA addressing mode, conflict 1926 1932 if (!(ci1->flags & ci2->flags & CIFLea)) 1927 1933 { if (i) printf("\t2\n"); … … 2282 2288 ) 2283 2289 { 2284 2290 movesp = 1; // this is a MOV reg2,offset[ESP] 2285 2291 offset = (signed char)c->IEVpointer1; 2286 2292 reg2 = (c->Irm >> 3) & 7; 2287 2293 } 2288 2294 2289 2295 2290 2296 // Start at tblmax, and back up until we get a conflict 2291 2297 ic = -1; 2292 2298 imin = 0; 2293 2299 for (i = tblmax; i >= 0; i--) 2294 2300 { Cinfo *cit; 2295 2301 2296 2302 cit = tbl[i]; 2297 2303 if (!cit) 2298 2304 continue; 2299 2305 2300 2306 // Look for special case swap 2301 2307 if (movesp && 2302 (cit->c->Iop & 0xF8) == 0x50 &&// if PUSH reg12308 (cit->c->Iop & ~7) == 0x50 && // if PUSH reg1 2303 2309 (cit->c->Iop & 7) != reg2 && // if reg1 != reg2 2304 2310 ((signed char)c->IEVpointer1) >= -cit->spadjust 2305 2311 ) 2306 2312 { 2307 2313 c->IEVpointer1 += cit->spadjust; 2308 2314 //printf("\t1, spadjust = %d, ptr = x%x\n",cit->spadjust,c->IEVpointer1); 2309 2315 continue; 2310 2316 } 2311 2317 2312 2318 if (movesp && 2313 2319 cit->c->Iop == 0x83 && 2314 2320 cit->c->Irm == modregrm(3,5,SP) && // if SUB ESP,offset 2315 2321 cit->c->IFL2 == FLconst && 2316 2322 ((signed char)c->IEVpointer1) >= -cit->spadjust 2317 2323 ) 2318 2324 { 2319 2325 //printf("\t2, spadjust = %d\n",cit->spadjust); 2320 2326 c->IEVpointer1 += cit->spadjust; 2321 2327 continue; 2322 2328 } … … 2505 2511 c->IEVpointer1 -= cit->spadjust; 2506 2512 //printf("\t3, spadjust = %d, ptr = x%x\n",cit->spadjust,c->IEVpointer1); 2507 2513 } 2508 2514 } 2509 2515 } 2510 2516 if (i >= tblmax) 2511 2517 tblmax = i + 1; 2512 2518 2513 2519 // Now do a hack. Look back at immediately preceding instructions, 2514 2520 // and see if we can swap with a push. 2515 2521 if (0 && movesp) 2516 2522 { int j; 2517 2523 2518 2524 while (1) 2519 2525 { 2520 2526 for (j = 1; i > j; j++) 2521 2527 if (tbl[i - j]) 2522 2528 break; 2523 2529 2524 2530 if (i >= j && tbl[i - j] && 2525 (tbl[i - j]->c->Iop & 0xF8) == 0x50 && // if PUSH reg12531 (tbl[i - j]->c->Iop & ~7) == 0x50 && // if PUSH reg1 2526 2532 (tbl[i - j]->c->Iop & 7) != reg2 && // if reg1 != reg2 2527 2533 (signed char)c->IEVpointer1 >= REGSIZE) 2528 2534 { 2529 2535 //printf("\t-4 prec, i-j=%d, i=%d\n",i-j,i); 2530 2536 assert((unsigned)i < TBLMAX); 2531 2537 assert((unsigned)(i - j) < TBLMAX); 2532 2538 tbl[i] = tbl[i - j]; 2533 2539 tbl[i - j] = ci; 2534 2540 i -= j; 2535 2541 c->IEVpointer1 -= REGSIZE; 2536 2542 } 2537 2543 else 2538 2544 break; 2539 2545 } 2540 2546 } 2541 2547 2542 2548 //printf("\tinsert\n"); 2543 2549 return 1; 2544 2550 } 2545 2551 … … 2647 2653 * Returns: 2648 2654 * next instruction (the tail) or 2649 2655 * NULL for no more instructions 2650 2656 */ 2651 2657 2652 2658 STATIC code * csnip(code *c) 2653 2659 { code **pc; 2654 2660 unsigned iflags; 2655 2661 2656 2662 if (c) 2657 2663 { iflags = c->Iflags & CFclassinit; 2658 2664 while (1) 2659 2665 { 2660 2666 pc = &code_next(c); 2661 2667 c = *pc; 2662 2668 if (!c) 2663 2669 break; 2664 2670 if (c->Iflags & (CFtarg | CFtarg2)) 2665 2671 break; 2666 2672 if (!(c->Iop == NOP || 2667 (c->Iop == ESCAPE && c->Iop2 ==ESClinnum) ||2673 c->Iop == (ESCAPE | ESClinnum) || 2668 2674 c->Iflags & iflags)) 2669 2675 break; 2670 2676 } 2671 2677 *pc = NULL; 2672 2678 } 2673 2679 return c; 2674 2680 } 2675 2681 2676 2682 2677 2683 /****************************** 2678 2684 * Schedule Pentium instructions, 2679 2685 * based on Steve Russell's algorithm. 2680 2686 */ 2681 2687 2682 2688 code *schedule(code *c,regm_t scratch) 2683 2689 { 2684 2690 code *cresult = NULL; 2685 2691 code **pctail = &cresult; 2686 2692 Schedule sch; 2687 2693 2688 2694 sch.initialize(0); // initialize scheduling table 2689 2695 while (c) 2690 2696 { 2691 if ((c->Iop == NOP || c->Iop== ESCAPE || c->Iflags & CFclassinit) &&2697 if ((c->Iop == NOP || (c->Iop & 0xFF) == ESCAPE || c->Iflags & CFclassinit) && 2692 2698 !(c->Iflags & (CFtarg | CFtarg2))) 2693 2699 { code *cn; 2694 2700 2695 2701 // Just append this instruction to pctail and go to the next one 2696 2702 *pctail = c; 2697 2703 cn = code_next(c); 2698 2704 code_next(c) = NULL; 2699 2705 pctail = &code_next(c); 2700 2706 c = cn; 2701 2707 continue; 2702 2708 } 2703 2709 2704 2710 //printf("init\n"); 2705 2711 sch.initialize(sch.fpustackused); // initialize scheduling table 2706 2712 2707 2713 while (c) 2708 2714 { 2709 2715 //printf("insert %p\n",c); 2710 2716 if (!sch.stage(c)) // store c in scheduling table 2711 2717 break; … … 2834 2840 code *c; 2835 2841 code *c1,*c2,*c3; 2836 2842 unsigned r1,r2; 2837 2843 unsigned mod,reg,rm; 2838 2844 2839 2845 //printf("peephole\n"); 2840 2846 for (c = cstart; c; c = c1) 2841 2847 { unsigned char rmi; 2842 2848 unsigned char rmn; 2843 2849 2844 2850 //c->print(); 2845 2851 c1 = cnext(c); 2846 2852 Ln: 2847 2853 if (!c1) 2848 2854 break; 2849 2855 if (c1->Iflags & (CFtarg | CFtarg2)) 2850 2856 continue; 2851 2857 2852 2858 // Do: 2853 2859 // PUSH reg 2854 if (I32 && (c->Iop & 0xF8) == 0x50)2860 if (I32 && (c->Iop & ~7) == 0x50) 2855 2861 { unsigned reg = c->Iop & 7; 2856 2862 2857 2863 // MOV [ESP],reg => NOP 2858 2864 if (c1->Iop == 0x8B && 2859 2865 c1->Irm == modregrm(0,reg,4) && 2860 2866 c1->Isib == modregrm(0,4,SP)) 2861 2867 { c1->Iop = NOP; 2862 2868 continue; 2863 2869 } 2864 2870 2865 2871 // PUSH [ESP] => PUSH reg 2866 2872 if (c1->Iop == 0xFF && 2867 2873 c1->Irm == modregrm(0,6,4) && 2868 2874 c1->Isib == modregrm(0,4,SP)) 2869 2875 { c1->Iop = 0x50 + reg; 2870 2876 continue; 2871 2877 } 2872 2878 2873 2879 // CMP [ESP],imm => CMP reg,i,, 2874 2880 if (c1->Iop == 0x83 && branches/dmd-1.x/src/backend/cod1.c
r595 r596 538 538 539 539 /****************************** 540 540 * Routine to aid in setting things up for gen(). 541 541 * Look for common subexpression. 542 542 * Can handle indirection operators, but not if they're common subs. 543 543 * Input: 544 544 * e -> elem where we get some of the data from 545 545 * cs -> partially filled code to add 546 546 * op = opcode 547 547 * reg = reg field of (mod reg r/m) 548 548 * offset = data to be added to Voffset field 549 549 * keepmsk = mask of registers we must not destroy 550 550 * desmsk = mask of registers destroyed by executing the instruction 551 551 * Returns: 552 552 * pointer to code generated 553 553 */ 554 554 555 555 code *loadea(elem *e,code *cs,unsigned op,unsigned reg,targ_size_t offset, 556 556 regm_t keepmsk,regm_t desmsk) 557 557 { 558 code *c,*cg,*cd ,*cprefix;558 code *c,*cg,*cd; 559 559 560 560 #ifdef DEBUG 561 561 if (debugw) 562 562 printf("loadea: e=%p cs=%p op=x%x reg=%d offset=%lld keepmsk=x%x desmsk=x%x\n", 563 563 e,cs,op,reg,(unsigned long long)offset,keepmsk,desmsk); 564 564 #endif 565 565 566 566 assert(e); 567 567 cs->Iflags = 0; 568 568 cs->Irex = 0; 569 cprefix = NULL; 570 if (op > 0xFF) // if 2 byte opcode 571 cprefix = setOpcode(NULL, cs, op); 572 else 573 cs->Iop = op; 569 cs->Iop = op; 574 570 tym_t tym = e->Ety; 575 571 int sz = tysize(tym); 576 572 577 573 /* Determine if location we want to get is in a register. If so, */ 578 574 /* substitute the register for the EA. */ 579 575 /* Note that operators don't go through this. CSE'd operators are */ 580 576 /* picked up by comsub(). */ 581 577 if (e->Ecount && /* if cse */ 582 578 e->Ecount != e->Ecomsub && /* and cse was generated */ 583 579 op != 0x8D && op != 0xC4 && /* and not an LEA or LES */ 584 580 (op != 0xFF || reg != 3) && /* and not CALLF MEM16 */ 585 581 (op & 0xFFF8) != 0xD8) // and not 8087 opcode 586 582 { 587 583 assert(!EOP(e)); /* can't handle this */ 588 584 regm_t rm = regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; // possible regs 589 585 if (sz > REGSIZE) // value is in 2 or 4 registers 590 586 { 591 587 if (I16 && sz == 8) // value is in 4 registers 592 588 { static regm_t rmask[4] = { mDX,mCX,mBX,mAX }; 593 589 rm &= rmask[offset >> 1]; … … 648 644 cs->Iflags |= CFwait; 649 645 L2: 650 646 cg = getregs(desmsk); /* save any regs we destroy */ 651 647 652 648 /* KLUDGE! fix up DX for divide instructions */ 653 649 cd = CNIL; 654 650 if (op == 0xF7 && desmsk == (mAX|mDX)) /* if we need to fix DX */ 655 651 { if (reg == 7) /* if IDIV */ 656 652 { cd = gen1(cd,0x99); // CWD 657 653 if (I64 && sz == 8) 658 654 code_orrex(cd, REX_W); 659 655 } 660 656 else if (reg == 6) // if DIV 661 657 { cd = genregs(cd,0x33,DX,DX); // XOR DX,DX 662 658 if (I64 && sz == 8) 663 659 code_orrex(cd, REX_W); 664 660 } 665 661 } 666 662 667 663 // Eliminate MOV reg,reg 668 if ((cs->Iop & 0xFC) == 0x88 &&664 if ((cs->Iop & ~3) == 0x88 && 669 665 (cs->Irm & 0xC7) == modregrm(3,0,reg & 7)) 670 666 { 671 667 unsigned r = cs->Irm & 7; 672 668 if (cs->Irex & REX_B) 673 669 r |= 8; 674 670 if (r == reg) 675 671 cs->Iop = NOP; 676 672 } 677 673 678 return cat4(c,cg,cd,gen( cprefix,cs));674 return cat4(c,cg,cd,gen(NULL,cs)); 679 675 } 680 676 681 677 /************************** 682 678 * Get addressing mode. 683 679 */ 684 680 685 681 unsigned getaddrmode(regm_t idxregs) 686 682 { 687 683 unsigned mode; 688 684 689 685 if (I16) 690 686 { 691 687 mode = (idxregs & mBX) ? modregrm(2,0,7) : /* [BX] */ 692 688 (idxregs & mDI) ? modregrm(2,0,5): /* [DI] */ 693 689 (idxregs & mSI) ? modregrm(2,0,4): /* [SI] */ 694 690 (assert(0),1); 695 691 } 696 692 else 697 693 { unsigned reg = findreg(idxregs & (ALLREGS | mBP)); 698 694 mode = modregrmx(2,0,reg); … … 791 787 if (tyfloating(ty)) 792 788 obj_fltused(); 793 789 else if (I64 && sz == 8) 794 790 pcs->Irex |= REX_W; 795 791 if (!I16 && sz == SHORTSIZE) 796 792 pcs->Iflags |= CFopsize; 797 793 if (ty & mTYvolatile) 798 794 pcs->Iflags |= CFvolatile; 799 795 c = CNIL; 800 796 switch (fl) 801 797 { 802 798 #if 0 && TARGET_LINUX 803 799 case FLgot: 804 800 case FLgotoff: 805 801 gotref = 1; 806 802 pcs->IEVsym1 = s; 807 803 pcs->IEVoffset1 = e->EV.sp.Voffset; 808 804 if (e->Eoper == OPvar && fl == FLgot) 809 805 { 810 806 code *c1; 811 intsaveop = pcs->Iop;807 unsigned saveop = pcs->Iop; 812 808 idxregs = allregs & ~keepmsk; // get a scratch register 813 809 c = allocreg(&idxregs,®,TYptr); 814 810 pcs->Irm = modregrm(2,reg,BX); // BX has GOT 815 811 pcs->Isib = 0; 816 812 //pcs->Iflags |= CFvolatile; 817 813 pcs->Iop = 0x8B; 818 814 c = gen(c,pcs); // MOV reg,disp[EBX] 819 815 pcs->Irm = modregrm(0,0,reg); 820 816 pcs->IEVoffset1 = 0; 821 817 pcs->Iop = saveop; 822 818 } 823 819 else 824 820 { 825 821 pcs->Irm = modregrm(2,0,BX); // disp[EBX] is addr 826 822 pcs->Isib = 0; 827 823 } 828 824 break; 829 825 #endif 830 826 case FLoper: 831 827 #ifdef DEBUG … … 1716 1712 else if (sz == 8) 1717 1713 code_orrex(ce, REX_W); 1718 1714 } 1719 1715 else 1720 1716 ce = gentstreg(ce,reg); // TEST reg,reg 1721 1717 return ce; 1722 1718 } 1723 1719 if (saveflag || tyfv(tym)) 1724 1720 { 1725 1721 scrregm = ALLREGS & ~regm; /* possible scratch regs */ 1726 1722 ce = allocreg(&scrregm,&scrreg,TYoffset); /* allocate scratch reg */ 1727 1723 if (I32 || sz == REGSIZE * 2) 1728 1724 { code *c; 1729 1725 1730 1726 assert(regm & mMSW && regm & mLSW); 1731 1727 1732 1728 reg = findregmsw(regm); 1733 1729 if (I32) 1734 1730 { 1735 1731 if (tyfv(tym)) 1736 { c = genregs(CNIL,0x0F,scrreg,reg); 1737 c->Iop2 = 0xB7; /* MOVZX scrreg,msreg */ 1732 { c = genregs(CNIL,0x0FB7,scrreg,reg); // MOVZX scrreg,msreg 1738 1733 ce = cat(ce,c); 1739 1734 } 1740 1735 else 1741 1736 { ce = genmovreg(ce,scrreg,reg); /* MOV scrreg,msreg */ 1742 1737 if (tym == TYdouble || tym == TYdouble_alias) 1743 1738 gen2(ce,0xD1,modregrm(3,4,scrreg)); /* SHL scrreg,1 */ 1744 1739 } 1745 1740 } 1746 1741 else 1747 1742 { 1748 1743 ce = genmovreg(ce,scrreg,reg); /* MOV scrreg,msreg */ 1749 1744 if (tym == TYfloat) 1750 1745 gen2(ce,0xD1,modregrm(3,4,scrreg)); /* SHL scrreg,1 */ 1751 1746 } 1752 1747 reg = findreglsw(regm); 1753 1748 genorreg(ce,scrreg,reg); /* OR scrreg,lsreg */ 1754 1749 } 1755 1750 else if (sz == 8) 1756 1751 { /* !I32 */ 1757 1752 ce = genmovreg(ce,scrreg,AX); /* MOV scrreg,AX */ … … 3575 3570 /* FALSE: CMP SP,SP (always equal) */ 3576 3571 c = genregs(CNIL,(boolres(e)) ? 0x09 : 0x39,SP,SP); 3577 3572 } 3578 3573 else if (sz <= REGSIZE) 3579 3574 { 3580 3575 if (!I16 && (tym == TYfloat || tym == TYifloat)) 3581 3576 { c = allocreg(®m,®,TYoffset); /* get a register */ 3582 3577 ce = loadea(e,&cs,0x8B,reg,0,0,0); // MOV reg,data 3583 3578 c = cat(c,ce); 3584 3579 ce = gen2(CNIL,0xD1,modregrmx(3,4,reg)); /* SHL reg,1 */ 3585 3580 c = cat(c,ce); 3586 3581 } 3587 3582 else 3588 3583 { cs.IFL2 = FLconst; 3589 3584 cs.IEV2.Vint = 0; 3590 3585 op = (sz == 1) ? 0x80 : 0x81; 3591 3586 c = loadea(e,&cs,op,7,0,0,0); /* CMP EA,0 */ 3592 3587 3593 3588 // Convert to TEST instruction if EA is a register 3594 3589 // (to avoid register contention on Pentium) 3595 if ((c->Iop & 0xFE) == 0x38 &&3590 if ((c->Iop & ~1) == 0x38 && 3596 3591 (c->Irm & modregrm(3,0,0)) == modregrm(3,0,0) 3597 3592 ) 3598 3593 { c->Iop = (c->Iop & 1) | 0x84; 3599 3594 code_newreg(c, c->Irm & 7); 3600 3595 if (c->Irex & REX_B) 3601 3596 c->Irex = (c->Irex & ~REX_B) | REX_R; 3602 3597 } 3603 3598 } 3604 3599 } 3605 3600 else if (sz < 8) 3606 3601 { 3607 3602 c = allocreg(®m,®,TYoffset); /* get a register */ 3608 3603 if (I32) // it's a 48 bit pointer 3609 3604 ce = loadea(e,&cs,0x0FB7,reg,REGSIZE,0,0); /* MOVZX reg,data+4 */ 3610 3605 else 3611 3606 { ce = loadea(e,&cs,0x8B,reg,REGSIZE,0,0); /* MOV reg,data+2 */ 3612 3607 if (tym == TYfloat || tym == TYifloat) // dump sign bit 3613 3608 gen2(ce,0xD1,modregrm(3,4,reg)); /* SHL reg,1 */ 3614 3609 } 3615 3610 c = cat(c,ce); branches/dmd-1.x/src/backend/cod2.c
r589 r596 1697 1697 freenode(e2); 1698 1698 c = cat6(cc,c,c2,cnop1,fixresult(e,retregs,pretregs),NULL); 1699 1699 goto Lret; 1700 1700 } 1701 1701 1702 1702 cnop1 = gennop(CNIL); 1703 1703 cnop2 = gennop(CNIL); /* dummy target addresses */ 1704 1704 c = logexp(e1,FALSE,FLcode,cnop1); /* evaluate condition */ 1705 1705 regconold = regcon; 1706 1706 stackusedold = stackused; 1707 1707 stackpushold = stackpush; 1708 1708 memcpy(_8087old,_8087elems,sizeof(_8087elems)); 1709 1709 c1 = codelem(e21,pretregs,FALSE); 1710 1710 1711 1711 #if SCPP 1712 1712 if (CPP && e2->Eoper == OPcolon2) 1713 1713 { code cs; 1714 1714 1715 1715 // This is necessary so that any cleanup code on one branch 1716 1716 // is redone on the other branch. 1717 cs.Iop = ESCAPE; 1718 cs.Iop2 = ESCmark2; 1717 cs.Iop = ESCAPE | ESCmark2; 1719 1718 cs.Iflags = 0; 1720 1719 cs.Irex = 0; 1721 1720 c1 = cat(gen(CNIL,&cs),c1); 1722 cs.Iop 2 =ESCrelease2;1721 cs.Iop = ESCAPE | ESCrelease2; 1723 1722 c1 = gen(c1,&cs); 1724 1723 } 1725 1724 #endif 1726 1725 1727 1726 regconsave = regcon; 1728 1727 regcon = regconold; 1729 1728 1730 1729 stackpushsave = stackpush; 1731 1730 stackpush = stackpushold; 1732 1731 1733 1732 stackusedsave = stackused; 1734 1733 stackused = stackusedold; 1735 1734 1736 1735 memcpy(_8087save,_8087elems,sizeof(_8087elems)); 1737 1736 memcpy(_8087elems,_8087old,sizeof(_8087elems)); 1738 1737 1739 1738 *pretregs |= psw; /* PSW bit may have been trashed */ 1740 1739 c2 = codelem(e22,pretregs,FALSE); /* use same regs as E1 */ 1741 1740 andregcon(®conold); 1742 1741 andregcon(®consave); … … 2412 2411 { 2413 2412 if (!I16 && tym == TYfloat) 2414 2413 { retregs = ALLREGS & ~idxregs; 2415 2414 c = cat(c,allocreg(&retregs,®,TYfloat)); 2416 2415 cs.Iop = 0x8B; 2417 2416 code_newreg(&cs,reg); 2418 2417 ce = gen(CNIL,&cs); // MOV reg,lsw 2419 2418 gen2(ce,0xD1,modregrmx(3,4,reg)); // SHL reg,1 2420 2419 } 2421 2420 else if (sz <= REGSIZE) 2422 2421 { 2423 2422 cs.Iop = 0x81 ^ byte; 2424 2423 cs.Irm |= modregrm(0,7,0); 2425 2424 cs.IFL2 = FLconst; 2426 2425 cs.IEV2.Vint = 0; 2427 2426 ce = gen(CNIL,&cs); /* CMP [idx],0 */ 2428 2427 } 2429 2428 else if (!I16 && sz == REGSIZE + 2) // if far pointer 2430 2429 { retregs = ALLREGS & ~idxregs; 2431 2430 c = cat(c,allocreg(&retregs,®,TYint)); 2432 cs.Iop = 0x0F; 2433 cs.Iop2 = 0xB7; 2431 cs.Iop = 0x0FB7; 2434 2432 cs.Irm |= modregrm(0,reg,0); 2435 2433 getlvalue_msw(&cs); 2436 2434 ce = gen(CNIL,&cs); /* MOVZX reg,msw */ 2437 2435 goto L4; 2438 2436 } 2439 2437 else if (sz <= 2 * REGSIZE) 2440 2438 { retregs = ALLREGS & ~idxregs; 2441 2439 c = cat(c,allocreg(&retregs,®,TYint)); 2442 2440 cs.Iop = 0x8B; 2443 2441 cs.Irm |= modregrm(0,reg,0); 2444 2442 getlvalue_msw(&cs); 2445 2443 ce = gen(CNIL,&cs); /* MOV reg,msw */ 2446 2444 if (I32) 2447 2445 { if (tym == TYdouble || tym == TYdouble_alias) 2448 2446 gen2(ce,0xD1,modregrm(3,4,reg)); // SHL reg,1 2449 2447 } 2450 2448 else if (tym == TYfloat) 2451 2449 gen2(ce,0xD1,modregrm(3,4,reg)); /* SHL reg,1 */ 2452 2450 L4: cs.Iop = 0x0B; 2453 2451 getlvalue_lsw(&cs); … … 4514 4512 if (0 && config.exe == EX_NT) 4515 4513 { unsigned idx; 4516 4514 4517 4515 idx = except_index_get(); 4518 4516 except_mark(); 4519 4517 c = codelem(e->E2,pretregs,FALSE); 4520 4518 if (config.exe == EX_NT && idx != except_index_get()) 4521 4519 { usednteh |= NTEHcleanup; 4522 4520 c = cat(c,nteh_gensindex(idx - 1)); 4523 4521 } 4524 4522 except_release(); 4525 4523 assert(idx == except_index_get()); 4526 4524 } 4527 4525 else 4528 4526 { 4529 4527 #if 0 4530 4528 usednteh |= EHcleanup; 4531 4529 if (config.exe == EX_NT) 4532 4530 usednteh |= NTEHcleanup; 4533 4531 #endif 4534 cs.Iop = ESCAPE; 4535 cs.Iop2 = ESCmark; 4532 cs.Iop = ESCAPE | ESCmark; 4536 4533 cs.Iflags = 0; 4537 4534 cs.Irex = 0; 4538 4535 c = gen(CNIL,&cs); 4539 4536 c = cat(c,codelem(e->E2,pretregs,FALSE)); 4540 cs.Iop 2 =ESCrelease;4537 cs.Iop = ESCAPE | ESCrelease; 4541 4538 gen(c,&cs); 4542 4539 } 4543 4540 freenode(e->E1); 4544 4541 break; 4545 4542 default: 4546 4543 assert(0); 4547 4544 } 4548 4545 return c; 4549 4546 #else 4550 4547 return NULL; 4551 4548 #endif 4552 4549 } 4553 4550 4554 4551 code *cdctor(elem *e,regm_t *pretregs) 4555 4552 { 4556 4553 #if SCPP 4557 4554 code cs; 4558 4555 code *c; 4559 4556 4560 4557 #if 0 4561 4558 if (config.exe == EX_NT) 4562 4559 { usednteh |= NTEHcleanup; 4563 4560 except_push(NULL,e,NULL); 4564 4561 return nteh_gensindex(except_index_get() - 1); 4565 4562 } 4566 4563 #else 4567 4564 usednteh |= EHcleanup; 4568 4565 if (config.exe == EX_NT) 4569 4566 usednteh |= NTEHcleanup; 4570 4567 #endif 4571 4568 assert(*pretregs == 0); 4572 cs.Iop = ESCAPE; 4573 cs.Iop2 = ESCctor; 4569 cs.Iop = ESCAPE | ESCctor; 4574 4570 cs.Iflags = 0; 4575 4571 cs.Irex = 0; 4576 4572 cs.IFL1 = FLctor; 4577 4573 cs.IEV1.Vtor = e; 4578 4574 c = gen(CNIL,&cs); 4579 4575 //except_push(c,e,NULL); 4580 4576 return c; 4581 4577 #else 4582 4578 return NULL; 4583 4579 #endif 4584 4580 } 4585 4581 4586 4582 code *cddtor(elem *e,regm_t *pretregs) 4587 4583 { 4588 4584 #if SCPP 4589 4585 code cs; 4590 4586 code *c; 4591 4587 4592 4588 #if 0 4593 4589 if (config.exe == EX_NT) 4594 4590 { usednteh |= NTEHcleanup; 4595 4591 except_pop(NULL,e,NULL); 4596 4592 return nteh_gensindex(except_index_get() - 1); 4597 4593 } 4598 4594 #else 4599 4595 usednteh |= EHcleanup; 4600 4596 if (config.exe == EX_NT) 4601 4597 usednteh |= NTEHcleanup; 4602 4598 #endif 4603 4599 assert(*pretregs == 0); 4604 cs.Iop = ESCAPE; 4605 cs.Iop2 = ESCdtor; 4600 cs.Iop = ESCAPE | ESCdtor; 4606 4601 cs.Iflags = 0; 4607 4602 cs.Irex = 0; 4608 4603 cs.IFL1 = FLdtor; 4609 4604 cs.IEV1.Vtor = e; 4610 4605 c = gen(CNIL,&cs); 4611 4606 //except_pop(c,e,NULL); 4612 4607 return c; 4613 4608 #else 4614 4609 return NULL; 4615 4610 #endif 4616 4611 } 4617 4612 4618 4613 code *cdmark(elem *e,regm_t *pretregs) 4619 4614 { 4620 4615 return NULL; 4621 4616 } 4622 4617 4623 4618 #if !NTEXCEPTIONS 4624 4619 code *cdsetjmp(elem *e,regm_t *pretregs) 4625 4620 { branches/dmd-1.x/src/backend/cod3.c
r595 r596 201 201 2,2,2,M|3, M|T|E|4,M|3,M|3,M|3, // A8 202 202 M|E|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // B0 203 203 2,2,M|T|E|4,M|3, M|3,M|3,M|3,M|3, // B8 204 204 M|3,M|3,M|T|E|4,M|3, M|T|E|4,M|T|E|4,M|T|E|4,M|3, // C0 205 205 2,2,2,2, 2,2,2,2, // C8 206 206 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // D0 207 207 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // D8 208 208 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // E0 209 209 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // E8 210 210 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // F0 211 211 M|3,M|3,M|3,M|3, M|3,M|3,M|3,2 // F8 212 212 }; 213 213 214 214 /************************************ 215 215 * Determine if there is a modregrm byte for code. 216 216 */ 217 217 218 218 int cod3_EA(code *c) 219 219 { unsigned ins; 220 220 221 switch (c->Iop) 222 { case ESCAPE: 223 ins = 0; 224 break; 225 case 0x0F: 226 ins = inssize2[c->Iop2]; 227 break; 228 default: 229 ins = inssize[c->Iop]; 230 break; 231 } 221 unsigned op1 = c->Iop & 0xFF; 222 if (op1 == ESCAPE) 223 ins = 0; 224 else if ((c->Iop & 0xFF00) == 0x0F00) 225 ins = inssize2[op1]; 226 else 227 ins = inssize[op1]; 232 228 return ins & M; 233 229 } 234 230 235 231 /******************************** 236 232 * Fix global variables for 386. 237 233 */ 238 234 239 235 void cod3_set386() 240 236 { 241 237 // if (I32) 242 238 { unsigned i; 243 239 244 240 inssize[0xA0] = T|5; 245 241 inssize[0xA1] = T|5; 246 242 inssize[0xA2] = T|5; 247 243 inssize[0xA3] = T|5; 248 244 BPRM = 5; /* [EBP] addressing mode */ 249 245 fregsaved = mBP | mBX | mSI | mDI; // saved across function calls 250 246 FLOATREGS = FLOATREGS_32; 251 247 FLOATREGS2 = FLOATREGS2_32; … … 1054 1050 return mBP; 1055 1051 1056 1052 Lcant: 1057 1053 return 0; 1058 1054 } 1059 1055 1060 1056 /*************************************** 1061 1057 * Gen code for OPframeptr 1062 1058 */ 1063 1059 1064 1060 code *cdframeptr(elem *e, regm_t *pretregs) 1065 1061 { 1066 1062 unsigned reg; 1067 1063 code cs; 1068 1064 1069 1065 regm_t retregs = *pretregs & allregs; 1070 1066 if (!retregs) 1071 1067 retregs = allregs; 1072 1068 code *cg = allocreg(&retregs, ®, TYint); 1073 1069 1074 cs.Iop = ESCAPE; 1075 cs.Iop2 = ESCframeptr; 1070 cs.Iop = ESCAPE | ESCframeptr; 1076 1071 cs.Iflags = 0; 1077 1072 cs.Irex = 0; 1078 1073 cs.Irm = reg; 1079 1074 cg = gen(cg,&cs); 1080 1075 1081 1076 return cat(cg,fixresult(e,retregs,pretregs)); 1082 1077 } 1083 1078 1084 1079 /*************************************** 1085 1080 * Gen code for load of _GLOBAL_OFFSET_TABLE_. 1086 1081 * This value gets cached in the local variable 'localgot'. 1087 1082 */ 1088 1083 1089 1084 code *cdgot(elem *e, regm_t *pretregs) 1090 1085 { 1091 1086 #if TARGET_OSX 1092 1087 regm_t retregs; 1093 1088 unsigned reg; 1094 1089 code *c; 1095 1090 … … 2303 2298 2304 2299 int branch(block *bl,int flag) 2305 2300 { int bytesaved; 2306 2301 code *c,*cn,*ct; 2307 2302 targ_size_t offset,disp; 2308 2303 targ_size_t csize; 2309 2304 2310 2305 if (!flag) 2311 2306 bl->Bflags |= BFLjmpoptdone; // assume this will be all 2312 2307 c = bl->Bcode; 2313 2308 if (!c) 2314 2309 return 0; 2315 2310 bytesaved = 0; 2316 2311 offset = bl->Boffset; /* offset of start of block */ 2317 2312 while (1) 2318 2313 { unsigned char op; 2319 2314 2320 2315 csize = calccodsize(c); 2321 2316 cn = code_next(c); 2322 2317 op = c->Iop; 2323 if ((op & 0xF0) == 0x70 && c->Iflags & CFjmp16 ||2318 if ((op & ~0x0F) == 0x70 && c->Iflags & CFjmp16 || 2324 2319 op == JMP) 2325 2320 { 2326 2321 L1: 2327 2322 switch (c->IFL2) 2328 2323 { 2329 2324 case FLblock: 2330 2325 if (flag) // no offsets yet, don't optimize 2331 2326 goto L3; 2332 2327 disp = c->IEV2.Vblock->Boffset - offset - csize; 2333 2328 2334 2329 /* If this is a forward branch, and there is an aligned 2335 2330 * block intervening, it is possible that shrinking 2336 2331 * the jump instruction will cause it to be out of 2337 2332 * range of the target. This happens if the alignment 2338 2333 * prevents the target block from moving correspondingly 2339 2334 * closer. 2340 2335 */ 2341 2336 if (disp >= 0x7F-4 && c->IEV2.Vblock->Boffset > offset) 2342 2337 { /* Look for intervening alignment 2343 2338 */ … … 2371 2366 2372 2367 if (!cr) 2373 2368 { // Didn't find it in forward search. Try backwards jump 2374 2369 int s = 0; 2375 2370 disp = 0; 2376 2371 for (cr = bl->Bcode; cr != cn; cr = code_next(cr)) 2377 2372 { 2378 2373 assert(cr != NULL); // must have found it 2379 2374 if (cr == ct) 2380 2375 s = 1; 2381 2376 if (s) 2382 2377 disp += calccodsize(cr); 2383 2378 } 2384 2379 } 2385 2380 2386 2381 if (config.flags4 & CFG4optimized && !flag) 2387 2382 { 2388 2383 /* Propagate branch forward past junk */ 2389 2384 while (1) 2390 2385 { if (ct->Iop == NOP || 2391 (ct->Iop == ESCAPE && ct->Iop2 ==ESClinnum))2386 ct->Iop == (ESCAPE | ESClinnum)) 2392 2387 { ct = code_next(ct); 2393 2388 if (!ct) 2394 2389 goto L2; 2395 2390 } 2396 2391 else 2397 2392 { c->IEV2.Vcode = ct; 2398 2393 ct->Iflags |= CFtarg; 2399 2394 break; 2400 2395 } 2401 2396 } 2402 2397 2403 2398 /* And eliminate jmps to jmps */ 2404 2399 if ((op == ct->Iop || ct->Iop == JMP) && 2405 2400 (op == JMP || c->Iflags & CFjmp16)) 2406 2401 { c->IFL2 = ct->IFL2; 2407 2402 c->IEV2.Vcode = ct->IEV2.Vcode; 2408 2403 /*printf("eliminating branch\n");*/ 2409 2404 goto L1; 2410 2405 } 2411 2406 L2: ; … … 2432 2427 if (op == JMP) 2433 2428 { c->Iop = JMPS; // JMP SHORT 2434 2429 bytesaved += I16 ? 1 : 3; 2435 2430 } 2436 2431 else // else Jcond 2437 2432 { c->Iflags &= ~CFjmp16; // a branch is ok 2438 2433 bytesaved += I16 ? 3 : 4; 2439 2434 2440 2435 // Replace a cond jump around a call to a function that 2441 2436 // never returns with a cond jump to that function. 2442 2437 if (config.flags4 & CFG4optimized && 2443 2438 config.target_cpu >= TARGET_80386 && 2444 2439 disp == (I16 ? 3 : 5) && 2445 2440 cn && 2446 2441 cn->Iop == 0xE8 && 2447 2442 cn->IFL2 == FLfunc && 2448 2443 cn->IEVsym2->Sflags & SFLexit && 2449 2444 !(cn->Iflags & (CFtarg | CFtarg2)) 2450 2445 ) 2451 2446 { 2452 cn->Iop = 0x0F; 2453 cn->Iop2 = (c->Iop & 0x0F) ^ 0x81; 2447 cn->Iop = 0x0F00 | ((c->Iop & 0x0F) ^ 0x81); 2454 2448 c->Iop = NOP; 2455 2449 c->IEV2.Vcode = NULL; 2456 2450 bytesaved++; 2457 2451 2458 2452 // If nobody else points to ct, we can remove the CFtarg 2459 2453 if (flag && ct) 2460 2454 { code *cx; 2461 2455 2462 2456 for (cx = bl->Bcode; 1; cx = code_next(cx)) 2463 2457 { 2464 2458 if (!cx) 2465 2459 { ct->Iflags &= ~CFtarg; 2466 2460 break; 2467 2461 } 2468 2462 if (cx->IEV2.Vcode == ct) 2469 2463 break; 2470 2464 } 2471 2465 } 2472 2466 } 2473 2467 } … … 2560 2554 2561 2555 void assignaddrc(code *c) 2562 2556 { 2563 2557 int sn; 2564 2558 symbol *s; 2565 2559 unsigned char ins,rm; 2566 2560 targ_size_t soff; 2567 2561 targ_size_t base; 2568 2562 2569 2563 base = EBPtoESP; 2570 2564 for (; c; c = code_next(c)) 2571 2565 { 2572 2566 #ifdef DEBUG 2573 2567 if (0) 2574 2568 { printf("assignaddrc()\n"); 2575 2569 c->print(); 2576 2570 } 2577 2571 if (code_next(c) && code_next(code_next(c)) == c) 2578 2572 assert(0); 2579 2573 #endif 2580 if ( c->Iop == 0x0F)2581 ins = inssize2[c->Iop 2];2582 else if ( c->Iop== ESCAPE)2583 { 2584 if (c->Iop 2 == ESCadjesp)2574 if ((c->Iop & 0xFF00) == 0x0F00) 2575 ins = inssize2[c->Iop & 0xFF]; 2576 else if ((c->Iop & 0xFF) == ESCAPE) 2577 { 2578 if (c->Iop == (ESCAPE | ESCadjesp)) 2585 2579 { 2586 2580 //printf("adjusting EBPtoESP (%d) by %ld\n",EBPtoESP,c->IEV2.Vint); 2587 2581 EBPtoESP += c->IEV2.Vint; 2588 2582 c->Iop = NOP; 2589 2583 } 2590 if (c->Iop 2 == ESCframeptr)2584 if (c->Iop == (ESCAPE | ESCframeptr)) 2591 2585 { // Convert to load of frame pointer 2592 2586 // c->Irm is the register to use 2593 2587 if (hasframe) 2594 2588 { // MOV reg,EBP 2595 2589 c->Iop = 0x89; 2596 2590 if (c->Irm & 8) 2597 2591 c->Irex |= REX_B; 2598 2592 c->Irm = modregrm(3,BP,c->Irm & 7); 2599 2593 } 2600 2594 else 2601 2595 { // LEA reg,EBPtoESP[ESP] 2602 2596 c->Iop = 0x8D; 2603 2597 if (c->Irm & 8) 2604 2598 c->Irex |= REX_R; 2605 2599 c->Irm = modregrm(2,c->Irm & 7,4); 2606 2600 c->Isib = modregrm(0,4,SP); 2607 2601 c->Iflags = CFoff; 2608 2602 c->IFL1 = FLconst; 2609 2603 c->IEV1.Vuns = EBPtoESP; 2610 2604 } 2611 2605 } 2612 2606 if (I64) 2613 2607 c->Irex |= REX_W; 2614 2608 continue; 2615 2609 } 2616 2610 else 2617 ins = inssize[c->Iop ];2611 ins = inssize[c->Iop & 0xFF]; 2618 2612 if (!(ins & M) || 2619 2613 ((rm = c->Irm) & 0xC0) == 0xC0) 2620 2614 goto do2; /* if no first operand */ 2621 2615 if (is32bitaddr(I32,c->Iflags)) 2622 2616 { 2623 2617 2624 2618 if ( 2625 2619 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c->Isib & 7) == 5 || (rm & 7) == 5)) 2626 2620 ) 2627 2621 goto do2; /* if no first operand */ 2628 2622 } 2629 2623 else 2630 2624 { 2631 2625 if ( 2632 2626 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) 2633 2627 ) 2634 2628 goto do2; /* if no first operand */ 2635 2629 } 2636 2630 s = c->IEVsym1; 2637 2631 switch (c->IFL1) … … 2943 2937 if (debugc) 2944 2938 { 2945 2939 printf("+pinholeopt(%p)\n",c); 2946 2940 } 2947 2941 #endif 2948 2942 2949 2943 if (b) 2950 2944 { bn = b->Bnext; 2951 2945 usespace = (config.flags4 & CFG4space && b->BC != BCasm); 2952 2946 useopsize = (I16 || (config.flags4 & CFG4space && b->BC != BCasm)); 2953 2947 } 2954 2948 else 2955 2949 { bn = NULL; 2956 2950 usespace = (config.flags4 & CFG4space); 2957 2951 useopsize = (I16 || config.flags4 & CFG4space); 2958 2952 } 2959 2953 for (; c; c = code_next(c)) 2960 2954 { 2961 2955 L1: 2962 2956 op = c->Iop; 2963 if ( op == 0x0F)2964 ins = inssize2[ c->Iop2];2957 if ((op & 0xFF00) == 0x0F00) 2958 ins = inssize2[op & 0xFF]; 2965 2959 else 2966 ins = inssize[ c->Iop];2960 ins = inssize[op & 0xFF]; 2967 2961 if (ins & M) // if modregrm byte 2968 2962 { int shortop = (c->Iflags & CFopsize) ? !I16 : I16; 2969 2963 int local_BPRM = BPRM; 2970 2964 2971 2965 if (c->Iflags & CFaddrsize) 2972 2966 local_BPRM ^= 5 ^ 6; // toggle between 5 and 6 2973 2967 2974 2968 unsigned rm = c->Irm; 2975 2969 unsigned reg = rm & modregrm(0,7,0); // isolate reg field 2976 2970 unsigned ereg = rm & 7; 2977 2971 //printf("c = %p, op = %02x rm = %02x\n", c, op, rm); 2978 2972 2979 2973 /* If immediate second operand */ 2980 2974 if ((ins & T || op == 0xF6 || op == 0xF7) && 2981 2975 c->IFL2 == FLconst) 2982 2976 { 2983 2977 int flags = c->Iflags & CFpsw; /* if want result in flags */ 2984 2978 targ_long u = c->IEV2.Vuns; 2985 2979 if (ins & E) 2986 2980 u = (signed char) u; … … 3037 3031 if ((u & 0xFF) == 0xFF) 3038 3032 goto L3; 3039 3033 } 3040 3034 } 3041 3035 } 3042 3036 if (!shortop && useopsize) 3043 3037 { 3044 3038 if ((u & 0xFFFF0000) == 0xFFFF0000) 3045 3039 { c->Iflags ^= CFopsize; 3046 3040 goto L1; 3047 3041 } 3048 3042 if ((u & 0xFFFF) == 0xFFFF && rm < modregrm(3,4,AX)) 3049 3043 { c->IEVoffset1 += 2; /* address MSW */ 3050 3044 c->IEV2.Vuns >>= 16; 3051 3045 c->Iflags ^= CFopsize; 3052 3046 goto L1; 3053 3047 } 3054 3048 if (rm >= modregrm(3,4,AX)) 3055 3049 { 3056 3050 if (u == 0xFF && (rm <= modregrm(3,4,BX) || I64)) 3057 { c->Iop2 = 0xB6; /* MOVZX */ 3058 c->Iop = 0x0F; 3051 { c->Iop = 0x0FB6; // MOVZX 3059 3052 c->Irm = modregrm(3,ereg,ereg); 3060 3053 if (c->Irex & REX_B) 3061 3054 c->Irex |= REX_R; 3062 3055 goto L1; 3063 3056 } 3064 3057 if (u == 0xFFFF) 3065 { c->Iop2 = 0xB7; /* MOVZX */ 3066 c->Iop = 0x0F; 3058 { c->Iop = 0x0FB7; // MOVZX 3067 3059 c->Irm = modregrm(3,ereg,ereg); 3068 3060 if (c->Irex & REX_B) 3069 3061 c->Irex |= REX_R; 3070 3062 goto L1; 3071 3063 } 3072 3064 } 3073 3065 } 3074 3066 } 3075 3067 } 3076 3068 3077 3069 /* Look for ADD,OR,SUB,XOR with u that we can eliminate */ 3078 3070 if (!flags && 3079 3071 (op == 0x81 || op == 0x80) && 3080 3072 (reg == modregrm(0,0,0) || reg == modregrm(0,1,0) || // ADD,OR 3081 3073 reg == modregrm(0,5,0) || reg == modregrm(0,6,0)) // SUB, XOR 3082 3074 ) 3083 3075 { 3084 3076 if (u == 0) 3085 3077 { 3086 3078 c->Iop = NOP; … … 3246 3238 if ((ins & R) && (rm & 0xC0) == 0xC0) 3247 3239 { switch (op) 3248 3240 { case 0xC6: op = 0xB0 + ereg; break; 3249 3241 case 0xC7: op = 0xB8 + ereg; break; 3250 3242 case 0xFF: 3251 3243 switch (reg) 3252 3244 { case 6<<3: op = 0x50+ereg; break;/* PUSH*/ 3253 3245 case 0<<3: if (!I64) op = 0x40+ereg; break; /* INC*/ 3254 3246 case 1<<3: if (!I64) op = 0x48+ereg; break; /* DEC*/ 3255 3247 } 3256 3248 break; 3257 3249 case 0x8F: op = 0x58 + ereg; break; 3258 3250 case 0x87: 3259 3251 if (reg == 0) op = 0x90 + ereg; 3260 3252 break; 3261 3253 } 3262 3254 c->Iop = op; 3263 3255 } 3264 3256 3265 3257 // Look to replace SHL reg,1 with ADD reg,reg 3266 if ((op & 0xFE) == 0xD0 &&3258 if ((op & ~1) == 0xD0 && 3267 3259 (rm & modregrm(3,7,0)) == modregrm(3,4,0) && 3268 3260 config.target_cpu >= TARGET_80486) 3269 3261 { 3270 3262 c->Iop &= 1; 3271 3263 c->Irm = (rm & modregrm(3,0,7)) | (ereg << 3); 3272 3264 if (c->Irex & REX_B) 3273 3265 c->Irex |= REX_R; 3274 3266 if (!(c->Iflags & CFpsw) && !I16) 3275 3267 c->Iflags &= ~CFopsize; 3276 3268 goto L1; 3277 3269 } 3278 3270 3279 3271 /* Look for sign extended modregrm displacement, or 0 3280 3272 * displacement. 3281 3273 */ 3282 3274 3283 3275 if (((rm & 0xC0) == 0x80) && // it's a 16/32 bit disp 3284 3276 c->IFL1 == FLconst) // and it's a constant 3285 3277 { 3286 3278 a = c->IEVpointer1; … … 3334 3326 else if (mod == modregrm(1,0,0) && rm == local_BPRM && 3335 3327 c->IFL1 == FLconst && c->IEVpointer1 == 0) 3336 3328 { c->Iop = 0x8B; /* MOV reg,BP */ 3337 3329 c->Irm = modregrm(3,0,BP) + reg; 3338 3330 } 3339 3331 } 3340 3332 3341 3333 // Replace [R13] with 0[R13] 3342 3334 if (c->Irex & REX_B && (c->Irm & modregrm(3,0,5)) == modregrm(0,0,5)) 3343 3335 { 3344 3336 c->Irm |= modregrm(1,0,0); 3345 3337 c->IFL1 = FLconst; 3346 3338 c->IEVpointer1 = 0; 3347 3339 } 3348 3340 } 3349 3341 else 3350 3342 { 3351 3343 switch (op) 3352 3344 { 3353 3345 default: 3354 if ((op & 0xF0) != 0x70)3346 if ((op & ~0x0F) != 0x70) 3355 3347 break; 3356 3348 case JMP: 3357 3349 switch (c->IFL2) 3358 3350 { case FLcode: 3359 3351 if (c->IEV2.Vcode == code_next(c)) 3360 3352 { c->Iop = NOP; 3361 3353 continue; 3362 3354 } 3363 3355 break; 3364 3356 case FLblock: 3365 3357 if (!code_next(c) && c->IEV2.Vblock == bn) 3366 3358 { c->Iop = NOP; 3367 3359 continue; 3368 3360 } 3369 3361 break; 3370 3362 case FLconst: 3371 3363 case FLfunc: 3372 3364 case FLextern: 3373 3365 break; 3374 3366 default: … … 3479 3471 } 3480 3472 #endif 3481 3473 3482 3474 /************************** 3483 3475 * Compute jump addresses for FLcode. 3484 3476 * Note: only works for forward referenced code. 3485 3477 * only direct jumps and branches are detected. 3486 3478 * LOOP instructions only work for backward refs. 3487 3479 */ 3488 3480 3489 3481 void jmpaddr(code *c) 3490 3482 { code *ci,*cn,*ctarg,*cstart; 3491 3483 targ_size_t ad; 3492 3484 unsigned char op; 3493 3485 3494 3486 //printf("jmpaddr()\n"); 3495 3487 cstart = c; /* remember start of code */ 3496 3488 while (c) 3497 3489 { 3498 3490 op = c->Iop; 3499 if (inssize[op ] & T && /* if second operand */3491 if (inssize[op & 0xFF] & T && // if second operand 3500 3492 c->IFL2 == FLcode && 3501 ((op & 0xF0) == 0x70 || op == JMP || op == JMPS || op == JCXZ))3493 ((op & ~0x0F) == 0x70 || op == JMP || op == JMPS || op == JCXZ)) 3502 3494 { ci = code_next(c); 3503 3495 ctarg = c->IEV2.Vcode; /* target code */ 3504 3496 ad = 0; /* IP displacement */ 3505 3497 while (ci && ci != ctarg) 3506 3498 { 3507 3499 ad += calccodsize(ci); 3508 3500 ci = code_next(ci); 3509 3501 } 3510 3502 if (!ci) 3511 3503 goto Lbackjmp; // couldn't find it 3512 3504 if (I32 || op == JMP || op == JMPS || op == JCXZ) 3513 3505 c->IEVpointer2 = ad; 3514 3506 else /* else conditional */ 3515 3507 { if (!(c->Iflags & CFjmp16)) /* if branch */ 3516 3508 c->IEVpointer2 = ad; 3517 3509 else /* branch around a long jump */ 3518 3510 { cn = code_next(c); 3519 3511 code_next(c) = code_calloc(); 3520 3512 code_next(code_next(c)) = cn; 3521 3513 c->Iop = op ^ 1; /* converse jmp */ … … 3566 3558 /***************************** 3567 3559 * Calculate and return code size of a code. 3568 3560 * Note that NOPs are sometimes used as markers, but are 3569 3561 * never output. LINNUMs are never output. 3570 3562 * Note: This routine must be fast. Profiling shows it is significant. 3571 3563 */ 3572 3564 3573 3565 unsigned calccodsize(code *c) 3574 3566 { unsigned size; 3575 3567 unsigned op; 3576 3568 unsigned char rm,mod,ins; 3577 3569 unsigned iflags; 3578 3570 unsigned i32 = I32 || I64; 3579 3571 unsigned a32 = i32; 3580 3572 3581 3573 #ifdef DEBUG 3582 3574 assert((a32 & ~1) == 0); 3583 3575 #endif 3584 3576 iflags = c->Iflags; 3585 3577 op = c->Iop; 3578 if ((op & 0xFF00) == 0x0F00) 3579 op = 0x0F; 3580 else 3581 op &= 0xFF; 3586 3582 switch (op) 3587 3583 { 3588 3584 case 0x0F: 3589 ins = inssize2[c->Iop 2];3585 ins = inssize2[c->Iop & 0xFF]; 3590 3586 size = ins & 7; 3587 if (c->Iop & 0xFF0000) 3588 size++; 3591 3589 break; 3592 3590 3593 3591 case NOP: 3594 3592 case ESCAPE: 3595 3593 size = 0; // since these won't be output 3596 3594 goto Lret2; 3597 3595 3598 3596 case ASM: 3599 3597 if (c->Iflags == CFaddrsize) // kludge for DA inline asm 3600 3598 size = NPTRSIZE; 3601 3599 else 3602 3600 size = c->IEV1.as.len; 3603 3601 goto Lret2; 3604 3602 3605 3603 case 0xA1: 3606 3604 case 0xA3: 3607 3605 if (c->Irex) 3608 3606 { 3609 3607 size = 9; // 64 bit immediate value for MOV to/from RAX 3610 3608 goto Lret; … … 3663 3661 size += 2; 3664 3662 } 3665 3663 } 3666 3664 else if (iflags & CFopsize) 3667 3665 { if (I32) 3668 3666 size -= 2; 3669 3667 else 3670 3668 size += 2; 3671 3669 } 3672 3670 } 3673 3671 if (iflags & CFaddrsize) 3674 3672 { if (!I64) 3675 3673 a32 ^= 1; 3676 3674 size++; 3677 3675 } 3678 3676 if (iflags & CFopsize) 3679 3677 size++; /* +1 for OPSIZE prefix */ 3680 3678 } 3681 3679 } 3682 3680 3683 if ((op & 0xF0) == 0x70)3681 if ((op & ~0x0F) == 0x70) 3684 3682 { if (iflags & CFjmp16) // if long branch 3685 3683 size += I16 ? 3 : 4; // + 3(4) bytes for JMP 3686 3684 } 3687 3685 else if (ins & M) // if modregrm byte 3688 3686 { 3689 3687 rm = c->Irm; 3690 3688 mod = rm & 0xC0; 3691 3689 if (a32 || I64) 3692 3690 { // 32 bit addressing 3693 3691 if (issib(rm)) 3694 3692 size++; 3695 3693 switch (mod) 3696 3694 { case 0: 3697 3695 if (issib(rm) && (c->Isib & 7) == 5 || 3698 3696 (rm & 7) == 5) 3699 3697 size += 4; /* disp32 */ 3700 3698 if (c->Irex & REX_B && (rm & 7) == 5) 3701 3699 /* Instead of selecting R13, this mode is an [RIP] relative 3702 3700 * address. Although valid, it's redundant, and should not 3703 3701 * be generated. Instead, generate 0[R13] instead of [R13]. … … 3707 3705 case 0x40: 3708 3706 size++; /* disp8 */ 3709 3707 break; 3710 3708 case 0x80: 3711 3709 size += 4; /* disp32 */ 3712 3710 break; 3713 3711 } 3714 3712 } 3715 3713 else 3716 3714 { // 16 bit addressing 3717 3715 if (mod == 0x40) /* 01: 8 bit displacement */ 3718 3716 size++; 3719 3717 else if (mod == 0x80 || (mod == 0 && (rm & 7) == 6)) 3720 3718 size += 2; 3721 3719 } 3722 3720 } 3723 3721 3724 3722 Lret: 3725 3723 if (c->Irex) 3726 3724 { size++; 3727 if (c->Irex & REX_W && (op & 0xF8) == 0xB8)3725 if (c->Irex & REX_W && (op & ~7) == 0xB8) 3728 3726 size += 4; 3729 3727 } 3730 3728 Lret2: 3731 3729 //printf("op = x%02x, size = %d\n",op,size); 3732 3730 return size; 3733 3731 } 3734 3732 3735 3733 /******************************** 3736 3734 * Return !=0 if codes match. 3737 3735 */ 3738 3736 3739 3737 #if 0 3740 3738 3741 3739 int code_match(code *c1,code *c2) 3742 3740 { code cs1,cs2; 3743 3741 unsigned char ins; 3744 3742 3745 3743 if (c1 == c2) 3746 3744 goto match; 3747 3745 cs1 = *c1; 3748 3746 cs2 = *c2; 3749 3747 if (cs1.Iop != cs2.Iop) 3750 3748 goto nomatch; 3751 3749 switch (cs1.Iop) 3752 { case ESCAPE: 3753 switch (c->Iop2) 3754 { 3755 case ESCctor: 3756 goto nomatch; 3757 case ESCdtor: 3758 goto nomatch; 3759 } 3760 goto match; 3750 { 3751 case ESCAPE | ESCctor: 3752 case ESCAPE | ESCdtor: 3753 goto nomatch; 3754 3761 3755 case NOP: 3762 3756 goto match; 3763 3757 case ASM: 3764 3758 if (cs1.IEV1.as.len == cs2.IEV1.as.len && 3765 3759 memcmp(cs1.IEV1.as.bytes,cs2.IEV1.as.bytes,cs1.EV1.as.len) == 0) 3766 3760 goto match; 3767 3761 else 3768 3762 goto nomatch; 3764 3765 default: 3766 if ((cs1.Iop & 0xFF) == ESCAPE) 3767 goto match; 3768 break; 3769 3769 } 3770 3770 if (cs1.Iflags != cs2.Iflags) 3771 3771 goto nomatch; 3772 3772 3773 ins = inssize[cs1.Iop ];3774 if ( cs1.Iop == 0x0F)3773 ins = inssize[cs1.Iop & 0xFF]; 3774 if ((cs1.Iop & 0xFF00) == 0x0F00) 3775 3775 { 3776 if (cs1.Iop2 != cs2.Iop2) 3777 goto nomatch; 3778 if (cs1.Iop2 == 0x38 || cs1.Iop2 == 0x3A) 3779 { 3780 if (cs1.Iop3 != cs2.Iop3) 3781 goto nomatch; 3782 } 3783 ins = inssize2[cs1.Iop2]; 3776 ins = inssize2[cs1.Iop & 0xFF]; 3784 3777 } 3785 3778 3786 3779 if (ins & M) // if modregrm byte 3787 3780 { 3788 3781 if (cs1.Irm != cs2.Irm) 3789 3782 goto nomatch; 3790 3783 if ((cs1.Irm & 0xC0) == 0xC0) 3791 3784 goto do2; 3792 3785 if (is32bitaddr(I32,cs1.Iflags)) 3793 3786 { 3794 3787 if (issib(cs1.Irm) && cs1.Isib != cs2.Isib) 3795 3788 goto nomatch; 3796 3789 if ( 3797 3790 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c->Isib & 7) == 5 || (rm & 7) == 5)) 3798 3791 ) 3799 3792 goto do2; /* if no first operand */ 3800 3793 } 3801 3794 else 3802 3795 { 3803 3796 if ( … … 3864 3857 } 3865 3858 3866 3859 unsigned codout(code *c) 3867 3860 { unsigned op; 3868 3861 unsigned char rm,mod; 3869 3862 unsigned char ins; 3870 3863 code *cn; 3871 3864 unsigned flags; 3872 3865 symbol *s; 3873 3866 3874 3867 #ifdef DEBUG 3875 3868 if (debugc) printf("codout(%p), Coffset = x%llx\n",c,(unsigned long long)Coffset); 3876 3869 #endif 3877 3870 3878 3871 pgen = bytes; 3879 3872 offset = Coffset; 3880 3873 for (; c; c = code_next(c)) 3881 3874 { 3882 3875 #ifdef DEBUG 3883 3876 if (debugc) { printf("off=%02lx, sz=%ld, ",(long)OFFSET(),(long)calccodsize(c)); c->print(); } 3877 unsigned startoffset = OFFSET(); 3884 3878 #endif 3885 3879 op = c->Iop; 3886 ins = inssize[op ];3887 switch (op )3880 ins = inssize[op & 0xFF]; 3881 switch (op & 0xFF) 3888 3882 { case ESCAPE: 3889 switch ( c->Iop2)3883 switch (op & 0xFF00) 3890 3884 { case ESClinnum: 3891 3885 /* put out line number stuff */ 3892 3886 objlinnum(c->IEV2.Vsrcpos,OFFSET()); 3893 3887 break; 3894 3888 #if SCPP 3895 3889 #if 1 3896 3890 case ESCctor: 3897 3891 case ESCdtor: 3898 3892 case ESCoffset: 3899 3893 if (config.exe != EX_NT) 3900 3894 except_pair_setoffset(c,OFFSET() - funcoffset); 3901 3895 break; 3902 3896 case ESCmark: 3903 3897 case ESCrelease: 3904 3898 case ESCmark2: 3905 3899 case ESCrelease2: 3906 3900 break; 3907 3901 #else 3908 3902 case ESCctor: 3909 3903 except_push(OFFSET() - funcoffset,c->IEV1.Vtor,NULL); 3910 3904 break; 3911 3905 case ESCdtor: 3912 3906 except_pop(OFFSET() - funcoffset,c->IEV1.Vtor,NULL); 3913 3907 break; 3914 3908 case ESCmark: 3915 3909 except_mark(); 3916 3910 break; 3917 3911 case ESCrelease: 3918 3912 except_release(); 3919 3913 break; 3920 3914 #endif 3921 3915 #endif 3922 3916 } 3917 #ifdef DEBUG 3918 assert(calccodsize(c) == 0); 3919 #endif 3923 3920 continue; 3924 3921 case NOP: /* don't send them out */ 3922 if (op != NOP) 3923 break; 3924 #ifdef DEBUG 3925 assert(calccodsize(c) == 0); 3926 #endif 3925 3927 continue; 3926 3928 case ASM: 3929 if (op != ASM) 3930 break; 3927 3931 FLUSH(); 3928 3932 if (c->Iflags == CFaddrsize) // kludge for DA inline asm 3929 3933 { 3930 3934 do32bit(FLblockoff,&c->IEV1,0); 3931 3935 } 3932 3936 else 3933 3937 { 3934 3938 offset += obj_bytes(cseg,offset,c->IEV1.as.len,c->IEV1.as.bytes); 3935 3939 } 3940 #ifdef DEBUG 3941 assert(calccodsize(c) == c->IEV1.as.len); 3942 #endif 3936 3943 continue; 3937 3944 } 3938 3945 flags = c->Iflags; 3939 3946 3940 3947 // See if we need to flush (don't have room for largest code sequence) 3941 if (pgen - bytes > sizeof(bytes) - ( 4+4+4+4))3948 if (pgen - bytes > sizeof(bytes) - (1+4+4+8+8)) 3942 3949 FLUSH(); 3943 3950 3944 3951 // see if we need to put out prefix bytes 3945 3952 if (flags & (CFwait | CFPREFIX | CFjmp16)) 3946 3953 { int override; 3947 3954 3948 3955 if (flags & CFwait) 3949 3956 GEN(0x9B); // FWAIT 3950 3957 /* ? SEGES : SEGSS */ 3951 3958 switch (flags & CFSEG) 3952 3959 { case CFes: override = SEGES; goto segover; 3953 3960 case CFss: override = SEGSS; goto segover; 3954 3961 case CFcs: override = SEGCS; goto segover; 3955 3962 case CFds: override = SEGDS; goto segover; 3956 3963 case CFfs: override = SEGFS; goto segover; 3957 3964 case CFgs: override = SEGGS; goto segover; 3958 3965 segover: GEN(override); 3959 3966 break; 3960 3967 } 3961 3968 3962 3969 if (flags & CFaddrsize) 3963 3970 GEN(0x67); 3964 3971 3965 3972 // Do this last because of instructions like ADDPD 3966 3973 if (flags & CFopsize) 3967 3974 GEN(0x66); /* operand size */ 3968 3975 3969 if ((op & 0xF0) == 0x70 && flags & CFjmp16) /* long condit jmp */3976 if ((op & ~0x0F) == 0x70 && flags & CFjmp16) /* long condit jmp */ 3970 3977 { 3971 3978 if (!I16) 3972 3979 { // Put out 16 bit conditional jump 3973 c->Iop2 = 0x80 | (op & 0x0F); 3974 c->Iop = op = 0x0F; 3980 c->Iop = op = 0x0F00 | (0x80 | (op & 0x0F)); 3975 3981 } 3976 3982 else 3977 3983 { 3978 3984 cn = code_calloc(); 3979 3985 /*cxcalloc++;*/ 3980 3986 code_next(cn) = code_next(c); 3981 3987 code_next(c) = cn; // link into code 3982 3988 cn->Iop = JMP; // JMP block 3983 3989 cn->IFL2 = c->IFL2; 3984 3990 cn->IEV2.Vblock = c->IEV2.Vblock; 3985 3991 c->Iop = op ^= 1; // toggle condition 3986 3992 c->IFL2 = FLconst; 3987 3993 c->IEVpointer2 = I16 ? 3 : 5; // skip over JMP block 3988 3994 } 3989 3995 } 3990 3996 } 3991 3997 3992 3998 if (c->Irex) 3993 3999 GEN(c->Irex | REX); 3994 GEN(op); 3995 if (op == 0x0F) 3996 { 3997 ins = inssize2[c->Iop2]; 3998 GEN(c->Iop2); 3999 if (c->Iop2 == 0x38 || c->Iop2 == 0x3A) 4000 GEN(c->Iop3); 4001 } 4000 if (op > 0xFF) 4001 { 4002 if ((op & 0xFF00) == 0x0F00) 4003 ins = inssize2[op & 0xFF]; 4004 if (op & 0xFF000000) 4005 { GEN(op >> 24); 4006 GEN((op >> 8) & 0xFF); 4007 GEN(op & 0xFF); 4008 GEN((op >> 16) & 0xFF); // yes, this is out of order. For 0x660F3A41 & 40 4009 } 4010 else if (op & 0xFF0000) 4011 { 4012 GEN((op >> 16) & 0xFF); 4013 GEN((op >> 8) & 0xFF); 4014 GEN(op & 0xFF); 4015 } 4016 else 4017 { GEN((op >> 8) & 0xFF); 4018 GEN(op & 0xFF); 4019 } 4020 } 4021 else 4022 GEN(op); 4002 4023 if (ins & M) /* if modregrm byte */ 4003 4024 { 4004 4025 rm = c->Irm; 4005 4026 GEN(rm); 4006 4027 4007 4028 // Look for an address size override when working with the 4008 4029 // MOD R/M and SIB bytes 4009 4030 4010 4031 if (is32bitaddr( I32, flags)) 4011 4032 { 4012 4033 if (issib(rm)) 4013 4034 GEN(c->Isib); 4014 4035 switch (rm & 0xC0) 4015 4036 { case 0x40: 4016 4037 do8bit((enum FL) c->IFL1,&c->IEV1); // 8 bit 4017 4038 break; 4018 4039 case 0: 4019 4040 if (!(issib(rm) && (c->Isib & 7) == 5 || 4020 4041 (rm & 7) == 5)) 4021 4042 break; … … 4078 4099 if (c->Iflags & CFopsize) 4079 4100 goto ptr1616; 4080 4101 else 4081 4102 goto ptr1632; 4082 4103 4083 4104 case 0x68: // PUSH immed32 4084 4105 if ((enum FL)c->IFL2 == FLblock) 4085 4106 { 4086 4107 c->IFL2 = FLblockoff; 4087 4108 goto do32; 4088 4109 } 4089 4110 else 4090 4111 goto case_default; 4091 4112 4092 4113 case 0xE8: // CALL rel 4093 4114 case 0xE9: // JMP rel 4094 4115 flags |= CFselfrel; 4095 4116 goto case_default; 4096 4117 4097 4118 default: 4098 if (I64 && (op & 0xF8) == 0xB8 && c->Irex & REX_W)4119 if (I64 && (op & ~7) == 0xB8 && c->Irex & REX_W) 4099 4120 goto do64; 4100 4121 case_default: 4101 4122 if (c->Iflags & CFopsize) 4102 4123 goto do16; 4103 4124 else 4104 4125 goto do32; 4105 4126 break; 4106 4127 } 4107 4128 } 4108 4129 else 4109 4130 { 4110 4131 switch (op) { 4111 4132 case 0xC2: 4112 4133 case 0xCA: 4113 4134 goto do16; 4114 4135 case 0xA0: 4115 4136 case 0xA1: 4116 4137 case 0xA2: 4117 4138 case 0xA3: 4118 4139 if (c->Iflags & CFaddrsize) … … 4161 4182 goto do32; 4162 4183 else 4163 4184 goto do16; 4164 4185 break; 4165 4186 } 4166 4187 } 4167 4188 } 4168 4189 else if (op == 0xF6) /* TEST mem8,immed8 */ 4169 4190 { if ((rm & (7<<3)) == 0) 4170 4191 do8bit((enum FL)c->IFL2,&c->IEV2); 4171 4192 } 4172 4193 else if (op == 0xF7) 4173 4194 { if ((rm & (7<<3)) == 0) /* TEST mem16/32,immed16/32 */ 4174 4195 { 4175 4196 if ((I32 || I64) ^ ((c->Iflags & CFopsize) != 0)) 4176 4197 do32bit((enum FL)c->IFL2,&c->IEV2,flags); 4177 4198 else 4178 4199 do16bit((enum FL)c->IFL2,&c->IEV2,flags); 4179 4200 } 4180 4201 } 4202 #ifdef DEBUG 4203 if (OFFSET() - startoffset != calccodsize(c)) 4204 { 4205 printf("actual: %d, calc: %d\n", (int)(OFFSET() - startoffset), (int)calccodsize(c)); 4206 c->print(); 4207 assert(0); 4208 } 4209 #endif 4181 4210 } 4182 4211 FLUSH(); 4183 4212 Coffset = offset; 4184 4213 //printf("-codout(), Coffset = x%x\n", Coffset); 4185 4214 return offset; /* ending address */ 4186 4215 } 4187 4216 4188 4217 4189 4218 STATIC void do64bit(enum FL fl,union evc *uev,int flags) 4190 4219 { char *p; 4191 4220 symbol *s; 4192 4221 targ_size_t ad; 4193 4222 long tmp; 4194 4223 4195 4224 assert(I64); 4196 4225 switch (fl) 4197 4226 { 4198 4227 case FLconst: 4199 4228 ad = * (targ_size_t *) uev; 4200 4229 L1: … … 4645 4674 #endif 4646 4675 } 4647 4676 } 4648 4677 } 4649 4678 4650 4679 4651 4680 /********************************** 4652 4681 */ 4653 4682 4654 4683 #if HYDRATE 4655 4684 void code_hydrate(code **pc) 4656 4685 { 4657 4686 code *c; 4658 4687 unsigned char ins,rm; 4659 4688 enum FL fl; 4660 4689 4661 4690 assert(pc); 4662 4691 while (*pc) 4663 4692 { 4664 4693 c = (code *) ph_hydrate(pc); 4694 if ((c->Iop & 0xFF00) == 0x0F00) 4695 ins = inssize2[c->Iop & 0xFF]; 4696 else 4697 ins = inssize[c->Iop & 0xFF]; 4665 4698 switch (c->Iop) 4666 { case 0x0F: 4667 ins = inssize2[c->Iop2]; 4668 break; 4699 { 4669 4700 default: 4670 ins = inssize[c->Iop]; 4671 break; 4672 case ESCAPE: 4673 switch (c->Iop2) 4674 { case ESClinnum: 4675 srcpos_hydrate(&c->IEV2.Vsrcpos); 4676 break; 4677 case ESCctor: 4678 case ESCdtor: 4679 el_hydrate(&c->IEV1.Vtor); 4680 break; 4681 } 4701 break; 4702 4703 case ESCAPE | ESClinnum: 4704 srcpos_hydrate(&c->IEV2.Vsrcpos); 4682 4705 goto done; 4706 4707 case ESCAPE | ESCctor: 4708 case ESCAPE | ESCdtor: 4709 el_hydrate(&c->IEV1.Vtor); 4710 goto done; 4711 4683 4712 case ASM: 4684 4713 ph_hydrate(&c->IEV1.as.bytes); 4685 4714 goto done; 4686 4715 } 4687 4716 if (!(ins & M) || 4688 4717 ((rm = c->Irm) & 0xC0) == 0xC0) 4689 4718 goto do2; /* if no first operand */ 4690 4719 if (is32bitaddr(I32,c->Iflags)) 4691 4720 { 4692 4721 4693 4722 if ( 4694 4723 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c->Isib & 7) == 5 || (rm & 7) == 5)) 4695 4724 ) 4696 4725 goto do2; /* if no first operand */ 4697 4726 } 4698 4727 else 4699 4728 { 4700 4729 if ( 4701 4730 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) 4702 4731 ) … … 4809 4838 4810 4839 pc = &code_next(c); 4811 4840 } 4812 4841 } 4813 4842 #endif 4814 4843 4815 4844 /********************************** 4816 4845 */ 4817 4846 4818 4847 #if DEHYDRATE 4819 4848 void code_dehydrate(code **pc) 4820 4849 { 4821 4850 code *c; 4822 4851 unsigned char ins,rm; 4823 4852 enum FL fl; 4824 4853 4825 4854 while ((c = *pc) != NULL) 4826 4855 { 4827 4856 ph_dehydrate(pc); 4828 4857 4858 if ((c->Iop & 0xFF00) == 0x0F00) 4859 ins = inssize2[c->Iop & 0xFF]; 4860 else 4861 ins = inssize[c->Iop & 0xFF]; 4829 4862 switch (c->Iop) 4830 { case 0x0F: 4831 ins = inssize2[c->Iop2]; 4832 break; 4863 { 4833 4864 default: 4834 ins = inssize[c->Iop]; 4835 break; 4836 case ESCAPE: 4837 switch (c->Iop2) 4838 { case ESClinnum: 4839 srcpos_dehydrate(&c->IEV2.Vsrcpos); 4840 break; 4841 case ESCctor: 4842 case ESCdtor: 4843 el_dehydrate(&c->IEV1.Vtor); 4844 break; 4845 } 4865 break; 4866 4867 case ESCAPE | ESClinnum: 4868 srcpos_dehydrate(&c->IEV2.Vsrcpos); 4846 4869 goto done; 4870 4871 case ESCAPE | ESCctor: 4872 case ESCAPE | ESCdtor: 4873 el_dehydrate(&c->IEV1.Vtor); 4874 goto done; 4875 4847 4876 case ASM: 4848 4877 ph_dehydrate(&c->IEV1.as.bytes); 4849 4878 goto done; 4850 4879 } 4851 4880 4852 4881 if (!(ins & M) || 4853 4882 ((rm = c->Irm) & 0xC0) == 0xC0) 4854 4883 goto do2; /* if no first operand */ 4855 4884 if (is32bitaddr(I32,c->Iflags)) 4856 4885 { 4857 4886 4858 4887 if ( 4859 4888 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c->Isib & 7) == 5 || (rm & 7) == 5)) 4860 4889 ) 4861 4890 goto do2; /* if no first operand */ 4862 4891 } 4863 4892 else 4864 4893 { 4865 4894 if ( 4866 4895 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) … … 4982 5011 */ 4983 5012 4984 5013 #if DEBUG 4985 5014 4986 5015 void WRcodlst(code *c) 4987 5016 { for (; c; c = code_next(c)) 4988 5017 c->print(); 4989 5018 } 4990 5019 4991 5020 void code::print() 4992 5021 { 4993 5022 unsigned op,rm; 4994 5023 unsigned char ins; 4995 5024 code *c = this; 4996 5025 4997 5026 if (c == CNIL) 4998 5027 { printf("code 0\n"); 4999 5028 return; 5000 5029 } 5001 5030 op = c->Iop; 5002 ins = inssize[op]; 5003 if (op == 0x0F) 5004 { op = 0x0F00 + c->Iop2; 5005 if (op == 0x0F38 || op == 0x0F3A) 5006 op = (op << 8) | c->Iop3; 5007 ins = inssize2[c->Iop2]; 5008 } 5031 5032 if ((c->Iop & 0xFF00) == 0x0F00) 5033 ins = inssize2[op & 0xFF]; 5034 else 5035 ins = inssize[op & 0xFF]; 5036 5009 5037 printf("code %p: nxt=%p op=%02x",c,code_next(c),op); 5010 if ( op== ESCAPE)5011 { if ( c->Iop2== ESClinnum)5038 if ((op & 0xFF) == ESCAPE) 5039 { if ((op & 0xFF00) == ESClinnum) 5012 5040 { printf(" linnum = %d\n",c->IEV2.Vsrcpos.Slinnum); 5013 5041 return; 5014 5042 } 5015 printf(" ESCAPE %d",c->Iop 2);5043 printf(" ESCAPE %d",c->Iop >> 8); 5016 5044 } 5017 5045 if (c->Iflags) 5018 5046 printf(" flg=%x",c->Iflags); 5019 5047 if (ins & M) 5020 5048 { rm = c->Irm; 5021 5049 printf(" rm=%02x=%d,%d,%d",rm,(rm>>6)&3,(rm>>3)&7,rm&7); 5022 5050 if (I32 && issib(rm)) 5023 5051 { unsigned char sib = c->Isib; 5024 5052 printf(" sib=%02x=%d,%d,%d",sib,(sib>>6)&3,(sib>>3)&7,sib&7); 5025 5053 } 5026 5054 if ((rm & 0xC7) == BPRM || (rm & 0xC0) == 0x80 || (rm & 0xC0) == 0x40) 5027 5055 { 5028 5056 switch (c->IFL1) 5029 5057 { 5030 5058 case FLconst: 5031 5059 case FLoffset: 5032 5060 printf(" int = %4d",c->IEV1.Vuns); 5033 5061 break; 5034 5062 case FLblock: 5035 5063 printf(" block = %p",c->IEV1.Vblock); branches/dmd-1.x/src/backend/cod4.c
r577 r596 1223 1223 cl = getlvalue(&cs,e1,0); /* get EA */ 1224 1224 regm_t idxregs = idxregm(&cs); 1225 1225 retregs = *pretregs & (ALLREGS | mBP) & ~idxregs; 1226 1226 if (!retregs) 1227 1227 retregs = ALLREGS & ~idxregs; 1228 1228 cg = allocreg(&retregs,&resreg,tyml); 1229 1229 cs.Iop = 0x69; /* IMUL reg,EA,e2value */ 1230 1230 cs.IFL2 = FLconst; 1231 1231 cs.IEV2.Vint = e2factor; 1232 1232 opr = resreg; 1233 1233 } 1234 1234 else if (!I16 && !byte) 1235 1235 { 1236 1236 L1: 1237 1237 retregs = *pretregs & (ALLREGS | mBP); 1238 1238 if (!retregs) 1239 1239 retregs = ALLREGS; 1240 1240 cr = codelem(e2,&retregs,FALSE); /* load rvalue in reg */ 1241 1241 cl = getlvalue(&cs,e1,retregs); /* get EA */ 1242 1242 cg = getregs(retregs); /* destroy these regs */ 1243 cs.Iop = 0x0F; /* IMUL resreg,EA */ 1244 cs.Iop2 = 0xAF; 1243 cs.Iop = 0x0FAF; // IMUL resreg,EA 1245 1244 resreg = findreg(retregs); 1246 1245 opr = resreg; 1247 1246 } 1248 1247 else 1249 1248 { 1250 1249 retregs = mAX; 1251 1250 cr = codelem(e2,&retregs,FALSE); // load rvalue in AX 1252 1251 cl = getlvalue(&cs,e1,mAX); // get EA 1253 1252 cg = getregs(byte ? mAX : mAX | mDX); // destroy these regs 1254 1253 cs.Iop = 0xF7 ^ byte; // [I]MUL EA 1255 1254 } 1256 1255 cs.Irm |= modregrm(0,opr,0); 1257 1256 c = gen(CNIL,&cs); 1258 1257 } 1259 1258 else // /= or %= 1260 1259 { targ_size_t e2factor; 1261 1260 int pow2; 1262 1261 targ_ulong m; 1263 1262 1264 1263 assert(!byte); // should never happen … … 2570 2569 if (cx->Iop == 0x81 && (cx->Irm & modregrm(3,7,0)) == modregrm(3,4,0)) 2571 2570 { 2572 2571 // Convert AND of a word to AND of a dword, zeroing upper word 2573 2572 retregs = mask[cx->Irm & 7]; 2574 2573 cx->Iflags &= ~CFopsize; 2575 2574 cx->IEV2.Vint &= 0xFFFF; 2576 2575 goto L1; 2577 2576 } 2578 2577 } 2579 2578 if (op == OPs16_32 && retregs == mAX) 2580 2579 c2 = gen1(c2,0x98); /* CWDE */ 2581 2580 else 2582 2581 { 2583 2582 reg = findreg(retregs); 2584 2583 if (config.flags4 & CFG4speed && op == OPu16_32) 2585 2584 { // AND reg,0xFFFF 2586 2585 c3 = genc2(NULL,0x81,modregrm(3,4,reg),0xFFFFu); 2587 2586 } 2588 2587 else 2589 2588 { 2590 c3 = genregs(CNIL,0x0F,reg,reg);2591 c3 ->Iop2 = (op == OPu16_32) ? 0xB7 : 0xBF; /* MOVZX/MOVSX reg,reg */2589 unsigned iop = (op == OPu16_32) ? 0x0FB7 : 0x0FBF; /* MOVZX/MOVSX reg,reg */ 2590 c3 = genregs(CNIL,iop,reg,reg); 2592 2591 } 2593 2592 c2 = cat(c2,c3); 2594 2593 } 2595 2594 L1: 2596 2595 c3 = e1comsub ? getregs(retregs) : CNIL; 2597 2596 } 2598 2597 c4 = fixresult(e,retregs,pretregs); 2599 2598 c = cat4(c1,c2,c3,c4); 2600 2599 } 2601 2600 else if (*pretregs & mPSW || config.target_cpu < TARGET_80286) 2602 2601 { 2603 2602 // OPs16_32, OPs32_64 2604 2603 // CWD doesn't affect flags, so we can depend on the integer 2605 2604 // math to provide the flags. 2606 2605 retregs = mAX | mPSW; // want integer result in AX 2607 2606 *pretregs &= ~mPSW; // flags are already set 2608 2607 c1 = codelem(e->E1,&retregs,FALSE); 2609 2608 c2 = getregs(mDX); // sign extend into DX 2610 2609 c2 = gen1(c2,0x99); // CWD/CDQ 2611 2610 c3 = e1comsub ? getregs(retregs) : CNIL; … … 2724 2723 } 2725 2724 else 2726 2725 { 2727 2726 L1: 2728 2727 if (!I16) 2729 2728 { 2730 2729 if (op == OPs8int && reg == AX && size == 2) 2731 2730 { c3 = gen1(c3,0x98); /* CBW */ 2732 2731 c3->Iflags |= CFopsize; /* don't do a CWDE */ 2733 2732 } 2734 2733 else 2735 2734 { 2736 2735 /* We could do better by not forcing the src and dst */ 2737 2736 /* registers to be the same. */ 2738 2737 2739 2738 if (config.flags4 & CFG4speed && op == OPu8_16) 2740 2739 { // AND reg,0xFF 2741 2740 c3 = genc2(c3,0x81,modregrm(3,4,reg),0xFF); 2742 2741 } 2743 2742 else 2744 { c3 = genregs(c3,0x0F,reg,reg); 2745 c3->Iop2 = (op == OPu8int) ? 0xB6 : 0xBE; /* MOVZX/MOVSX reg,reg */ 2743 { 2744 unsigned iop = (op == OPu8int) ? 0x0FB6 : 0x0FBE; // MOVZX/MOVSX reg,reg 2745 c3 = genregs(c3,iop,reg,reg); 2746 2746 } 2747 2747 } 2748 2748 } 2749 2749 else 2750 2750 { 2751 2751 if (op == OPu8int) 2752 2752 c3 = genregs(c3,0x30,reg+4,reg+4); // XOR regH,regH 2753 2753 else 2754 2754 { 2755 2755 c3 = gen1(c3,0x98); /* CBW */ 2756 2756 *pretregs &= ~mPSW; /* flags already set */ 2757 2757 } 2758 2758 } 2759 2759 } 2760 2760 c2 = getregs(retregs); 2761 2761 L2: 2762 2762 c4 = fixresult(e,retregs,pretregs); 2763 2763 return cat6(c0,c1,c2,c3,c4,NULL); 2764 2764 } 2765 2765 … … 3033 3033 case OPbts: op = 0xAB; mode = 5; break; 3034 3034 3035 3035 default: 3036 3036 assert(0); 3037 3037 } 3038 3038 3039 3039 e1 = e->E1; 3040 3040 e2 = e->E2; 3041 3041 cs.Iflags = 0; 3042 3042 c = getlvalue(&cs, e, RMload); // get addressing mode 3043 3043 if (e->Eoper == OPbt && *pretregs == 0) 3044 3044 return cat(c, codelem(e2,pretregs,FALSE)); 3045 3045 3046 3046 ty1 = tybasic(e1->Ety); 3047 3047 word = (!I16 && tysize[ty1] == SHORTSIZE) ? CFopsize : 0; 3048 3048 idxregs = idxregm(&cs); // mask if index regs used 3049 3049 3050 3050 // if (e2->Eoper == OPconst && e2->EV.Vuns < 0x100) // should do this instead? 3051 3051 if (e2->Eoper == OPconst) 3052 3052 { 3053 cs.Iop = 0x0F; 3054 cs.Iop2 = 0xBA; // BT rm,imm8 3053 cs.Iop = 0x0FBA; // BT rm,imm8 3055 3054 cs.Irm |= modregrm(0,mode,0); 3056 3055 cs.Iflags |= CFpsw | word; 3057 3056 cs.IFL2 = FLconst; 3058 3057 if (tysize[ty1] == SHORTSIZE) 3059 3058 { 3060 3059 cs.IEVoffset1 += (e2->EV.Vuns & ~15) >> 3; 3061 3060 cs.IEV2.Vint = e2->EV.Vint & 15; 3062 3061 } 3063 3062 else if (tysize[ty1] == 4) 3064 3063 { 3065 3064 cs.IEVoffset1 += (e2->EV.Vuns & ~31) >> 3; 3066 3065 cs.IEV2.Vint = e2->EV.Vint & 31; 3067 3066 } 3068 3067 else 3069 3068 { 3070 3069 cs.IEVoffset1 += (e2->EV.Vuns & ~63) >> 3; 3071 3070 cs.IEV2.Vint = e2->EV.Vint & 63; 3072 3071 } 3073 3072 c2 = gen(CNIL,&cs); 3074 3073 } 3075 3074 else 3076 3075 { 3077 3076 retregs = ALLREGS & ~idxregs; 3078 3077 c2 = scodelem(e2,&retregs,idxregs,TRUE); 3079 3078 reg = findreg(retregs); 3080 3079 3081 cs.Iop = 0x0F; 3082 cs.Iop2 = op; // BT rm,reg 3080 cs.Iop = 0x0F00 | op; // BT rm,reg 3083 3081 code_newreg(&cs,reg); 3084 3082 cs.Iflags |= CFpsw | word; 3085 3083 c2 = gen(c2,&cs); 3086 3084 } 3087 3085 3088 3086 if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register 3089 3087 { 3090 3088 code *nop = CNIL; 3091 3089 regm_t save = regcon.immed.mval; 3092 3090 code *cg = allocreg(&retregs,®,TYint); 3093 3091 regcon.immed.mval = save; 3094 3092 if ((*pretregs & mPSW) == 0) 3095 3093 { 3096 3094 cg = cat(cg,getregs(retregs)); 3097 3095 cg = genregs(cg,0x19,reg,reg); // SBB reg,reg 3098 3096 } 3099 3097 else 3100 3098 { 3101 3099 cg = movregconst(cg,reg,1,8); // MOV reg,1 3102 3100 nop = gennop(nop); … … 3137 3135 { 3138 3136 cl = getlvalue(&cs, e->E1, RMload); // get addressing mode 3139 3137 } 3140 3138 else 3141 3139 { 3142 3140 retregs = allregs; 3143 3141 cl = codelem(e->E1, &retregs, FALSE); 3144 3142 reg = findreg(retregs); 3145 3143 cs.Irm = modregrm(3,0,reg & 7); 3146 3144 cs.Iflags = 0; 3147 3145 cs.Irex = 0; 3148 3146 if (reg & 8) 3149 3147 cs.Irex |= REX_B; 3150 3148 } 3151 3149 3152 3150 retregs = *pretregs & allregs; 3153 3151 if (!retregs) 3154 3152 retregs = allregs; 3155 3153 cg = allocreg(&retregs, ®, e->Ety); 3156 3154 3157 cs.Iop = 0x0F; 3158 cs.Iop2 = (e->Eoper == OPbsf) ? 0xBC : 0xBD; // BSF/BSR reg,EA 3155 cs.Iop = (e->Eoper == OPbsf) ? 0x0FBC : 0x0FBD; // BSF/BSR reg,EA 3159 3156 code_newreg(&cs, reg); 3160 3157 if (!I16 && sz == SHORTSIZE) 3161 3158 cs.Iflags |= CFopsize; 3162 3159 cg = gen(cg,&cs); 3163 3160 3164 3161 return cat3(cl,cg,fixresult(e,retregs,pretregs)); 3165 3162 } 3166 3163 3167 3164 /******************************************* 3168 3165 * Generate code for OPpair, OPrpair. 3169 3166 */ 3170 3167 3171 3168 code *cdpair(elem *e, regm_t *pretregs) 3172 3169 { 3173 3170 regm_t retregs; 3174 3171 regm_t regs1; 3175 3172 regm_t regs2; 3176 3173 unsigned reg; 3177 3174 code *cg; 3178 3175 code *c1; branches/dmd-1.x/src/backend/code.h
r595 r596 214 214 #define JBE 0x76 215 215 #define JA 0x77 216 216 #define JS 0x78 217 217 #define JNS 0x79 218 218 #define JP 0x7A 219 219 #define JNP 0x7B 220 220 #define JL 0x7C 221 221 #define JGE 0x7D 222 222 #define JLE 0x7E 223 223 #define JG 0x7F 224 224 225 225 /* NOP is used as a placeholder in the linked list of instructions, no */ 226 226 /* actual code will be generated for it. */ 227 227 #define NOP 0x2E /* actually CS: (we don't use 0x90 because the */ 228 228 /* silly Windows stuff wants to output 0x90's) */ 229 229 230 230 #define ESCAPE 0x3E // marker that special information is here 231 231 // (Iop2 is the type of special information) 232 232 // (Same as DS:, but we will never generate 233 233 // a separate DS: opcode anyway) 234 #define ESClinnum 1// line number information235 #define ESCctor 2// object is constructed236 #define ESCdtor 3// object is destructed237 #define ESCmark 4// mark eh stack238 #define ESCrelease 5// release eh stack239 #define ESCoffset 6// set code offset for eh240 #define ESCadjesp 7// adjust ESP by IEV2.Vint241 #define ESCmark2 8// mark eh stack242 #define ESCrelease2 9// release eh stack243 #define ESCframeptr 10// replace with load of frame pointer234 #define ESClinnum (1 << 8) // line number information 235 #define ESCctor (2 << 8) // object is constructed 236 #define ESCdtor (3 << 8) // object is destructed 237 #define ESCmark (4 << 8) // mark eh stack 238 #define ESCrelease (5 << 8) // release eh stack 239 #define ESCoffset (6 << 8) // set code offset for eh 240 #define ESCadjesp (7 << 8) // adjust ESP by IEV2.Vint 241 #define ESCmark2 (8 << 8) // mark eh stack 242 #define ESCrelease2 (9 << 8) // release eh stack 243 #define ESCframeptr (10 << 8) // replace with load of frame pointer 244 244 245 245 #define ASM 0x36 // string of asm bytes, actually an SS: opcode 246 246 247 247 /********************************* 248 248 * Macros to ease generating code 249 249 * modregrm: generate mod reg r/m field 250 250 * modregxrm: reg could be R8..R15 251 251 * modregrmx: rm could be R8..R15 252 252 * modregxrmx: reg or rm could be R8..R15 253 253 * NEWREG: change reg field of x to r 254 254 * genorreg: OR t,f 255 255 */ 256 256 257 257 #define modregrm(m,r,rm) (((m)<<6)|((r)<<3)|(rm)) 258 258 #define modregxrm(m,r,rm) ((((r)&8)<<15)|modregrm((m),(r)&7,rm)) 259 259 #define modregrmx(m,r,rm) ((((rm)&8)<<13)|modregrm((m),r,(rm)&7)) 260 260 #define modregxrmx(m,r,rm) ((((r)&8)<<15)|(((rm)&8)<<13)|modregrm((m),(r)&7,(rm)&7)) 261 261 262 262 #define NEWREXR(x,r) ((x)=((x)&~REX_R)|(((r)&8)>>1)) 263 263 #define NEWREG(x,r) ((x)=((x)&~(7<<3))|((r)<<3)) … … 380 380 #define CFopsize 0x80 // prefix with operand size 381 381 #define CFaddrsize 0x100 // prefix with address size 382 382 #define CFds 0x200 // need DS override (not with es, ss, or cs ) 383 383 #define CFcs 0x400 // need CS override 384 384 #define CFfs 0x800 // need FS override 385 385 #define CFgs (CFcs | CFfs) // need GS override 386 386 #define CFwait 0x1000 // If I32 it indicates when to output a WAIT 387 387 #define CFselfrel 0x2000 // if self-relative 388 388 #define CFunambig 0x4000 // indicates cannot be accessed by other addressing 389 389 // modes 390 390 #define CFtarg2 0x8000 // like CFtarg, but we can't optimize this away 391 391 #define CFvolatile 0x10000 // volatile reference, do not schedule 392 392 #define CFclassinit 0x20000 // class init code 393 393 #define CFoffset64 0x40000 // offset is 64 bits 394 394 #define CFpc32 0x80000 // I64: PC relative 32 bit fixup 395 395 396 396 #define CFPREFIX (CFSEG | CFopsize | CFaddrsize) 397 397 #define CFSEG (CFes | CFss | CFds | CFcs | CFfs | CFgs) 398 398 399 399 400 unsigned char Iop; 401 unsigned char Iop2; // second opcode byte 402 unsigned char Iop3; // third opcode byte 400 unsigned Iop; 403 401 404 402 union 405 403 { unsigned _Iea; 406 404 struct 407 405 { 408 406 unsigned char _Irm; // reg/mode 409 407 unsigned char _Isib; // SIB byte 410 408 unsigned char _Irex; // REX prefix 411 409 } _ea; 412 410 } _EA; 413 411 414 412 #define Iea _EA._Iea 415 413 #define Irm _EA._ea._Irm 416 414 #define Isib _EA._ea._Isib 417 415 #define Irex _EA._ea._Irex 418 416 419 417 unsigned char IFL1,IFL2; // FLavors of 1st, 2nd operands 420 418 union evc IEV1; // 1st operand, if any 421 419 #define IEVpointer1 IEV1._EP.Vpointer 422 420 #define IEVseg1 IEV1._EP.Vseg branches/dmd-1.x/src/iasm.c
r577 r596 1353 1353 case _ES: 1354 1354 emit(0x26); 1355 1355 pc->Iflags |= CFes; 1356 1356 break; 1357 1357 case _FS: 1358 1358 emit(0x64); 1359 1359 pc->Iflags |= CFfs; 1360 1360 break; 1361 1361 case _GS: 1362 1362 emit(0x65); 1363 1363 pc->Iflags |= CFgs; 1364 1364 break; 1365 1365 default: 1366 1366 assert(0); 1367 1367 } 1368 1368 } 1369 1369 break; 1370 1370 } 1371 1371 unsigned usOpcode = ptb.pptb0->usOpcode; 1372 1372 1373 pc->Iop = usOpcode; 1373 1374 if ((usOpcode & 0xFFFFFF00) == 0x660F3A00 || // SSE4 1374 1375 (usOpcode & 0xFFFFFF00) == 0x660F3800) // SSE4 1375 1376 { 1376 pc->Iflags |= CFopsize; 1377 pc->Iop = 0x0F; 1378 pc->Iop2 = (usOpcode >> 8) & 0xFF; 1379 pc->Iop3 = usOpcode & 0xFF; 1377 pc->Iop = 0x66000F00 | ((usOpcode >> 8) & 0xFF) | ((usOpcode & 0xFF) << 16); 1380 1378 goto L3; 1381 1379 } 1382 1380 switch (usOpcode & 0xFF0000) 1383 1381 { 1384 1382 case 0: 1385 1383 break; 1386 1384 1387 1385 case 0x660000: 1388 pc->Iflags |= CFopsize;1389 1386 usOpcode &= 0xFFFF; 1390 break;1387 goto L3; 1391 1388 1392 1389 case 0xF20000: // REPNE 1393 1390 case 0xF30000: // REP/REPE 1394 1391 // BUG: What if there's an address size prefix or segment 1395 1392 // override prefix? Must the REP be adjacent to the rest 1396 1393 // of the opcode? 1397 pcPrefix = code_calloc();1398 pcPrefix->Iop = usOpcode >> 16;1399 1394 usOpcode &= 0xFFFF; 1400 break;1395 goto L3; 1401 1396 1402 1397 case 0x0F0000: // an AMD instruction 1403 1398 puc = ((unsigned char *) &usOpcode); 1404 1399 if (puc[1] != 0x0F) // if not AMD instruction 0x0F0F 1405 1400 goto L4; 1406 1401 emit(puc[2]); 1407 1402 emit(puc[1]); 1408 1403 emit(puc[0]); 1409 pc->Iop = puc[2]; 1410 pc->Iop2 = puc[1]; 1404 pc->Iop >>= 8; 1411 1405 pc->IEVint2 = puc[0]; 1412 1406 pc->IFL2 = FLconst; 1413 1407 goto L3; 1414 1408 1415 1409 default: 1416 1410 puc = ((unsigned char *) &usOpcode); 1417 1411 L4: 1418 1412 emit(puc[2]); 1419 1413 emit(puc[1]); 1420 1414 emit(puc[0]); 1421 pc->Iop = puc[2]; 1422 pc->Iop2 = puc[1]; 1415 pc->Iop >>= 8; 1423 1416 pc->Irm = puc[0]; 1424 1417 goto L3; 1425 1418 } 1426 1419 if (usOpcode & 0xff00) 1427 1420 { 1428 1421 puc = ((unsigned char *) &(usOpcode)); 1429 1422 emit(puc[1]); 1430 1423 emit(puc[0]); 1431 1424 pc->Iop = puc[1]; 1432 1425 if (pc->Iop == 0x0f) 1433 pc->Iop 2 =puc[0];1426 pc->Iop = 0x0F00 | puc[0]; 1434 1427 else 1435 1428 { 1436 1429 if (usOpcode == 0xDFE0) // FSTSW AX 1437 1430 { pc->Irm = puc[0]; 1438 1431 goto L2; 1439 1432 } 1440 1433 if (asmstate.ucItype == ITfloat) 1441 1434 pc->Irm = puc[0]; 1442 1435 else 1443 1436 { pc->IEVint2 = puc[0]; 1444 1437 pc->IFL2 = FLconst; 1445 1438 } 1446 1439 } 1447 1440 } 1448 1441 else 1449 1442 { 1450 1443 emit(usOpcode); 1451 pc->Iop = usOpcode;1452 1444 } 1453 1445 L3: ; 1454 1446 1455 1447 // If CALL, Jxx or LOOPx to a symbolic location 1456 1448 if (/*asmstate.ucItype == ITjump &&*/ 1457 1449 popnd1 && popnd1->s && popnd1->s->isLabel()) 1458 1450 { Dsymbol *s; 1459 1451 1460 1452 s = popnd1->s; 1461 1453 if (s == asmstate.psDollar) 1462 1454 { 1463 1455 pc->IFL2 = FLconst; 1464 1456 if (uSizemaskTable1 & (_8 | _16)) 1465 1457 pc->IEVint2 = popnd1->disp; 1466 1458 else if (uSizemaskTable1 & _32) 1467 1459 pc->IEVpointer2 = (targ_size_t) popnd1->disp; 1468 1460 } 1469 1461 else 1470 1462 { LabelDsymbol *label; 1471 1463 1472 1464 label = s->isLabel(); 1473 1465 if (label) 1474 { if ((pc->Iop & 0xF0) == 0x70)1466 { if ((pc->Iop & ~0x0F) == 0x70) 1475 1467 pc->Iflags |= CFjmp16; 1476 1468 if (usNumops == 1) 1477 1469 { pc->IFL2 = FLblock; 1478 1470 pc->IEVlsym2 = label; 1479 1471 } 1480 1472 else 1481 1473 { pc->IFL1 = FLblock; 1482 1474 pc->IEVlsym1 = label; 1483 1475 } 1484 1476 } 1485 1477 } 1486 1478 } 1487 1479 1488 1480 switch (usNumops) 1489 1481 { 1490 1482 case 0: 1491 1483 break; 1492 1484 case 1: 1493 1485 if (((aoptyTable1 == _reg || aoptyTable1 == _float) && 1494 1486 amodTable1 == _normal && (uRegmaskTable1 & _rplus_r))) 1495 1487 { 1496 1488 unsigned reg = popnd1->base->val; 1497 1489 if (reg & 8) 1498 1490 { reg &= 7; 1499 1491 pc->Irex |= REX_B; 1500 1492 assert(I64); 1501 1493 } 1502 1494 if (asmstate.ucItype == ITfloat) 1503 1495 pc->Irm += reg; 1504 else if (pc->Iop == 0x0f)1505 pc->Iop2 += reg;1506 1496 else 1507 1497 pc->Iop += reg; 1508 1498 #ifdef DEBUG 1509 1499 auchOpcode[usIdx-1] += reg; 1510 1500 #endif 1511 1501 } 1512 1502 else 1513 1503 { asm_make_modrm_byte( 1514 1504 #ifdef DEBUG 1515 1505 auchOpcode, &usIdx, 1516 1506 #endif 1517 1507 pc, 1518 1508 ptb.pptb1->usFlags, 1519 1509 popnd1, NULL); 1520 1510 } 1521 1511 popndTmp = popnd1; 1522 1512 aoptyTmp = aoptyTable1; 1523 1513 uSizemaskTmp = uSizemaskTable1; 1524 1514 L1: 1525 1515 if (aoptyTmp == _imm) … … 1621 1611 popnd2, popnd1); 1622 1612 } 1623 1613 popndTmp = popnd1; 1624 1614 aoptyTmp = aoptyTable1; 1625 1615 uSizemaskTmp = uSizemaskTable1; 1626 1616 } 1627 1617 else 1628 1618 { 1629 1619 if (((aoptyTable1 == _reg || aoptyTable1 == _float) && 1630 1620 amodTable1 == _normal && 1631 1621 (uRegmaskTable1 & _rplus_r))) 1632 1622 { 1633 1623 unsigned reg = popnd1->base->val; 1634 1624 if (reg & 8) 1635 1625 { reg &= 7; 1636 1626 pc->Irex |= REX_B; 1637 1627 assert(I64); 1638 1628 } 1639 1629 if (asmstate.ucItype == ITfloat) 1640 1630 pc->Irm += reg; 1641 else if (pc->Iop == 0x0f)1642 pc->Iop2 += reg;1643 1631 else 1644 1632 pc->Iop += reg; 1645 1633 #ifdef DEBUG 1646 1634 auchOpcode[usIdx-1] += reg; 1647 1635 #endif 1648 1636 } 1649 1637 else 1650 1638 if (((aoptyTable2 == _reg || aoptyTable2 == _float) && 1651 1639 amodTable2 == _normal && 1652 1640 (uRegmaskTable2 & _rplus_r))) 1653 1641 { 1654 1642 unsigned reg = popnd2->base->val; 1655 1643 if (reg & 8) 1656 1644 { reg &= 7; 1657 1645 pc->Irex |= REX_B; 1658 1646 assert(I64); 1659 1647 } 1660 1648 if (asmstate.ucItype == ITfloat) 1661 1649 pc->Irm += reg; 1662 else if (pc->Iop == 0x0f)1663 pc->Iop2 += reg;1664 1650 else 1665 1651 pc->Iop += reg; 1666 1652 #ifdef DEBUG 1667 1653 auchOpcode[usIdx-1] += reg; 1668 1654 #endif 1669 1655 } 1670 1656 else if (ptb.pptb0->usOpcode == 0xF30FD6 || 1671 1657 ptb.pptb0->usOpcode == 0x0F12 || 1672 1658 ptb.pptb0->usOpcode == 0x0F16 || 1673 1659 ptb.pptb0->usOpcode == 0x660F50 || 1674 1660 ptb.pptb0->usOpcode == 0x0F50 || 1675 1661 ptb.pptb0->usOpcode == 0x660FD7 || 1676 1662 ptb.pptb0->usOpcode == 0x0FD7) 1677 1663 { 1678 1664 asm_make_modrm_byte( 1679 1665 #ifdef DEBUG 1680 1666 auchOpcode, &usIdx, 1681 1667 #endif 1682 1668 pc, 1683 1669 ptb.pptb1->usFlags, … … 1721 1707 ptb.pptb1->usFlags, 1722 1708 popnd2, popnd1); 1723 1709 popndTmp = popnd3; 1724 1710 aoptyTmp = aoptyTable3; 1725 1711 uSizemaskTmp = uSizemaskTable3; 1726 1712 } 1727 1713 else { 1728 1714 1729 1715 if (((aoptyTable1 == _reg || aoptyTable1 == _float) && 1730 1716 amodTable1 == _normal && 1731 1717 (uRegmaskTable1 &_rplus_r))) 1732 1718 { 1733 1719 unsigned reg = popnd1->base->val; 1734 1720 if (reg & 8) 1735 1721 { reg &= 7; 1736 1722 pc->Irex |= REX_B; 1737 1723 assert(I64); 1738 1724 } 1739 1725 if (asmstate.ucItype == ITfloat) 1740 1726 pc->Irm += reg; 1741 else if (pc->Iop == 0x0f)1742 pc->Iop2 += reg;1743 1727 else 1744 1728 pc->Iop += reg; 1745 1729 #ifdef DEBUG 1746 1730 auchOpcode[usIdx-1] += reg; 1747 1731 #endif 1748 1732 } 1749 1733 else 1750 1734 if (((aoptyTable2 == _reg || aoptyTable2 == _float) && 1751 1735 amodTable2 == _normal && 1752 1736 (uRegmaskTable2 &_rplus_r))) 1753 1737 { 1754 1738 unsigned reg = popnd1->base->val; 1755 1739 if (reg & 8) 1756 1740 { reg &= 7; 1757 1741 pc->Irex |= REX_B; 1758 1742 assert(I64); 1759 1743 } 1760 1744 if (asmstate.ucItype == ITfloat) 1761 1745 pc->Irm += reg; 1762 else if (pc->Iop == 0x0f)1763 pc->Iop2 += reg;1764 1746 else 1765 1747 pc->Iop += reg; 1766 1748 #ifdef DEBUG 1767 1749 auchOpcode[usIdx-1] += reg; 1768 1750 #endif 1769 1751 } 1770 1752 else 1771 1753 asm_make_modrm_byte( 1772 1754 #ifdef DEBUG 1773 1755 auchOpcode, &usIdx, 1774 1756 #endif 1775 1757 pc, 1776 1758 ptb.pptb1->usFlags, 1777 1759 popnd1, popnd2); 1778 1760 1779 1761 popndTmp = popnd3; 1780 1762 aoptyTmp = aoptyTable3; 1781 1763 uSizemaskTmp = uSizemaskTable3; 1782 1764 1783 1765 } 1784 1766 goto L1; 1785 1767 } 1786 1768 L2: 1787 1769 1788 if ((pc->Iop & 0xF8) == 0xD8 &&1770 if ((pc->Iop & ~7) == 0xD8 && 1789 1771 ADDFWAIT() && 1790 1772 !(ptb.pptb0->usFlags & _nfwait)) 1791 1773 pc->Iflags |= CFwait; 1792 1774 else if ((ptb.pptb0->usFlags & _fwait) && 1793 1775 config.target_cpu >= TARGET_80386) 1794 1776 pc->Iflags |= CFwait; 1795 1777 1796 1778 #ifdef DEBUG 1797 1779 if (debuga) 1798 1780 { unsigned u; 1799 1781 1800 1782 for (u = 0; u < usIdx; u++) 1801 1783 printf(" %02X", auchOpcode[u]); 1802 1784 1803 1785 printf("\t%s\t", asm_opstr(pop)); 1804 1786 if (popnd1) 1805 1787 asm_output_popnd(popnd1); 1806 1788 if (popnd2) { 1807 1789 printf(","); 1808 1790 asm_output_popnd(popnd2); trunk/src/backend/cgcod.c
r595 r596 1343 1343 dbg_printf(" block (%p) Btry=%p Bindex=%d\n",b,b->Btry,b->Bindex); 1344 1344 } 1345 1345 #endif 1346 1346 except_index_set(b->Bindex); 1347 1347 if (btry != b->Btry) // exited previous try block 1348 1348 { 1349 1349 except_pop(b,NULL,btry); 1350 1350 btry = b->Btry; 1351 1351 } 1352 1352 if (b->BC == BCtry) 1353 1353 { 1354 1354 except_push(b,NULL,b); 1355 1355 btry = b; 1356 1356 tryidx = except_index_get(); 1357 1357 b->Bcode = cat(nteh_gensindex(tryidx - 1),b->Bcode); 1358 1358 } 1359 1359 1360 1360 stack = NULL; 1361 1361 for (c = b->Bcode; c; c = code_next(c)) 1362 1362 { 1363 if ( c->Iop== ESCAPE)1363 if ((c->Iop & 0xFF) == ESCAPE) 1364 1364 { 1365 1365 c1 = NULL; 1366 switch (c->Iop 2)1366 switch (c->Iop & 0xFF00) 1367 1367 { 1368 1368 case ESCctor: 1369 1369 //printf("ESCctor\n"); 1370 1370 except_push(c,c->IEV1.Vtor,NULL); 1371 1371 goto L1; 1372 1372 1373 1373 case ESCdtor: 1374 1374 //printf("ESCdtor\n"); 1375 1375 except_pop(c,c->IEV1.Vtor,NULL); 1376 1376 L1: if (config.flags2 & CFG2seh) 1377 1377 { 1378 1378 c1 = nteh_gensindex(except_index_get() - 1); 1379 1379 code_next(c1) = code_next(c); 1380 1380 code_next(c) = c1; 1381 1381 } 1382 1382 break; 1383 1383 case ESCmark: 1384 1384 //printf("ESCmark\n"); 1385 1385 idx = except_index_get(); 1386 1386 list_prependdata(&stack,idx); 1387 1387 except_mark(); 1388 1388 break; 1389 1389 case ESCrelease: 1390 1390 //printf("ESCrelease\n"); 1391 1391 idx = list_data(stack); 1392 1392 list_pop(&stack); 1393 1393 if (idx != except_index_get()) 1394 1394 { 1395 1395 if (config.flags2 & CFG2seh) 1396 1396 { c1 = nteh_gensindex(idx - 1); 1397 1397 code_next(c1) = code_next(c); 1398 1398 code_next(c) = c1; 1399 1399 } 1400 1400 else 1401 1401 { except_pair_append(c,idx - 1); 1402 c->Iop 2 =ESCoffset;1402 c->Iop = ESCAPE | ESCoffset; 1403 1403 } 1404 1404 } 1405 1405 except_release(); 1406 1406 break; 1407 1407 case ESCmark2: 1408 1408 //printf("ESCmark2\n"); 1409 1409 except_mark(); 1410 1410 break; 1411 1411 case ESCrelease2: 1412 1412 //printf("ESCrelease2\n"); 1413 1413 except_release(); 1414 1414 break; 1415 1415 } 1416 1416 } 1417 1417 } 1418 1418 assert(stack == NULL); 1419 1419 b->Bendindex = except_index_get(); 1420 1420 1421 1421 if (b->BC != BCret && b->BC != BCretexp) 1422 1422 lastidx = b->Bendindex; trunk/src/backend/cgen.c
r595 r596 49 49 c = c->next; 50 50 c->Iflags |= flag; 51 51 } 52 52 } 53 53 54 54 /***************************** 55 55 * Set rex bits on last code in list. 56 56 */ 57 57 58 58 void code_orrex(code *c,unsigned rex) 59 59 { 60 60 if (rex && c) 61 61 { while (c->next) 62 62 c = c->next; 63 63 c->Irex |= rex; 64 64 } 65 65 } 66 66 67 67 /************************************** 68 68 * Set the opcode fields in cs. 69 * This is ridiculously complex, cs.Iop should70 * just be an unsigned.71 69 */ 72 70 code *setOpcode(code *c, code *cs, unsigned op) 73 71 { 74 cs->Iflags = 0; 75 if (op > 0xFF) 76 { 77 switch (op & 0xFF0000) 78 { 79 case 0: 80 break; 81 case 0x660000: 82 cs->Iflags = CFopsize; 83 break; 84 case 0xF20000: // REPNE 85 case 0xF30000: // REP/REPE 86 c = gen1(c, op >> 16); 87 break; 88 } 89 cs->Iop = op >> 8; 90 cs->Iop2 = op & 0xFF; 91 } 92 else 93 cs->Iop = op; 72 cs->Iop = op; 94 73 return c; 95 74 } 96 75 97 76 /***************************** 98 77 * Concatenate two code lists together. Return pointer to result. 99 78 */ 100 79 101 80 #if TX86 && __INTSIZE == 4 && __SC__ 102 81 __declspec(naked) code * __pascal cat(code *c1,code *c2) 103 82 { 104 83 _asm 105 84 { 106 85 mov EAX,c1-4[ESP] 107 86 mov ECX,c2-4[ESP] 108 87 test EAX,EAX 109 88 jne L6D 110 89 mov EAX,ECX 111 90 ret 8 112 91 113 92 L6D: mov EDX,EAX … … 208 187 209 188 code *gen1(code *c,unsigned op) 210 189 { code *ce,*cstart; 211 190 212 191 ce = code_calloc(); 213 192 ce->Iop = op; 214 193 if (c) 215 194 { cstart = c; 216 195 while (code_next(c)) c = code_next(c); /* find end of list */ 217 196 code_next(c) = ce; /* link into list */ 218 197 return cstart; 219 198 } 220 199 return ce; 221 200 } 222 201 223 202 code *gen2(code *c,unsigned op,unsigned rm) 224 203 { code *ce,*cstart; 225 204 226 205 cstart = ce = code_calloc(); 227 206 /*cxcalloc++;*/ 228 if (op > 0xFF) 229 { ce->Iop = op >> 8; 230 ce->Iop2 = op & 0xFF; 231 } 232 else 233 ce->Iop = op; 207 ce->Iop = op; 234 208 ce->Iea = rm; 235 209 if (c) 236 210 { cstart = c; 237 211 while (code_next(c)) c = code_next(c); /* find end of list */ 238 212 code_next(c) = ce; /* link into list */ 239 213 } 240 214 return cstart; 241 215 } 242 216 243 217 code *gen2sib(code *c,unsigned op,unsigned rm,unsigned sib) 244 218 { code *ce,*cstart; 245 219 246 220 cstart = ce = code_calloc(); 247 221 /*cxcalloc++;*/ 248 222 ce->Iop = op; 249 223 ce->Irm = rm; 250 224 ce->Isib = sib; 251 225 ce->Irex = (rm | (sib & (REX_B << 16))) >> 16; 252 226 if (sib & (REX_R << 16)) 253 227 ce->Irex |= REX_X; … … 318 292 { 319 293 if (to == ES) 320 294 c = genregs(c,0x8E,0,from); 321 295 else if (from == ES) 322 296 c = genregs(c,0x8C,0,to); 323 297 else 324 298 c = genregs(c,0x89,from,to); 325 299 } 326 300 return c; 327 301 } 328 302 329 303 /************************** 330 304 * Generate a jump instruction. 331 305 */ 332 306 333 307 code *genjmp(code *c,unsigned op,unsigned fltarg,block *targ) 334 308 { code cs; 335 309 code *cj; 336 310 code *cnop; 337 311 338 cs.Iop = op ;312 cs.Iop = op & 0xFF; 339 313 cs.Iflags = 0; 340 314 cs.Irex = 0; 341 315 if (op != JMP) /* if not already long branch */ 342 316 cs.Iflags = CFjmp16; /* assume long branch for op = 0x7x */ 343 317 cs.IFL2 = fltarg; /* FLblock (or FLcode) */ 344 318 cs.IEV2.Vblock = targ; /* target block (or code) */ 345 319 if (fltarg == FLcode) 346 320 ((code *)targ)->Iflags |= CFtarg; 347 321 348 322 if (config.flags4 & CFG4fastfloat) // if fast floating point 349 323 return gen(c,&cs); 350 324 351 325 cj = gen(CNIL,&cs); 352 326 switch (op & 0xFF00) /* look at second jump opcode */ 353 327 { 354 328 /* The JP and JNP come from floating point comparisons */ 355 329 case JP << 8: 356 330 cs.Iop = JP; 357 331 gen(cj,&cs); 358 332 break; … … 373 347 } 374 348 return cat(c,cj); 375 349 } 376 350 377 351 code *gencs(code *c,unsigned op,unsigned ea,unsigned FL2,symbol *s) 378 352 { code cs; 379 353 380 354 cs.Iop = op; 381 355 cs.Iea = ea; 382 356 cs.Iflags = 0; 383 357 cs.IFL2 = FL2; 384 358 cs.IEVsym2 = s; 385 359 cs.IEVoffset2 = 0; 386 360 387 361 return gen(c,&cs); 388 362 } 389 363 390 364 code *genc2(code *c,unsigned op,unsigned ea,targ_size_t EV2) 391 365 { code cs; 392 366 393 if (op > 0xFF) 394 { cs.Iop = op >> 8; 395 cs.Iop2 = op & 0xFF; 396 } 397 else 398 cs.Iop = op; 367 cs.Iop = op; 399 368 cs.Iea = ea; 400 369 cs.Iflags = CFoff; 401 370 cs.IFL2 = FLconst; 402 371 cs.IEV2.Vsize_t = EV2; 403 372 return gen(c,&cs); 404 373 } 405 374 406 375 /***************** 407 376 * Generate code. 408 377 */ 409 378 410 379 code *genc1(code *c,unsigned op,unsigned ea,unsigned FL1,targ_size_t EV1) 411 380 { code cs; 412 381 413 382 assert(FL1 < FLMAX); 414 if (op > 0xFF) 415 { 416 c = setOpcode(c, &cs, op); 417 cs.Iflags |= CFoff; 418 } 419 else 420 { cs.Iop = op; 421 cs.Iflags = CFoff; 422 } 383 cs.Iop = op; 384 cs.Iflags = CFoff; 423 385 cs.Iea = ea; 424 386 cs.IFL1 = FL1; 425 387 cs.IEV1.Vsize_t = EV1; 426 388 return gen(c,&cs); 427 389 } 428 390 429 391 /***************** 430 392 * Generate code. 431 393 */ 432 394 433 395 code *genc(code *c,unsigned op,unsigned ea,unsigned FL1,targ_size_t EV1,unsigned FL2,targ_size_t EV2) 434 396 { code cs; 435 397 436 398 assert(FL1 < FLMAX); 437 assert(op < 256);438 399 cs.Iop = op; 439 400 cs.Iea = ea; 440 401 cs.Iflags = CFoff; 441 402 cs.IFL1 = FL1; 442 403 cs.IEV1.Vsize_t = EV1; 443 404 assert(FL2 < FLMAX); 444 405 cs.IFL2 = FL2; 445 406 cs.IEV2.Vsize_t = EV2; 446 407 return gen(c,&cs); 447 408 } 448 409 449 410 /*************************************** 450 411 * Generate immediate multiply instruction for r1=r2*imm. 451 412 * Optimize it into LEA's if we can. 452 413 */ 453 414 454 415 code *genmulimm(code *c,unsigned r1,unsigned r2,targ_int imm) 455 416 { code cs; 456 417 457 418 // These optimizations should probably be put into pinholeopt() … … 471 432 c = genc2(c,0x69,modregxrmx(3,r1,r2),imm); // IMUL r1,r2,imm 472 433 break; 473 434 } 474 435 return c; 475 436 } 476 437 477 438 /******************************** 478 439 * Generate 'instruction' which is actually a line number. 479 440 */ 480 441 481 442 code *genlinnum(code *c,Srcpos srcpos) 482 443 { code cs; 483 444 484 445 #if 0 485 446 #if MARS 486 447 printf("genlinnum(Sfilename = %p, Slinnum = %u)\n", srcpos.Sfilename, srcpos.Slinnum); 487 448 #else 488 449 printf("genlinnum(Sfilptr = %p, Slinnum = %u)\n", srcpos.Sfilptr, srcpos.Slinnum); 489 450 #endif 490 451 #endif 491 cs.Iop = ESCAPE; 492 cs.Iop2 = ESClinnum; 452 cs.Iop = ESCAPE | ESClinnum; 493 453 cs.Iflags = 0; 494 454 cs.Irex = 0; 495 455 cs.IFL1 = 0; 496 456 cs.IFL2 = 0; 497 457 cs.IEV2.Vsrcpos = srcpos; 498 458 return gen(c,&cs); 499 459 } 500 460 501 461 /****************************** 502 462 * Append line number to existing code. 503 463 */ 504 464 505 465 void cgen_linnum(code **pc,Srcpos srcpos) 506 466 { 507 467 *pc = genlinnum(*pc,srcpos); 508 468 } 509 469 510 470 /***************************** 511 471 * Prepend line number to existing code. 512 472 */ 513 473 514 474 void cgen_prelinnum(code **pc,Srcpos srcpos) 515 475 { 516 476 *pc = cat(genlinnum(NULL,srcpos),*pc); 517 477 } 518 478 519 479 /******************************** 520 480 * Generate 'instruction' which tells the address resolver that the stack has 521 481 * changed. 522 482 */ 523 483 524 484 code *genadjesp(code *c, int offset) 525 485 { code cs; 526 486 527 487 if (!I16 && offset) 528 488 { 529 cs.Iop = ESCAPE; 530 cs.Iop2 = ESCadjesp; 489 cs.Iop = ESCAPE | ESCadjesp; 531 490 cs.Iflags = 0; 532 491 cs.Irex = 0; 533 492 cs.IEV2.Vint = offset; 534 493 return gen(c,&cs); 535 494 } 536 495 else 537 496 return c; 538 497 } 539 498 540 499 /******************************** 541 500 * Generate 'nop' 542 501 */ 543 502 544 503 code *gennop(code *c) 545 504 { 546 505 return gen1(c,NOP); 547 506 } 548 507 549 508 /****************************** 550 509 * Load CX with the value of _AHSHIFT. trunk/src/backend/cgsched.c
r569 r596 761 761 1,1,5,5, 4,4,0,0, /* F8 */ 762 762 }; 763 763 764 764 static unsigned char uopsx[8] = { 1,1,2,5,1,1,1,5 }; 765 765 766 766 /************************************************ 767 767 * Determine number of micro-ops for Pentium Pro and Pentium II processors. 768 768 * 5 means 'complex'. 769 769 * Doesn't currently handle: 770 770 * floating point 771 771 * MMX 772 772 * 0F opcodes 773 773 * prefix bytes 774 774 */ 775 775 776 776 STATIC int uops(code *c) 777 777 { int n; 778 778 int op; 779 779 int op2; 780 780 781 op = c->Iop; 781 op = c->Iop & 0xFF; 782 if ((c->Iop & 0xFF00) == 0x0F00) 783 op = 0x0F; 782 784 n = insuops[op]; 783 785 if (!n) // if special case 784 786 { unsigned char irm,mod,reg,rm; 785 787 786 788 irm = c->Irm; 787 789 mod = (irm >> 6) & 3; 788 790 reg = (irm >> 3) & 7; 789 791 rm = irm & 7; 790 792 791 793 switch (op) 792 794 { 793 795 case 0x10: 794 796 case 0x11: // ADC rm,r 795 797 case 0x18: 796 798 case 0x19: // SBB rm,r 797 799 n = (mod == 3) ? 2 : 4; 798 800 break; 799 801 800 802 case 0x12: 801 803 case 0x13: // ADC r,rm … … 966 968 n = 4; 967 969 else if (reg == 4 || reg == 5) // MUL/IMUL rm 968 970 n = (mod == 3) ? 3 : 4; 969 971 else if (reg == 2 || reg == 3) // NOT/NEG rm 970 972 n = (mod == 3) ? 1 : 4; 971 973 break; 972 974 973 975 case 0xFF: 974 976 if (reg == 2 || reg == 3 || // CALL rm, CALL m,rm 975 977 reg == 5) // JMP seg:offset 976 978 n = 5; 977 979 else if (reg == 4) 978 980 n = (mod == 3) ? 1 : 2; 979 981 else if (reg == 0 || reg == 1) // INC/DEC rm 980 982 n = (mod == 3) ? 1 : 4; 981 983 else if (reg == 6) // PUSH rm 982 984 n = (mod == 3) ? 3 : 4; 983 985 break; 984 986 985 987 case 0x0F: 986 op2 = c->Iop 2;988 op2 = c->Iop & 0xFF; 987 989 if ((op2 & 0xF0) == 0x80) // Jcc 988 990 { n = 1; 989 991 break; 990 992 } 991 993 if ((op2 & 0xF0) == 0x90) // SETcc 992 994 { n = (mod == 3) ? 1 : 3; 993 995 break; 994 996 } 995 997 if (op2 == 0xB6 || op2 == 0xB7 || // MOVZX 996 998 op2 == 0xBE || op2 == 0xBF) // MOVSX 997 999 { n = 1; 998 1000 break; 999 1001 } 1000 1002 if (op2 == 0xAF) // IMUL r,m 1001 1003 { n = (mod == 3) ? 1 : 2; 1002 1004 break; 1003 1005 } 1004 1006 break; 1005 1007 } 1006 1008 } … … 1008 1010 n = 5; // copout for now 1009 1011 return n; 1010 1012 } 1011 1013 1012 1014 /****************************************** 1013 1015 * Determine pairing classification. 1014 1016 * Don't deal with floating point, just assume they are all NP (Not Pairable). 1015 1017 * Returns: 1016 1018 * NP,UV,PU,PV optionally OR'd with PE 1017 1019 */ 1018 1020 1019 1021 STATIC int pair_class(code *c) 1020 1022 { unsigned char op; 1021 1023 unsigned char irm,mod,reg,rm; 1022 1024 unsigned a32; 1023 1025 int pc; 1024 1026 1025 1027 // Of course, with Intel this is *never* simple, and Intel's 1026 1028 // documentation is vague about the specifics. 1027 1029 1028 op = c->Iop; 1030 op = c->Iop & 0xFF; 1031 if ((c->Iop & 0xFF00) == 0x0F00) 1032 op = 0x0F; 1029 1033 pc = pentcycl[op]; 1030 1034 a32 = I32; 1031 1035 if (c->Iflags & CFaddrsize) 1032 1036 a32 ^= 1; 1033 1037 irm = c->Irm; 1034 1038 mod = (irm >> 6) & 3; 1035 1039 reg = (irm >> 3) & 7; 1036 1040 rm = irm & 7; 1037 1041 switch (op) 1038 1042 { 1039 1043 case 0x0F: // 2 byte opcode 1040 if ((c->Iop 2 & 0xF0) == 0x80)// if Jcc1044 if ((c->Iop & 0xF0) == 0x80) // if Jcc 1041 1045 pc = PV | PF; 1042 1046 break; 1043 1047 1044 1048 case 0x80: 1045 1049 case 0x81: 1046 1050 case 0x83: 1047 1051 if (reg == 2 || // ADC EA,immed 1048 1052 reg == 3) // SBB EA,immed 1049 1053 { pc = PU; 1050 1054 goto L2; 1051 1055 } 1052 1056 goto L1; // AND/OR/XOR/ADD/SUB/CMP EA,immed 1053 1057 1054 1058 case 0x84: 1055 1059 case 0x85: // TEST EA,reg 1056 1060 if (mod == 3) // TEST reg,reg 1057 1061 pc = UV; 1058 1062 break; 1059 1063 1060 1064 case 0xC0: … … 1167 1171 1168 1172 if (PRO) 1169 1173 { 1170 1174 ci->uops = uops(c); 1171 1175 ci->isz = calccodsize(c); 1172 1176 } 1173 1177 else 1174 1178 ci->pair = pair_class(c); 1175 1179 1176 1180 unsigned char op; 1177 1181 unsigned char op2; 1178 1182 unsigned char irm,mod,reg,rm; 1179 1183 unsigned a32; 1180 1184 int pc; 1181 1185 unsigned r,w; 1182 1186 int sz = I32 ? 4 : 2; 1183 1187 1184 1188 ci->r = 0; 1185 1189 ci->w = 0; 1186 1190 ci->a = 0; 1187 op = c->Iop; 1191 op = c->Iop & 0xFF; 1192 if ((c->Iop & 0xFF00) == 0x0F00) 1193 op = 0x0F; 1188 1194 //printf("\tgetinfo %x, op %x \n",c,op); 1189 1195 pc = pentcycl[op]; 1190 1196 a32 = I32; 1191 1197 if (c->Iflags & CFaddrsize) 1192 1198 a32 ^= 1; 1193 1199 if (c->Iflags & CFopsize) 1194 1200 sz ^= 2 | 4; 1195 1201 irm = c->Irm; 1196 1202 mod = (irm >> 6) & 3; 1197 1203 reg = (irm >> 3) & 7; 1198 1204 rm = irm & 7; 1199 1205 1200 1206 r = oprw[op][0]; 1201 1207 w = oprw[op][1]; 1202 1208 1203 1209 switch (op) 1204 1210 { 1205 1211 case 0x50: 1206 1212 case 0x51: 1207 1213 case 0x52: … … 1295 1301 w = F; 1296 1302 #if CLASSINIT2 1297 1303 ci->pair = UV; // it is patched to CMP EAX,0 1298 1304 #else 1299 1305 ci->pair = NP; 1300 1306 #endif 1301 1307 } 1302 1308 break; 1303 1309 1304 1310 case 0xF6: 1305 1311 r = grprw[3][reg][0]; // Grp 3, byte version 1306 1312 w = grprw[3][reg][1]; 1307 1313 break; 1308 1314 1309 1315 case 0xF7: 1310 1316 r = grprw[1][reg][0]; // Grp 3 1311 1317 w = grprw[1][reg][1]; 1312 1318 break; 1313 1319 1314 1320 case 0x0F: 1315 op2 = c->Iop 2;1321 op2 = c->Iop & 0xFF; 1316 1322 if ((op2 & 0xF0) == 0x80) // if Jxx instructions 1317 1323 { 1318 1324 ci->r = F | N; 1319 1325 ci->w = N; 1320 1326 goto Lret; 1321 1327 } 1322 1328 ci->r = N; 1323 1329 ci->w = N; // copout for now 1324 1330 goto Lret; 1325 1331 1326 1332 case 0xD7: // XLAT 1327 1333 ci->a = mAX | mBX; 1328 1334 break; 1329 1335 1330 1336 case 0xFF: 1331 1337 r = grprw[2][reg][0]; // Grp 5 1332 1338 w = grprw[2][reg][1]; 1333 1339 if (reg == 6) // PUSH rm 1334 1340 goto Lpush; 1335 1341 break; … … 1744 1750 Lnopair: 1745 1751 return 0; 1746 1752 } 1747 1753 1748 1754 /******************************************** 1749 1755 * Get next instruction worth looking at for scheduling. 1750 1756 * Returns: 1751 1757 * NULL no more instructions 1752 1758 */ 1753 1759 1754 1760 STATIC code * cnext(code *c) 1755 1761 { 1756 1762 while (1) 1757 1763 { 1758 1764 c = code_next(c); 1759 1765 if (!c) 1760 1766 break; 1761 1767 if (c->Iflags & (CFtarg | CFtarg2)) 1762 1768 break; 1763 1769 if (!(c->Iop == NOP || 1764 (c->Iop == ESCAPE && c->Iop2 ==ESClinnum)))1770 c->Iop == (ESCAPE | ESClinnum))) 1765 1771 break; 1766 1772 } 1767 1773 return c; 1768 1774 } 1769 1775 1770 1776 /****************************************** 1771 1777 * Instruction scheduler. 1772 1778 * Input: 1773 1779 * c list of instructions to schedule 1774 1780 * scratch scratch registers we can use 1775 1781 * Returns: 1776 1782 * revised list of scheduled instructions 1777 1783 */ 1778 1784 1779 1785 /////////////////////////////////// 1780 1786 // Determine if c1 and c2 are swappable. 1781 1787 // c1 comes before c2. 1782 1788 // If they do not conflict 1783 1789 // return 0 1784 1790 // If they do conflict … … 1867 1873 w2 &= ~F; // remove conflict 1868 1874 goto L1; // and try again 1869 1875 } 1870 1876 #endif 1871 1877 // If other than the memory reference is a conflict 1872 1878 if (w1 & r2 & ~mMEM || (r1 | w1) & w2 & ~mMEM) 1873 1879 { if (i) printf("\t1\n"); 1874 1880 if (i) printf("r1=%x, w1=%x, a1=%x, sz1=%d, r2=%x, w2=%x, a2=%x, sz2=%d\n",r1,w1,a1,sz1,r2,w2,a2,sz2); 1875 1881 goto Lconflict; 1876 1882 } 1877 1883 1878 1884 // If referring to distinct types, then no dependency 1879 1885 if (c1->Irex && c2->Irex && c1->Irex != c2->Irex) 1880 1886 goto Lswap; 1881 1887 1882 1888 ifl1 = c1->IFL1; 1883 1889 ifl2 = c2->IFL1; 1884 1890 1885 1891 // Special case: Allow indexed references using registers other than 1886 1892 // ESP and EBP to be swapped with PUSH instructions 1887 if (((c1->Iop & 0xF8) == 0x50 ||// PUSH reg1893 if (((c1->Iop & ~7) == 0x50 || // PUSH reg 1888 1894 c1->Iop == 0x6A || // PUSH imm8 1889 1895 c1->Iop == 0x68 || // PUSH imm16/imm32 1890 1896 (c1->Iop == 0xFF && ci1->reg == 6) // PUSH EA 1891 1897 ) && 1892 1898 ci2->flags & CIFLea && !(a2 & mSP) && 1893 1899 !(a2 & mBP && (long)c2->IEVpointer1 < 0) 1894 1900 ) 1895 1901 { 1896 1902 if (c1->Iop == 0xFF) 1897 1903 { 1898 1904 if (!(w2 & mMEM)) 1899 1905 goto Lswap; 1900 1906 } 1901 1907 else 1902 1908 goto Lswap; 1903 1909 } 1904 1910 1905 1911 // Special case: Allow indexed references using registers other than 1906 1912 // ESP and EBP to be swapped with PUSH instructions 1907 if (((c2->Iop & 0xF8) == 0x50 ||// PUSH reg1913 if (((c2->Iop & ~7) == 0x50 || // PUSH reg 1908 1914 c2->Iop == 0x6A || // PUSH imm8 1909 1915 c2->Iop == 0x68 || // PUSH imm16/imm32 1910 1916 (c2->Iop == 0xFF && ci2->reg == 6) // PUSH EA 1911 1917 ) && 1912 1918 ci1->flags & CIFLea && !(a1 & mSP) && 1913 1919 !(a2 & mBP && (long)c2->IEVpointer1 < 0) 1914 1920 ) 1915 1921 { 1916 1922 if (c2->Iop == 0xFF) 1917 1923 { 1918 1924 if (!(w1 & mMEM)) 1919 1925 goto Lswap; 1920 1926 } 1921 1927 else 1922 1928 goto Lswap; 1923 1929 } 1924 1930 1925 1931 // If not both an EA addressing mode, conflict 1926 1932 if (!(ci1->flags & ci2->flags & CIFLea)) 1927 1933 { if (i) printf("\t2\n"); … … 2282 2288 ) 2283 2289 { 2284 2290 movesp = 1; // this is a MOV reg2,offset[ESP] 2285 2291 offset = (signed char)c->IEVpointer1; 2286 2292 reg2 = (c->Irm >> 3) & 7; 2287 2293 } 2288 2294 2289 2295 2290 2296 // Start at tblmax, and back up until we get a conflict 2291 2297 ic = -1; 2292 2298 imin = 0; 2293 2299 for (i = tblmax; i >= 0; i--) 2294 2300 { Cinfo *cit; 2295 2301 2296 2302 cit = tbl[i]; 2297 2303 if (!cit) 2298 2304 continue; 2299 2305 2300 2306 // Look for special case swap 2301 2307 if (movesp && 2302 (cit->c->Iop & 0xF8) == 0x50 &&// if PUSH reg12308 (cit->c->Iop & ~7) == 0x50 && // if PUSH reg1 2303 2309 (cit->c->Iop & 7) != reg2 && // if reg1 != reg2 2304 2310 ((signed char)c->IEVpointer1) >= -cit->spadjust 2305 2311 ) 2306 2312 { 2307 2313 c->IEVpointer1 += cit->spadjust; 2308 2314 //printf("\t1, spadjust = %d, ptr = x%x\n",cit->spadjust,c->IEVpointer1); 2309 2315 continue; 2310 2316 } 2311 2317 2312 2318 if (movesp && 2313 2319 cit->c->Iop == 0x83 && 2314 2320 cit->c->Irm == modregrm(3,5,SP) && // if SUB ESP,offset 2315 2321 cit->c->IFL2 == FLconst && 2316 2322 ((signed char)c->IEVpointer1) >= -cit->spadjust 2317 2323 ) 2318 2324 { 2319 2325 //printf("\t2, spadjust = %d\n",cit->spadjust); 2320 2326 c->IEVpointer1 += cit->spadjust; 2321 2327 continue; 2322 2328 } … … 2505 2511 c->IEVpointer1 -= cit->spadjust; 2506 2512 //printf("\t3, spadjust = %d, ptr = x%x\n",cit->spadjust,c->IEVpointer1); 2507 2513 } 2508 2514 } 2509 2515 } 2510 2516 if (i >= tblmax) 2511 2517 tblmax = i + 1; 2512 2518 2513 2519 // Now do a hack. Look back at immediately preceding instructions, 2514 2520 // and see if we can swap with a push. 2515 2521 if (0 && movesp) 2516 2522 { int j; 2517 2523 2518 2524 while (1) 2519 2525 { 2520 2526 for (j = 1; i > j; j++) 2521 2527 if (tbl[i - j]) 2522 2528 break; 2523 2529 2524 2530 if (i >= j && tbl[i - j] && 2525 (tbl[i - j]->c->Iop & 0xF8) == 0x50 && // if PUSH reg12531 (tbl[i - j]->c->Iop & ~7) == 0x50 && // if PUSH reg1 2526 2532 (tbl[i - j]->c->Iop & 7) != reg2 && // if reg1 != reg2 2527 2533 (signed char)c->IEVpointer1 >= REGSIZE) 2528 2534 { 2529 2535 //printf("\t-4 prec, i-j=%d, i=%d\n",i-j,i); 2530 2536 assert((unsigned)i < TBLMAX); 2531 2537 assert((unsigned)(i - j) < TBLMAX); 2532 2538 tbl[i] = tbl[i - j]; 2533 2539 tbl[i - j] = ci; 2534 2540 i -= j; 2535 2541 c->IEVpointer1 -= REGSIZE; 2536 2542 } 2537 2543 else 2538 2544 break; 2539 2545 } 2540 2546 } 2541 2547 2542 2548 //printf("\tinsert\n"); 2543 2549 return 1; 2544 2550 } 2545 2551 … … 2647 2653 * Returns: 2648 2654 * next instruction (the tail) or 2649 2655 * NULL for no more instructions 2650 2656 */ 2651 2657 2652 2658 STATIC code * csnip(code *c) 2653 2659 { code **pc; 2654 2660 unsigned iflags; 2655 2661 2656 2662 if (c) 2657 2663 { iflags = c->Iflags & CFclassinit; 2658 2664 while (1) 2659 2665 { 2660 2666 pc = &code_next(c); 2661 2667 c = *pc; 2662 2668 if (!c) 2663 2669 break; 2664 2670 if (c->Iflags & (CFtarg | CFtarg2)) 2665 2671 break; 2666 2672 if (!(c->Iop == NOP || 2667 (c->Iop == ESCAPE && c->Iop2 ==ESClinnum) ||2673 c->Iop == (ESCAPE | ESClinnum) || 2668 2674 c->Iflags & iflags)) 2669 2675 break; 2670 2676 } 2671 2677 *pc = NULL; 2672 2678 } 2673 2679 return c; 2674 2680 } 2675 2681 2676 2682 2677 2683 /****************************** 2678 2684 * Schedule Pentium instructions, 2679 2685 * based on Steve Russell's algorithm. 2680 2686 */ 2681 2687 2682 2688 code *schedule(code *c,regm_t scratch) 2683 2689 { 2684 2690 code *cresult = NULL; 2685 2691 code **pctail = &cresult; 2686 2692 Schedule sch; 2687 2693 2688 2694 sch.initialize(0); // initialize scheduling table 2689 2695 while (c) 2690 2696 { 2691 if ((c->Iop == NOP || c->Iop== ESCAPE || c->Iflags & CFclassinit) &&2697 if ((c->Iop == NOP || (c->Iop & 0xFF) == ESCAPE || c->Iflags & CFclassinit) && 2692 2698 !(c->Iflags & (CFtarg | CFtarg2))) 2693 2699 { code *cn; 2694 2700 2695 2701 // Just append this instruction to pctail and go to the next one 2696 2702 *pctail = c; 2697 2703 cn = code_next(c); 2698 2704 code_next(c) = NULL; 2699 2705 pctail = &code_next(c); 2700 2706 c = cn; 2701 2707 continue; 2702 2708 } 2703 2709 2704 2710 //printf("init\n"); 2705 2711 sch.initialize(sch.fpustackused); // initialize scheduling table 2706 2712 2707 2713 while (c) 2708 2714 { 2709 2715 //printf("insert %p\n",c); 2710 2716 if (!sch.stage(c)) // store c in scheduling table 2711 2717 break; … … 2834 2840 code *c; 2835 2841 code *c1,*c2,*c3; 2836 2842 unsigned r1,r2; 2837 2843 unsigned mod,reg,rm; 2838 2844 2839 2845 //printf("peephole\n"); 2840 2846 for (c = cstart; c; c = c1) 2841 2847 { unsigned char rmi; 2842 2848 unsigned char rmn; 2843 2849 2844 2850 //c->print(); 2845 2851 c1 = cnext(c); 2846 2852 Ln: 2847 2853 if (!c1) 2848 2854 break; 2849 2855 if (c1->Iflags & (CFtarg | CFtarg2)) 2850 2856 continue; 2851 2857 2852 2858 // Do: 2853 2859 // PUSH reg 2854 if (I32 && (c->Iop & 0xF8) == 0x50)2860 if (I32 && (c->Iop & ~7) == 0x50) 2855 2861 { unsigned reg = c->Iop & 7; 2856 2862 2857 2863 // MOV [ESP],reg => NOP 2858 2864 if (c1->Iop == 0x8B && 2859 2865 c1->Irm == modregrm(0,reg,4) && 2860 2866 c1->Isib == modregrm(0,4,SP)) 2861 2867 { c1->Iop = NOP; 2862 2868 continue; 2863 2869 } 2864 2870 2865 2871 // PUSH [ESP] => PUSH reg 2866 2872 if (c1->Iop == 0xFF && 2867 2873 c1->Irm == modregrm(0,6,4) && 2868 2874 c1->Isib == modregrm(0,4,SP)) 2869 2875 { c1->Iop = 0x50 + reg; 2870 2876 continue; 2871 2877 } 2872 2878 2873 2879 // CMP [ESP],imm => CMP reg,i,, 2874 2880 if (c1->Iop == 0x83 && trunk/src/backend/cod1.c
r595 r596 538 538 539 539 /****************************** 540 540 * Routine to aid in setting things up for gen(). 541 541 * Look for common subexpression. 542 542 * Can handle indirection operators, but not if they're common subs. 543 543 * Input: 544 544 * e -> elem where we get some of the data from 545 545 * cs -> partially filled code to add 546 546 * op = opcode 547 547 * reg = reg field of (mod reg r/m) 548 548 * offset = data to be added to Voffset field 549 549 * keepmsk = mask of registers we must not destroy 550 550 * desmsk = mask of registers destroyed by executing the instruction 551 551 * Returns: 552 552 * pointer to code generated 553 553 */ 554 554 555 555 code *loadea(elem *e,code *cs,unsigned op,unsigned reg,targ_size_t offset, 556 556 regm_t keepmsk,regm_t desmsk) 557 557 { 558 code *c,*cg,*cd ,*cprefix;558 code *c,*cg,*cd; 559 559 560 560 #ifdef DEBUG 561 561 if (debugw) 562 562 printf("loadea: e=%p cs=%p op=x%x reg=%d offset=%lld keepmsk=x%x desmsk=x%x\n", 563 563 e,cs,op,reg,(unsigned long long)offset,keepmsk,desmsk); 564 564 #endif 565 565 566 566 assert(e); 567 567 cs->Iflags = 0; 568 568 cs->Irex = 0; 569 cprefix = NULL; 570 if (op > 0xFF) // if 2 byte opcode 571 cprefix = setOpcode(NULL, cs, op); 572 else 573 cs->Iop = op; 569 cs->Iop = op; 574 570 tym_t tym = e->Ety; 575 571 int sz = tysize(tym); 576 572 577 573 /* Determine if location we want to get is in a register. If so, */ 578 574 /* substitute the register for the EA. */ 579 575 /* Note that operators don't go through this. CSE'd operators are */ 580 576 /* picked up by comsub(). */ 581 577 if (e->Ecount && /* if cse */ 582 578 e->Ecount != e->Ecomsub && /* and cse was generated */ 583 579 op != 0x8D && op != 0xC4 && /* and not an LEA or LES */ 584 580 (op != 0xFF || reg != 3) && /* and not CALLF MEM16 */ 585 581 (op & 0xFFF8) != 0xD8) // and not 8087 opcode 586 582 { 587 583 assert(!EOP(e)); /* can't handle this */ 588 584 regm_t rm = regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; // possible regs 589 585 if (sz > REGSIZE) // value is in 2 or 4 registers 590 586 { 591 587 if (I16 && sz == 8) // value is in 4 registers 592 588 { static regm_t rmask[4] = { mDX,mCX,mBX,mAX }; 593 589 rm &= rmask[offset >> 1]; … … 648 644 cs->Iflags |= CFwait; 649 645 L2: 650 646 cg = getregs(desmsk); /* save any regs we destroy */ 651 647 652 648 /* KLUDGE! fix up DX for divide instructions */ 653 649 cd = CNIL; 654 650 if (op == 0xF7 && desmsk == (mAX|mDX)) /* if we need to fix DX */ 655 651 { if (reg == 7) /* if IDIV */ 656 652 { cd = gen1(cd,0x99); // CWD 657 653 if (I64 && sz == 8) 658 654 code_orrex(cd, REX_W); 659 655 } 660 656 else if (reg == 6) // if DIV 661 657 { cd = genregs(cd,0x33,DX,DX); // XOR DX,DX 662 658 if (I64 && sz == 8) 663 659 code_orrex(cd, REX_W); 664 660 } 665 661 } 666 662 667 663 // Eliminate MOV reg,reg 668 if ((cs->Iop & 0xFC) == 0x88 &&664 if ((cs->Iop & ~3) == 0x88 && 669 665 (cs->Irm & 0xC7) == modregrm(3,0,reg & 7)) 670 666 { 671 667 unsigned r = cs->Irm & 7; 672 668 if (cs->Irex & REX_B) 673 669 r |= 8; 674 670 if (r == reg) 675 671 cs->Iop = NOP; 676 672 } 677 673 678 return cat4(c,cg,cd,gen( cprefix,cs));674 return cat4(c,cg,cd,gen(NULL,cs)); 679 675 } 680 676 681 677 /************************** 682 678 * Get addressing mode. 683 679 */ 684 680 685 681 unsigned getaddrmode(regm_t idxregs) 686 682 { 687 683 unsigned mode; 688 684 689 685 if (I16) 690 686 { 691 687 mode = (idxregs & mBX) ? modregrm(2,0,7) : /* [BX] */ 692 688 (idxregs & mDI) ? modregrm(2,0,5): /* [DI] */ 693 689 (idxregs & mSI) ? modregrm(2,0,4): /* [SI] */ 694 690 (assert(0),1); 695 691 } 696 692 else 697 693 { unsigned reg = findreg(idxregs & (ALLREGS | mBP)); 698 694 mode = modregrmx(2,0,reg); … … 791 787 if (tyfloating(ty)) 792 788 obj_fltused(); 793 789 else if (I64 && sz == 8) 794 790 pcs->Irex |= REX_W; 795 791 if (!I16 && sz == SHORTSIZE) 796 792 pcs->Iflags |= CFopsize; 797 793 if (ty & mTYvolatile) 798 794 pcs->Iflags |= CFvolatile; 799 795 c = CNIL; 800 796 switch (fl) 801 797 { 802 798 #if 0 && TARGET_LINUX 803 799 case FLgot: 804 800 case FLgotoff: 805 801 gotref = 1; 806 802 pcs->IEVsym1 = s; 807 803 pcs->IEVoffset1 = e->EV.sp.Voffset; 808 804 if (e->Eoper == OPvar && fl == FLgot) 809 805 { 810 806 code *c1; 811 intsaveop = pcs->Iop;807 unsigned saveop = pcs->Iop; 812 808 idxregs = allregs & ~keepmsk; // get a scratch register 813 809 c = allocreg(&idxregs,®,TYptr); 814 810 pcs->Irm = modregrm(2,reg,BX); // BX has GOT 815 811 pcs->Isib = 0; 816 812 //pcs->Iflags |= CFvolatile; 817 813 pcs->Iop = 0x8B; 818 814 c = gen(c,pcs); // MOV reg,disp[EBX] 819 815 pcs->Irm = modregrm(0,0,reg); 820 816 pcs->IEVoffset1 = 0; 821 817 pcs->Iop = saveop; 822 818 } 823 819 else 824 820 { 825 821 pcs->Irm = modregrm(2,0,BX); // disp[EBX] is addr 826 822 pcs->Isib = 0; 827 823 } 828 824 break; 829 825 #endif 830 826 case FLoper: 831 827 #ifdef DEBUG … … 1716 1712 else if (sz == 8) 1717 1713 code_orrex(ce, REX_W); 1718 1714 } 1719 1715 else 1720 1716 ce = gentstreg(ce,reg); // TEST reg,reg 1721 1717 return ce; 1722 1718 } 1723 1719 if (saveflag || tyfv(tym)) 1724 1720 { 1725 1721 scrregm = ALLREGS & ~regm; /* possible scratch regs */ 1726 1722 ce = allocreg(&scrregm,&scrreg,TYoffset); /* allocate scratch reg */ 1727 1723 if (I32 || sz == REGSIZE * 2) 1728 1724 { code *c; 1729 1725 1730 1726 assert(regm & mMSW && regm & mLSW); 1731 1727 1732 1728 reg = findregmsw(regm); 1733 1729 if (I32) 1734 1730 { 1735 1731 if (tyfv(tym)) 1736 { c = genregs(CNIL,0x0F,scrreg,reg); 1737 c->Iop2 = 0xB7; /* MOVZX scrreg,msreg */ 1732 { c = genregs(CNIL,0x0FB7,scrreg,reg); // MOVZX scrreg,msreg 1738 1733 ce = cat(ce,c); 1739 1734 } 1740 1735 else 1741 1736 { ce = genmovreg(ce,scrreg,reg); /* MOV scrreg,msreg */ 1742 1737 if (tym == TYdouble || tym == TYdouble_alias) 1743 1738 gen2(ce,0xD1,modregrm(3,4,scrreg)); /* SHL scrreg,1 */ 1744 1739 } 1745 1740 } 1746 1741 else 1747 1742 { 1748 1743 ce = genmovreg(ce,scrreg,reg); /* MOV scrreg,msreg */ 1749 1744 if (tym == TYfloat) 1750 1745 gen2(ce,0xD1,modregrm(3,4,scrreg)); /* SHL scrreg,1 */ 1751 1746 } 1752 1747 reg = findreglsw(regm); 1753 1748 genorreg(ce,scrreg,reg); /* OR scrreg,lsreg */ 1754 1749 } 1755 1750 else if (sz == 8) 1756 1751 { /* !I32 */ 1757 1752 ce = genmovreg(ce,scrreg,AX); /* MOV scrreg,AX */ … … 3575 3570 /* FALSE: CMP SP,SP (always equal) */ 3576 3571 c = genregs(CNIL,(boolres(e)) ? 0x09 : 0x39,SP,SP); 3577 3572 } 3578 3573 else if (sz <= REGSIZE) 3579 3574 { 3580 3575 if (!I16 && (tym == TYfloat || tym == TYifloat)) 3581 3576 { c = allocreg(®m,®,TYoffset); /* get a register */ 3582 3577 ce = loadea(e,&cs,0x8B,reg,0,0,0); // MOV reg,data 3583 3578 c = cat(c,ce); 3584 3579 ce = gen2(CNIL,0xD1,modregrmx(3,4,reg)); /* SHL reg,1 */ 3585 3580 c = cat(c,ce); 3586 3581 } 3587 3582 else 3588 3583 { cs.IFL2 = FLconst; 3589 3584 cs.IEV2.Vint = 0; 3590 3585 op = (sz == 1) ? 0x80 : 0x81; 3591 3586 c = loadea(e,&cs,op,7,0,0,0); /* CMP EA,0 */ 3592 3587 3593 3588 // Convert to TEST instruction if EA is a register 3594 3589 // (to avoid register contention on Pentium) 3595 if ((c->Iop & 0xFE) == 0x38 &&3590 if ((c->Iop & ~1) == 0x38 && 3596 3591 (c->Irm & modregrm(3,0,0)) == modregrm(3,0,0) 3597 3592 ) 3598 3593 { c->Iop = (c->Iop & 1) | 0x84; 3599 3594 code_newreg(c, c->Irm & 7); 3600 3595 if (c->Irex & REX_B) 3601 3596 c->Irex = (c->Irex & ~REX_B) | REX_R; 3602 3597 } 3603 3598 } 3604 3599 } 3605 3600 else if (sz < 8) 3606 3601 { 3607 3602 c = allocreg(®m,®,TYoffset); /* get a register */ 3608 3603 if (I32) // it's a 48 bit pointer 3609 3604 ce = loadea(e,&cs,0x0FB7,reg,REGSIZE,0,0); /* MOVZX reg,data+4 */ 3610 3605 else 3611 3606 { ce = loadea(e,&cs,0x8B,reg,REGSIZE,0,0); /* MOV reg,data+2 */ 3612 3607 if (tym == TYfloat || tym == TYifloat) // dump sign bit 3613 3608 gen2(ce,0xD1,modregrm(3,4,reg)); /* SHL reg,1 */ 3614 3609 } 3615 3610 c = cat(c,ce); trunk/src/backend/cod2.c
r589 r596 1697 1697 freenode(e2); 1698 1698 c = cat6(cc,c,c2,cnop1,fixresult(e,retregs,pretregs),NULL); 1699 1699 goto Lret; 1700 1700 } 1701 1701 1702 1702 cnop1 = gennop(CNIL); 1703 1703 cnop2 = gennop(CNIL); /* dummy target addresses */ 1704 1704 c = logexp(e1,FALSE,FLcode,cnop1); /* evaluate condition */ 1705 1705 regconold = regcon; 1706 1706 stackusedold = stackused; 1707 1707 stackpushold = stackpush; 1708 1708 memcpy(_8087old,_8087elems,sizeof(_8087elems)); 1709 1709 c1 = codelem(e21,pretregs,FALSE); 1710 1710 1711 1711 #if SCPP 1712 1712 if (CPP && e2->Eoper == OPcolon2) 1713 1713 { code cs; 1714 1714 1715 1715 // This is necessary so that any cleanup code on one branch 1716 1716 // is redone on the other branch. 1717 cs.Iop = ESCAPE; 1718 cs.Iop2 = ESCmark2; 1717 cs.Iop = ESCAPE | ESCmark2; 1719 1718 cs.Iflags = 0; 1720 1719 cs.Irex = 0; 1721 1720 c1 = cat(gen(CNIL,&cs),c1); 1722 cs.Iop 2 =ESCrelease2;1721 cs.Iop = ESCAPE | ESCrelease2; 1723 1722 c1 = gen(c1,&cs); 1724 1723 } 1725 1724 #endif 1726 1725 1727 1726 regconsave = regcon; 1728 1727 regcon = regconold; 1729 1728 1730 1729 stackpushsave = stackpush; 1731 1730 stackpush = stackpushold; 1732 1731 1733 1732 stackusedsave = stackused; 1734 1733 stackused = stackusedold; 1735 1734 1736 1735 memcpy(_8087save,_8087elems,sizeof(_8087elems)); 1737 1736 memcpy(_8087elems,_8087old,sizeof(_8087elems)); 1738 1737 1739 1738 *pretregs |= psw; /* PSW bit may have been trashed */ 1740 1739 c2 = codelem(e22,pretregs,FALSE); /* use same regs as E1 */ 1741 1740 andregcon(®conold); 1742 1741 andregcon(®consave); … … 2412 2411 { 2413 2412 if (!I16 && tym == TYfloat) 2414 2413 { retregs = ALLREGS & ~idxregs; 2415 2414 c = cat(c,allocreg(&retregs,®,TYfloat)); 2416 2415 cs.Iop = 0x8B; 2417 2416 code_newreg(&cs,reg); 2418 2417 ce = gen(CNIL,&cs); // MOV reg,lsw 2419 2418 gen2(ce,0xD1,modregrmx(3,4,reg)); // SHL reg,1 2420 2419 } 2421 2420 else if (sz <= REGSIZE) 2422 2421 { 2423 2422 cs.Iop = 0x81 ^ byte; 2424 2423 cs.Irm |= modregrm(0,7,0); 2425 2424 cs.IFL2 = FLconst; 2426 2425 cs.IEV2.Vint = 0; 2427 2426 ce = gen(CNIL,&cs); /* CMP [idx],0 */ 2428 2427 } 2429 2428 else if (!I16 && sz == REGSIZE + 2) // if far pointer 2430 2429 { retregs = ALLREGS & ~idxregs; 2431 2430 c = cat(c,allocreg(&retregs,®,TYint)); 2432 cs.Iop = 0x0F; 2433 cs.Iop2 = 0xB7; 2431 cs.Iop = 0x0FB7; 2434 2432 cs.Irm |= modregrm(0,reg,0); 2435 2433 getlvalue_msw(&cs); 2436 2434 ce = gen(CNIL,&cs); /* MOVZX reg,msw */ 2437 2435 goto L4; 2438 2436 } 2439 2437 else if (sz <= 2 * REGSIZE) 2440 2438 { retregs = ALLREGS & ~idxregs; 2441 2439 c = cat(c,allocreg(&retregs,®,TYint)); 2442 2440 cs.Iop = 0x8B; 2443 2441 cs.Irm |= modregrm(0,reg,0); 2444 2442 getlvalue_msw(&cs); 2445 2443 ce = gen(CNIL,&cs); /* MOV reg,msw */ 2446 2444 if (I32) 2447 2445 { if (tym == TYdouble || tym == TYdouble_alias) 2448 2446 gen2(ce,0xD1,modregrm(3,4,reg)); // SHL reg,1 2449 2447 } 2450 2448 else if (tym == TYfloat) 2451 2449 gen2(ce,0xD1,modregrm(3,4,reg)); /* SHL reg,1 */ 2452 2450 L4: cs.Iop = 0x0B; 2453 2451 getlvalue_lsw(&cs); … … 4514 4512 if (0 && config.exe == EX_NT) 4515 4513 { unsigned idx; 4516 4514 4517 4515 idx = except_index_get(); 4518 4516 except_mark(); 4519 4517 c = codelem(e->E2,pretregs,FALSE); 4520 4518 if (config.exe == EX_NT && idx != except_index_get()) 4521 4519 { usednteh |= NTEHcleanup; 4522 4520 c = cat(c,nteh_gensindex(idx - 1)); 4523 4521 } 4524 4522 except_release(); 4525 4523 assert(idx == except_index_get()); 4526 4524 } 4527 4525 else 4528 4526 { 4529 4527 #if 0 4530 4528 usednteh |= EHcleanup; 4531 4529 if (config.exe == EX_NT) 4532 4530 usednteh |= NTEHcleanup; 4533 4531 #endif 4534 cs.Iop = ESCAPE; 4535 cs.Iop2 = ESCmark; 4532 cs.Iop = ESCAPE | ESCmark; 4536 4533 cs.Iflags = 0; 4537 4534 cs.Irex = 0; 4538 4535 c = gen(CNIL,&cs); 4539 4536 c = cat(c,codelem(e->E2,pretregs,FALSE)); 4540 cs.Iop 2 =ESCrelease;4537 cs.Iop = ESCAPE | ESCrelease; 4541 4538 gen(c,&cs); 4542 4539 } 4543 4540 freenode(e->E1); 4544 4541 break; 4545 4542 default: 4546 4543 assert(0); 4547 4544 } 4548 4545 return c; 4549 4546 #else 4550 4547 return NULL; 4551 4548 #endif 4552 4549 } 4553 4550 4554 4551 code *cdctor(elem *e,regm_t *pretregs) 4555 4552 { 4556 4553 #if SCPP 4557 4554 code cs; 4558 4555 code *c; 4559 4556 4560 4557 #if 0 4561 4558 if (config.exe == EX_NT) 4562 4559 { usednteh |= NTEHcleanup; 4563 4560 except_push(NULL,e,NULL); 4564 4561 return nteh_gensindex(except_index_get() - 1); 4565 4562 } 4566 4563 #else 4567 4564 usednteh |= EHcleanup; 4568 4565 if (config.exe == EX_NT) 4569 4566 usednteh |= NTEHcleanup; 4570 4567 #endif 4571 4568 assert(*pretregs == 0); 4572 cs.Iop = ESCAPE; 4573 cs.Iop2 = ESCctor; 4569 cs.Iop = ESCAPE | ESCctor; 4574 4570 cs.Iflags = 0; 4575 4571 cs.Irex = 0; 4576 4572 cs.IFL1 = FLctor; 4577 4573 cs.IEV1.Vtor = e; 4578 4574 c = gen(CNIL,&cs); 4579 4575 //except_push(c,e,NULL); 4580 4576 return c; 4581 4577 #else 4582 4578 return NULL; 4583 4579 #endif 4584 4580 } 4585 4581 4586 4582 code *cddtor(elem *e,regm_t *pretregs) 4587 4583 { 4588 4584 #if SCPP 4589 4585 code cs; 4590 4586 code *c; 4591 4587 4592 4588 #if 0 4593 4589 if (config.exe == EX_NT) 4594 4590 { usednteh |= NTEHcleanup; 4595 4591 except_pop(NULL,e,NULL); 4596 4592 return nteh_gensindex(except_index_get() - 1); 4597 4593 } 4598 4594 #else 4599 4595 usednteh |= EHcleanup; 4600 4596 if (config.exe == EX_NT) 4601 4597 usednteh |= NTEHcleanup; 4602 4598 #endif 4603 4599 assert(*pretregs == 0); 4604 cs.Iop = ESCAPE; 4605 cs.Iop2 = ESCdtor; 4600 cs.Iop = ESCAPE | ESCdtor; 4606 4601 cs.Iflags = 0; 4607 4602 cs.Irex = 0; 4608 4603 cs.IFL1 = FLdtor; 4609 4604 cs.IEV1.Vtor = e; 4610 4605 c = gen(CNIL,&cs); 4611 4606 //except_pop(c,e,NULL); 4612 4607 return c; 4613 4608 #else 4614 4609 return NULL; 4615 4610 #endif 4616 4611 } 4617 4612 4618 4613 code *cdmark(elem *e,regm_t *pretregs) 4619 4614 { 4620 4615 return NULL; 4621 4616 } 4622 4617 4623 4618 #if !NTEXCEPTIONS 4624 4619 code *cdsetjmp(elem *e,regm_t *pretregs) 4625 4620 { trunk/src/backend/cod3.c
r595 r596 201 201 2,2,2,M|3, M|T|E|4,M|3,M|3,M|3, // A8 202 202 M|E|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // B0 203 203 2,2,M|T|E|4,M|3, M|3,M|3,M|3,M|3, // B8 204 204 M|3,M|3,M|T|E|4,M|3, M|T|E|4,M|T|E|4,M|T|E|4,M|3, // C0 205 205 2,2,2,2, 2,2,2,2, // C8 206 206 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // D0 207 207 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // D8 208 208 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // E0 209 209 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // E8 210 210 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // F0 211 211 M|3,M|3,M|3,M|3, M|3,M|3,M|3,2 // F8 212 212 }; 213 213 214 214 /************************************ 215 215 * Determine if there is a modregrm byte for code. 216 216 */ 217 217 218 218 int cod3_EA(code *c) 219 219 { unsigned ins; 220 220 221 switch (c->Iop) 222 { case ESCAPE: 223 ins = 0; 224 break; 225 case 0x0F: 226 ins = inssize2[c->Iop2]; 227 break; 228 default: 229 ins = inssize[c->Iop]; 230 break; 231 } 221 unsigned op1 = c->Iop & 0xFF; 222 if (op1 == ESCAPE) 223 ins = 0; 224 else if ((c->Iop & 0xFF00) == 0x0F00) 225 ins = inssize2[op1]; 226 else 227 ins = inssize[op1]; 232 228 return ins & M; 233 229 } 234 230 235 231 /******************************** 236 232 * Fix global variables for 386. 237 233 */ 238 234 239 235 void cod3_set386() 240 236 { 241 237 // if (I32) 242 238 { unsigned i; 243 239 244 240 inssize[0xA0] = T|5; 245 241 inssize[0xA1] = T|5; 246 242 inssize[0xA2] = T|5; 247 243 inssize[0xA3] = T|5; 248 244 BPRM = 5; /* [EBP] addressing mode */ 249 245 fregsaved = mBP | mBX | mSI | mDI; // saved across function calls 250 246 FLOATREGS = FLOATREGS_32; 251 247 FLOATREGS2 = FLOATREGS2_32; … … 1054 1050 return mBP; 1055 1051 1056 1052 Lcant: 1057 1053 return 0; 1058 1054 } 1059 1055 1060 1056 /*************************************** 1061 1057 * Gen code for OPframeptr 1062 1058 */ 1063 1059 1064 1060 code *cdframeptr(elem *e, regm_t *pretregs) 1065 1061 { 1066 1062 unsigned reg; 1067 1063 code cs; 1068 1064 1069 1065 regm_t retregs = *pretregs & allregs; 1070 1066 if (!retregs) 1071 1067 retregs = allregs; 1072 1068 code *cg = allocreg(&retregs, ®, TYint); 1073 1069 1074 cs.Iop = ESCAPE; 1075 cs.Iop2 = ESCframeptr; 1070 cs.Iop = ESCAPE | ESCframeptr; 1076 1071 cs.Iflags = 0; 1077 1072 cs.Irex = 0; 1078 1073 cs.Irm = reg; 1079 1074 cg = gen(cg,&cs); 1080 1075 1081 1076 return cat(cg,fixresult(e,retregs,pretregs)); 1082 1077 } 1083 1078 1084 1079 /*************************************** 1085 1080 * Gen code for load of _GLOBAL_OFFSET_TABLE_. 1086 1081 * This value gets cached in the local variable 'localgot'. 1087 1082 */ 1088 1083 1089 1084 code *cdgot(elem *e, regm_t *pretregs) 1090 1085 { 1091 1086 #if TARGET_OSX 1092 1087 regm_t retregs; 1093 1088 unsigned reg; 1094 1089 code *c; 1095 1090 … … 2303 2298 2304 2299 int branch(block *bl,int flag) 2305 2300 { int bytesaved; 2306 2301 code *c,*cn,*ct; 2307 2302 targ_size_t offset,disp; 2308 2303 targ_size_t csize; 2309 2304 2310 2305 if (!flag) 2311 2306 bl->Bflags |= BFLjmpoptdone; // assume this will be all 2312 2307 c = bl->Bcode; 2313 2308 if (!c) 2314 2309 return 0; 2315 2310 bytesaved = 0; 2316 2311 offset = bl->Boffset; /* offset of start of block */ 2317 2312 while (1) 2318 2313 { unsigned char op; 2319 2314 2320 2315 csize = calccodsize(c); 2321 2316 cn = code_next(c); 2322 2317 op = c->Iop; 2323 if ((op & 0xF0) == 0x70 && c->Iflags & CFjmp16 ||2318 if ((op & ~0x0F) == 0x70 && c->Iflags & CFjmp16 || 2324 2319 op == JMP) 2325 2320 { 2326 2321 L1: 2327 2322 switch (c->IFL2) 2328 2323 { 2329 2324 case FLblock: 2330 2325 if (flag) // no offsets yet, don't optimize 2331 2326 goto L3; 2332 2327 disp = c->IEV2.Vblock->Boffset - offset - csize; 2333 2328 2334 2329 /* If this is a forward branch, and there is an aligned 2335 2330 * block intervening, it is possible that shrinking 2336 2331 * the jump instruction will cause it to be out of 2337 2332 * range of the target. This happens if the alignment 2338 2333 * prevents the target block from moving correspondingly 2339 2334 * closer. 2340 2335 */ 2341 2336 if (disp >= 0x7F-4 && c->IEV2.Vblock->Boffset > offset) 2342 2337 { /* Look for intervening alignment 2343 2338 */ … … 2371 2366 2372 2367 if (!cr) 2373 2368 { // Didn't find it in forward search. Try backwards jump 2374 2369 int s = 0; 2375 2370 disp = 0; 2376 2371 for (cr = bl->Bcode; cr != cn; cr = code_next(cr)) 2377 2372 { 2378 2373 assert(cr != NULL); // must have found it 2379 2374 if (cr == ct) 2380 2375 s = 1; 2381 2376 if (s) 2382 2377 disp += calccodsize(cr); 2383 2378 } 2384 2379 } 2385 2380 2386 2381 if (config.flags4 & CFG4optimized && !flag) 2387 2382 { 2388 2383 /* Propagate branch forward past junk */ 2389 2384 while (1) 2390 2385 { if (ct->Iop == NOP || 2391 (ct->Iop == ESCAPE && ct->Iop2 ==ESClinnum))2386 ct->Iop == (ESCAPE | ESClinnum)) 2392 2387 { ct = code_next(ct); 2393 2388 if (!ct) 2394 2389 goto L2; 2395 2390 } 2396 2391 else 2397 2392 { c->IEV2.Vcode = ct; 2398 2393 ct->Iflags |= CFtarg; 2399 2394 break; 2400 2395 } 2401 2396 } 2402 2397 2403 2398 /* And eliminate jmps to jmps */ 2404 2399 if ((op == ct->Iop || ct->Iop == JMP) && 2405 2400 (op == JMP || c->Iflags & CFjmp16)) 2406 2401 { c->IFL2 = ct->IFL2; 2407 2402 c->IEV2.Vcode = ct->IEV2.Vcode; 2408 2403 /*printf("eliminating branch\n");*/ 2409 2404 goto L1; 2410 2405 } 2411 2406 L2: ; … … 2432 2427 if (op == JMP) 2433 2428 { c->Iop = JMPS; // JMP SHORT 2434 2429 bytesaved += I16 ? 1 : 3; 2435 2430 } 2436 2431 else // else Jcond 2437 2432 { c->Iflags &= ~CFjmp16; // a branch is ok 2438 2433 bytesaved += I16 ? 3 : 4; 2439 2434 2440 2435 // Replace a cond jump around a call to a function that 2441 2436 // never returns with a cond jump to that function. 2442 2437 if (config.flags4 & CFG4optimized && 2443 2438 config.target_cpu >= TARGET_80386 && 2444 2439 disp == (I16 ? 3 : 5) && 2445 2440 cn && 2446 2441 cn->Iop == 0xE8 && 2447 2442 cn->IFL2 == FLfunc && 2448 2443 cn->IEVsym2->Sflags & SFLexit && 2449 2444 !(cn->Iflags & (CFtarg | CFtarg2)) 2450 2445 ) 2451 2446 { 2452 cn->Iop = 0x0F; 2453 cn->Iop2 = (c->Iop & 0x0F) ^ 0x81; 2447 cn->Iop = 0x0F00 | ((c->Iop & 0x0F) ^ 0x81); 2454 2448 c->Iop = NOP; 2455 2449 c->IEV2.Vcode = NULL; 2456 2450 bytesaved++; 2457 2451 2458 2452 // If nobody else points to ct, we can remove the CFtarg 2459 2453 if (flag && ct) 2460 2454 { code *cx; 2461 2455 2462 2456 for (cx = bl->Bcode; 1; cx = code_next(cx)) 2463 2457 { 2464 2458 if (!cx) 2465 2459 { ct->Iflags &= ~CFtarg; 2466 2460 break; 2467 2461 } 2468 2462 if (cx->IEV2.Vcode == ct) 2469 2463 break; 2470 2464 } 2471 2465 } 2472 2466 } 2473 2467 } … … 2560 2554 2561 2555 void assignaddrc(code *c) 2562 2556 { 2563 2557 int sn; 2564 2558 symbol *s; 2565 2559 unsigned char ins,rm; 2566 2560 targ_size_t soff; 2567 2561 targ_size_t base; 2568 2562 2569 2563 base = EBPtoESP; 2570 2564 for (; c; c = code_next(c)) 2571 2565 { 2572 2566 #ifdef DEBUG 2573 2567 if (0) 2574 2568 { printf("assignaddrc()\n"); 2575 2569 c->print(); 2576 2570 } 2577 2571 if (code_next(c) && code_next(code_next(c)) == c) 2578 2572 assert(0); 2579 2573 #endif 2580 if ( c->Iop == 0x0F)2581 ins = inssize2[c->Iop 2];2582 else if ( c->Iop== ESCAPE)2583 { 2584 if (c->Iop 2 == ESCadjesp)2574 if ((c->Iop & 0xFF00) == 0x0F00) 2575 ins = inssize2[c->Iop & 0xFF]; 2576 else if ((c->Iop & 0xFF) == ESCAPE) 2577 { 2578 if (c->Iop == (ESCAPE | ESCadjesp)) 2585 2579 { 2586 2580 //printf("adjusting EBPtoESP (%d) by %ld\n",EBPtoESP,c->IEV2.Vint); 2587 2581 EBPtoESP += c->IEV2.Vint; 2588 2582 c->Iop = NOP; 2589 2583 } 2590 if (c->Iop 2 == ESCframeptr)2584 if (c->Iop == (ESCAPE | ESCframeptr)) 2591 2585 { // Convert to load of frame pointer 2592 2586 // c->Irm is the register to use 2593 2587 if (hasframe) 2594 2588 { // MOV reg,EBP 2595 2589 c->Iop = 0x89; 2596 2590 if (c->Irm & 8) 2597 2591 c->Irex |= REX_B; 2598 2592 c->Irm = modregrm(3,BP,c->Irm & 7); 2599 2593 } 2600 2594 else 2601 2595 { // LEA reg,EBPtoESP[ESP] 2602 2596 c->Iop = 0x8D; 2603 2597 if (c->Irm & 8) 2604 2598 c->Irex |= REX_R; 2605 2599 c->Irm = modregrm(2,c->Irm & 7,4); 2606 2600 c->Isib = modregrm(0,4,SP); 2607 2601 c->Iflags = CFoff; 2608 2602 c->IFL1 = FLconst; 2609 2603 c->IEV1.Vuns = EBPtoESP; 2610 2604 } 2611 2605 } 2612 2606 if (I64) 2613 2607 c->Irex |= REX_W; 2614 2608 continue; 2615 2609 } 2616 2610 else 2617 ins = inssize[c->Iop ];2611 ins = inssize[c->Iop & 0xFF]; 2618 2612 if (!(ins & M) || 2619 2613 ((rm = c->Irm) & 0xC0) == 0xC0) 2620 2614 goto do2; /* if no first operand */ 2621 2615 if (is32bitaddr(I32,c->Iflags)) 2622 2616 { 2623 2617 2624 2618 if ( 2625 2619 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c->Isib & 7) == 5 || (rm & 7) == 5)) 2626 2620 ) 2627 2621 goto do2; /* if no first operand */ 2628 2622 } 2629 2623 else 2630 2624 { 2631 2625 if ( 2632 2626 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) 2633 2627 ) 2634 2628 goto do2; /* if no first operand */ 2635 2629 } 2636 2630 s = c->IEVsym1; 2637 2631 switch (c->IFL1) … … 2943 2937 if (debugc) 2944 2938 { 2945 2939 printf("+pinholeopt(%p)\n",c); 2946 2940 } 2947 2941 #endif 2948 2942 2949 2943 if (b) 2950 2944 { bn = b->Bnext; 2951 2945 usespace = (config.flags4 & CFG4space && b->BC != BCasm); 2952 2946 useopsize = (I16 || (config.flags4 & CFG4space && b->BC != BCasm)); 2953 2947 } 2954 2948 else 2955 2949 { bn = NULL; 2956 2950 usespace = (config.flags4 & CFG4space); 2957 2951 useopsize = (I16 || config.flags4 & CFG4space); 2958 2952 } 2959 2953 for (; c; c = code_next(c)) 2960 2954 { 2961 2955 L1: 2962 2956 op = c->Iop; 2963 if ( op == 0x0F)2964 ins = inssize2[ c->Iop2];2957 if ((op & 0xFF00) == 0x0F00) 2958 ins = inssize2[op & 0xFF]; 2965 2959 else 2966 ins = inssize[ c->Iop];2960 ins = inssize[op & 0xFF]; 2967 2961 if (ins & M) // if modregrm byte 2968 2962 { int shortop = (c->Iflags & CFopsize) ? !I16 : I16; 2969 2963 int local_BPRM = BPRM; 2970 2964 2971 2965 if (c->Iflags & CFaddrsize) 2972 2966 local_BPRM ^= 5 ^ 6; // toggle between 5 and 6 2973 2967 2974 2968 unsigned rm = c->Irm; 2975 2969 unsigned reg = rm & modregrm(0,7,0); // isolate reg field 2976 2970 unsigned ereg = rm & 7; 2977 2971 //printf("c = %p, op = %02x rm = %02x\n", c, op, rm); 2978 2972 2979 2973 /* If immediate second operand */ 2980 2974 if ((ins & T || op == 0xF6 || op == 0xF7) && 2981 2975 c->IFL2 == FLconst) 2982 2976 { 2983 2977 int flags = c->Iflags & CFpsw; /* if want result in flags */ 2984 2978 targ_long u = c->IEV2.Vuns; 2985 2979 if (ins & E) 2986 2980 u = (signed char) u; … … 3037 3031 if ((u & 0xFF) == 0xFF) 3038 3032 goto L3; 3039 3033 } 3040 3034 } 3041 3035 } 3042 3036 if (!shortop && useopsize) 3043 3037 { 3044 3038 if ((u & 0xFFFF0000) == 0xFFFF0000) 3045 3039 { c->Iflags ^= CFopsize; 3046 3040 goto L1; 3047 3041 } 3048 3042 if ((u & 0xFFFF) == 0xFFFF && rm < modregrm(3,4,AX)) 3049 3043 { c->IEVoffset1 += 2; /* address MSW */ 3050 3044 c->IEV2.Vuns >>= 16; 3051 3045 c->Iflags ^= CFopsize; 3052 3046 goto L1; 3053 3047 } 3054 3048 if (rm >= modregrm(3,4,AX)) 3055 3049 { 3056 3050 if (u == 0xFF && (rm <= modregrm(3,4,BX) || I64)) 3057 { c->Iop2 = 0xB6; /* MOVZX */ 3058 c->Iop = 0x0F; 3051 { c->Iop = 0x0FB6; // MOVZX 3059 3052 c->Irm = modregrm(3,ereg,ereg); 3060 3053 if (c->Irex & REX_B) 3061 3054 c->Irex |= REX_R; 3062 3055 goto L1; 3063 3056 } 3064 3057 if (u == 0xFFFF) 3065 { c->Iop2 = 0xB7; /* MOVZX */ 3066 c->Iop = 0x0F; 3058 { c->Iop = 0x0FB7; // MOVZX 3067 3059 c->Irm = modregrm(3,ereg,ereg); 3068 3060 if (c->Irex & REX_B) 3069 3061 c->Irex |= REX_R; 3070 3062 goto L1; 3071 3063 } 3072 3064 } 3073 3065 } 3074 3066 } 3075 3067 } 3076 3068 3077 3069 /* Look for ADD,OR,SUB,XOR with u that we can eliminate */ 3078 3070 if (!flags && 3079 3071 (op == 0x81 || op == 0x80) && 3080 3072 (reg == modregrm(0,0,0) || reg == modregrm(0,1,0) || // ADD,OR 3081 3073 reg == modregrm(0,5,0) || reg == modregrm(0,6,0)) // SUB, XOR 3082 3074 ) 3083 3075 { 3084 3076 if (u == 0) 3085 3077 { 3086 3078 c->Iop = NOP; … … 3246 3238 if ((ins & R) && (rm & 0xC0) == 0xC0) 3247 3239 { switch (op) 3248 3240 { case 0xC6: op = 0xB0 + ereg; break; 3249 3241 case 0xC7: op = 0xB8 + ereg; break; 3250 3242 case 0xFF: 3251 3243 switch (reg) 3252 3244 { case 6<<3: op = 0x50+ereg; break;/* PUSH*/ 3253 3245 case 0<<3: if (!I64) op = 0x40+ereg; break; /* INC*/ 3254 3246 case 1<<3: if (!I64) op = 0x48+ereg; break; /* DEC*/ 3255 3247 } 3256 3248 break; 3257 3249 case 0x8F: op = 0x58 + ereg; break; 3258 3250 case 0x87: 3259 3251 if (reg == 0) op = 0x90 + ereg; 3260 3252 break; 3261 3253 } 3262 3254 c->Iop = op; 3263 3255 } 3264 3256 3265 3257 // Look to replace SHL reg,1 with ADD reg,reg 3266 if ((op & 0xFE) == 0xD0 &&3258 if ((op & ~1) == 0xD0 && 3267 3259 (rm & modregrm(3,7,0)) == modregrm(3,4,0) && 3268 3260 config.target_cpu >= TARGET_80486) 3269 3261 { 3270 3262 c->Iop &= 1; 3271 3263 c->Irm = (rm & modregrm(3,0,7)) | (ereg << 3); 3272 3264 if (c->Irex & REX_B) 3273 3265 c->Irex |= REX_R; 3274 3266 if (!(c->Iflags & CFpsw) && !I16) 3275 3267 c->Iflags &= ~CFopsize; 3276 3268 goto L1; 3277 3269 } 3278 3270 3279 3271 /* Look for sign extended modregrm displacement, or 0 3280 3272 * displacement. 3281 3273 */ 3282 3274 3283 3275 if (((rm & 0xC0) == 0x80) && // it's a 16/32 bit disp 3284 3276 c->IFL1 == FLconst) // and it's a constant 3285 3277 { 3286 3278 a = c->IEVpointer1; … … 3334 3326 else if (mod == modregrm(1,0,0) && rm == local_BPRM && 3335 3327 c->IFL1 == FLconst && c->IEVpointer1 == 0) 3336 3328 { c->Iop = 0x8B; /* MOV reg,BP */ 3337 3329 c->Irm = modregrm(3,0,BP) + reg; 3338 3330 } 3339 3331 } 3340 3332 3341 3333 // Replace [R13] with 0[R13] 3342 3334 if (c->Irex & REX_B && (c->Irm & modregrm(3,0,5)) == modregrm(0,0,5)) 3343 3335 { 3344 3336 c->Irm |= modregrm(1,0,0); 3345 3337 c->IFL1 = FLconst; 3346 3338 c->IEVpointer1 = 0; 3347 3339 } 3348 3340 } 3349 3341 else 3350 3342 { 3351 3343 switch (op) 3352 3344 { 3353 3345 default: 3354 if ((op & 0xF0) != 0x70)3346 if ((op & ~0x0F) != 0x70) 3355 3347 break; 3356 3348 case JMP: 3357 3349 switch (c->IFL2) 3358 3350 { case FLcode: 3359 3351 if (c->IEV2.Vcode == code_next(c)) 3360 3352 { c->Iop = NOP; 3361 3353 continue; 3362 3354 } 3363 3355 break; 3364 3356 case FLblock: 3365 3357 if (!code_next(c) && c->IEV2.Vblock == bn) 3366 3358 { c->Iop = NOP; 3367 3359 continue; 3368 3360 } 3369 3361 break; 3370 3362 case FLconst: 3371 3363 case FLfunc: 3372 3364 case FLextern: 3373 3365 break; 3374 3366 default: … … 3479 3471 } 3480 3472 #endif 3481 3473 3482 3474 /************************** 3483 3475 * Compute jump addresses for FLcode. 3484 3476 * Note: only works for forward referenced code. 3485 3477 * only direct jumps and branches are detected. 3486 3478 * LOOP instructions only work for backward refs. 3487 3479 */ 3488 3480 3489 3481 void jmpaddr(code *c) 3490 3482 { code *ci,*cn,*ctarg,*cstart; 3491 3483 targ_size_t ad; 3492 3484 unsigned char op; 3493 3485 3494 3486 //printf("jmpaddr()\n"); 3495 3487 cstart = c; /* remember start of code */ 3496 3488 while (c) 3497 3489 { 3498 3490 op = c->Iop; 3499 if (inssize[op ] & T && /* if second operand */3491 if (inssize[op & 0xFF] & T && // if second operand 3500 3492 c->IFL2 == FLcode && 3501 ((op & 0xF0) == 0x70 || op == JMP || op == JMPS || op == JCXZ))3493 ((op & ~0x0F) == 0x70 || op == JMP || op == JMPS || op == JCXZ)) 3502 3494 { ci = code_next(c); 3503 3495 ctarg = c->IEV2.Vcode; /* target code */ 3504 3496 ad = 0; /* IP displacement */ 3505 3497 while (ci && ci != ctarg) 3506 3498 { 3507 3499 ad += calccodsize(ci); 3508 3500 ci = code_next(ci); 3509 3501 } 3510 3502 if (!ci) 3511 3503 goto Lbackjmp; // couldn't find it 3512 3504 if (I32 || op == JMP || op == JMPS || op == JCXZ) 3513 3505 c->IEVpointer2 = ad; 3514 3506 else /* else conditional */ 3515 3507 { if (!(c->Iflags & CFjmp16)) /* if branch */ 3516 3508 c->IEVpointer2 = ad; 3517 3509 else /* branch around a long jump */ 3518 3510 { cn = code_next(c); 3519 3511 code_next(c) = code_calloc(); 3520 3512 code_next(code_next(c)) = cn; 3521 3513 c->Iop = op ^ 1; /* converse jmp */ … … 3566 3558 /***************************** 3567 3559 * Calculate and return code size of a code. 3568 3560 * Note that NOPs are sometimes used as markers, but are 3569 3561 * never output. LINNUMs are never output. 3570 3562 * Note: This routine must be fast. Profiling shows it is significant. 3571 3563 */ 3572 3564 3573 3565 unsigned calccodsize(code *c) 3574 3566 { unsigned size; 3575 3567 unsigned op; 3576 3568 unsigned char rm,mod,ins; 3577 3569 unsigned iflags; 3578 3570 unsigned i32 = I32 || I64; 3579 3571 unsigned a32 = i32; 3580 3572 3581 3573 #ifdef DEBUG 3582 3574 assert((a32 & ~1) == 0); 3583 3575 #endif 3584 3576 iflags = c->Iflags; 3585 3577 op = c->Iop; 3578 if ((op & 0xFF00) == 0x0F00) 3579 op = 0x0F; 3580 else 3581 op &= 0xFF; 3586 3582 switch (op) 3587 3583 { 3588 3584 case 0x0F: 3589 ins = inssize2[c->Iop 2];3585 ins = inssize2[c->Iop & 0xFF]; 3590 3586 size = ins & 7; 3587 if (c->Iop & 0xFF0000) 3588 size++; 3591 3589 break; 3592 3590 3593 3591 case NOP: 3594 3592 case ESCAPE: 3595 3593 size = 0; // since these won't be output 3596 3594 goto Lret2; 3597 3595 3598 3596 case ASM: 3599 3597 if (c->Iflags == CFaddrsize) // kludge for DA inline asm 3600 3598 size = NPTRSIZE; 3601 3599 else 3602 3600 size = c->IEV1.as.len; 3603 3601 goto Lret2; 3604 3602 3605 3603 case 0xA1: 3606 3604 case 0xA3: 3607 3605 if (c->Irex) 3608 3606 { 3609 3607 size = 9; // 64 bit immediate value for MOV to/from RAX 3610 3608 goto Lret; … … 3663 3661 size += 2; 3664 3662 } 3665 3663 } 3666 3664 else if (iflags & CFopsize) 3667 3665 { if (I32) 3668 3666 size -= 2; 3669 3667 else 3670 3668 size += 2; 3671 3669 } 3672 3670 } 3673 3671 if (iflags & CFaddrsize) 3674 3672 { if (!I64) 3675 3673 a32 ^= 1; 3676 3674 size++; 3677 3675 } 3678 3676 if (iflags & CFopsize) 3679 3677 size++; /* +1 for OPSIZE prefix */ 3680 3678 } 3681 3679 } 3682 3680 3683 if ((op & 0xF0) == 0x70)3681 if ((op & ~0x0F) == 0x70) 3684 3682 { if (iflags & CFjmp16) // if long branch 3685 3683 size += I16 ? 3 : 4; // + 3(4) bytes for JMP 3686 3684 } 3687 3685 else if (ins & M) // if modregrm byte 3688 3686 { 3689 3687 rm = c->Irm; 3690 3688 mod = rm & 0xC0; 3691 3689 if (a32 || I64) 3692 3690 { // 32 bit addressing 3693 3691 if (issib(rm)) 3694 3692 size++; 3695 3693 switch (mod) 3696 3694 { case 0: 3697 3695 if (issib(rm) && (c->Isib & 7) == 5 || 3698 3696 (rm & 7) == 5) 3699 3697 size += 4; /* disp32 */ 3700 3698 if (c->Irex & REX_B && (rm & 7) == 5) 3701 3699 /* Instead of selecting R13, this mode is an [RIP] relative 3702 3700 * address. Although valid, it's redundant, and should not 3703 3701 * be generated. Instead, generate 0[R13] instead of [R13]. … … 3707 3705 case 0x40: 3708 3706 size++; /* disp8 */ 3709 3707 break; 3710 3708 case 0x80: 3711 3709 size += 4; /* disp32 */ 3712 3710 break; 3713 3711 } 3714 3712 } 3715 3713 else 3716 3714 { // 16 bit addressing 3717 3715 if (mod == 0x40) /* 01: 8 bit displacement */ 3718 3716 size++; 3719 3717 else if (mod == 0x80 || (mod == 0 && (rm & 7) == 6)) 3720 3718 size += 2; 3721 3719 } 3722 3720 } 3723 3721 3724 3722 Lret: 3725 3723 if (c->Irex) 3726 3724 { size++; 3727 if (c->Irex & REX_W && (op & 0xF8) == 0xB8)3725 if (c->Irex & REX_W && (op & ~7) == 0xB8) 3728 3726 size += 4; 3729 3727 } 3730 3728 Lret2: 3731 3729 //printf("op = x%02x, size = %d\n",op,size); 3732 3730 return size; 3733 3731 } 3734 3732 3735 3733 /******************************** 3736 3734 * Return !=0 if codes match. 3737 3735 */ 3738 3736 3739 3737 #if 0 3740 3738 3741 3739 int code_match(code *c1,code *c2) 3742 3740 { code cs1,cs2; 3743 3741 unsigned char ins; 3744 3742 3745 3743 if (c1 == c2) 3746 3744 goto match; 3747 3745 cs1 = *c1; 3748 3746 cs2 = *c2; 3749 3747 if (cs1.Iop != cs2.Iop) 3750 3748 goto nomatch; 3751 3749 switch (cs1.Iop) 3752 { case ESCAPE: 3753 switch (c->Iop2) 3754 { 3755 case ESCctor: 3756 goto nomatch; 3757 case ESCdtor: 3758 goto nomatch; 3759 } 3760 goto match; 3750 { 3751 case ESCAPE | ESCctor: 3752 case ESCAPE | ESCdtor: 3753 goto nomatch; 3754 3761 3755 case NOP: 3762 3756 goto match; 3763 3757 case ASM: 3764 3758 if (cs1.IEV1.as.len == cs2.IEV1.as.len && 3765 3759 memcmp(cs1.IEV1.as.bytes,cs2.IEV1.as.bytes,cs1.EV1.as.len) == 0) 3766 3760 goto match; 3767 3761 else 3768 3762 goto nomatch; 3764 3765 default: 3766 if ((cs1.Iop & 0xFF) == ESCAPE) 3767 goto match; 3768 break; 3769 3769 } 3770 3770 if (cs1.Iflags != cs2.Iflags) 3771 3771 goto nomatch; 3772 3772 3773 ins = inssize[cs1.Iop ];3774 if ( cs1.Iop == 0x0F)3773 ins = inssize[cs1.Iop & 0xFF]; 3774 if ((cs1.Iop & 0xFF00) == 0x0F00) 3775 3775 { 3776 if (cs1.Iop2 != cs2.Iop2) 3777 goto nomatch; 3778 if (cs1.Iop2 == 0x38 || cs1.Iop2 == 0x3A) 3779 { 3780 if (cs1.Iop3 != cs2.Iop3) 3781 goto nomatch; 3782 } 3783 ins = inssize2[cs1.Iop2]; 3776 ins = inssize2[cs1.Iop & 0xFF]; 3784 3777 } 3785 3778 3786 3779 if (ins & M) // if modregrm byte 3787 3780 { 3788 3781 if (cs1.Irm != cs2.Irm) 3789 3782 goto nomatch; 3790 3783 if ((cs1.Irm & 0xC0) == 0xC0) 3791 3784 goto do2; 3792 3785 if (is32bitaddr(I32,cs1.Iflags)) 3793 3786 { 3794 3787 if (issib(cs1.Irm) && cs1.Isib != cs2.Isib) 3795 3788 goto nomatch; 3796 3789 if ( 3797 3790 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c->Isib & 7) == 5 || (rm & 7) == 5)) 3798 3791 ) 3799 3792 goto do2; /* if no first operand */ 3800 3793 } 3801 3794 else 3802 3795 { 3803 3796 if ( … … 3864 3857 } 3865 3858 3866 3859 unsigned codout(code *c) 3867 3860 { unsigned op; 3868 3861 unsigned char rm,mod; 3869 3862 unsigned char ins; 3870 3863 code *cn; 3871 3864 unsigned flags; 3872 3865 symbol *s; 3873 3866 3874 3867 #ifdef DEBUG 3875 3868 if (debugc) printf("codout(%p), Coffset = x%llx\n",c,(unsigned long long)Coffset); 3876 3869 #endif 3877 3870 3878 3871 pgen = bytes; 3879 3872 offset = Coffset; 3880 3873 for (; c; c = code_next(c)) 3881 3874 { 3882 3875 #ifdef DEBUG 3883 3876 if (debugc) { printf("off=%02lx, sz=%ld, ",(long)OFFSET(),(long)calccodsize(c)); c->print(); } 3877 unsigned startoffset = OFFSET(); 3884 3878 #endif 3885 3879 op = c->Iop; 3886 ins = inssize[op ];3887 switch (op )3880 ins = inssize[op & 0xFF]; 3881 switch (op & 0xFF) 3888 3882 { case ESCAPE: 3889 switch ( c->Iop2)3883 switch (op & 0xFF00) 3890 3884 { case ESClinnum: 3891 3885 /* put out line number stuff */ 3892 3886 objlinnum(c->IEV2.Vsrcpos,OFFSET()); 3893 3887 break; 3894 3888 #if SCPP 3895 3889 #if 1 3896 3890 case ESCctor: 3897 3891 case ESCdtor: 3898 3892 case ESCoffset: 3899 3893 if (config.exe != EX_NT) 3900 3894 except_pair_setoffset(c,OFFSET() - funcoffset); 3901 3895 break; 3902 3896 case ESCmark: 3903 3897 case ESCrelease: 3904 3898 case ESCmark2: 3905 3899 case ESCrelease2: 3906 3900 break; 3907 3901 #else 3908 3902 case ESCctor: 3909 3903 except_push(OFFSET() - funcoffset,c->IEV1.Vtor,NULL); 3910 3904 break; 3911 3905 case ESCdtor: 3912 3906 except_pop(OFFSET() - funcoffset,c->IEV1.Vtor,NULL); 3913 3907 break; 3914 3908 case ESCmark: 3915 3909 except_mark(); 3916 3910 break; 3917 3911 case ESCrelease: 3918 3912 except_release(); 3919 3913 break; 3920 3914 #endif 3921 3915 #endif 3922 3916 } 3917 #ifdef DEBUG 3918 assert(calccodsize(c) == 0); 3919 #endif 3923 3920 continue; 3924 3921 case NOP: /* don't send them out */ 3922 if (op != NOP) 3923 break; 3924 #ifdef DEBUG 3925 assert(calccodsize(c) == 0); 3926 #endif 3925 3927 continue; 3926 3928 case ASM: 3929 if (op != ASM) 3930 break; 3927 3931 FLUSH(); 3928 3932 if (c->Iflags == CFaddrsize) // kludge for DA inline asm 3929 3933 { 3930 3934 do32bit(FLblockoff,&c->IEV1,0); 3931 3935 } 3932 3936 else 3933 3937 { 3934 3938 offset += obj_bytes(cseg,offset,c->IEV1.as.len,c->IEV1.as.bytes); 3935 3939 } 3940 #ifdef DEBUG 3941 assert(calccodsize(c) == c->IEV1.as.len); 3942 #endif 3936 3943 continue; 3937 3944 } 3938 3945 flags = c->Iflags; 3939 3946 3940 3947 // See if we need to flush (don't have room for largest code sequence) 3941 if (pgen - bytes > sizeof(bytes) - ( 4+4+4+4))3948 if (pgen - bytes > sizeof(bytes) - (1+4+4+8+8)) 3942 3949 FLUSH(); 3943 3950 3944 3951 // see if we need to put out prefix bytes 3945 3952 if (flags & (CFwait | CFPREFIX | CFjmp16)) 3946 3953 { int override; 3947 3954 3948 3955 if (flags & CFwait) 3949 3956 GEN(0x9B); // FWAIT 3950 3957 /* ? SEGES : SEGSS */ 3951 3958 switch (flags & CFSEG) 3952 3959 { case CFes: override = SEGES; goto segover; 3953 3960 case CFss: override = SEGSS; goto segover; 3954 3961 case CFcs: override = SEGCS; goto segover; 3955 3962 case CFds: override = SEGDS; goto segover; 3956 3963 case CFfs: override = SEGFS; goto segover; 3957 3964 case CFgs: override = SEGGS; goto segover; 3958 3965 segover: GEN(override); 3959 3966 break; 3960 3967 } 3961 3968 3962 3969 if (flags & CFaddrsize) 3963 3970 GEN(0x67); 3964 3971 3965 3972 // Do this last because of instructions like ADDPD 3966 3973 if (flags & CFopsize) 3967 3974 GEN(0x66); /* operand size */ 3968 3975 3969 if ((op & 0xF0) == 0x70 && flags & CFjmp16) /* long condit jmp */3976 if ((op & ~0x0F) == 0x70 && flags & CFjmp16) /* long condit jmp */ 3970 3977 { 3971 3978 if (!I16) 3972 3979 { // Put out 16 bit conditional jump 3973 c->Iop2 = 0x80 | (op & 0x0F); 3974 c->Iop = op = 0x0F; 3980 c->Iop = op = 0x0F00 | (0x80 | (op & 0x0F)); 3975 3981 } 3976 3982 else 3977 3983 { 3978 3984 cn = code_calloc(); 3979 3985 /*cxcalloc++;*/ 3980 3986 code_next(cn) = code_next(c); 3981 3987 code_next(c) = cn; // link into code 3982 3988 cn->Iop = JMP; // JMP block 3983 3989 cn->IFL2 = c->IFL2; 3984 3990 cn->IEV2.Vblock = c->IEV2.Vblock; 3985 3991 c->Iop = op ^= 1; // toggle condition 3986 3992 c->IFL2 = FLconst; 3987 3993 c->IEVpointer2 = I16 ? 3 : 5; // skip over JMP block 3988 3994 } 3989 3995 } 3990 3996 } 3991 3997 3992 3998 if (c->Irex) 3993 3999 GEN(c->Irex | REX); 3994 GEN(op); 3995 if (op == 0x0F) 3996 { 3997 ins = inssize2[c->Iop2]; 3998 GEN(c->Iop2); 3999 if (c->Iop2 == 0x38 || c->Iop2 == 0x3A) 4000 GEN(c->Iop3); 4001 } 4000 if (op > 0xFF) 4001 { 4002 if ((op & 0xFF00) == 0x0F00) 4003 ins = inssize2[op & 0xFF]; 4004 if (op & 0xFF000000) 4005 { GEN(op >> 24); 4006 GEN((op >> 8) & 0xFF); 4007 GEN(op & 0xFF); 4008 GEN((op >> 16) & 0xFF); // yes, this is out of order. For 0x660F3A41 & 40 4009 } 4010 else if (op & 0xFF0000) 4011 { 4012 GEN((op >> 16) & 0xFF); 4013 GEN((op >> 8) & 0xFF); 4014 GEN(op & 0xFF); 4015 } 4016 else 4017 { GEN((op >> 8) & 0xFF); 4018 GEN(op & 0xFF); 4019 } 4020 } 4021 else 4022 GEN(op); 4002 4023 if (ins & M) /* if modregrm byte */ 4003 4024 { 4004 4025 rm = c->Irm; 4005 4026 GEN(rm); 4006 4027 4007 4028 // Look for an address size override when working with the 4008 4029 // MOD R/M and SIB bytes 4009 4030 4010 4031 if (is32bitaddr( I32, flags)) 4011 4032 { 4012 4033 if (issib(rm)) 4013 4034 GEN(c->Isib); 4014 4035 switch (rm & 0xC0) 4015 4036 { case 0x40: 4016 4037 do8bit((enum FL) c->IFL1,&c->IEV1); // 8 bit 4017 4038 break; 4018 4039 case 0: 4019 4040 if (!(issib(rm) && (c->Isib & 7) == 5 || 4020 4041 (rm & 7) == 5)) 4021 4042 break; … … 4078 4099 if (c->Iflags & CFopsize) 4079 4100 goto ptr1616; 4080 4101 else 4081 4102 goto ptr1632; 4082 4103 4083 4104 case 0x68: // PUSH immed32 4084 4105 if ((enum FL)c->IFL2 == FLblock) 4085 4106 { 4086 4107 c->IFL2 = FLblockoff; 4087 4108 goto do32; 4088 4109 } 4089 4110 else 4090 4111 goto case_default; 4091 4112 4092 4113 case 0xE8: // CALL rel 4093 4114 case 0xE9: // JMP rel 4094 4115 flags |= CFselfrel; 4095 4116 goto case_default; 4096 4117 4097 4118 default: 4098 if (I64 && (op & 0xF8) == 0xB8 && c->Irex & REX_W)4119 if (I64 && (op & ~7) == 0xB8 && c->Irex & REX_W) 4099 4120 goto do64; 4100 4121 case_default: 4101 4122 if (c->Iflags & CFopsize) 4102 4123 goto do16; 4103 4124 else 4104 4125 goto do32; 4105 4126 break; 4106 4127 } 4107 4128 } 4108 4129 else 4109 4130 { 4110 4131 switch (op) { 4111 4132 case 0xC2: 4112 4133 case 0xCA: 4113 4134 goto do16; 4114 4135 case 0xA0: 4115 4136 case 0xA1: 4116 4137 case 0xA2: 4117 4138 case 0xA3: 4118 4139 if (c->Iflags & CFaddrsize) … … 4161 4182 goto do32; 4162 4183 else 4163 4184 goto do16; 4164 4185 break; 4165 4186 } 4166 4187 } 4167 4188 } 4168 4189 else if (op == 0xF6) /* TEST mem8,immed8 */ 4169 4190 { if ((rm & (7<<3)) == 0) 4170 4191 do8bit((enum FL)c->IFL2,&c->IEV2); 4171 4192 } 4172 4193 else if (op == 0xF7) 4173 4194 { if ((rm & (7<<3)) == 0) /* TEST mem16/32,immed16/32 */ 4174 4195 { 4175 4196 if ((I32 || I64) ^ ((c->Iflags & CFopsize) != 0)) 4176 4197 do32bit((enum FL)c->IFL2,&c->IEV2,flags); 4177 4198 else 4178 4199 do16bit((enum FL)c->IFL2,&c->IEV2,flags); 4179 4200 } 4180 4201 } 4202 #ifdef DEBUG 4203 if (OFFSET() - startoffset != calccodsize(c)) 4204 { 4205 printf("actual: %d, calc: %d\n", (int)(OFFSET() - startoffset), (int)calccodsize(c)); 4206 c->print(); 4207 assert(0); 4208 } 4209 #endif 4181 4210 } 4182 4211 FLUSH(); 4183 4212 Coffset = offset; 4184 4213 //printf("-codout(), Coffset = x%x\n", Coffset); 4185 4214 return offset; /* ending address */ 4186 4215 } 4187 4216 4188 4217 4189 4218 STATIC void do64bit(enum FL fl,union evc *uev,int flags) 4190 4219 { char *p; 4191 4220 symbol *s; 4192 4221 targ_size_t ad; 4193 4222 long tmp; 4194 4223 4195 4224 assert(I64); 4196 4225 switch (fl) 4197 4226 { 4198 4227 case FLconst: 4199 4228 ad = * (targ_size_t *) uev; 4200 4229 L1: … … 4645 4674 #endif 4646 4675 } 4647 4676 } 4648 4677 } 4649 4678 4650 4679 4651 4680 /********************************** 4652 4681 */ 4653 4682 4654 4683 #if HYDRATE 4655 4684 void code_hydrate(code **pc) 4656 4685 { 4657 4686 code *c; 4658 4687 unsigned char ins,rm; 4659 4688 enum FL fl; 4660 4689 4661 4690 assert(pc); 4662 4691 while (*pc) 4663 4692 { 4664 4693 c = (code *) ph_hydrate(pc); 4694 if ((c->Iop & 0xFF00) == 0x0F00) 4695 ins = inssize2[c->Iop & 0xFF]; 4696 else 4697 ins = inssize[c->Iop & 0xFF]; 4665 4698 switch (c->Iop) 4666 { case 0x0F: 4667 ins = inssize2[c->Iop2]; 4668 break; 4699 { 4669 4700 default: 4670 ins = inssize[c->Iop]; 4671 break; 4672 case ESCAPE: 4673 switch (c->Iop2) 4674 { case ESClinnum: 4675 srcpos_hydrate(&c->IEV2.Vsrcpos); 4676 break; 4677 case ESCctor: 4678 case ESCdtor: 4679 el_hydrate(&c->IEV1.Vtor); 4680 break; 4681 } 4701 break; 4702 4703 case ESCAPE | ESClinnum: 4704 srcpos_hydrate(&c->IEV2.Vsrcpos); 4682 4705 goto done; 4706 4707 case ESCAPE | ESCctor: 4708 case ESCAPE | ESCdtor: 4709 el_hydrate(&c->IEV1.Vtor); 4710 goto done; 4711 4683 4712 case ASM: 4684 4713 ph_hydrate(&c->IEV1.as.bytes); 4685 4714 goto done; 4686 4715 } 4687 4716 if (!(ins & M) || 4688 4717 ((rm = c->Irm) & 0xC0) == 0xC0) 4689 4718 goto do2; /* if no first operand */ 4690 4719 if (is32bitaddr(I32,c->Iflags)) 4691 4720 { 4692 4721 4693 4722 if ( 4694 4723 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c->Isib & 7) == 5 || (rm & 7) == 5)) 4695 4724 ) 4696 4725 goto do2; /* if no first operand */ 4697 4726 } 4698 4727 else 4699 4728 { 4700 4729 if ( 4701 4730 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) 4702 4731 ) … … 4809 4838 4810 4839 pc = &code_next(c); 4811 4840 } 4812 4841 } 4813 4842 #endif 4814 4843 4815 4844 /********************************** 4816 4845 */ 4817 4846 4818 4847 #if DEHYDRATE 4819 4848 void code_dehydrate(code **pc) 4820 4849 { 4821 4850 code *c; 4822 4851 unsigned char ins,rm; 4823 4852 enum FL fl; 4824 4853 4825 4854 while ((c = *pc) != NULL) 4826 4855 { 4827 4856 ph_dehydrate(pc); 4828 4857 4858 if ((c->Iop & 0xFF00) == 0x0F00) 4859 ins = inssize2[c->Iop & 0xFF]; 4860 else 4861 ins = inssize[c->Iop & 0xFF]; 4829 4862 switch (c->Iop) 4830 { case 0x0F: 4831 ins = inssize2[c->Iop2]; 4832 break; 4863 { 4833 4864 default: 4834 ins = inssize[c->Iop]; 4835 break; 4836 case ESCAPE: 4837 switch (c->Iop2) 4838 { case ESClinnum: 4839 srcpos_dehydrate(&c->IEV2.Vsrcpos); 4840 break; 4841 case ESCctor: 4842 case ESCdtor: 4843 el_dehydrate(&c->IEV1.Vtor); 4844 break; 4845 } 4865 break; 4866 4867 case ESCAPE | ESClinnum: 4868 srcpos_dehydrate(&c->IEV2.Vsrcpos); 4846 4869 goto done; 4870 4871 case ESCAPE | ESCctor: 4872 case ESCAPE | ESCdtor: 4873 el_dehydrate(&c->IEV1.Vtor); 4874 goto done; 4875 4847 4876 case ASM: 4848 4877 ph_dehydrate(&c->IEV1.as.bytes); 4849 4878 goto done; 4850 4879 } 4851 4880 4852 4881 if (!(ins & M) || 4853 4882 ((rm = c->Irm) & 0xC0) == 0xC0) 4854 4883 goto do2; /* if no first operand */ 4855 4884 if (is32bitaddr(I32,c->Iflags)) 4856 4885 { 4857 4886 4858 4887 if ( 4859 4888 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c->Isib & 7) == 5 || (rm & 7) == 5)) 4860 4889 ) 4861 4890 goto do2; /* if no first operand */ 4862 4891 } 4863 4892 else 4864 4893 { 4865 4894 if ( 4866 4895 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) … … 4982 5011 */ 4983 5012 4984 5013 #if DEBUG 4985 5014 4986 5015 void WRcodlst(code *c) 4987 5016 { for (; c; c = code_next(c)) 4988 5017 c->print(); 4989 5018 } 4990 5019 4991 5020 void code::print() 4992 5021 { 4993 5022 unsigned op,rm; 4994 5023 unsigned char ins; 4995 5024 code *c = this; 4996 5025 4997 5026 if (c == CNIL) 4998 5027 { printf("code 0\n"); 4999 5028 return; 5000 5029 } 5001 5030 op = c->Iop; 5002 ins = inssize[op]; 5003 if (op == 0x0F) 5004 { op = 0x0F00 + c->Iop2; 5005 if (op == 0x0F38 || op == 0x0F3A) 5006 op = (op << 8) | c->Iop3; 5007 ins = inssize2[c->Iop2]; 5008 } 5031 5032 if ((c->Iop & 0xFF00) == 0x0F00) 5033 ins = inssize2[op & 0xFF]; 5034 else 5035 ins = inssize[op & 0xFF]; 5036 5009 5037 printf("code %p: nxt=%p op=%02x",c,code_next(c),op); 5010 if ( op== ESCAPE)5011 { if ( c->Iop2== ESClinnum)5038 if ((op & 0xFF) == ESCAPE) 5039 { if ((op & 0xFF00) == ESClinnum) 5012 5040 { printf(" linnum = %d\n",c->IEV2.Vsrcpos.Slinnum); 5013 5041 return; 5014 5042 } 5015 printf(" ESCAPE %d",c->Iop 2);5043 printf(" ESCAPE %d",c->Iop >> 8); 5016 5044 } 5017 5045 if (c->Iflags) 5018 5046 printf(" flg=%x",c->Iflags); 5019 5047 if (ins & M) 5020 5048 { rm = c->Irm; 5021 5049 printf(" rm=%02x=%d,%d,%d",rm,(rm>>6)&3,(rm>>3)&7,rm&7); 5022 5050 if (I32 && issib(rm)) 5023 5051 { unsigned char sib = c->Isib; 5024 5052 printf(" sib=%02x=%d,%d,%d",sib,(sib>>6)&3,(sib>>3)&7,sib&7); 5025 5053 } 5026 5054 if ((rm & 0xC7) == BPRM || (rm & 0xC0) == 0x80 || (rm & 0xC0) == 0x40) 5027 5055 { 5028 5056 switch (c->IFL1) 5029 5057 { 5030 5058 case FLconst: 5031 5059 case FLoffset: 5032 5060 printf(" int = %4d",c->IEV1.Vuns); 5033 5061 break; 5034 5062 case FLblock: 5035 5063 printf(" block = %p",c->IEV1.Vblock); trunk/src/backend/cod4.c
r577 r596 1223 1223 cl = getlvalue(&cs,e1,0); /* get EA */ 1224 1224 regm_t idxregs = idxregm(&cs); 1225 1225 retregs = *pretregs & (ALLREGS | mBP) & ~idxregs; 1226 1226 if (!retregs) 1227 1227 retregs = ALLREGS & ~idxregs; 1228 1228 cg = allocreg(&retregs,&resreg,tyml); 1229 1229 cs.Iop = 0x69; /* IMUL reg,EA,e2value */ 1230 1230 cs.IFL2 = FLconst; 1231 1231 cs.IEV2.Vint = e2factor; 1232 1232 opr = resreg; 1233 1233 } 1234 1234 else if (!I16 && !byte) 1235 1235 { 1236 1236 L1: 1237 1237 retregs = *pretregs & (ALLREGS | mBP); 1238 1238 if (!retregs) 1239 1239 retregs = ALLREGS; 1240 1240 cr = codelem(e2,&retregs,FALSE); /* load rvalue in reg */ 1241 1241 cl = getlvalue(&cs,e1,retregs); /* get EA */ 1242 1242 cg = getregs(retregs); /* destroy these regs */ 1243 cs.Iop = 0x0F; /* IMUL resreg,EA */ 1244 cs.Iop2 = 0xAF; 1243 cs.Iop = 0x0FAF; // IMUL resreg,EA 1245 1244 resreg = findreg(retregs); 1246 1245 opr = resreg; 1247 1246 } 1248 1247 else 1249 1248 { 1250 1249 retregs = mAX; 1251 1250 cr = codelem(e2,&retregs,FALSE); // load rvalue in AX 1252 1251 cl = getlvalue(&cs,e1,mAX); // get EA 1253 1252 cg = getregs(byte ? mAX : mAX | mDX); // destroy these regs 1254 1253 cs.Iop = 0xF7 ^ byte; // [I]MUL EA 1255 1254 } 1256 1255 cs.Irm |= modregrm(0,opr,0); 1257 1256 c = gen(CNIL,&cs); 1258 1257 } 1259 1258 else // /= or %= 1260 1259 { targ_size_t e2factor; 1261 1260 int pow2; 1262 1261 targ_ulong m; 1263 1262 1264 1263 assert(!byte); // should never happen … … 2570 2569 if (cx->Iop == 0x81 && (cx->Irm & modregrm(3,7,0)) == modregrm(3,4,0)) 2571 2570 { 2572 2571 // Convert AND of a word to AND of a dword, zeroing upper word 2573 2572 retregs = mask[cx->Irm & 7]; 2574 2573 cx->Iflags &= ~CFopsize; 2575 2574 cx->IEV2.Vint &= 0xFFFF; 2576 2575 goto L1; 2577 2576 } 2578 2577 } 2579 2578 if (op == OPs16_32 && retregs == mAX) 2580 2579 c2 = gen1(c2,0x98); /* CWDE */ 2581 2580 else 2582 2581 { 2583 2582 reg = findreg(retregs); 2584 2583 if (config.flags4 & CFG4speed && op == OPu16_32) 2585 2584 { // AND reg,0xFFFF 2586 2585 c3 = genc2(NULL,0x81,modregrm(3,4,reg),0xFFFFu); 2587 2586 } 2588 2587 else 2589 2588 { 2590 c3 = genregs(CNIL,0x0F,reg,reg);2591 c3 ->Iop2 = (op == OPu16_32) ? 0xB7 : 0xBF; /* MOVZX/MOVSX reg,reg */2589 unsigned iop = (op == OPu16_32) ? 0x0FB7 : 0x0FBF; /* MOVZX/MOVSX reg,reg */ 2590 c3 = genregs(CNIL,iop,reg,reg); 2592 2591 } 2593 2592 c2 = cat(c2,c3); 2594 2593 } 2595 2594 L1: 2596 2595 c3 = e1comsub ? getregs(retregs) : CNIL; 2597 2596 } 2598 2597 c4 = fixresult(e,retregs,pretregs); 2599 2598 c = cat4(c1,c2,c3,c4); 2600 2599 } 2601 2600 else if (*pretregs & mPSW || config.target_cpu < TARGET_80286) 2602 2601 { 2603 2602 // OPs16_32, OPs32_64 2604 2603 // CWD doesn't affect flags, so we can depend on the integer 2605 2604 // math to provide the flags. 2606 2605 retregs = mAX | mPSW; // want integer result in AX 2607 2606 *pretregs &= ~mPSW; // flags are already set 2608 2607 c1 = codelem(e->E1,&retregs,FALSE); 2609 2608 c2 = getregs(mDX); // sign extend into DX 2610 2609 c2 = gen1(c2,0x99); // CWD/CDQ 2611 2610 c3 = e1comsub ? getregs(retregs) : CNIL; … … 2724 2723 } 2725 2724 else 2726 2725 { 2727 2726 L1: 2728 2727 if (!I16) 2729 2728 { 2730 2729 if (op == OPs8int && reg == AX && size == 2) 2731 2730 { c3 = gen1(c3,0x98); /* CBW */ 2732 2731 c3->Iflags |= CFopsize; /* don't do a CWDE */ 2733 2732 } 2734 2733 else 2735 2734 { 2736 2735 /* We could do better by not forcing the src and dst */ 2737 2736 /* registers to be the same. */ 2738 2737 2739 2738 if (config.flags4 & CFG4speed && op == OPu8_16) 2740 2739 { // AND reg,0xFF 2741 2740 c3 = genc2(c3,0x81,modregrm(3,4,reg),0xFF); 2742 2741 } 2743 2742 else 2744 { c3 = genregs(c3,0x0F,reg,reg); 2745 c3->Iop2 = (op == OPu8int) ? 0xB6 : 0xBE; /* MOVZX/MOVSX reg,reg */ 2743 { 2744 unsigned iop = (op == OPu8int) ? 0x0FB6 : 0x0FBE; // MOVZX/MOVSX reg,reg 2745 c3 = genregs(c3,iop,reg,reg); 2746 2746 } 2747 2747 } 2748 2748 } 2749 2749 else 2750 2750 { 2751 2751 if (op == OPu8int) 2752 2752 c3 = genregs(c3,0x30,reg+4,reg+4); // XOR regH,regH 2753 2753 else 2754 2754 { 2755 2755 c3 = gen1(c3,0x98); /* CBW */ 2756 2756 *pretregs &= ~mPSW; /* flags already set */ 2757 2757 } 2758 2758 } 2759 2759 } 2760 2760 c2 = getregs(retregs); 2761 2761 L2: 2762 2762 c4 = fixresult(e,retregs,pretregs); 2763 2763 return cat6(c0,c1,c2,c3,c4,NULL); 2764 2764 } 2765 2765 … … 3033 3033 case OPbts: op = 0xAB; mode = 5; break; 3034 3034 3035 3035 default: 3036 3036 assert(0); 3037 3037 } 3038 3038 3039 3039 e1 = e->E1; 3040 3040 e2 = e->E2; 3041 3041 cs.Iflags = 0; 3042 3042 c = getlvalue(&cs, e, RMload); // get addressing mode 3043 3043 if (e->Eoper == OPbt && *pretregs == 0) 3044 3044 return cat(c, codelem(e2,pretregs,FALSE)); 3045 3045 3046 3046 ty1 = tybasic(e1->Ety); 3047 3047 word = (!I16 && tysize[ty1] == SHORTSIZE) ? CFopsize : 0; 3048 3048 idxregs = idxregm(&cs); // mask if index regs used 3049 3049 3050 3050 // if (e2->Eoper == OPconst && e2->EV.Vuns < 0x100) // should do this instead? 3051 3051 if (e2->Eoper == OPconst) 3052 3052 { 3053 cs.Iop = 0x0F; 3054 cs.Iop2 = 0xBA; // BT rm,imm8 3053 cs.Iop = 0x0FBA; // BT rm,imm8 3055 3054 cs.Irm |= modregrm(0,mode,0); 3056 3055 cs.Iflags |= CFpsw | word; 3057 3056 cs.IFL2 = FLconst; 3058 3057 if (tysize[ty1] == SHORTSIZE) 3059 3058 { 3060 3059 cs.IEVoffset1 += (e2->EV.Vuns & ~15) >> 3; 3061 3060 cs.IEV2.Vint = e2->EV.Vint & 15; 3062 3061 } 3063 3062 else if (tysize[ty1] == 4) 3064 3063 { 3065 3064 cs.IEVoffset1 += (e2->EV.Vuns & ~31) >> 3; 3066 3065 cs.IEV2.Vint = e2->EV.Vint & 31; 3067 3066 } 3068 3067 else 3069 3068 { 3070 3069 cs.IEVoffset1 += (e2->EV.Vuns & ~63) >> 3; 3071 3070 cs.IEV2.Vint = e2->EV.Vint & 63; 3072 3071 } 3073 3072 c2 = gen(CNIL,&cs); 3074 3073 } 3075 3074 else 3076 3075 { 3077 3076 retregs = ALLREGS & ~idxregs; 3078 3077 c2 = scodelem(e2,&retregs,idxregs,TRUE); 3079 3078 reg = findreg(retregs); 3080 3079 3081 cs.Iop = 0x0F; 3082 cs.Iop2 = op; // BT rm,reg 3080 cs.Iop = 0x0F00 | op; // BT rm,reg 3083 3081 code_newreg(&cs,reg); 3084 3082 cs.Iflags |= CFpsw | word; 3085 3083 c2 = gen(c2,&cs); 3086 3084 } 3087 3085 3088 3086 if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register 3089 3087 { 3090 3088 code *nop = CNIL; 3091 3089 regm_t save = regcon.immed.mval; 3092 3090 code *cg = allocreg(&retregs,®,TYint); 3093 3091 regcon.immed.mval = save; 3094 3092 if ((*pretregs & mPSW) == 0) 3095 3093 { 3096 3094 cg = cat(cg,getregs(retregs)); 3097 3095 cg = genregs(cg,0x19,reg,reg); // SBB reg,reg 3098 3096 } 3099 3097 else 3100 3098 { 3101 3099 cg = movregconst(cg,reg,1,8); // MOV reg,1 3102 3100 nop = gennop(nop); … … 3137 3135 { 3138 3136 cl = getlvalue(&cs, e->E1, RMload); // get addressing mode 3139 3137 } 3140 3138 else 3141 3139 { 3142 3140 retregs = allregs; 3143 3141 cl = codelem(e->E1, &retregs, FALSE); 3144 3142 reg = findreg(retregs); 3145 3143 cs.Irm = modregrm(3,0,reg & 7); 3146 3144 cs.Iflags = 0; 3147 3145 cs.Irex = 0; 3148 3146 if (reg & 8) 3149 3147 cs.Irex |= REX_B; 3150 3148 } 3151 3149 3152 3150 retregs = *pretregs & allregs; 3153 3151 if (!retregs) 3154 3152 retregs = allregs; 3155 3153 cg = allocreg(&retregs, ®, e->Ety); 3156 3154 3157 cs.Iop = 0x0F; 3158 cs.Iop2 = (e->Eoper == OPbsf) ? 0xBC : 0xBD; // BSF/BSR reg,EA 3155 cs.Iop = (e->Eoper == OPbsf) ? 0x0FBC : 0x0FBD; // BSF/BSR reg,EA 3159 3156 code_newreg(&cs, reg); 3160 3157 if (!I16 && sz == SHORTSIZE) 3161 3158 cs.Iflags |= CFopsize; 3162 3159 cg = gen(cg,&cs); 3163 3160 3164 3161 return cat3(cl,cg,fixresult(e,retregs,pretregs)); 3165 3162 } 3166 3163 3167 3164 /******************************************* 3168 3165 * Generate code for OPpair, OPrpair. 3169 3166 */ 3170 3167 3171 3168 code *cdpair(elem *e, regm_t *pretregs) 3172 3169 { 3173 3170 regm_t retregs; 3174 3171 regm_t regs1; 3175 3172 regm_t regs2; 3176 3173 unsigned reg; 3177 3174 code *cg; 3178 3175 code *c1; trunk/src/backend/code.h
r595 r596 214 214 #define JBE 0x76 215 215 #define JA 0x77 216 216 #define JS 0x78 217 217 #define JNS 0x79 218 218 #define JP 0x7A 219 219 #define JNP 0x7B 220 220 #define JL 0x7C 221 221 #define JGE 0x7D 222 222 #define JLE 0x7E 223 223 #define JG 0x7F 224 224 225 225 /* NOP is used as a placeholder in the linked list of instructions, no */ 226 226 /* actual code will be generated for it. */ 227 227 #define NOP 0x2E /* actually CS: (we don't use 0x90 because the */ 228 228 /* silly Windows stuff wants to output 0x90's) */ 229 229 230 230 #define ESCAPE 0x3E // marker that special information is here 231 231 // (Iop2 is the type of special information) 232 232 // (Same as DS:, but we will never generate 233 233 // a separate DS: opcode anyway) 234 #define ESClinnum 1// line number information235 #define ESCctor 2// object is constructed236 #define ESCdtor 3// object is destructed237 #define ESCmark 4// mark eh stack238 #define ESCrelease 5// release eh stack239 #define ESCoffset 6// set code offset for eh240 #define ESCadjesp 7// adjust ESP by IEV2.Vint241 #define ESCmark2 8// mark eh stack242 #define ESCrelease2 9// release eh stack243 #define ESCframeptr 10// replace with load of frame pointer234 #define ESClinnum (1 << 8) // line number information 235 #define ESCctor (2 << 8) // object is constructed 236 #define ESCdtor (3 << 8) // object is destructed 237 #define ESCmark (4 << 8) // mark eh stack 238 #define ESCrelease (5 << 8) // release eh stack 239 #define ESCoffset (6 << 8) // set code offset for eh 240 #define ESCadjesp (7 << 8) // adjust ESP by IEV2.Vint 241 #define ESCmark2 (8 << 8) // mark eh stack 242 #define ESCrelease2 (9 << 8) // release eh stack 243 #define ESCframeptr (10 << 8) // replace with load of frame pointer 244 244 245 245 #define ASM 0x36 // string of asm bytes, actually an SS: opcode 246 246 247 247 /********************************* 248 248 * Macros to ease generating code 249 249 * modregrm: generate mod reg r/m field 250 250 * modregxrm: reg could be R8..R15 251 251 * modregrmx: rm could be R8..R15 252 252 * modregxrmx: reg or rm could be R8..R15 253 253 * NEWREG: change reg field of x to r 254 254 * genorreg: OR t,f 255 255 */ 256 256 257 257 #define modregrm(m,r,rm) (((m)<<6)|((r)<<3)|(rm)) 258 258 #define modregxrm(m,r,rm) ((((r)&8)<<15)|modregrm((m),(r)&7,rm)) 259 259 #define modregrmx(m,r,rm) ((((rm)&8)<<13)|modregrm((m),r,(rm)&7)) 260 260 #define modregxrmx(m,r,rm) ((((r)&8)<<15)|(((rm)&8)<<13)|modregrm((m),(r)&7,(rm)&7)) 261 261 262 262 #define NEWREXR(x,r) ((x)=((x)&~REX_R)|(((r)&8)>>1)) 263 263 #define NEWREG(x,r) ((x)=((x)&~(7<<3))|((r)<<3)) … … 380 380 #define CFopsize 0x80 // prefix with operand size 381 381 #define CFaddrsize 0x100 // prefix with address size 382 382 #define CFds 0x200 // need DS override (not with es, ss, or cs ) 383 383 #define CFcs 0x400 // need CS override 384 384 #define CFfs 0x800 // need FS override 385 385 #define CFgs (CFcs | CFfs) // need GS override 386 386 #define CFwait 0x1000 // If I32 it indicates when to output a WAIT 387 387 #define CFselfrel 0x2000 // if self-relative 388 388 #define CFunambig 0x4000 // indicates cannot be accessed by other addressing 389 389 // modes 390 390 #define CFtarg2 0x8000 // like CFtarg, but we can't optimize this away 391 391 #define CFvolatile 0x10000 // volatile reference, do not schedule 392 392 #define CFclassinit 0x20000 // class init code 393 393 #define CFoffset64 0x40000 // offset is 64 bits 394 394 #define CFpc32 0x80000 // I64: PC relative 32 bit fixup 395 395 396 396 #define CFPREFIX (CFSEG | CFopsize | CFaddrsize) 397 397 #define CFSEG (CFes | CFss | CFds | CFcs | CFfs | CFgs) 398 398 399 399 400 unsigned char Iop; 401 unsigned char Iop2; // second opcode byte 402 unsigned char Iop3; // third opcode byte 400 unsigned Iop; 403 401 404 402 union 405 403 { unsigned _Iea; 406 404 struct 407 405 { 408 406 unsigned char _Irm; // reg/mode 409 407 unsigned char _Isib; // SIB byte 410 408 unsigned char _Irex; // REX prefix 411 409 } _ea; 412 410 } _EA; 413 411 414 412 #define Iea _EA._Iea 415 413 #define Irm _EA._ea._Irm 416 414 #define Isib _EA._ea._Isib 417 415 #define Irex _EA._ea._Irex 418 416 419 417 unsigned char IFL1,IFL2; // FLavors of 1st, 2nd operands 420 418 union evc IEV1; // 1st operand, if any 421 419 #define IEVpointer1 IEV1._EP.Vpointer 422 420 #define IEVseg1 IEV1._EP.Vseg trunk/src/iasm.c
r577 r596 1353 1353 case _ES: 1354 1354 emit(0x26); 1355 1355 pc->Iflags |= CFes; 1356 1356 break; 1357 1357 case _FS: 1358 1358 emit(0x64); 1359 1359 pc->Iflags |= CFfs; 1360 1360 break; 1361 1361 case _GS: 1362 1362 emit(0x65); 1363 1363 pc->Iflags |= CFgs; 1364 1364 break; 1365 1365 default: 1366 1366 assert(0); 1367 1367 } 1368 1368 } 1369 1369 break; 1370 1370 } 1371 1371 unsigned usOpcode = ptb.pptb0->usOpcode; 1372 1372 1373 pc->Iop = usOpcode; 1373 1374 if ((usOpcode & 0xFFFFFF00) == 0x660F3A00 || // SSE4 1374 1375 (usOpcode & 0xFFFFFF00) == 0x660F3800) // SSE4 1375 1376 { 1376 pc->Iflags |= CFopsize; 1377 pc->Iop = 0x0F; 1378 pc->Iop2 = (usOpcode >> 8) & 0xFF; 1379 pc->Iop3 = usOpcode & 0xFF; 1377 pc->Iop = 0x66000F00 | ((usOpcode >> 8) & 0xFF) | ((usOpcode & 0xFF) << 16); 1380 1378 goto L3; 1381 1379 } 1382 1380 switch (usOpcode & 0xFF0000) 1383 1381 { 1384 1382 case 0: 1385 1383 break; 1386 1384 1387 1385 case 0x660000: 1388 pc->Iflags |= CFopsize;1389 1386 usOpcode &= 0xFFFF; 1390 break;1387 goto L3; 1391 1388 1392 1389 case 0xF20000: // REPNE 1393 1390 case 0xF30000: // REP/REPE 1394 1391 // BUG: What if there's an address size prefix or segment 1395 1392 // override prefix? Must the REP be adjacent to the rest 1396 1393 // of the opcode? 1397 pcPrefix = code_calloc();1398 pcPrefix->Iop = usOpcode >> 16;1399 1394 usOpcode &= 0xFFFF; 1400 break;1395 goto L3; 1401 1396 1402 1397 case 0x0F0000: // an AMD instruction 1403 1398 puc = ((unsigned char *) &usOpcode); 1404 1399 if (puc[1] != 0x0F) // if not AMD instruction 0x0F0F 1405 1400 goto L4; 1406 1401 emit(puc[2]); 1407 1402 emit(puc[1]); 1408 1403 emit(puc[0]); 1409 pc->Iop = puc[2]; 1410 pc->Iop2 = puc[1]; 1404 pc->Iop >>= 8; 1411 1405 pc->IEVint2 = puc[0]; 1412 1406 pc->IFL2 = FLconst; 1413 1407 goto L3; 1414 1408 1415 1409 default: 1416 1410 puc = ((unsigned char *) &usOpcode); 1417 1411 L4: 1418 1412 emit(puc[2]); 1419 1413 emit(puc[1]); 1420 1414 emit(puc[0]); 1421 pc->Iop = puc[2]; 1422 pc->Iop2 = puc[1]; 1415 pc->Iop >>= 8; 1423 1416 pc->Irm = puc[0]; 1424 1417 goto L3; 1425 1418 } 1426 1419 if (usOpcode & 0xff00) 1427 1420 { 1428 1421 puc = ((unsigned char *) &(usOpcode)); 1429 1422 emit(puc[1]); 1430 1423 emit(puc[0]); 1431 1424 pc->Iop = puc[1]; 1432 1425 if (pc->Iop == 0x0f) 1433 pc->Iop 2 =puc[0];1426 pc->Iop = 0x0F00 | puc[0]; 1434 1427 else 1435 1428 { 1436 1429 if (usOpcode == 0xDFE0) // FSTSW AX 1437 1430 { pc->Irm = puc[0]; 1438 1431 goto L2; 1439 1432 } 1440 1433 if (asmstate.ucItype == ITfloat) 1441 1434 pc->Irm = puc[0]; 1442 1435 else 1443 1436 { pc->IEVint2 = puc[0]; 1444 1437 pc->IFL2 = FLconst; 1445 1438 } 1446 1439 } 1447 1440 } 1448 1441 else 1449 1442 { 1450 1443 emit(usOpcode); 1451 pc->Iop = usOpcode;1452 1444 } 1453 1445 L3: ; 1454 1446 1455 1447 // If CALL, Jxx or LOOPx to a symbolic location 1456 1448 if (/*asmstate.ucItype == ITjump &&*/ 1457 1449 popnd1 && popnd1->s && popnd1->s->isLabel()) 1458 1450 { Dsymbol *s; 1459 1451 1460 1452 s = popnd1->s; 1461 1453 if (s == asmstate.psDollar) 1462 1454 { 1463 1455 pc->IFL2 = FLconst; 1464 1456 if (uSizemaskTable1 & (_8 | _16)) 1465 1457 pc->IEVint2 = popnd1->disp; 1466 1458 else if (uSizemaskTable1 & _32) 1467 1459 pc->IEVpointer2 = (targ_size_t) popnd1->disp; 1468 1460 } 1469 1461 else 1470 1462 { LabelDsymbol *label; 1471 1463 1472 1464 label = s->isLabel(); 1473 1465 if (label) 1474 { if ((pc->Iop & 0xF0) == 0x70)1466 { if ((pc->Iop & ~0x0F) == 0x70) 1475 1467 pc->Iflags |= CFjmp16; 1476 1468 if (usNumops == 1) 1477 1469 { pc->IFL2 = FLblock; 1478 1470 pc->IEVlsym2 = label; 1479 1471 } 1480 1472 else 1481 1473 { pc->IFL1 = FLblock; 1482 1474 pc->IEVlsym1 = label; 1483 1475 } 1484 1476 } 1485 1477 } 1486 1478 } 1487 1479 1488 1480 switch (usNumops) 1489 1481 { 1490 1482 case 0: 1491 1483 break; 1492 1484 case 1: 1493 1485 if (((aoptyTable1 == _reg || aoptyTable1 == _float) && 1494 1486 amodTable1 == _normal && (uRegmaskTable1 & _rplus_r))) 1495 1487 { 1496 1488 unsigned reg = popnd1->base->val; 1497 1489 if (reg & 8) 1498 1490 { reg &= 7; 1499 1491 pc->Irex |= REX_B; 1500 1492 assert(I64); 1501 1493 } 1502 1494 if (asmstate.ucItype == ITfloat) 1503 1495 pc->Irm += reg; 1504 else if (pc->Iop == 0x0f)1505 pc->Iop2 += reg;1506 1496 else 1507 1497 pc->Iop += reg; 1508 1498 #ifdef DEBUG 1509 1499 auchOpcode[usIdx-1] += reg; 1510 1500 #endif 1511 1501 } 1512 1502 else 1513 1503 { asm_make_modrm_byte( 1514 1504 #ifdef DEBUG 1515 1505 auchOpcode, &usIdx, 1516 1506 #endif 1517 1507 pc, 1518 1508 ptb.pptb1->usFlags, 1519 1509 popnd1, NULL); 1520 1510 } 1521 1511 popndTmp = popnd1; 1522 1512 aoptyTmp = aoptyTable1; 1523 1513 uSizemaskTmp = uSizemaskTable1; 1524 1514 L1: 1525 1515 if (aoptyTmp == _imm) … … 1621 1611 popnd2, popnd1); 1622 1612 } 1623 1613 popndTmp = popnd1; 1624 1614 aoptyTmp = aoptyTable1; 1625 1615 uSizemaskTmp = uSizemaskTable1; 1626 1616 } 1627 1617 else 1628 1618 { 1629 1619 if (((aoptyTable1 == _reg || aoptyTable1 == _float) && 1630 1620 amodTable1 == _normal && 1631 1621 (uRegmaskTable1 & _rplus_r))) 1632 1622 { 1633 1623 unsigned reg = popnd1->base->val; 1634 1624 if (reg & 8) 1635 1625 { reg &= 7; 1636 1626 pc->Irex |= REX_B; 1637 1627 assert(I64); 1638 1628 } 1639 1629 if (asmstate.ucItype == ITfloat) 1640 1630 pc->Irm += reg; 1641 else if (pc->Iop == 0x0f)1642 pc->Iop2 += reg;1643 1631 else 1644 1632 pc->Iop += reg; 1645 1633 #ifdef DEBUG 1646 1634 auchOpcode[usIdx-1] += reg; 1647 1635 #endif 1648 1636 } 1649 1637 else 1650 1638 if (((aoptyTable2 == _reg || aoptyTable2 == _float) && 1651 1639 amodTable2 == _normal && 1652 1640 (uRegmaskTable2 & _rplus_r))) 1653 1641 { 1654 1642 unsigned reg = popnd2->base->val; 1655 1643 if (reg & 8) 1656 1644 { reg &= 7; 1657 1645 pc->Irex |= REX_B; 1658 1646 assert(I64); 1659 1647 } 1660 1648 if (asmstate.ucItype == ITfloat) 1661 1649 pc->Irm += reg; 1662 else if (pc->Iop == 0x0f)1663 pc->Iop2 += reg;1664 1650 else 1665 1651 pc->Iop += reg; 1666 1652 #ifdef DEBUG 1667 1653 auchOpcode[usIdx-1] += reg; 1668 1654 #endif 1669 1655 } 1670 1656 else if (ptb.pptb0->usOpcode == 0xF30FD6 || 1671 1657 ptb.pptb0->usOpcode == 0x0F12 || 1672 1658 ptb.pptb0->usOpcode == 0x0F16 || 1673 1659 ptb.pptb0->usOpcode == 0x660F50 || 1674 1660 ptb.pptb0->usOpcode == 0x0F50 || 1675 1661 ptb.pptb0->usOpcode == 0x660FD7 || 1676 1662 ptb.pptb0->usOpcode == 0x0FD7) 1677 1663 { 1678 1664 asm_make_modrm_byte( 1679 1665 #ifdef DEBUG 1680 1666 auchOpcode, &usIdx, 1681 1667 #endif 1682 1668 pc, 1683 1669 ptb.pptb1->usFlags, … … 1721 1707 ptb.pptb1->usFlags, 1722 1708 popnd2, popnd1); 1723 1709 popndTmp = popnd3; 1724 1710 aoptyTmp = aoptyTable3; 1725 1711 uSizemaskTmp = uSizemaskTable3; 1726 1712 } 1727 1713 else { 1728 1714 1729 1715 if (((aoptyTable1 == _reg || aoptyTable1 == _float) && 1730 1716 amodTable1 == _normal && 1731 1717 (uRegmaskTable1 &_rplus_r))) 1732 1718 { 1733 1719 unsigned reg = popnd1->base->val; 1734 1720 if (reg & 8) 1735 1721 { reg &= 7; 1736 1722 pc->Irex |= REX_B; 1737 1723 assert(I64); 1738 1724 } 1739 1725 if (asmstate.ucItype == ITfloat) 1740 1726 pc->Irm += reg; 1741 else if (pc->Iop == 0x0f)1742 pc->Iop2 += reg;1743 1727 else 1744 1728 pc->Iop += reg; 1745 1729 #ifdef DEBUG 1746 1730 auchOpcode[usIdx-1] += reg; 1747 1731 #endif 1748 1732 } 1749 1733 else 1750 1734 if (((aoptyTable2 == _reg || aoptyTable2 == _float) && 1751 1735 amodTable2 == _normal && 1752 1736 (uRegmaskTable2 &_rplus_r))) 1753 1737 { 1754 1738 unsigned reg = popnd1->base->val; 1755 1739 if (reg & 8) 1756 1740 { reg &= 7; 1757 1741 pc->Irex |= REX_B; 1758 1742 assert(I64); 1759 1743 } 1760 1744 if (asmstate.ucItype == ITfloat) 1761 1745 pc->Irm += reg; 1762 else if (pc->Iop == 0x0f)1763 pc->Iop2 += reg;1764 1746 else 1765 1747 pc->Iop += reg; 1766 1748 #ifdef DEBUG 1767 1749 auchOpcode[usIdx-1] += reg; 1768 1750 #endif 1769 1751 } 1770 1752 else 1771 1753 asm_make_modrm_byte( 1772 1754 #ifdef DEBUG 1773 1755 auchOpcode, &usIdx, 1774 1756 #endif 1775 1757 pc, 1776 1758 ptb.pptb1->usFlags, 1777 1759 popnd1, popnd2); 1778 1760 1779 1761 popndTmp = popnd3; 1780 1762 aoptyTmp = aoptyTable3; 1781 1763 uSizemaskTmp = uSizemaskTable3; 1782 1764 1783 1765 } 1784 1766 goto L1; 1785 1767 } 1786 1768 L2: 1787 1769 1788 if ((pc->Iop & 0xF8) == 0xD8 &&1770 if ((pc->Iop & ~7) == 0xD8 && 1789 1771 ADDFWAIT() && 1790 1772 !(ptb.pptb0->usFlags & _nfwait)) 1791 1773 pc->Iflags |= CFwait; 1792 1774 else if ((ptb.pptb0->usFlags & _fwait) && 1793 1775 config.target_cpu >= TARGET_80386) 1794 1776 pc->Iflags |= CFwait; 1795 1777 1796 1778 #ifdef DEBUG 1797 1779 if (debuga) 1798 1780 { unsigned u; 1799 1781 1800 1782 for (u = 0; u < usIdx; u++) 1801 1783 printf(" %02X", auchOpcode[u]); 1802 1784 1803 1785 printf("\t%s\t", asm_opstr(pop)); 1804 1786 if (popnd1) 1805 1787 asm_output_popnd(popnd1); 1806 1788 if (popnd2) { 1807 1789 printf(","); 1808 1790 asm_output_popnd(popnd2);
