Changeset 581
- Timestamp:
- 07/20/10 18:22:19 (14 years ago)
- Files:
-
- branches/dmd-1.x/src/backend/cod1.c (modified) (2 diffs)
- branches/dmd-1.x/src/backend/cod2.c (modified) (1 diff)
- branches/dmd-1.x/src/backend/cod3.c (modified) (5 diffs)
- branches/dmd-1.x/src/backend/code.h (modified) (1 diff)
- branches/dmd-1.x/src/backend/el.c (modified) (3 diffs)
- branches/dmd-1.x/src/backend/elfobj.c (modified) (4 diffs)
- trunk/src/backend/cod1.c (modified) (2 diffs)
- trunk/src/backend/cod2.c (modified) (1 diff)
- trunk/src/backend/cod3.c (modified) (5 diffs)
- trunk/src/backend/code.h (modified) (1 diff)
- trunk/src/backend/el.c (modified) (3 diffs)
- trunk/src/backend/elfobj.c (modified) (4 diffs)
- trunk/src/clone.c (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
branches/dmd-1.x/src/backend/cod1.c
r580 r581 1327 1327 s->Sclass == SCparameter) 1328 1328 { refparam = TRUE; 1329 1329 reflocal = TRUE; // kludge to set up prolog 1330 1330 } 1331 1331 pcs->Irm = modregrm(3,0,s->Sreglsw & 7); 1332 1332 if (s->Sreglsw & 8) 1333 1333 pcs->Irex |= REX_B; 1334 1334 if (e->EV.sp.Voffset == 1 && sz == 1) 1335 1335 { assert(s->Sregm & BYTEREGS); 1336 1336 assert(s->Sreglsw < 4); 1337 1337 pcs->Irm |= 4; // use 2nd byte of register 1338 1338 } 1339 1339 else 1340 1340 assert(!e->EV.sp.Voffset); 1341 1341 } 1342 1342 else if (s->ty() & mTYcs && !(fl == FLextern && LARGECODE)) 1343 1343 { 1344 1344 pcs->Iflags |= CFcs | CFoff; 1345 1345 } 1346 1346 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 1347 // if (fl == FLtlsdata || s->ty() & mTYthread) 1348 // pcs->Iflags |= CFgs; 1347 if (I64 && config.flags3 & CFG3pic && 1348 (fl == FLtlsdata || s->ty() & mTYthread)) 1349 { 1350 pcs->Iflags |= CFopsize; 1351 pcs->Irex = 0x48; 1352 } 1349 1353 #endif 1350 1354 pcs->IEVsym1 = s; 1351 1355 pcs->IEVoffset1 = e->EV.sp.Voffset; 1352 1356 if (sz == 1) 1353 1357 { /* Don't use SI or DI for this variable */ 1354 1358 s->Sflags |= GTbyte; 1355 1359 if (e->EV.sp.Voffset > 1) 1356 1360 s->Sflags &= ~GTregcand; 1357 1361 } 1358 1362 else if (e->EV.sp.Voffset) 1359 1363 s->Sflags &= ~GTregcand; 1360 1364 if (!(keepmsk & RMstore)) // if not store only 1361 1365 { s->Sflags |= SFLread; // assume we are doing a read 1362 1366 } 1363 1367 break; 1364 1368 case FLpseudo: 1365 1369 #if MARS 1366 1370 assert(0); 1367 1371 #else 1368 1372 { … … 2676 2680 goto LF1; 2677 2681 else 2678 2682 goto LF2; 2679 2683 } 2680 2684 else 2681 2685 { int fl; 2682 2686 2683 2687 fl = FLfunc; 2684 2688 if (!tyfunc(s->ty())) 2685 2689 fl = el_fl(e1); 2686 2690 if (tym1 == TYifunc) 2687 2691 c1 = gen1(c1,0x9C); // PUSHF 2688 2692 #if 0 && TARGET_LINUX 2689 2693 if (s->Sfl == FLgot || s->Sfl == FLgotoff) 2690 2694 fl = s->Sfl; 2691 2695 #endif 2692 2696 ce = gencs(CNIL,farfunc ? 0x9A : 0xE8,0,fl,s); // CALL extern 2693 2697 ce->Iflags |= farfunc ? (CFseg | CFoff) : (CFselfrel | CFoff); 2694 2698 #if TARGET_LINUX 2695 2699 if (s == tls_get_addr_sym) 2696 { /* Append a NOP so GNU linker has patch room 2697 */ 2698 ce = gen1(ce, 0x90); // NOP 2699 code_orflag(ce, CFvolatile); // don't schedule it 2700 { 2701 if (I32) 2702 { 2703 /* Append a NOP so GNU linker has patch room 2704 */ 2705 ce = gen1(ce, 0x90); // NOP 2706 code_orflag(ce, CFvolatile); // don't schedule it 2707 } 2708 else 2709 { /* Prepend 66 66 48 so GNU linker has patch room 2710 */ 2711 assert(I64); 2712 ce->Irex = REX | REX_W; 2713 ce = cat(gen1(CNIL, 0x66), ce); 2714 ce = cat(gen1(CNIL, 0x66), ce); 2715 } 2700 2716 } 2701 2717 #endif 2702 2718 } 2703 2719 ce = cat(c1,ce); 2704 2720 } 2705 2721 else 2706 2722 { /* Call function via pointer */ 2707 2723 elem *e11; 2708 2724 tym_t e11ty; 2709 2725 2710 2726 #ifdef DEBUG 2711 2727 if (e1->Eoper != OPind 2712 2728 ) { WRFL((enum FL)el_fl(e1)); WROP(e1->Eoper); } 2713 2729 #endif 2714 2730 c = save87(); // assume 8087 regs are all trashed 2715 2731 assert(e1->Eoper == OPind); 2716 2732 e11 = e1->E1; 2717 2733 e11ty = tybasic(e11->Ety); 2718 2734 assert(!I16 || (e11ty == (farfunc ? TYfptr : TYnptr))); 2719 2735 branches/dmd-1.x/src/backend/cod2.c
r579 r581 3736 3736 { code cs; 3737 3737 code *c; 3738 3738 3739 3739 cs.Iflags = 0; 3740 3740 unsigned char rex = 0; 3741 3741 cs.Irex = rex; 3742 3742 assert(e->Eoper == OPvar || e->Eoper == OPrelconst); 3743 3743 enum FL fl = el_fl(e); 3744 3744 switch (fl) 3745 3745 { 3746 3746 case FLdatseg: 3747 3747 cs.IEV2._EP.Vpointer = e->EV.Vpointer; 3748 3748 goto L3; 3749 3749 3750 3750 case FLfardata: 3751 3751 assert(!TARGET_FLAT); 3752 3752 goto L4; 3753 3753 3754 3754 case FLtlsdata: 3755 3755 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 3756 { /* Generate: 3756 { 3757 L5: 3758 if (I64 && config.flags3 & CFG3pic) 3759 { 3760 /* Generate: 3761 * LEA DI,s@TLSGD[RIP] 3762 */ 3763 assert(reg == DI); 3764 code css; 3765 css.Irex = REX | REX_W; 3766 css.Iop = 0x8D; // LEA 3767 css.Irm = modregrm(0,DI,5); 3768 css.Iflags = CFopsize; 3769 css.IFL1 = fl; 3770 css.IEVsym1 = e->EV.sp.Vsym; 3771 css.IEVoffset1 = e->EV.sp.Voffset; 3772 c = gen(NULL, &css); 3773 return c; 3774 } 3775 /* Generate: 3757 3776 * MOV reg,GS:[00000000] 3758 3777 * ADD reg, offset s@TLS_LE 3759 3778 * for locals, and for globals: 3760 3779 * MOV reg,GS:[00000000] 3761 3780 * ADD reg, s@TLS_IE 3762 3781 * note different fixup 3763 3782 */ 3764 L5:3765 3783 int stack = 0; 3766 3784 c = NULL; 3767 3785 if (reg == STACK) 3768 3786 { regm_t retregs = ALLREGS; 3769 3787 3770 3788 c = allocreg(&retregs,®,TYoffset); 3771 3789 reg = findreg(retregs); 3772 3790 stack = 1; 3773 3791 } 3774 3792 3775 3793 code css; 3776 3794 css.Irex = rex; 3777 3795 css.Iop = 0x8B; 3778 3796 css.Irm = modregrm(0, 0, BPRM); 3779 3797 code_newreg(&css, reg); 3780 3798 css.Iflags = CFgs; 3781 3799 css.IFL1 = FLconst; 3782 3800 css.IEV1.Vuns = 0; 3783 3801 c = gen(c, &css); // MOV reg,GS:[00000000] 3784 3802 branches/dmd-1.x/src/backend/cod3.c
r580 r581 19 19 #include "cc.h" 20 20 #include "el.h" 21 21 #include "code.h" 22 22 #include "oper.h" 23 23 #include "global.h" 24 24 #include "type.h" 25 25 #include "parser.h" 26 26 #if SCPP 27 27 #include "cpp.h" 28 28 #include "exh.h" 29 29 #endif 30 30 31 31 static char __file__[] = __FILE__; /* for tassert.h */ 32 32 #include "tassert.h" 33 33 34 34 #if MARS 35 35 #define tstrace NULL 36 36 #endif 37 37 38 38 extern targ_size_t retsize; 39 STATIC void pinholeopt_unittest(); 39 40 STATIC void do8bit (enum FL,union evc *); 40 41 STATIC void do16bit (enum FL,union evc *,int); 41 42 STATIC void do32bit (enum FL,union evc *,int); 42 43 STATIC void do64bit (enum FL,union evc *,int); 43 44 44 45 static int hasframe; /* !=0 if this function has a stack frame */ 45 46 static targ_size_t Foff; // BP offset of floating register 46 47 static targ_size_t CSoff; // offset of common sub expressions 47 48 static targ_size_t NDPoff; // offset of saved 8087 registers 48 49 int BPoff; // offset from BP 49 50 static int EBPtoESP; // add to EBP offset to get ESP offset 50 51 static int AAoff; // offset of alloca temporary 51 52 52 53 #if ELFOBJ || MACHOBJ 53 54 #define JMPSEG CDATA 54 55 #define JMPOFF CDoffset 55 56 #else 56 57 #define JMPSEG DATA 57 58 #define JMPOFF Doffset 58 59 #endif … … 2893 2894 return offset; 2894 2895 } 2895 2896 2896 2897 2897 2898 2898 2899 /******************************* 2899 2900 * Find shorter versions of the same instructions. 2900 2901 * Does these optimizations: 2901 2902 * replaces jmps to the next instruction with NOPs 2902 2903 * sign extension of modregrm displacement 2903 2904 * sign extension of immediate data (can't do it for OR, AND, XOR 2904 2905 * as the opcodes are not defined) 2905 2906 * short versions for AX EA 2906 2907 * short versions for reg EA 2907 2908 * Input: 2908 2909 * b -> block for code (or NULL) 2909 2910 */ 2910 2911 2911 2912 void pinholeopt(code *c,block *b) 2912 2913 { targ_size_t a; 2913 unsigned op,mod ,rm,reg,ereg;2914 unsigned op,mod; 2914 2915 unsigned char ins; 2915 2916 int usespace; 2916 2917 int useopsize; 2917 2918 int space; 2918 2919 block *bn; 2920 2921 #ifdef DEBUG 2922 static int tested; if (!tested) { tested++; pinholeopt_unittest(); } 2923 #endif 2919 2924 2920 2925 #if 0 2921 2926 code *cstart = c; 2922 2927 if (debugc) 2923 2928 { 2924 2929 printf("+pinholeopt(%p)\n",c); 2925 2930 } 2926 2931 #endif 2927 2932 2928 2933 if (b) 2929 2934 { bn = b->Bnext; 2930 2935 usespace = (config.flags4 & CFG4space && b->BC != BCasm); 2931 2936 useopsize = (I16 || (config.flags4 & CFG4space && b->BC != BCasm)); 2932 2937 } 2933 2938 else 2934 2939 { bn = NULL; 2935 2940 usespace = (config.flags4 & CFG4space); 2936 2941 useopsize = (I16 || config.flags4 & CFG4space); 2937 2942 } 2938 2943 for (; c; c = code_next(c)) 2939 2944 { 2940 2945 L1: 2941 2946 op = c->Iop; 2942 2947 if (op == 0x0F) 2943 2948 ins = inssize2[c->Iop2]; 2944 2949 else 2945 2950 ins = inssize[c->Iop]; 2946 if ( !I64 &&ins & M) // if modregrm byte2947 { int longop = (c->Iflags & CFopsize) ? I16 : I32;2951 if (ins & M) // if modregrm byte 2952 { int shortop = (c->Iflags & CFopsize) ? !I16 : I16; 2948 2953 int local_BPRM = BPRM; 2949 2954 2950 2955 if (c->Iflags & CFaddrsize) 2951 2956 local_BPRM ^= 5 ^ 6; // toggle between 5 and 6 2952 2957 2953 rm = c->Irm; 2954 reg = rm & (7<<3); // isolate reg field 2955 ereg = rm & 7; 2958 unsigned rm = c->Irm; 2959 unsigned reg = rm & modregrm(0,7,0); // isolate reg field 2960 unsigned ereg = rm & 7; 2961 //printf("c = %p, op = %02x rm = %02x\n", c, op, rm); 2956 2962 2957 2963 /* If immediate second operand */ 2958 2964 if ((ins & T || op == 0xF6 || op == 0xF7) && 2959 2965 c->IFL2 == FLconst) 2960 { int flags; 2961 targ_long u; 2962 2963 flags = c->Iflags & CFpsw; /* if want result in flags */ 2964 u = c->IEV2.Vuns; 2966 { 2967 int flags = c->Iflags & CFpsw; /* if want result in flags */ 2968 targ_long u = c->IEV2.Vuns; 2965 2969 if (ins & E) 2966 2970 u = (signed char) u; 2967 else if ( !longop)2971 else if (shortop) 2968 2972 u = (short) u; 2969 2973 2970 2974 // Replace CMP reg,0 with TEST reg,reg 2971 if ((op & 0xFE) == 0x80 && 2975 if ((op & 0xFE) == 0x80 && // 80 is CMP R8,imm8; 81 is CMP reg,imm 2972 2976 rm >= modregrm(3,7,AX) && 2973 2977 u == 0) 2974 2978 { c->Iop = (op & 1) | 0x84; 2975 2979 c->Irm = modregrm(3,ereg,ereg); 2980 if (c->Irex & REX_B) 2981 c->Irex |= REX_R; 2976 2982 goto L1; 2977 2983 } 2978 2984 2979 2985 /* Optimize ANDs with an immediate constant */ 2980 2986 if ((op == 0x81 || op == 0x80) && reg == modregrm(0,4,0)) 2981 2987 { 2982 if (rm >= modregrm(3,4,AX)) 2988 if (rm >= modregrm(3,4,AX)) // AND reg,imm 2983 2989 { 2984 2990 if (u == 0) 2985 2991 { /* Replace with XOR reg,reg */ 2986 2992 c->Iop = 0x30 | (op & 1); 2987 NEWREG(c->Irm,rm & 7); 2993 c->Irm = modregrm(3,ereg,ereg); 2994 if (c->Irex & REX_B) 2995 c->Irex |= REX_R; 2988 2996 goto L1; 2989 2997 } 2990 2998 if (u == 0xFFFFFFFF && !flags) 2991 2999 { c->Iop = NOP; 2992 3000 goto L1; 2993 3001 } 2994 3002 } 2995 3003 if (op == 0x81 && !flags) 2996 3004 { // If we can do the operation in one byte 2997 3005 2998 3006 // If EA is not SI or DI 2999 if ( rm < modregrm(3,4,SP) &&3007 if ((rm < modregrm(3,4,SP) || I64) && 3000 3008 (config.flags4 & CFG4space || 3001 3009 config.target_cpu < TARGET_PentiumPro) 3002 3010 ) 3003 3011 { 3004 3012 if ((u & 0xFFFFFF00) == 0xFFFFFF00) 3005 3013 goto L2; 3006 else 3007 { if ( longop)3014 else if (rm < modregrm(3,0,0) || (!c->Irex && ereg < 4)) 3015 { if (!shortop) 3008 3016 { if ((u & 0xFFFF00FF) == 0xFFFF00FF) 3009 3017 goto L3; 3010 3018 } 3011 3019 else 3012 3020 { 3013 3021 if ((u & 0xFF) == 0xFF) 3014 3022 goto L3; 3015 3023 } 3016 3024 } 3017 3025 } 3018 if ( longop && useopsize)3026 if (!shortop && useopsize) 3019 3027 { 3020 3028 if ((u & 0xFFFF0000) == 0xFFFF0000) 3021 3029 { c->Iflags ^= CFopsize; 3022 3030 goto L1; 3023 3031 } 3024 3032 if ((u & 0xFFFF) == 0xFFFF && rm < modregrm(3,4,AX)) 3025 3033 { c->IEVoffset1 += 2; /* address MSW */ 3026 3034 c->IEV2.Vuns >>= 16; 3027 3035 c->Iflags ^= CFopsize; 3028 3036 goto L1; 3029 3037 } 3030 3038 if (rm >= modregrm(3,4,AX)) 3031 3039 { 3032 if (u == 0xFF && rm <= modregrm(3,4,BX))3040 if (u == 0xFF && (rm <= modregrm(3,4,BX) || I64)) 3033 3041 { c->Iop2 = 0xB6; /* MOVZX */ 3034 3042 c->Iop = 0x0F; 3035 NEWREG(c->Irm,rm & 7); 3043 c->Irm = modregrm(3,ereg,ereg); 3044 if (c->Irex & REX_B) 3045 c->Irex |= REX_R; 3036 3046 goto L1; 3037 3047 } 3038 3048 if (u == 0xFFFF) 3039 3049 { c->Iop2 = 0xB7; /* MOVZX */ 3040 3050 c->Iop = 0x0F; 3041 NEWREG(c->Irm,rm & 7); 3051 c->Irm = modregrm(3,ereg,ereg); 3052 if (c->Irex & REX_B) 3053 c->Irex |= REX_R; 3042 3054 goto L1; 3043 3055 } 3044 3056 } 3045 3057 } 3046 3058 } 3047 3059 } 3048 3060 3049 3061 /* Look for ADD,OR,SUB,XOR with u that we can eliminate */ 3050 3062 if (!flags && 3051 3063 (op == 0x81 || op == 0x80) && 3052 (reg == modregrm(0,0,0) || reg == modregrm(0,1,0) || 3053 reg == modregrm(0,5,0) || reg == modregrm(0,6,0)) 3064 (reg == modregrm(0,0,0) || reg == modregrm(0,1,0) || // ADD,OR 3065 reg == modregrm(0,5,0) || reg == modregrm(0,6,0)) // SUB, XOR 3054 3066 ) 3055 { if (u == 0) 3067 { 3068 if (u == 0) 3056 3069 { 3057 3070 c->Iop = NOP; 3058 3071 goto L1; 3059 3072 } 3060 3073 if (u == ~0 && reg == modregrm(0,6,0)) /* XOR */ 3061 3074 { 3062 3075 c->Iop = 0xF6 | (op & 1); /* NOT */ 3063 3076 c->Irm ^= modregrm(0,6^2,0); 3064 3077 goto L1; 3065 3078 } 3066 if ( longop &&3079 if (!shortop && 3067 3080 useopsize && 3068 3081 op == 0x81 && 3069 3082 (u & 0xFFFF0000) == 0 && 3070 3083 (reg == modregrm(0,6,0) || reg == modregrm(0,1,0))) 3071 3084 { c->Iflags ^= CFopsize; 3072 3085 goto L1; 3073 3086 } 3074 3087 } 3075 3088 3076 3089 /* Look for TEST or OR or XOR with an immediate constant */ 3077 3090 /* that we can replace with a byte operation */ 3078 3091 if (op == 0xF7 && reg == modregrm(0,0,0) || 3079 3092 op == 0x81 && reg == modregrm(0,6,0) && !flags || 3080 3093 op == 0x81 && reg == modregrm(0,1,0)) 3081 3094 { 3082 3095 // See if we can replace a dword with a word 3083 3096 // (avoid for 32 bit instructions, because CFopsize 3084 3097 // is too slow) 3085 if ( longop && useopsize)3098 if (!shortop && useopsize) 3086 3099 { if ((u & 0xFFFF0000) == 0) 3087 3100 { c->Iflags ^= CFopsize; 3088 3101 goto L1; 3089 3102 } 3090 3103 /* If memory (not register) addressing mode */ 3091 3104 if ((u & 0xFFFF) == 0 && rm < modregrm(3,0,AX)) 3092 3105 { c->IEVoffset1 += 2; /* address MSW */ 3093 3106 c->IEV2.Vuns >>= 16; 3094 3107 c->Iflags ^= CFopsize; 3095 3108 goto L1; 3096 3109 } 3097 3110 } 3098 3111 3099 3112 // If EA is not SI or DI 3100 3113 if (rm < (modregrm(3,0,SP) | reg) && 3101 3114 (usespace || 3102 3115 config.target_cpu < TARGET_PentiumPro) 3103 3116 ) 3104 3117 { 3105 3118 if ((u & 0xFFFFFF00) == 0) 3106 3119 { 3107 3120 L2: c->Iop--; /* to byte instruction */ 3108 3121 c->Iflags &= ~CFopsize; 3109 3122 goto L1; 3110 3123 } 3111 if ((u & 0xFFFF00FF) == 0 || 3112 (!longop && (u & 0xFF) == 0)) 3124 if (((u & 0xFFFF00FF) == 0 || 3125 (shortop && (u & 0xFF) == 0)) && 3126 (rm < modregrm(3,0,0) || (!c->Irex && ereg < 4))) 3113 3127 { 3114 3128 L3: 3115 3129 c->IEV2.Vuns >>= 8; 3116 3130 if (rm >= (modregrm(3,0,AX) | reg)) 3117 3131 c->Irm |= 4; /* AX->AH, BX->BH, etc. */ 3118 3132 else 3119 3133 c->IEVoffset1 += 1; 3120 3134 goto L2; 3121 3135 } 3122 3136 } 3123 3137 #if 0 3124 3138 // BUG: which is right? 3125 3139 else if ((u & 0xFFFF0000) == 0) 3126 3140 #else 3127 3141 else if (0 && op == 0xF7 && 3128 3142 rm >= modregrm(3,0,SP) && 3129 3143 (u & 0xFFFF0000) == 0) 3130 3144 #endif 3131 3145 c->Iflags &= ~CFopsize; 3132 3146 } 3133 3147 3134 3148 // Try to replace TEST reg,-1 with TEST reg,reg 3135 if (op == 0xF6 && rm >= modregrm(3,0,AX) )3136 { if ( u == ~0)3149 if (op == 0xF6 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7)) // TEST regL,immed8 3150 { if ((u & 0xFF) == 0xFF) 3137 3151 { 3138 3152 L4: c->Iop = 0x84; // TEST regL,regL 3139 c->Irm |= ereg << 3; 3153 c->Irm = modregrm(3,ereg,ereg); 3154 if (c->Irex & REX_B) 3155 c->Irex |= REX_R; 3140 3156 c->Iflags &= ~CFopsize; 3141 3157 goto L1; 3142 3158 } 3143 3159 } 3144 if (op == 0xF7 && rm >= modregrm(3,0,AX) && ereg < SP)3160 if (op == 0xF7 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7) && (I64 || ereg < 4)) 3145 3161 { if (u == 0xFF) 3146 3162 goto L4; 3147 if ( u == ~0xFF && !longop)3148 { rm|= 4; /* to regH */3163 if ((u & 0xFFFF) == 0xFF00 && shortop && !c->Irex && ereg < 4) 3164 { ereg |= 4; /* to regH */ 3149 3165 goto L4; 3150 3166 } 3151 3167 } 3152 3168 3153 3169 /* Look for sign extended immediate data */ 3154 3170 if ((signed char) u == u) 3155 3171 { 3156 3172 if (op == 0x81) 3157 3173 { if (reg != 0x08 && reg != 0x20 && reg != 0x30) 3158 3174 c->Iop = op = 0x83; /* 8 bit sgn ext */ 3159 3175 } 3160 3176 else if (op == 0x69) /* IMUL rw,ew,dw */ 3161 3177 c->Iop = op = 0x6B; /* IMUL rw,ew,db */ 3162 3178 } 3163 3179 3164 3180 // Look for SHIFT EA,imm8 we can replace with short form 3165 3181 if (u == 1 && ((op & 0xFE) == 0xC0)) 3166 3182 c->Iop |= 0xD0; 3167 3183 3168 3184 } /* if immediate second operand */ 3169 3185 3170 3186 /* Look for AX short form */ 3171 3187 if (ins & A) 3172 { if (rm == modregrm(0,AX,local_BPRM) && (op & ~3) == 0x88) 3188 { if (rm == modregrm(0,AX,local_BPRM) && 3189 !(c->Irex & REX_R) && // and it's AX, not R8 3190 (op & ~3) == 0x88 && 3191 !I64) 3173 3192 { op = ((op & 3) + 0xA0) ^ 2; 3174 3193 /* 8A-> A0 */ 3175 3194 /* 8B-> A1 */ 3176 3195 /* 88-> A2 */ 3177 3196 /* 89-> A3 */ 3178 3197 c->Iop = op; 3179 3198 c->IFL2 = c->IFL1; 3180 3199 c->IEV2 = c->IEV1; 3181 3200 } 3182 3201 3183 3202 /* Replace MOV REG1,REG2 with MOV EREG1,EREG2 */ 3184 else if ( I32&&3203 else if (!I16 && 3185 3204 (op == 0x89 || op == 0x8B) && 3186 3205 (rm & 0xC0) == 0xC0 && 3187 3206 (!b || b->BC != BCasm) 3188 3207 ) 3189 3208 c->Iflags &= ~CFopsize; 3190 3209 3191 else if ((rm & 0xC7) == 0xC0) 3210 // If rm is AX 3211 else if ((rm & modregrm(3,0,7)) == modregrm(3,0,AX) && !(c->Irex & (REX_R | REX_B))) 3192 3212 { switch (op) 3193 3213 { case 0x80: op = reg | 4; break; 3194 3214 case 0x81: op = reg | 5; break; 3195 case 0x87: op = 0x90 + (reg>>3); break; 3215 case 0x87: op = 0x90 + (reg>>3); break; // XCHG 3196 3216 case 0xF6: 3197 3217 if (reg == 0) 3198 3218 op = 0xA8; /* TEST AL,immed8 */ 3199 3219 break; 3200 3220 case 0xF7: 3201 3221 if (reg == 0) 3202 3222 op = 0xA9; /* TEST AX,immed16 */ 3203 3223 break; 3204 3224 } 3205 3225 c->Iop = op; 3206 3226 } 3207 3227 } 3208 3228 3209 3229 /* Look for reg short form */ 3210 3230 if ((ins & R) && (rm & 0xC0) == 0xC0) 3211 3231 { switch (op) 3212 3232 { case 0xC6: op = 0xB0 + ereg; break; 3213 3233 case 0xC7: op = 0xB8 + ereg; break; 3214 3234 case 0xFF: 3215 3235 switch (reg) 3216 3236 { case 6<<3: op = 0x50+ereg; break;/* PUSH*/ 3217 case 0<<3: op = 0x40+ereg; break; /* INC*/3218 case 1<<3: op = 0x48+ereg; break; /* DEC*/3237 case 0<<3: if (!I64) op = 0x40+ereg; break; /* INC*/ 3238 case 1<<3: if (!I64) op = 0x48+ereg; break; /* DEC*/ 3219 3239 } 3220 3240 break; 3221 3241 case 0x8F: op = 0x58 + ereg; break; 3222 3242 case 0x87: 3223 3243 if (reg == 0) op = 0x90 + ereg; 3224 3244 break; 3225 3245 } 3226 3246 c->Iop = op; 3227 3247 } 3228 3248 3229 3249 // Look to replace SHL reg,1 with ADD reg,reg 3230 3250 if ((op & 0xFE) == 0xD0 && 3231 3251 (rm & modregrm(3,7,0)) == modregrm(3,4,0) && 3232 3252 config.target_cpu >= TARGET_80486) 3233 3253 { 3234 3254 c->Iop &= 1; 3235 3255 c->Irm = (rm & modregrm(3,0,7)) | (ereg << 3); 3236 if (!(c->Iflags & CFpsw) && I32) 3256 if (c->Irex & REX_B) 3257 c->Irex |= REX_R; 3258 if (!(c->Iflags & CFpsw) && !I16) 3237 3259 c->Iflags &= ~CFopsize; 3238 3260 goto L1; 3239 3261 } 3240 3262 3241 3263 /* Look for sign extended modregrm displacement, or 0 3242 3264 * displacement. 3243 3265 */ 3244 3266 3245 3267 if (((rm & 0xC0) == 0x80) && // it's a 16/32 bit disp 3246 3268 c->IFL1 == FLconst) // and it's a constant 3247 3269 { 3248 3270 a = c->IEVpointer1; 3249 if (a == 0 && (rm & 7) != local_BPRM && // if 0 disp3250 !(local_BPRM == 5 && (rm & 7) == 4 && (c->Isib & 7) == BP )3271 if (a == 0 && (rm & 7) != local_BPRM && // if 0[disp] 3272 !(local_BPRM == 5 && (rm & 7) == 4 && (c->Isib & 7) == BP && !(c->Irex & REX_B)) 3251 3273 ) 3252 3274 c->Irm &= 0x3F; 3253 else if ( I32)3275 else if (!I16) 3254 3276 { 3255 3277 if ((targ_size_t)(targ_schar)a == a) 3256 3278 c->Irm ^= 0xC0; /* do 8 sx */ 3257 3279 } 3258 3280 else if (((targ_size_t)(targ_schar)a & 0xFFFF) == (a & 0xFFFF)) 3259 3281 c->Irm ^= 0xC0; /* do 8 sx */ 3260 3282 } 3261 3283 3262 3284 /* Look for LEA reg,[ireg], replace with MOV reg,ireg */ 3263 3285 else if (op == 0x8D) 3264 3286 { rm = c->Irm & 7; 3265 3287 mod = c->Irm & modregrm(3,0,0); 3266 3288 if (mod == 0) 3267 3289 { 3268 if ( I32)3290 if (!I16) 3269 3291 { 3270 3292 switch (rm) 3271 3293 { 3272 3294 case 4: 3273 3295 case 5: 3274 3296 break; 3275 3297 default: 3276 3298 c->Irm |= modregrm(3,0,0); 3277 3299 c->Iop = 0x8B; 3278 3300 break; 3279 3301 } 3280 3302 } 3281 3303 else 3282 3304 { 3283 3305 switch (rm) 3284 3306 { 3285 3307 case 4: rm = modregrm(3,0,SI); goto L6; 3286 3308 case 5: rm = modregrm(3,0,DI); goto L6; 3287 3309 case 7: rm = modregrm(3,0,BX); goto L6; 3288 3310 L6: c->Irm = rm + reg; 3289 3311 c->Iop = 0x8B; 3290 3312 break; 3291 3313 } 3292 3314 } 3293 3315 } 3294 3316 3295 3317 /* replace LEA reg,0[BP] with MOV reg,BP */ 3296 3318 else if (mod == modregrm(1,0,0) && rm == local_BPRM && 3297 3319 c->IFL1 == FLconst && c->IEVpointer1 == 0) 3298 3320 { c->Iop = 0x8B; /* MOV reg,BP */ 3299 3321 c->Irm = modregrm(3,0,BP) + reg; 3300 3322 } 3323 } 3324 3325 // Replace [R13] with 0[R13] 3326 if (c->Irex & REX_B && (c->Irm & modregrm(3,0,5)) == modregrm(0,0,5)) 3327 { 3328 c->Irm |= modregrm(1,0,0); 3329 c->IFL1 = FLconst; 3330 c->IEVpointer1 = 0; 3301 3331 } 3302 3332 } 3303 3333 else 3304 3334 { 3305 3335 switch (op) 3306 3336 { 3307 3337 default: 3308 3338 if ((op & 0xF0) != 0x70) 3309 3339 break; 3310 3340 case JMP: 3311 3341 switch (c->IFL2) 3312 3342 { case FLcode: 3313 3343 if (c->IEV2.Vcode == code_next(c)) 3314 3344 { c->Iop = NOP; 3315 3345 continue; 3316 3346 } 3317 3347 break; 3318 3348 case FLblock: 3319 3349 if (!code_next(c) && c->IEV2.Vblock == bn) 3320 3350 { c->Iop = NOP; 3321 3351 continue; 3322 3352 } 3323 3353 break; 3324 3354 case FLconst: 3325 3355 case FLfunc: 3326 3356 case FLextern: 3327 3357 break; 3328 3358 default: 3329 3359 #ifdef DEBUG 3330 3360 WRFL((enum FL)c->IFL2); 3331 3361 #endif 3332 3362 assert(0); 3333 3363 } 3334 3364 break; 3335 3365 3336 3366 case 0x68: // PUSH immed16 3337 3367 if (c->IFL2 == FLconst) 3338 { targ_long u; 3339 3340 u = c->IEV2.Vuns; 3341 if ((c->Iflags & CFopsize) ? !I32 : I32) 3342 { if (u == (signed char) u) 3368 { 3369 targ_long u = c->IEV2.Vuns; 3370 if (I64 || 3371 ((c->Iflags & CFopsize) ? I16 : I32)) 3372 { // PUSH 32/64 bit operand 3373 if (u == (signed char) u) 3343 3374 c->Iop = 0x6A; // PUSH immed8 3344 3375 } 3345 else 3376 else // PUSH 16 bit operand 3346 3377 { if ((short)u == (signed char) u) 3347 3378 c->Iop = 0x6A; // PUSH immed8 3348 3379 } 3349 3380 } 3350 3381 break; 3351 3382 } 3352 3383 } 3353 3384 } 3354 3385 #if 0 3355 3386 if (1 || debugc) { 3356 3387 printf("-pinholeopt(%p)\n",cstart); 3357 3388 for (c = cstart; c; c = code_next(c)) 3358 3389 c->print(); 3359 3390 } 3360 3391 #endif 3361 3392 } 3362 3393 3394 #ifdef DEBUG 3395 STATIC void pinholeopt_unittest() 3396 { 3397 //printf("pinholeopt_unittest()\n"); 3398 struct CS { unsigned model,op,ea,ev1,ev2,flags; } tests[][2] = 3399 { 3400 // XOR reg,immed NOT regL 3401 {{ 16,0x81,modregrm(3,6,BX),0,0xFF,0 }, { 0,0xF6,modregrm(3,2,BX),0,0xFF }}, 3402 3403 #if 0 // only if config.flags4 & CFG4space 3404 // TEST regL,immed8 3405 {{ 0,0xF6,modregrm(3,0,BX),0,0xFF,0 }, { 0,0x84,modregrm(3,BX,BX),0,0xFF }}, 3406 {{ 0,0xF7,modregrm(3,0,BX),0,0xFF,0 }, { 0,0x84,modregrm(3,BX,BX),0,0xFF }}, 3407 {{ 64,0xF6,modregrmx(3,0,R8),0,0xFF,0 }, { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }}, 3408 {{ 64,0xF7,modregrmx(3,0,R8),0,0xFF,0 }, { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }}, 3409 #endif 3410 3411 // PUSH immed => PUSH immed8 3412 {{ 0,0x68,0,0,0 }, { 0,0x6A,0,0,0 }}, 3413 {{ 0,0x68,0,0,0x7F }, { 0,0x6A,0,0,0x7F }}, 3414 {{ 0,0x68,0,0,0x80 }, { 0,0x68,0,0,0x80 }}, 3415 {{ 16,0x68,0,0,0,CFopsize }, { 0,0x6A,0,0,0,CFopsize }}, 3416 {{ 16,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }}, 3417 {{ 16,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }}, 3418 {{ 16,0x68,0,0,0x10000,0 }, { 0,0x6A,0,0,0x10000,0 }}, 3419 {{ 16,0x68,0,0,0x10000,CFopsize }, { 0,0x68,0,0,0x10000,CFopsize }}, 3420 {{ 32,0x68,0,0,0,CFopsize }, { 0,0x6A,0,0,0,CFopsize }}, 3421 {{ 32,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }}, 3422 {{ 32,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }}, 3423 {{ 32,0x68,0,0,0x10000,CFopsize }, { 0,0x6A,0,0,0x10000,CFopsize }}, 3424 {{ 32,0x68,0,0,0x8000,CFopsize }, { 0,0x68,0,0,0x8000,CFopsize }}, 3425 }; 3426 3427 //config.flags4 |= CFG4space; 3428 for (int i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) 3429 { CS *pin = &tests[i][0]; 3430 CS *pout = &tests[i][1]; 3431 code cs; 3432 memset(&cs, 0, sizeof(cs)); 3433 if (pin->model) 3434 { 3435 if (I16 && pin->model != 16) 3436 continue; 3437 if (I32 && pin->model != 32) 3438 continue; 3439 if (I64 && pin->model != 64) 3440 continue; 3441 } 3442 //printf("[%d]\n", i); 3443 cs.Iop = pin->op; 3444 cs.Iea = pin->ea; 3445 cs.IFL1 = FLconst; 3446 cs.IFL2 = FLconst; 3447 cs.IEV1.Vuns = pin->ev1; 3448 cs.IEV2.Vuns = pin->ev2; 3449 cs.Iflags = pin->flags; 3450 pinholeopt(&cs, NULL); 3451 if (cs.Iop != pout->op) 3452 { printf("[%d] Iop = x%02x, pout = x%02x\n", i, cs.Iop, pout->op); 3453 assert(0); 3454 } 3455 assert(cs.Iea == pout->ea); 3456 assert(cs.IEV1.Vuns == pout->ev1); 3457 assert(cs.IEV2.Vuns == pout->ev2); 3458 assert(cs.Iflags == pout->flags); 3459 } 3460 } 3461 #endif 3363 3462 3364 3463 /************************** 3365 3464 * Compute jump addresses for FLcode. 3366 3465 * Note: only works for forward referenced code. 3367 3466 * only direct jumps and branches are detected. 3368 3467 * LOOP instructions only work for backward refs. 3369 3468 */ 3370 3469 3371 3470 void jmpaddr(code *c) 3372 3471 { code *ci,*cn,*ctarg,*cstart; 3373 3472 targ_size_t ad; 3374 3473 unsigned char op; 3375 3474 3376 3475 //printf("jmpaddr()\n"); 3377 3476 cstart = c; /* remember start of code */ 3378 3477 while (c) 3379 3478 { 3380 3479 op = c->Iop; 3381 3480 if (inssize[op] & T && /* if second operand */ 3382 3481 c->IFL2 == FLcode && … … 3559 3658 } 3560 3659 if (iflags & CFopsize) 3561 3660 size++; /* +1 for OPSIZE prefix */ 3562 3661 } 3563 3662 } 3564 3663 3565 3664 if ((op & 0xF0) == 0x70) 3566 3665 { if (iflags & CFjmp16) // if long branch 3567 3666 size += I16 ? 3 : 4; // + 3(4) bytes for JMP 3568 3667 } 3569 3668 else if (ins & M) // if modregrm byte 3570 3669 { 3571 3670 rm = c->Irm; 3572 3671 mod = rm & 0xC0; 3573 3672 if (a32 || I64) 3574 3673 { // 32 bit addressing 3575 3674 if (issib(rm)) 3576 3675 size++; 3577 3676 switch (mod) 3578 3677 { case 0: 3579 if (issib(rm) && (c->Isib & 7) == 5 || (rm & 7) == 5) 3678 if (issib(rm) && (c->Isib & 7) == 5 || 3679 (rm & 7) == 5) 3580 3680 size += 4; /* disp32 */ 3681 if (c->Irex & REX_B && (rm & 7) == 5) 3682 /* Instead of selecting R13, this mode is an [RIP] relative 3683 * address. Although valid, it's redundant, and should not 3684 * be generated. Instead, generate 0[R13] instead of [R13]. 3685 */ 3686 assert(0); 3581 3687 break; 3582 3688 case 0x40: 3583 3689 size++; /* disp8 */ 3584 3690 break; 3585 3691 case 0x80: 3586 3692 size += 4; /* disp32 */ 3587 3693 break; 3588 3694 } 3589 3695 } 3590 3696 else 3591 3697 { // 16 bit addressing 3592 3698 if (mod == 0x40) /* 01: 8 bit displacement */ 3593 3699 size++; 3594 3700 else if (mod == 0x80 || (mod == 0 && (rm & 7) == 6)) 3595 3701 size += 2; 3596 3702 } 3597 3703 } 3598 3704 3599 3705 Lret: 3600 3706 if (c->Irex) … … 3727 3833 3728 3834 STATIC void cod3_flush() 3729 3835 { 3730 3836 // Emit accumulated bytes to code segment 3731 3837 #ifdef DEBUG 3732 3838 assert(pgen - bytes < sizeof(bytes)); 3733 3839 #endif 3734 3840 offset += obj_bytes(cseg,offset,pgen - bytes,bytes); 3735 3841 pgen = bytes; 3736 3842 } 3737 3843 3738 3844 unsigned codout(code *c) 3739 3845 { unsigned op; 3740 3846 unsigned char rm,mod; 3741 3847 unsigned char ins; 3742 3848 code *cn; 3743 3849 unsigned flags; 3744 3850 symbol *s; 3745 3851 3746 3852 #ifdef DEBUG 3747 if (debugc) printf("codout(%p), Coffset = x%l x\n",c,Coffset);3853 if (debugc) printf("codout(%p), Coffset = x%llx\n",c,(unsigned long long)Coffset); 3748 3854 #endif 3749 3855 3750 3856 pgen = bytes; 3751 3857 offset = Coffset; 3752 3858 for (; c; c = code_next(c)) 3753 3859 { 3754 3860 #ifdef DEBUG 3755 3861 if (debugc) { printf("off=%02lx, sz=%ld, ",(long)OFFSET(),(long)calccodsize(c)); c->print(); } 3756 3862 #endif 3757 3863 op = c->Iop; 3758 3864 ins = inssize[op]; 3759 3865 switch (op) 3760 3866 { case ESCAPE: 3761 3867 switch (c->Iop2) 3762 3868 { case ESClinnum: 3763 3869 /* put out line number stuff */ 3764 3870 objlinnum(c->IEV2.Vsrcpos,OFFSET()); 3765 3871 break; 3766 3872 #if SCPP 3767 3873 #if 1 … … 4891 4997 printf(" sib=%02x=%d,%d,%d",sib,(sib>>6)&3,(sib>>3)&7,sib&7); 4892 4998 } 4893 4999 if ((rm & 0xC7) == BPRM || (rm & 0xC0) == 0x80 || (rm & 0xC0) == 0x40) 4894 5000 { 4895 5001 switch (c->IFL1) 4896 5002 { 4897 5003 case FLconst: 4898 5004 case FLoffset: 4899 5005 printf(" int = %4d",c->IEV1.Vuns); 4900 5006 break; 4901 5007 case FLblock: 4902 5008 printf(" block = %p",c->IEV1.Vblock); 4903 5009 break; 4904 5010 case FLswitch: 4905 5011 case FLblockoff: 4906 5012 case FLlocalsize: 4907 5013 case FLframehandler: 4908 5014 case 0: 4909 5015 break; 4910 5016 case FLdatseg: 4911 printf(" %d.%l x",c->IEVseg1,c->IEVpointer1);5017 printf(" %d.%llx",c->IEVseg1,(unsigned long long)c->IEVpointer1); 4912 5018 break; 4913 5019 case FLauto: 4914 5020 case FLreg: 4915 5021 case FLdata: 4916 5022 case FLudata: 4917 5023 case FLpara: 4918 5024 case FLtmp: 4919 5025 case FLbprel: 4920 5026 case FLtlsdata: 4921 5027 printf(" sym='%s'",c->IEVsym1->Sident); 4922 5028 break; 4923 5029 default: 4924 5030 WRFL((enum FL)c->IFL1); 4925 5031 break; 4926 5032 } 4927 5033 } 4928 5034 } 4929 5035 if (ins & T) 4930 5036 { printf(" "); WRFL((enum FL)c->IFL2); 4931 5037 switch (c->IFL2) 4932 5038 { 4933 5039 case FLconst: 4934 5040 printf(" int = %4d",c->IEV2.Vuns); 4935 5041 break; 4936 5042 case FLblock: 4937 5043 printf(" block = %p",c->IEV2.Vblock); 4938 5044 break; 4939 5045 case FLswitch: 4940 5046 case FLblockoff: 4941 5047 case 0: 4942 5048 case FLlocalsize: 4943 5049 case FLframehandler: 4944 5050 break; 4945 5051 case FLdatseg: 4946 printf(" %d.%l x",c->IEVseg2,c->IEVpointer2);5052 printf(" %d.%llx",c->IEVseg2,(unsigned long long)c->IEVpointer2); 4947 5053 break; 4948 5054 case FLauto: 4949 5055 case FLreg: 4950 5056 case FLpara: 4951 5057 case FLtmp: 4952 5058 case FLbprel: 4953 5059 case FLfunc: 4954 5060 case FLdata: 4955 5061 case FLudata: 4956 5062 case FLtlsdata: 4957 5063 printf(" sym='%s'",c->IEVsym2->Sident); 4958 5064 break; 4959 5065 case FLcode: 4960 5066 printf(" code = %p",c->IEV2.Vcode); 4961 5067 break; 4962 5068 default: 4963 5069 WRFL((enum FL)c->IFL2); 4964 5070 break; 4965 5071 } 4966 5072 } branches/dmd-1.x/src/backend/code.h
r580 r581 150 150 #define SEG_ES 0 151 151 #define SEG_CS 1 152 152 #define SEG_SS 2 153 153 #define SEG_DS 3 154 154 155 155 /********************* 156 156 * Masks for register pairs. 157 157 * Note that index registers are always LSWs. This is for the convenience 158 158 * of implementing far pointers. 159 159 */ 160 160 161 161 #if 0 162 162 // Give us an extra one so we can enregister a long 163 163 #define mMSW (mCX|mDX|mDI|mES) // most significant regs 164 164 #define mLSW (mAX|mBX|mSI) // least significant regs 165 165 #else 166 166 #define mMSW (mCX|mDX|mES) /* most significant regs */ 167 167 #define mLSW (mAX|mBX|mSI|mDI) /* least significant regs */ 168 168 #endif 169 169 170 /* Return !=0 if there is a nSIB byte */170 /* Return !=0 if there is a SIB byte */ 171 171 #define issib(rm) (((rm) & 7) == 4 && ((rm) & 0xC0) != 0xC0) 172 172 173 173 #if 0 174 174 // relocation field size is always 32bits 175 175 #define is32bitaddr(x,Iflags) (1) 176 176 #else 177 177 // 178 178 // is32bitaddr works correctly only when x is 0 or 1. This is 179 179 // true today for the current definition of I32, but if the definition 180 180 // of I32 changes, this macro will need to change as well 181 181 // 182 182 // Note: even for linux targets, CFaddrsize can be set by the inline 183 183 // assembler. 184 184 #define is32bitaddr(x,Iflags) (I64 || ((x) ^(((Iflags) & CFaddrsize) !=0))) 185 185 #endif 186 186 187 187 /******************* 188 188 * Some instructions. 189 189 */ 190 190 branches/dmd-1.x/src/backend/el.c
r580 r581 1326 1326 default: 1327 1327 break; 1328 1328 } 1329 1329 return e; 1330 1330 } 1331 1331 #endif 1332 1332 #if TARGET_LINUX || TARGET_FREEBSD || TARGET_SOLARIS 1333 1333 1334 1334 elem *el_picvar(symbol *s) 1335 1335 { elem *e; 1336 1336 int x; 1337 1337 1338 1338 //printf("el_picvar(s = '%s')\n", s->Sident); 1339 1339 symbol_debug(s); 1340 1340 type_debug(s->Stype); 1341 1341 e = el_calloc(); 1342 1342 e->Eoper = OPvar; 1343 1343 e->EV.sp.Vsym = s; 1344 1344 e->Ety = s->ty(); 1345 1345 1346 /* For 32 bit: 1347 * CALL __i686.get_pc_thunk.bx@PC32 1348 * ADD EBX,offset _GLOBAL_OFFSET_TABLE_@GOTPC[2] 1349 * Generate for var locals: 1350 * MOV reg,s@GOTOFF[014h][EBX] 1351 * For var globals: 1352 * MOV EAX,s@GOT32[EBX] 1353 * MOV reg,[EAX] 1354 * For TLS var locals and globals: 1355 * MOV EAX,s@TLS_GD[EBX] 1356 * CALL ___tls_get_addr@PLT32 1357 * MOV reg,[EAX] 1358 ***************************************** 1359 * Generate for var locals: 1360 * MOV reg,s@PC32[RIP] 1361 * For var globals: 1362 * MOV RAX,s@GOTPCREL[RIP] 1363 * MOV reg,[RAX] 1364 * For TLS var locals and globals: 1365 * 0x66 1366 * LEA DI,s@TLSGD[RIP] 1367 * 0x66 1368 * 0x66 1369 * 0x48 (REX | REX_W) 1370 * CALL __tls_get_addr@PLT32 1371 * MOV reg,[RAX] 1372 */ 1373 1374 if (I64) 1375 { 1376 elfobj_refGOTsym(); 1377 switch (s->Sclass) 1378 { 1379 case SCstatic: 1380 case SClocstat: 1381 x = 0; 1382 goto case_got64; 1383 1384 case SCcomdat: 1385 case SCcomdef: 1386 case SCglobal: 1387 case SCextern: 1388 x = 1; 1389 case_got64: 1390 { 1391 int op = e->Eoper; 1392 tym_t tym = e->Ety; 1393 e->Ety = TYnptr; 1394 1395 if (s->Stype->Tty & mTYthread) 1396 { 1397 /* Add "volatile" to prevent e from being common subexpressioned. 1398 * This is so we can preserve the magic sequence of instructions 1399 * that the gnu linker patches: 1400 * lea EDI,x@tlsgd[RIP], call __tls_get_addr@plt 1401 * => 1402 * mov EAX,gs[0], sub EAX,x@tpoff 1403 */ 1404 e->Eoper = OPrelconst; 1405 e->Ety |= mTYvolatile; 1406 if (!tls_get_addr_sym) 1407 { 1408 /* void *__tls_get_addr(void *ptr); 1409 * Parameter ptr is passed in RDI, matching TYnfunc calling convention. 1410 */ 1411 tls_get_addr_sym = symbol_name("__tls_get_addr",SCglobal,type_fake(TYnfunc)); 1412 symbol_keep(tls_get_addr_sym); 1413 } 1414 e = el_bin(OPcall, TYnptr, el_var(tls_get_addr_sym), e); 1415 } 1416 1417 switch (op * 2 + x) 1418 { 1419 case OPvar * 2 + 1: 1420 e = el_una(OPind, TYnptr, e); 1421 break; 1422 case OPvar * 2 + 0: 1423 case OPrelconst * 2 + 1: 1424 break; 1425 case OPrelconst * 2 + 0: 1426 e = el_una(OPaddr, TYnptr, e); 1427 break; 1428 default: 1429 assert(0); 1430 break; 1431 } 1432 e->Ety = tym; 1433 break; 1434 } 1435 default: 1436 break; 1437 } 1438 } 1439 else 1346 1440 switch (s->Sclass) 1347 1441 { 1348 1442 /* local (and thread) symbols get only one level of indirection; 1349 1443 * all globally known symbols get two. 1350 1444 */ 1351 1445 case SCstatic: 1352 1446 case SClocstat: 1353 1447 x = 0; 1354 1448 goto case_got; 1355 1449 1356 1450 case SCcomdat: 1357 1451 case SCcomdef: 1358 1452 case SCglobal: 1359 1453 case SCextern: 1360 1454 if (s->Stype->Tty & mTYthread) 1361 1455 x = 0; 1362 1456 else 1363 1457 x = 1; 1364 1458 case_got: 1365 1459 { if (!localgot) … … 1424 1518 break; 1425 1519 } 1426 1520 return e; 1427 1521 } 1428 1522 #endif 1429 1523 1430 1524 /************************** 1431 1525 * Make an elem out of a symbol. 1432 1526 */ 1433 1527 1434 1528 #if MARS 1435 1529 elem * el_var(symbol *s) 1436 1530 { elem *e; 1437 1531 1438 1532 //printf("el_var(s = '%s')\n", s->Sident); 1439 1533 //printf("%x\n", s->Stype->Tty); 1440 1534 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 1441 1535 // OSX is currently always pic 1442 1536 if (config.flags3 & CFG3pic && 1443 1537 #if TARGET_LINUX || TARGET_FREEBSD || TARGET_SOLARIS 1444 !(s->Stype->Tty & mTYthread) &&1538 (!(s->Stype->Tty & mTYthread) || I64) && 1445 1539 #endif 1446 1540 !tyfunc(s->ty())) 1447 1541 // Position Independent Code 1448 1542 return el_picvar(s); 1449 1543 #endif 1450 1544 symbol_debug(s); 1451 1545 type_debug(s->Stype); 1452 1546 e = el_calloc(); 1453 1547 e->Eoper = OPvar; 1454 1548 e->EV.sp.Vsym = s; 1455 1549 type_debug(s->Stype); 1456 1550 e->Ety = s->ty(); 1457 1551 if (s->Stype->Tty & mTYthread) 1458 1552 { 1459 1553 //printf("thread local %s\n", s->Sident); 1460 1554 #if TARGET_OSX 1461 1555 ; 1462 1556 #elif TARGET_LINUX || TARGET_FREEBSD || TARGET_SOLARIS 1463 1557 /* For 32 bit: 1464 1558 * Generate for var locals: … … 1477 1571 * For var globals: 1478 1572 * MOV RAX,s@GOTTPOFF[RIP] 1479 1573 * MOV reg,FS:[RAX] 1480 1574 * 1481 1575 * For address of locals: 1482 1576 * MOV RAX,FS:[00] 1483 1577 * LEA reg,s@TPOFF32[RAX] 1484 1578 * e => &s + *(FS:0) 1485 1579 * For address of globals: 1486 1580 * MOV reg,FS:[00] 1487 1581 * MOV RAX,s@GOTTPOFF[RIP] 1488 1582 * ADD reg,RAX 1489 1583 * e => s + *(FS:0) 1490 1584 * This leaves us with a problem, as the 'var' version cannot simply have 1491 1585 * its address taken, as what is the address of FS:s ? The (not so efficient) 1492 1586 * solution is to just use the second address form, and * it. 1493 1587 * Turns out that is identical to the 32 bit version, except GS => FS and the 1494 1588 * fixups are different. 1495 1589 * In the future, we should figure out a way to optimize to the 'var' version. 1496 1590 */ 1591 if (I64) 1592 elfobj_refGOTsym(); 1497 1593 elem *e1 = el_calloc(); 1498 1594 e1->EV.sp.Vsym = s; 1499 1595 if (s->Sclass == SCstatic || s->Sclass == SClocstat) 1500 1596 { e1->Eoper = OPrelconst; 1501 1597 e1->Ety = TYnptr; 1502 1598 } 1503 1599 else 1504 1600 { 1505 1601 e1->Eoper = OPvar; 1506 1602 e1->Ety = TYnptr; 1507 1603 } 1508 1604 1509 1605 /* Fake GS:[0000] as a load of _tls_array, and then in the back end recognize 1510 1606 * the fake and rewrite it as GS:[0000] (or FS:[0000] for I64), because there is 1511 1607 * no way to represent segment overrides in the elem nodes. 1512 1608 */ 1513 1609 elem *e2 = el_calloc(); 1514 1610 e2->Eoper = OPvar; 1515 1611 e2->EV.sp.Vsym = rtlsym[RTLSYM_TLS_ARRAY]; 1516 1612 e2->Ety = e2->EV.sp.Vsym->ty(); branches/dmd-1.x/src/backend/elfobj.c
r579 r581 67 67 * RI_TYPE_GOT32 R_X86_64_ R_X86_64_ 68 68 * RI_TYPE_TLS_GD R_X86_64_TLSGD R_X86_64_ 69 69 * RI_TYPE_TLS_IE R_X86_64_GOTTPOFF R_X86_64_ 70 70 * RI_TYPE_TLS_LE R_X86_64_TPOFF32 R_X86_64_ 71 71 * RI_TYPE_PLT32 R_X86_64_PLT32 R_X86_64_ 72 72 * RI_TYPE_PC32 R_X86_64_PC32 R_X86_64_ 73 73 */ 74 74 75 75 /****************************************** 76 76 */ 77 77 78 78 symbol *GOTsym; // global offset table reference 79 79 80 80 symbol *elfobj_getGOTsym() 81 81 { 82 82 if (!GOTsym) 83 83 { 84 84 GOTsym = symbol_name("_GLOBAL_OFFSET_TABLE_",SCglobal,tspvoid); 85 85 } 86 86 return GOTsym; 87 } 88 89 void elfobj_refGOTsym() 90 { 91 if (!GOTsym) 92 { 93 symbol *s = elfobj_getGOTsym(); 94 objextern(s); 95 } 87 96 } 88 97 89 98 static void objfile_write(FILE *fd, void *buffer, unsigned len); 90 99 91 100 STATIC char * objmodtoseg (const char *modname); 92 101 STATIC void obj_browse_flush(); 93 102 STATIC void objfixupp (struct FIXUP *); 94 103 STATIC void ledata_new (int seg,targ_size_t offset); 95 104 void obj_tlssections(); 96 105 97 106 static IDXSYM elf_addsym(IDXSTR sym, targ_size_t val, unsigned sz, 98 107 unsigned typ,unsigned bind,IDXSEC sec); 99 108 static long elf_align(FILE *fd, targ_size_t size, long offset); 100 109 101 110 // The object file is built is several separate pieces 102 111 103 112 // Non-repeatable section types have single output buffers 104 113 // Pre-allocated buffers are defined for: 105 114 // Section Names string table 106 115 // Section Headers table … … 2296 2305 * seg = where the address is going 2297 2306 * offset = offset within seg 2298 2307 * val = displacement from address 2299 2308 * targetdatum = DATA, CDATA or UDATA, depending where the address is 2300 2309 * flags = CFoff, CFseg 2301 2310 * Example: 2302 2311 * int *abc = &def[3]; 2303 2312 * to allocate storage: 2304 2313 * reftodatseg(DATA,offset,3 * sizeof(int *),UDATA); 2305 2314 */ 2306 2315 2307 2316 void reftodatseg(int seg,targ_size_t offset,targ_size_t val, 2308 2317 unsigned targetdatum,int flags) 2309 2318 { 2310 2319 Outbuffer *buf; 2311 2320 int save; 2312 2321 2313 2322 buf = SegData[seg]->SDbuf; 2314 2323 save = buf->size(); 2315 2324 buf->setsize(offset); 2316 //dbg_printf("reftodatseg(seg=%d, offset=x%lx, val=x%lx,data %x, flags %x )\n", 2317 // seg,offset,val,targetdatum,flags); 2325 #if 0 2326 printf("reftodatseg(seg=%d, offset=x%llx, val=x%llx,data %x, flags %x)\n", 2327 seg,(unsigned long long)offset,(unsigned long long)val,targetdatum,flags); 2328 #endif 2318 2329 /*if (OPT_IS_SET(OPTfwritable_strings)) 2319 2330 { 2320 2331 elf_addrel(seg,offset,RI_TYPE_SYM32,STI_DATA,0); 2321 2332 } 2322 2333 else*/ 2323 2334 { 2324 2335 int relinfo; 2325 2336 2326 2337 if (I64) 2327 2338 { 2328 2339 if (MAP_SEG2TYP(seg) == CODE && config.flags3 & CFG3pic) 2329 2340 relinfo = R_X86_64_PC32; 2330 2341 else if (MAP_SEG2SEC(targetdatum)->sh_flags & SHF_TLS) 2331 2342 relinfo = config.flags3 & CFG3pic ? R_X86_64_TLSGD : R_X86_64_TPOFF32; 2332 2343 else 2333 2344 relinfo = R_X86_64_32; 2334 2345 } 2335 2346 else 2336 2347 { 2337 2348 if (MAP_SEG2TYP(seg) == CODE && config.flags3 & CFG3pic) … … 2402 2413 * flags = CFselfrel: self-relative 2403 2414 * CFseg: get segment 2404 2415 * CFoff: get offset 2405 2416 * CFoffset64: 64 bit fixup 2406 2417 * Returns: 2407 2418 * number of bytes in reference (4 or 8) 2408 2419 */ 2409 2420 2410 2421 int reftoident(int seg, targ_size_t offset, Symbol *s, targ_size_t val, 2411 2422 int flags) 2412 2423 { 2413 2424 tym_t ty; 2414 2425 bool external = TRUE; 2415 2426 Outbuffer *buf; 2416 2427 elf_u32_f32 relinfo,refseg; 2417 2428 int segtyp = MAP_SEG2TYP(seg); 2418 2429 //assert(val == 0); 2419 2430 int retsize = (flags & CFoffset64) ? 8 : 4; 2420 2431 2421 2432 #if 0 2422 dbg_printf("\nreftoident('%s' seg %d, offset x%lx, val x%lx, flags x%x)\n",2433 printf("\nreftoident('%s' seg %d, offset x%llx, val x%llx, flags x%x)\n", 2423 2434 s->Sident,seg,offset,val,flags); 2424 dbg_printf("Sseg = %d, Sxtrnnum = %d \n",s->Sseg,s->Sxtrnnum);2435 dbg_printf("Sseg = %d, Sxtrnnum = %d, retsize = %d\n",s->Sseg,s->Sxtrnnum,retsize); 2425 2436 symbol_print(s); 2426 2437 #endif 2427 2438 2428 2439 ty = s->ty(); 2429 2440 if (s->Sxtrnnum) 2430 2441 { // identifier is defined somewhere else 2431 2442 if (I64) 2432 2443 { 2433 2444 if (SymbolTable64[s->Sxtrnnum].st_shndx != SHT_UNDEF) 2434 2445 external = FALSE; 2435 2446 } 2436 2447 else 2437 2448 { 2438 2449 if (SymbolTable[s->Sxtrnnum].st_shndx != SHT_UNDEF) 2439 2450 external = FALSE; 2440 2451 } 2441 2452 } 2442 2453 2443 2454 switch (s->Sclass) 2444 2455 { … … 2503 2514 if (!external && // local definition found 2504 2515 s->Sseg == seg && // within same code segment 2505 2516 (!(config.flags3 & CFG3pic) || // not position indp code 2506 2517 s->Sclass == SCstatic)) // or is pic, but declared static 2507 2518 { // Can use PC relative 2508 2519 //dbg_printf("\tdoing PC relative\n"); 2509 2520 val = (s->Soffset+val) - (offset+4); 2510 2521 } 2511 2522 else 2512 2523 { 2513 2524 val = (targ_size_t)-4; 2514 2525 //dbg_printf("\tadding relocation\n"); 2515 2526 if (I64) 2516 2527 relinfo = config.flags3 & CFG3pic ? R_X86_64_PLT32 : R_X86_64_PC32; 2517 2528 else 2518 2529 relinfo = config.flags3 & CFG3pic ? RI_TYPE_PLT32 : RI_TYPE_PC32; 2519 2530 elf_addrel(seg,offset, 2520 2531 relinfo, 2521 2532 s->Sxtrnnum,0); 2522 2533 } 2534 if (I64) 2535 val += 4; 2523 2536 } 2524 2537 else 2525 2538 { // code to code code to data, data to code, data to data refs 2526 2539 refseg = s->Sxtrnnum; // default to name symbol table entry 2527 2540 if (s->Sclass == SCstatic) 2528 2541 { // offset into .data or .bss seg 2529 2542 refseg = MAP_SEG2SYMIDX(s->Sseg); 2530 2543 // use segment symbol table entry 2531 2544 val += s->Soffset; 2532 2545 if (!(config.flags3 & CFG3pic) || // all static refs from normal code 2533 2546 segtyp == DATA) // or refs from data from posi indp 2534 2547 { 2535 2548 relinfo = I64 ? R_X86_64_32 : RI_TYPE_SYM32; 2536 2549 } 2537 2550 else 2538 2551 { 2539 2552 relinfo = I64 ? R_X86_64_PC32 : RI_TYPE_GOTOFF; 2540 2553 } 2541 2554 } 2542 2555 else if (config.flags3 & CFG3pic && s == GOTsym) 2543 2556 { // relocation for Gbl Offset Tab 2544 2557 relinfo = I64 ? R_X86_64_NONE : RI_TYPE_GOTPC; 2545 2558 } 2546 2559 else if (segtyp == DATA) 2547 2560 { // relocation from within DATA seg 2548 2561 relinfo = I64 ? R_X86_64_32 : RI_TYPE_SYM32; 2549 2562 } 2550 2563 else 2551 2564 { // relocation from within CODE seg 2552 2565 if (I64) 2553 relinfo = config.flags3 & CFG3pic ? R_X86_64_NONE : R_X86_64_32; 2566 { if (config.flags3 & CFG3pic) 2567 relinfo = R_X86_64_GOTPCREL; 2568 else 2569 relinfo = (flags & CFpc32) ? R_X86_64_PC32 : R_X86_64_32; 2570 } 2554 2571 else 2555 2572 relinfo = config.flags3 & CFG3pic ? RI_TYPE_GOT32 : RI_TYPE_SYM32; 2556 2573 } 2557 2574 if ((s->ty() & mTYLINK) & mTYthread) 2558 2575 { 2559 2576 if (I64) 2560 2577 { 2561 2578 if (config.flags3 & CFG3pic) 2562 2579 { 2563 if (s->Sclass == SCstatic )2564 relinfo = R_X86_64_T POFF32; // TLS_GD?2580 if (s->Sclass == SCstatic || s->Sclass == SClocstat) 2581 relinfo = R_X86_64_TLSGD; // TLS_GD? 2565 2582 else 2566 relinfo = R_X86_64_ GOTTPOFF;2583 relinfo = R_X86_64_TLSGD; 2567 2584 } 2568 2585 else 2569 2586 { 2570 if (s->Sclass == SCstatic )2587 if (s->Sclass == SCstatic || s->Sclass == SClocstat) 2571 2588 relinfo = R_X86_64_TPOFF32; 2572 2589 else 2573 2590 relinfo = R_X86_64_GOTTPOFF; 2574 2591 } 2575 2592 } 2576 2593 else 2577 2594 { 2578 2595 if (config.flags3 & CFG3pic) 2579 2596 { 2580 2597 if (s->Sclass == SCstatic) 2581 2598 relinfo = RI_TYPE_TLS_LE; // TLS_GD? 2582 2599 else 2583 2600 relinfo = RI_TYPE_TLS_IE; 2584 2601 } 2585 2602 else 2586 2603 { 2587 2604 if (s->Sclass == SCstatic) 2588 2605 relinfo = RI_TYPE_TLS_LE; 2589 2606 else 2590 2607 relinfo = RI_TYPE_TLS_IE; trunk/src/backend/cod1.c
r580 r581 1327 1327 s->Sclass == SCparameter) 1328 1328 { refparam = TRUE; 1329 1329 reflocal = TRUE; // kludge to set up prolog 1330 1330 } 1331 1331 pcs->Irm = modregrm(3,0,s->Sreglsw & 7); 1332 1332 if (s->Sreglsw & 8) 1333 1333 pcs->Irex |= REX_B; 1334 1334 if (e->EV.sp.Voffset == 1 && sz == 1) 1335 1335 { assert(s->Sregm & BYTEREGS); 1336 1336 assert(s->Sreglsw < 4); 1337 1337 pcs->Irm |= 4; // use 2nd byte of register 1338 1338 } 1339 1339 else 1340 1340 assert(!e->EV.sp.Voffset); 1341 1341 } 1342 1342 else if (s->ty() & mTYcs && !(fl == FLextern && LARGECODE)) 1343 1343 { 1344 1344 pcs->Iflags |= CFcs | CFoff; 1345 1345 } 1346 1346 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 1347 // if (fl == FLtlsdata || s->ty() & mTYthread) 1348 // pcs->Iflags |= CFgs; 1347 if (I64 && config.flags3 & CFG3pic && 1348 (fl == FLtlsdata || s->ty() & mTYthread)) 1349 { 1350 pcs->Iflags |= CFopsize; 1351 pcs->Irex = 0x48; 1352 } 1349 1353 #endif 1350 1354 pcs->IEVsym1 = s; 1351 1355 pcs->IEVoffset1 = e->EV.sp.Voffset; 1352 1356 if (sz == 1) 1353 1357 { /* Don't use SI or DI for this variable */ 1354 1358 s->Sflags |= GTbyte; 1355 1359 if (e->EV.sp.Voffset > 1) 1356 1360 s->Sflags &= ~GTregcand; 1357 1361 } 1358 1362 else if (e->EV.sp.Voffset) 1359 1363 s->Sflags &= ~GTregcand; 1360 1364 if (!(keepmsk & RMstore)) // if not store only 1361 1365 { s->Sflags |= SFLread; // assume we are doing a read 1362 1366 } 1363 1367 break; 1364 1368 case FLpseudo: 1365 1369 #if MARS 1366 1370 assert(0); 1367 1371 #else 1368 1372 { … … 2676 2680 goto LF1; 2677 2681 else 2678 2682 goto LF2; 2679 2683 } 2680 2684 else 2681 2685 { int fl; 2682 2686 2683 2687 fl = FLfunc; 2684 2688 if (!tyfunc(s->ty())) 2685 2689 fl = el_fl(e1); 2686 2690 if (tym1 == TYifunc) 2687 2691 c1 = gen1(c1,0x9C); // PUSHF 2688 2692 #if 0 && TARGET_LINUX 2689 2693 if (s->Sfl == FLgot || s->Sfl == FLgotoff) 2690 2694 fl = s->Sfl; 2691 2695 #endif 2692 2696 ce = gencs(CNIL,farfunc ? 0x9A : 0xE8,0,fl,s); // CALL extern 2693 2697 ce->Iflags |= farfunc ? (CFseg | CFoff) : (CFselfrel | CFoff); 2694 2698 #if TARGET_LINUX 2695 2699 if (s == tls_get_addr_sym) 2696 { /* Append a NOP so GNU linker has patch room 2697 */ 2698 ce = gen1(ce, 0x90); // NOP 2699 code_orflag(ce, CFvolatile); // don't schedule it 2700 { 2701 if (I32) 2702 { 2703 /* Append a NOP so GNU linker has patch room 2704 */ 2705 ce = gen1(ce, 0x90); // NOP 2706 code_orflag(ce, CFvolatile); // don't schedule it 2707 } 2708 else 2709 { /* Prepend 66 66 48 so GNU linker has patch room 2710 */ 2711 assert(I64); 2712 ce->Irex = REX | REX_W; 2713 ce = cat(gen1(CNIL, 0x66), ce); 2714 ce = cat(gen1(CNIL, 0x66), ce); 2715 } 2700 2716 } 2701 2717 #endif 2702 2718 } 2703 2719 ce = cat(c1,ce); 2704 2720 } 2705 2721 else 2706 2722 { /* Call function via pointer */ 2707 2723 elem *e11; 2708 2724 tym_t e11ty; 2709 2725 2710 2726 #ifdef DEBUG 2711 2727 if (e1->Eoper != OPind 2712 2728 ) { WRFL((enum FL)el_fl(e1)); WROP(e1->Eoper); } 2713 2729 #endif 2714 2730 c = save87(); // assume 8087 regs are all trashed 2715 2731 assert(e1->Eoper == OPind); 2716 2732 e11 = e1->E1; 2717 2733 e11ty = tybasic(e11->Ety); 2718 2734 assert(!I16 || (e11ty == (farfunc ? TYfptr : TYnptr))); 2719 2735 trunk/src/backend/cod2.c
r579 r581 3736 3736 { code cs; 3737 3737 code *c; 3738 3738 3739 3739 cs.Iflags = 0; 3740 3740 unsigned char rex = 0; 3741 3741 cs.Irex = rex; 3742 3742 assert(e->Eoper == OPvar || e->Eoper == OPrelconst); 3743 3743 enum FL fl = el_fl(e); 3744 3744 switch (fl) 3745 3745 { 3746 3746 case FLdatseg: 3747 3747 cs.IEV2._EP.Vpointer = e->EV.Vpointer; 3748 3748 goto L3; 3749 3749 3750 3750 case FLfardata: 3751 3751 assert(!TARGET_FLAT); 3752 3752 goto L4; 3753 3753 3754 3754 case FLtlsdata: 3755 3755 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 3756 { /* Generate: 3756 { 3757 L5: 3758 if (I64 && config.flags3 & CFG3pic) 3759 { 3760 /* Generate: 3761 * LEA DI,s@TLSGD[RIP] 3762 */ 3763 assert(reg == DI); 3764 code css; 3765 css.Irex = REX | REX_W; 3766 css.Iop = 0x8D; // LEA 3767 css.Irm = modregrm(0,DI,5); 3768 css.Iflags = CFopsize; 3769 css.IFL1 = fl; 3770 css.IEVsym1 = e->EV.sp.Vsym; 3771 css.IEVoffset1 = e->EV.sp.Voffset; 3772 c = gen(NULL, &css); 3773 return c; 3774 } 3775 /* Generate: 3757 3776 * MOV reg,GS:[00000000] 3758 3777 * ADD reg, offset s@TLS_LE 3759 3778 * for locals, and for globals: 3760 3779 * MOV reg,GS:[00000000] 3761 3780 * ADD reg, s@TLS_IE 3762 3781 * note different fixup 3763 3782 */ 3764 L5:3765 3783 int stack = 0; 3766 3784 c = NULL; 3767 3785 if (reg == STACK) 3768 3786 { regm_t retregs = ALLREGS; 3769 3787 3770 3788 c = allocreg(&retregs,®,TYoffset); 3771 3789 reg = findreg(retregs); 3772 3790 stack = 1; 3773 3791 } 3774 3792 3775 3793 code css; 3776 3794 css.Irex = rex; 3777 3795 css.Iop = 0x8B; 3778 3796 css.Irm = modregrm(0, 0, BPRM); 3779 3797 code_newreg(&css, reg); 3780 3798 css.Iflags = CFgs; 3781 3799 css.IFL1 = FLconst; 3782 3800 css.IEV1.Vuns = 0; 3783 3801 c = gen(c, &css); // MOV reg,GS:[00000000] 3784 3802 trunk/src/backend/cod3.c
r580 r581 19 19 #include "cc.h" 20 20 #include "el.h" 21 21 #include "code.h" 22 22 #include "oper.h" 23 23 #include "global.h" 24 24 #include "type.h" 25 25 #include "parser.h" 26 26 #if SCPP 27 27 #include "cpp.h" 28 28 #include "exh.h" 29 29 #endif 30 30 31 31 static char __file__[] = __FILE__; /* for tassert.h */ 32 32 #include "tassert.h" 33 33 34 34 #if MARS 35 35 #define tstrace NULL 36 36 #endif 37 37 38 38 extern targ_size_t retsize; 39 STATIC void pinholeopt_unittest(); 39 40 STATIC void do8bit (enum FL,union evc *); 40 41 STATIC void do16bit (enum FL,union evc *,int); 41 42 STATIC void do32bit (enum FL,union evc *,int); 42 43 STATIC void do64bit (enum FL,union evc *,int); 43 44 44 45 static int hasframe; /* !=0 if this function has a stack frame */ 45 46 static targ_size_t Foff; // BP offset of floating register 46 47 static targ_size_t CSoff; // offset of common sub expressions 47 48 static targ_size_t NDPoff; // offset of saved 8087 registers 48 49 int BPoff; // offset from BP 49 50 static int EBPtoESP; // add to EBP offset to get ESP offset 50 51 static int AAoff; // offset of alloca temporary 51 52 52 53 #if ELFOBJ || MACHOBJ 53 54 #define JMPSEG CDATA 54 55 #define JMPOFF CDoffset 55 56 #else 56 57 #define JMPSEG DATA 57 58 #define JMPOFF Doffset 58 59 #endif … … 2893 2894 return offset; 2894 2895 } 2895 2896 2896 2897 2897 2898 2898 2899 /******************************* 2899 2900 * Find shorter versions of the same instructions. 2900 2901 * Does these optimizations: 2901 2902 * replaces jmps to the next instruction with NOPs 2902 2903 * sign extension of modregrm displacement 2903 2904 * sign extension of immediate data (can't do it for OR, AND, XOR 2904 2905 * as the opcodes are not defined) 2905 2906 * short versions for AX EA 2906 2907 * short versions for reg EA 2907 2908 * Input: 2908 2909 * b -> block for code (or NULL) 2909 2910 */ 2910 2911 2911 2912 void pinholeopt(code *c,block *b) 2912 2913 { targ_size_t a; 2913 unsigned op,mod ,rm,reg,ereg;2914 unsigned op,mod; 2914 2915 unsigned char ins; 2915 2916 int usespace; 2916 2917 int useopsize; 2917 2918 int space; 2918 2919 block *bn; 2920 2921 #ifdef DEBUG 2922 static int tested; if (!tested) { tested++; pinholeopt_unittest(); } 2923 #endif 2919 2924 2920 2925 #if 0 2921 2926 code *cstart = c; 2922 2927 if (debugc) 2923 2928 { 2924 2929 printf("+pinholeopt(%p)\n",c); 2925 2930 } 2926 2931 #endif 2927 2932 2928 2933 if (b) 2929 2934 { bn = b->Bnext; 2930 2935 usespace = (config.flags4 & CFG4space && b->BC != BCasm); 2931 2936 useopsize = (I16 || (config.flags4 & CFG4space && b->BC != BCasm)); 2932 2937 } 2933 2938 else 2934 2939 { bn = NULL; 2935 2940 usespace = (config.flags4 & CFG4space); 2936 2941 useopsize = (I16 || config.flags4 & CFG4space); 2937 2942 } 2938 2943 for (; c; c = code_next(c)) 2939 2944 { 2940 2945 L1: 2941 2946 op = c->Iop; 2942 2947 if (op == 0x0F) 2943 2948 ins = inssize2[c->Iop2]; 2944 2949 else 2945 2950 ins = inssize[c->Iop]; 2946 if ( !I64 &&ins & M) // if modregrm byte2947 { int longop = (c->Iflags & CFopsize) ? I16 : I32;2951 if (ins & M) // if modregrm byte 2952 { int shortop = (c->Iflags & CFopsize) ? !I16 : I16; 2948 2953 int local_BPRM = BPRM; 2949 2954 2950 2955 if (c->Iflags & CFaddrsize) 2951 2956 local_BPRM ^= 5 ^ 6; // toggle between 5 and 6 2952 2957 2953 rm = c->Irm; 2954 reg = rm & (7<<3); // isolate reg field 2955 ereg = rm & 7; 2958 unsigned rm = c->Irm; 2959 unsigned reg = rm & modregrm(0,7,0); // isolate reg field 2960 unsigned ereg = rm & 7; 2961 //printf("c = %p, op = %02x rm = %02x\n", c, op, rm); 2956 2962 2957 2963 /* If immediate second operand */ 2958 2964 if ((ins & T || op == 0xF6 || op == 0xF7) && 2959 2965 c->IFL2 == FLconst) 2960 { int flags; 2961 targ_long u; 2962 2963 flags = c->Iflags & CFpsw; /* if want result in flags */ 2964 u = c->IEV2.Vuns; 2966 { 2967 int flags = c->Iflags & CFpsw; /* if want result in flags */ 2968 targ_long u = c->IEV2.Vuns; 2965 2969 if (ins & E) 2966 2970 u = (signed char) u; 2967 else if ( !longop)2971 else if (shortop) 2968 2972 u = (short) u; 2969 2973 2970 2974 // Replace CMP reg,0 with TEST reg,reg 2971 if ((op & 0xFE) == 0x80 && 2975 if ((op & 0xFE) == 0x80 && // 80 is CMP R8,imm8; 81 is CMP reg,imm 2972 2976 rm >= modregrm(3,7,AX) && 2973 2977 u == 0) 2974 2978 { c->Iop = (op & 1) | 0x84; 2975 2979 c->Irm = modregrm(3,ereg,ereg); 2980 if (c->Irex & REX_B) 2981 c->Irex |= REX_R; 2976 2982 goto L1; 2977 2983 } 2978 2984 2979 2985 /* Optimize ANDs with an immediate constant */ 2980 2986 if ((op == 0x81 || op == 0x80) && reg == modregrm(0,4,0)) 2981 2987 { 2982 if (rm >= modregrm(3,4,AX)) 2988 if (rm >= modregrm(3,4,AX)) // AND reg,imm 2983 2989 { 2984 2990 if (u == 0) 2985 2991 { /* Replace with XOR reg,reg */ 2986 2992 c->Iop = 0x30 | (op & 1); 2987 NEWREG(c->Irm,rm & 7); 2993 c->Irm = modregrm(3,ereg,ereg); 2994 if (c->Irex & REX_B) 2995 c->Irex |= REX_R; 2988 2996 goto L1; 2989 2997 } 2990 2998 if (u == 0xFFFFFFFF && !flags) 2991 2999 { c->Iop = NOP; 2992 3000 goto L1; 2993 3001 } 2994 3002 } 2995 3003 if (op == 0x81 && !flags) 2996 3004 { // If we can do the operation in one byte 2997 3005 2998 3006 // If EA is not SI or DI 2999 if ( rm < modregrm(3,4,SP) &&3007 if ((rm < modregrm(3,4,SP) || I64) && 3000 3008 (config.flags4 & CFG4space || 3001 3009 config.target_cpu < TARGET_PentiumPro) 3002 3010 ) 3003 3011 { 3004 3012 if ((u & 0xFFFFFF00) == 0xFFFFFF00) 3005 3013 goto L2; 3006 else 3007 { if ( longop)3014 else if (rm < modregrm(3,0,0) || (!c->Irex && ereg < 4)) 3015 { if (!shortop) 3008 3016 { if ((u & 0xFFFF00FF) == 0xFFFF00FF) 3009 3017 goto L3; 3010 3018 } 3011 3019 else 3012 3020 { 3013 3021 if ((u & 0xFF) == 0xFF) 3014 3022 goto L3; 3015 3023 } 3016 3024 } 3017 3025 } 3018 if ( longop && useopsize)3026 if (!shortop && useopsize) 3019 3027 { 3020 3028 if ((u & 0xFFFF0000) == 0xFFFF0000) 3021 3029 { c->Iflags ^= CFopsize; 3022 3030 goto L1; 3023 3031 } 3024 3032 if ((u & 0xFFFF) == 0xFFFF && rm < modregrm(3,4,AX)) 3025 3033 { c->IEVoffset1 += 2; /* address MSW */ 3026 3034 c->IEV2.Vuns >>= 16; 3027 3035 c->Iflags ^= CFopsize; 3028 3036 goto L1; 3029 3037 } 3030 3038 if (rm >= modregrm(3,4,AX)) 3031 3039 { 3032 if (u == 0xFF && rm <= modregrm(3,4,BX))3040 if (u == 0xFF && (rm <= modregrm(3,4,BX) || I64)) 3033 3041 { c->Iop2 = 0xB6; /* MOVZX */ 3034 3042 c->Iop = 0x0F; 3035 NEWREG(c->Irm,rm & 7); 3043 c->Irm = modregrm(3,ereg,ereg); 3044 if (c->Irex & REX_B) 3045 c->Irex |= REX_R; 3036 3046 goto L1; 3037 3047 } 3038 3048 if (u == 0xFFFF) 3039 3049 { c->Iop2 = 0xB7; /* MOVZX */ 3040 3050 c->Iop = 0x0F; 3041 NEWREG(c->Irm,rm & 7); 3051 c->Irm = modregrm(3,ereg,ereg); 3052 if (c->Irex & REX_B) 3053 c->Irex |= REX_R; 3042 3054 goto L1; 3043 3055 } 3044 3056 } 3045 3057 } 3046 3058 } 3047 3059 } 3048 3060 3049 3061 /* Look for ADD,OR,SUB,XOR with u that we can eliminate */ 3050 3062 if (!flags && 3051 3063 (op == 0x81 || op == 0x80) && 3052 (reg == modregrm(0,0,0) || reg == modregrm(0,1,0) || 3053 reg == modregrm(0,5,0) || reg == modregrm(0,6,0)) 3064 (reg == modregrm(0,0,0) || reg == modregrm(0,1,0) || // ADD,OR 3065 reg == modregrm(0,5,0) || reg == modregrm(0,6,0)) // SUB, XOR 3054 3066 ) 3055 { if (u == 0) 3067 { 3068 if (u == 0) 3056 3069 { 3057 3070 c->Iop = NOP; 3058 3071 goto L1; 3059 3072 } 3060 3073 if (u == ~0 && reg == modregrm(0,6,0)) /* XOR */ 3061 3074 { 3062 3075 c->Iop = 0xF6 | (op & 1); /* NOT */ 3063 3076 c->Irm ^= modregrm(0,6^2,0); 3064 3077 goto L1; 3065 3078 } 3066 if ( longop &&3079 if (!shortop && 3067 3080 useopsize && 3068 3081 op == 0x81 && 3069 3082 (u & 0xFFFF0000) == 0 && 3070 3083 (reg == modregrm(0,6,0) || reg == modregrm(0,1,0))) 3071 3084 { c->Iflags ^= CFopsize; 3072 3085 goto L1; 3073 3086 } 3074 3087 } 3075 3088 3076 3089 /* Look for TEST or OR or XOR with an immediate constant */ 3077 3090 /* that we can replace with a byte operation */ 3078 3091 if (op == 0xF7 && reg == modregrm(0,0,0) || 3079 3092 op == 0x81 && reg == modregrm(0,6,0) && !flags || 3080 3093 op == 0x81 && reg == modregrm(0,1,0)) 3081 3094 { 3082 3095 // See if we can replace a dword with a word 3083 3096 // (avoid for 32 bit instructions, because CFopsize 3084 3097 // is too slow) 3085 if ( longop && useopsize)3098 if (!shortop && useopsize) 3086 3099 { if ((u & 0xFFFF0000) == 0) 3087 3100 { c->Iflags ^= CFopsize; 3088 3101 goto L1; 3089 3102 } 3090 3103 /* If memory (not register) addressing mode */ 3091 3104 if ((u & 0xFFFF) == 0 && rm < modregrm(3,0,AX)) 3092 3105 { c->IEVoffset1 += 2; /* address MSW */ 3093 3106 c->IEV2.Vuns >>= 16; 3094 3107 c->Iflags ^= CFopsize; 3095 3108 goto L1; 3096 3109 } 3097 3110 } 3098 3111 3099 3112 // If EA is not SI or DI 3100 3113 if (rm < (modregrm(3,0,SP) | reg) && 3101 3114 (usespace || 3102 3115 config.target_cpu < TARGET_PentiumPro) 3103 3116 ) 3104 3117 { 3105 3118 if ((u & 0xFFFFFF00) == 0) 3106 3119 { 3107 3120 L2: c->Iop--; /* to byte instruction */ 3108 3121 c->Iflags &= ~CFopsize; 3109 3122 goto L1; 3110 3123 } 3111 if ((u & 0xFFFF00FF) == 0 || 3112 (!longop && (u & 0xFF) == 0)) 3124 if (((u & 0xFFFF00FF) == 0 || 3125 (shortop && (u & 0xFF) == 0)) && 3126 (rm < modregrm(3,0,0) || (!c->Irex && ereg < 4))) 3113 3127 { 3114 3128 L3: 3115 3129 c->IEV2.Vuns >>= 8; 3116 3130 if (rm >= (modregrm(3,0,AX) | reg)) 3117 3131 c->Irm |= 4; /* AX->AH, BX->BH, etc. */ 3118 3132 else 3119 3133 c->IEVoffset1 += 1; 3120 3134 goto L2; 3121 3135 } 3122 3136 } 3123 3137 #if 0 3124 3138 // BUG: which is right? 3125 3139 else if ((u & 0xFFFF0000) == 0) 3126 3140 #else 3127 3141 else if (0 && op == 0xF7 && 3128 3142 rm >= modregrm(3,0,SP) && 3129 3143 (u & 0xFFFF0000) == 0) 3130 3144 #endif 3131 3145 c->Iflags &= ~CFopsize; 3132 3146 } 3133 3147 3134 3148 // Try to replace TEST reg,-1 with TEST reg,reg 3135 if (op == 0xF6 && rm >= modregrm(3,0,AX) )3136 { if ( u == ~0)3149 if (op == 0xF6 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7)) // TEST regL,immed8 3150 { if ((u & 0xFF) == 0xFF) 3137 3151 { 3138 3152 L4: c->Iop = 0x84; // TEST regL,regL 3139 c->Irm |= ereg << 3; 3153 c->Irm = modregrm(3,ereg,ereg); 3154 if (c->Irex & REX_B) 3155 c->Irex |= REX_R; 3140 3156 c->Iflags &= ~CFopsize; 3141 3157 goto L1; 3142 3158 } 3143 3159 } 3144 if (op == 0xF7 && rm >= modregrm(3,0,AX) && ereg < SP)3160 if (op == 0xF7 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7) && (I64 || ereg < 4)) 3145 3161 { if (u == 0xFF) 3146 3162 goto L4; 3147 if ( u == ~0xFF && !longop)3148 { rm|= 4; /* to regH */3163 if ((u & 0xFFFF) == 0xFF00 && shortop && !c->Irex && ereg < 4) 3164 { ereg |= 4; /* to regH */ 3149 3165 goto L4; 3150 3166 } 3151 3167 } 3152 3168 3153 3169 /* Look for sign extended immediate data */ 3154 3170 if ((signed char) u == u) 3155 3171 { 3156 3172 if (op == 0x81) 3157 3173 { if (reg != 0x08 && reg != 0x20 && reg != 0x30) 3158 3174 c->Iop = op = 0x83; /* 8 bit sgn ext */ 3159 3175 } 3160 3176 else if (op == 0x69) /* IMUL rw,ew,dw */ 3161 3177 c->Iop = op = 0x6B; /* IMUL rw,ew,db */ 3162 3178 } 3163 3179 3164 3180 // Look for SHIFT EA,imm8 we can replace with short form 3165 3181 if (u == 1 && ((op & 0xFE) == 0xC0)) 3166 3182 c->Iop |= 0xD0; 3167 3183 3168 3184 } /* if immediate second operand */ 3169 3185 3170 3186 /* Look for AX short form */ 3171 3187 if (ins & A) 3172 { if (rm == modregrm(0,AX,local_BPRM) && (op & ~3) == 0x88) 3188 { if (rm == modregrm(0,AX,local_BPRM) && 3189 !(c->Irex & REX_R) && // and it's AX, not R8 3190 (op & ~3) == 0x88 && 3191 !I64) 3173 3192 { op = ((op & 3) + 0xA0) ^ 2; 3174 3193 /* 8A-> A0 */ 3175 3194 /* 8B-> A1 */ 3176 3195 /* 88-> A2 */ 3177 3196 /* 89-> A3 */ 3178 3197 c->Iop = op; 3179 3198 c->IFL2 = c->IFL1; 3180 3199 c->IEV2 = c->IEV1; 3181 3200 } 3182 3201 3183 3202 /* Replace MOV REG1,REG2 with MOV EREG1,EREG2 */ 3184 else if ( I32&&3203 else if (!I16 && 3185 3204 (op == 0x89 || op == 0x8B) && 3186 3205 (rm & 0xC0) == 0xC0 && 3187 3206 (!b || b->BC != BCasm) 3188 3207 ) 3189 3208 c->Iflags &= ~CFopsize; 3190 3209 3191 else if ((rm & 0xC7) == 0xC0) 3210 // If rm is AX 3211 else if ((rm & modregrm(3,0,7)) == modregrm(3,0,AX) && !(c->Irex & (REX_R | REX_B))) 3192 3212 { switch (op) 3193 3213 { case 0x80: op = reg | 4; break; 3194 3214 case 0x81: op = reg | 5; break; 3195 case 0x87: op = 0x90 + (reg>>3); break; 3215 case 0x87: op = 0x90 + (reg>>3); break; // XCHG 3196 3216 case 0xF6: 3197 3217 if (reg == 0) 3198 3218 op = 0xA8; /* TEST AL,immed8 */ 3199 3219 break; 3200 3220 case 0xF7: 3201 3221 if (reg == 0) 3202 3222 op = 0xA9; /* TEST AX,immed16 */ 3203 3223 break; 3204 3224 } 3205 3225 c->Iop = op; 3206 3226 } 3207 3227 } 3208 3228 3209 3229 /* Look for reg short form */ 3210 3230 if ((ins & R) && (rm & 0xC0) == 0xC0) 3211 3231 { switch (op) 3212 3232 { case 0xC6: op = 0xB0 + ereg; break; 3213 3233 case 0xC7: op = 0xB8 + ereg; break; 3214 3234 case 0xFF: 3215 3235 switch (reg) 3216 3236 { case 6<<3: op = 0x50+ereg; break;/* PUSH*/ 3217 case 0<<3: op = 0x40+ereg; break; /* INC*/3218 case 1<<3: op = 0x48+ereg; break; /* DEC*/3237 case 0<<3: if (!I64) op = 0x40+ereg; break; /* INC*/ 3238 case 1<<3: if (!I64) op = 0x48+ereg; break; /* DEC*/ 3219 3239 } 3220 3240 break; 3221 3241 case 0x8F: op = 0x58 + ereg; break; 3222 3242 case 0x87: 3223 3243 if (reg == 0) op = 0x90 + ereg; 3224 3244 break; 3225 3245 } 3226 3246 c->Iop = op; 3227 3247 } 3228 3248 3229 3249 // Look to replace SHL reg,1 with ADD reg,reg 3230 3250 if ((op & 0xFE) == 0xD0 && 3231 3251 (rm & modregrm(3,7,0)) == modregrm(3,4,0) && 3232 3252 config.target_cpu >= TARGET_80486) 3233 3253 { 3234 3254 c->Iop &= 1; 3235 3255 c->Irm = (rm & modregrm(3,0,7)) | (ereg << 3); 3236 if (!(c->Iflags & CFpsw) && I32) 3256 if (c->Irex & REX_B) 3257 c->Irex |= REX_R; 3258 if (!(c->Iflags & CFpsw) && !I16) 3237 3259 c->Iflags &= ~CFopsize; 3238 3260 goto L1; 3239 3261 } 3240 3262 3241 3263 /* Look for sign extended modregrm displacement, or 0 3242 3264 * displacement. 3243 3265 */ 3244 3266 3245 3267 if (((rm & 0xC0) == 0x80) && // it's a 16/32 bit disp 3246 3268 c->IFL1 == FLconst) // and it's a constant 3247 3269 { 3248 3270 a = c->IEVpointer1; 3249 if (a == 0 && (rm & 7) != local_BPRM && // if 0 disp3250 !(local_BPRM == 5 && (rm & 7) == 4 && (c->Isib & 7) == BP )3271 if (a == 0 && (rm & 7) != local_BPRM && // if 0[disp] 3272 !(local_BPRM == 5 && (rm & 7) == 4 && (c->Isib & 7) == BP && !(c->Irex & REX_B)) 3251 3273 ) 3252 3274 c->Irm &= 0x3F; 3253 else if ( I32)3275 else if (!I16) 3254 3276 { 3255 3277 if ((targ_size_t)(targ_schar)a == a) 3256 3278 c->Irm ^= 0xC0; /* do 8 sx */ 3257 3279 } 3258 3280 else if (((targ_size_t)(targ_schar)a & 0xFFFF) == (a & 0xFFFF)) 3259 3281 c->Irm ^= 0xC0; /* do 8 sx */ 3260 3282 } 3261 3283 3262 3284 /* Look for LEA reg,[ireg], replace with MOV reg,ireg */ 3263 3285 else if (op == 0x8D) 3264 3286 { rm = c->Irm & 7; 3265 3287 mod = c->Irm & modregrm(3,0,0); 3266 3288 if (mod == 0) 3267 3289 { 3268 if ( I32)3290 if (!I16) 3269 3291 { 3270 3292 switch (rm) 3271 3293 { 3272 3294 case 4: 3273 3295 case 5: 3274 3296 break; 3275 3297 default: 3276 3298 c->Irm |= modregrm(3,0,0); 3277 3299 c->Iop = 0x8B; 3278 3300 break; 3279 3301 } 3280 3302 } 3281 3303 else 3282 3304 { 3283 3305 switch (rm) 3284 3306 { 3285 3307 case 4: rm = modregrm(3,0,SI); goto L6; 3286 3308 case 5: rm = modregrm(3,0,DI); goto L6; 3287 3309 case 7: rm = modregrm(3,0,BX); goto L6; 3288 3310 L6: c->Irm = rm + reg; 3289 3311 c->Iop = 0x8B; 3290 3312 break; 3291 3313 } 3292 3314 } 3293 3315 } 3294 3316 3295 3317 /* replace LEA reg,0[BP] with MOV reg,BP */ 3296 3318 else if (mod == modregrm(1,0,0) && rm == local_BPRM && 3297 3319 c->IFL1 == FLconst && c->IEVpointer1 == 0) 3298 3320 { c->Iop = 0x8B; /* MOV reg,BP */ 3299 3321 c->Irm = modregrm(3,0,BP) + reg; 3300 3322 } 3323 } 3324 3325 // Replace [R13] with 0[R13] 3326 if (c->Irex & REX_B && (c->Irm & modregrm(3,0,5)) == modregrm(0,0,5)) 3327 { 3328 c->Irm |= modregrm(1,0,0); 3329 c->IFL1 = FLconst; 3330 c->IEVpointer1 = 0; 3301 3331 } 3302 3332 } 3303 3333 else 3304 3334 { 3305 3335 switch (op) 3306 3336 { 3307 3337 default: 3308 3338 if ((op & 0xF0) != 0x70) 3309 3339 break; 3310 3340 case JMP: 3311 3341 switch (c->IFL2) 3312 3342 { case FLcode: 3313 3343 if (c->IEV2.Vcode == code_next(c)) 3314 3344 { c->Iop = NOP; 3315 3345 continue; 3316 3346 } 3317 3347 break; 3318 3348 case FLblock: 3319 3349 if (!code_next(c) && c->IEV2.Vblock == bn) 3320 3350 { c->Iop = NOP; 3321 3351 continue; 3322 3352 } 3323 3353 break; 3324 3354 case FLconst: 3325 3355 case FLfunc: 3326 3356 case FLextern: 3327 3357 break; 3328 3358 default: 3329 3359 #ifdef DEBUG 3330 3360 WRFL((enum FL)c->IFL2); 3331 3361 #endif 3332 3362 assert(0); 3333 3363 } 3334 3364 break; 3335 3365 3336 3366 case 0x68: // PUSH immed16 3337 3367 if (c->IFL2 == FLconst) 3338 { targ_long u; 3339 3340 u = c->IEV2.Vuns; 3341 if ((c->Iflags & CFopsize) ? !I32 : I32) 3342 { if (u == (signed char) u) 3368 { 3369 targ_long u = c->IEV2.Vuns; 3370 if (I64 || 3371 ((c->Iflags & CFopsize) ? I16 : I32)) 3372 { // PUSH 32/64 bit operand 3373 if (u == (signed char) u) 3343 3374 c->Iop = 0x6A; // PUSH immed8 3344 3375 } 3345 else 3376 else // PUSH 16 bit operand 3346 3377 { if ((short)u == (signed char) u) 3347 3378 c->Iop = 0x6A; // PUSH immed8 3348 3379 } 3349 3380 } 3350 3381 break; 3351 3382 } 3352 3383 } 3353 3384 } 3354 3385 #if 0 3355 3386 if (1 || debugc) { 3356 3387 printf("-pinholeopt(%p)\n",cstart); 3357 3388 for (c = cstart; c; c = code_next(c)) 3358 3389 c->print(); 3359 3390 } 3360 3391 #endif 3361 3392 } 3362 3393 3394 #ifdef DEBUG 3395 STATIC void pinholeopt_unittest() 3396 { 3397 //printf("pinholeopt_unittest()\n"); 3398 struct CS { unsigned model,op,ea,ev1,ev2,flags; } tests[][2] = 3399 { 3400 // XOR reg,immed NOT regL 3401 {{ 16,0x81,modregrm(3,6,BX),0,0xFF,0 }, { 0,0xF6,modregrm(3,2,BX),0,0xFF }}, 3402 3403 #if 0 // only if config.flags4 & CFG4space 3404 // TEST regL,immed8 3405 {{ 0,0xF6,modregrm(3,0,BX),0,0xFF,0 }, { 0,0x84,modregrm(3,BX,BX),0,0xFF }}, 3406 {{ 0,0xF7,modregrm(3,0,BX),0,0xFF,0 }, { 0,0x84,modregrm(3,BX,BX),0,0xFF }}, 3407 {{ 64,0xF6,modregrmx(3,0,R8),0,0xFF,0 }, { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }}, 3408 {{ 64,0xF7,modregrmx(3,0,R8),0,0xFF,0 }, { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }}, 3409 #endif 3410 3411 // PUSH immed => PUSH immed8 3412 {{ 0,0x68,0,0,0 }, { 0,0x6A,0,0,0 }}, 3413 {{ 0,0x68,0,0,0x7F }, { 0,0x6A,0,0,0x7F }}, 3414 {{ 0,0x68,0,0,0x80 }, { 0,0x68,0,0,0x80 }}, 3415 {{ 16,0x68,0,0,0,CFopsize }, { 0,0x6A,0,0,0,CFopsize }}, 3416 {{ 16,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }}, 3417 {{ 16,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }}, 3418 {{ 16,0x68,0,0,0x10000,0 }, { 0,0x6A,0,0,0x10000,0 }}, 3419 {{ 16,0x68,0,0,0x10000,CFopsize }, { 0,0x68,0,0,0x10000,CFopsize }}, 3420 {{ 32,0x68,0,0,0,CFopsize }, { 0,0x6A,0,0,0,CFopsize }}, 3421 {{ 32,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }}, 3422 {{ 32,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }}, 3423 {{ 32,0x68,0,0,0x10000,CFopsize }, { 0,0x6A,0,0,0x10000,CFopsize }}, 3424 {{ 32,0x68,0,0,0x8000,CFopsize }, { 0,0x68,0,0,0x8000,CFopsize }}, 3425 }; 3426 3427 //config.flags4 |= CFG4space; 3428 for (int i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) 3429 { CS *pin = &tests[i][0]; 3430 CS *pout = &tests[i][1]; 3431 code cs; 3432 memset(&cs, 0, sizeof(cs)); 3433 if (pin->model) 3434 { 3435 if (I16 && pin->model != 16) 3436 continue; 3437 if (I32 && pin->model != 32) 3438 continue; 3439 if (I64 && pin->model != 64) 3440 continue; 3441 } 3442 //printf("[%d]\n", i); 3443 cs.Iop = pin->op; 3444 cs.Iea = pin->ea; 3445 cs.IFL1 = FLconst; 3446 cs.IFL2 = FLconst; 3447 cs.IEV1.Vuns = pin->ev1; 3448 cs.IEV2.Vuns = pin->ev2; 3449 cs.Iflags = pin->flags; 3450 pinholeopt(&cs, NULL); 3451 if (cs.Iop != pout->op) 3452 { printf("[%d] Iop = x%02x, pout = x%02x\n", i, cs.Iop, pout->op); 3453 assert(0); 3454 } 3455 assert(cs.Iea == pout->ea); 3456 assert(cs.IEV1.Vuns == pout->ev1); 3457 assert(cs.IEV2.Vuns == pout->ev2); 3458 assert(cs.Iflags == pout->flags); 3459 } 3460 } 3461 #endif 3363 3462 3364 3463 /************************** 3365 3464 * Compute jump addresses for FLcode. 3366 3465 * Note: only works for forward referenced code. 3367 3466 * only direct jumps and branches are detected. 3368 3467 * LOOP instructions only work for backward refs. 3369 3468 */ 3370 3469 3371 3470 void jmpaddr(code *c) 3372 3471 { code *ci,*cn,*ctarg,*cstart; 3373 3472 targ_size_t ad; 3374 3473 unsigned char op; 3375 3474 3376 3475 //printf("jmpaddr()\n"); 3377 3476 cstart = c; /* remember start of code */ 3378 3477 while (c) 3379 3478 { 3380 3479 op = c->Iop; 3381 3480 if (inssize[op] & T && /* if second operand */ 3382 3481 c->IFL2 == FLcode && … … 3559 3658 } 3560 3659 if (iflags & CFopsize) 3561 3660 size++; /* +1 for OPSIZE prefix */ 3562 3661 } 3563 3662 } 3564 3663 3565 3664 if ((op & 0xF0) == 0x70) 3566 3665 { if (iflags & CFjmp16) // if long branch 3567 3666 size += I16 ? 3 : 4; // + 3(4) bytes for JMP 3568 3667 } 3569 3668 else if (ins & M) // if modregrm byte 3570 3669 { 3571 3670 rm = c->Irm; 3572 3671 mod = rm & 0xC0; 3573 3672 if (a32 || I64) 3574 3673 { // 32 bit addressing 3575 3674 if (issib(rm)) 3576 3675 size++; 3577 3676 switch (mod) 3578 3677 { case 0: 3579 if (issib(rm) && (c->Isib & 7) == 5 || (rm & 7) == 5) 3678 if (issib(rm) && (c->Isib & 7) == 5 || 3679 (rm & 7) == 5) 3580 3680 size += 4; /* disp32 */ 3681 if (c->Irex & REX_B && (rm & 7) == 5) 3682 /* Instead of selecting R13, this mode is an [RIP] relative 3683 * address. Although valid, it's redundant, and should not 3684 * be generated. Instead, generate 0[R13] instead of [R13]. 3685 */ 3686 assert(0); 3581 3687 break; 3582 3688 case 0x40: 3583 3689 size++; /* disp8 */ 3584 3690 break; 3585 3691 case 0x80: 3586 3692 size += 4; /* disp32 */ 3587 3693 break; 3588 3694 } 3589 3695 } 3590 3696 else 3591 3697 { // 16 bit addressing 3592 3698 if (mod == 0x40) /* 01: 8 bit displacement */ 3593 3699 size++; 3594 3700 else if (mod == 0x80 || (mod == 0 && (rm & 7) == 6)) 3595 3701 size += 2; 3596 3702 } 3597 3703 } 3598 3704 3599 3705 Lret: 3600 3706 if (c->Irex) … … 3727 3833 3728 3834 STATIC void cod3_flush() 3729 3835 { 3730 3836 // Emit accumulated bytes to code segment 3731 3837 #ifdef DEBUG 3732 3838 assert(pgen - bytes < sizeof(bytes)); 3733 3839 #endif 3734 3840 offset += obj_bytes(cseg,offset,pgen - bytes,bytes); 3735 3841 pgen = bytes; 3736 3842 } 3737 3843 3738 3844 unsigned codout(code *c) 3739 3845 { unsigned op; 3740 3846 unsigned char rm,mod; 3741 3847 unsigned char ins; 3742 3848 code *cn; 3743 3849 unsigned flags; 3744 3850 symbol *s; 3745 3851 3746 3852 #ifdef DEBUG 3747 if (debugc) printf("codout(%p), Coffset = x%l x\n",c,Coffset);3853 if (debugc) printf("codout(%p), Coffset = x%llx\n",c,(unsigned long long)Coffset); 3748 3854 #endif 3749 3855 3750 3856 pgen = bytes; 3751 3857 offset = Coffset; 3752 3858 for (; c; c = code_next(c)) 3753 3859 { 3754 3860 #ifdef DEBUG 3755 3861 if (debugc) { printf("off=%02lx, sz=%ld, ",(long)OFFSET(),(long)calccodsize(c)); c->print(); } 3756 3862 #endif 3757 3863 op = c->Iop; 3758 3864 ins = inssize[op]; 3759 3865 switch (op) 3760 3866 { case ESCAPE: 3761 3867 switch (c->Iop2) 3762 3868 { case ESClinnum: 3763 3869 /* put out line number stuff */ 3764 3870 objlinnum(c->IEV2.Vsrcpos,OFFSET()); 3765 3871 break; 3766 3872 #if SCPP 3767 3873 #if 1 … … 4891 4997 printf(" sib=%02x=%d,%d,%d",sib,(sib>>6)&3,(sib>>3)&7,sib&7); 4892 4998 } 4893 4999 if ((rm & 0xC7) == BPRM || (rm & 0xC0) == 0x80 || (rm & 0xC0) == 0x40) 4894 5000 { 4895 5001 switch (c->IFL1) 4896 5002 { 4897 5003 case FLconst: 4898 5004 case FLoffset: 4899 5005 printf(" int = %4d",c->IEV1.Vuns); 4900 5006 break; 4901 5007 case FLblock: 4902 5008 printf(" block = %p",c->IEV1.Vblock); 4903 5009 break; 4904 5010 case FLswitch: 4905 5011 case FLblockoff: 4906 5012 case FLlocalsize: 4907 5013 case FLframehandler: 4908 5014 case 0: 4909 5015 break; 4910 5016 case FLdatseg: 4911 printf(" %d.%l x",c->IEVseg1,c->IEVpointer1);5017 printf(" %d.%llx",c->IEVseg1,(unsigned long long)c->IEVpointer1); 4912 5018 break; 4913 5019 case FLauto: 4914 5020 case FLreg: 4915 5021 case FLdata: 4916 5022 case FLudata: 4917 5023 case FLpara: 4918 5024 case FLtmp: 4919 5025 case FLbprel: 4920 5026 case FLtlsdata: 4921 5027 printf(" sym='%s'",c->IEVsym1->Sident); 4922 5028 break; 4923 5029 default: 4924 5030 WRFL((enum FL)c->IFL1); 4925 5031 break; 4926 5032 } 4927 5033 } 4928 5034 } 4929 5035 if (ins & T) 4930 5036 { printf(" "); WRFL((enum FL)c->IFL2); 4931 5037 switch (c->IFL2) 4932 5038 { 4933 5039 case FLconst: 4934 5040 printf(" int = %4d",c->IEV2.Vuns); 4935 5041 break; 4936 5042 case FLblock: 4937 5043 printf(" block = %p",c->IEV2.Vblock); 4938 5044 break; 4939 5045 case FLswitch: 4940 5046 case FLblockoff: 4941 5047 case 0: 4942 5048 case FLlocalsize: 4943 5049 case FLframehandler: 4944 5050 break; 4945 5051 case FLdatseg: 4946 printf(" %d.%l x",c->IEVseg2,c->IEVpointer2);5052 printf(" %d.%llx",c->IEVseg2,(unsigned long long)c->IEVpointer2); 4947 5053 break; 4948 5054 case FLauto: 4949 5055 case FLreg: 4950 5056 case FLpara: 4951 5057 case FLtmp: 4952 5058 case FLbprel: 4953 5059 case FLfunc: 4954 5060 case FLdata: 4955 5061 case FLudata: 4956 5062 case FLtlsdata: 4957 5063 printf(" sym='%s'",c->IEVsym2->Sident); 4958 5064 break; 4959 5065 case FLcode: 4960 5066 printf(" code = %p",c->IEV2.Vcode); 4961 5067 break; 4962 5068 default: 4963 5069 WRFL((enum FL)c->IFL2); 4964 5070 break; 4965 5071 } 4966 5072 } trunk/src/backend/code.h
r580 r581 150 150 #define SEG_ES 0 151 151 #define SEG_CS 1 152 152 #define SEG_SS 2 153 153 #define SEG_DS 3 154 154 155 155 /********************* 156 156 * Masks for register pairs. 157 157 * Note that index registers are always LSWs. This is for the convenience 158 158 * of implementing far pointers. 159 159 */ 160 160 161 161 #if 0 162 162 // Give us an extra one so we can enregister a long 163 163 #define mMSW (mCX|mDX|mDI|mES) // most significant regs 164 164 #define mLSW (mAX|mBX|mSI) // least significant regs 165 165 #else 166 166 #define mMSW (mCX|mDX|mES) /* most significant regs */ 167 167 #define mLSW (mAX|mBX|mSI|mDI) /* least significant regs */ 168 168 #endif 169 169 170 /* Return !=0 if there is a nSIB byte */170 /* Return !=0 if there is a SIB byte */ 171 171 #define issib(rm) (((rm) & 7) == 4 && ((rm) & 0xC0) != 0xC0) 172 172 173 173 #if 0 174 174 // relocation field size is always 32bits 175 175 #define is32bitaddr(x,Iflags) (1) 176 176 #else 177 177 // 178 178 // is32bitaddr works correctly only when x is 0 or 1. This is 179 179 // true today for the current definition of I32, but if the definition 180 180 // of I32 changes, this macro will need to change as well 181 181 // 182 182 // Note: even for linux targets, CFaddrsize can be set by the inline 183 183 // assembler. 184 184 #define is32bitaddr(x,Iflags) (I64 || ((x) ^(((Iflags) & CFaddrsize) !=0))) 185 185 #endif 186 186 187 187 /******************* 188 188 * Some instructions. 189 189 */ 190 190 trunk/src/backend/el.c
r580 r581 1326 1326 default: 1327 1327 break; 1328 1328 } 1329 1329 return e; 1330 1330 } 1331 1331 #endif 1332 1332 #if TARGET_LINUX || TARGET_FREEBSD || TARGET_SOLARIS 1333 1333 1334 1334 elem *el_picvar(symbol *s) 1335 1335 { elem *e; 1336 1336 int x; 1337 1337 1338 1338 //printf("el_picvar(s = '%s')\n", s->Sident); 1339 1339 symbol_debug(s); 1340 1340 type_debug(s->Stype); 1341 1341 e = el_calloc(); 1342 1342 e->Eoper = OPvar; 1343 1343 e->EV.sp.Vsym = s; 1344 1344 e->Ety = s->ty(); 1345 1345 1346 /* For 32 bit: 1347 * CALL __i686.get_pc_thunk.bx@PC32 1348 * ADD EBX,offset _GLOBAL_OFFSET_TABLE_@GOTPC[2] 1349 * Generate for var locals: 1350 * MOV reg,s@GOTOFF[014h][EBX] 1351 * For var globals: 1352 * MOV EAX,s@GOT32[EBX] 1353 * MOV reg,[EAX] 1354 * For TLS var locals and globals: 1355 * MOV EAX,s@TLS_GD[EBX] 1356 * CALL ___tls_get_addr@PLT32 1357 * MOV reg,[EAX] 1358 ***************************************** 1359 * Generate for var locals: 1360 * MOV reg,s@PC32[RIP] 1361 * For var globals: 1362 * MOV RAX,s@GOTPCREL[RIP] 1363 * MOV reg,[RAX] 1364 * For TLS var locals and globals: 1365 * 0x66 1366 * LEA DI,s@TLSGD[RIP] 1367 * 0x66 1368 * 0x66 1369 * 0x48 (REX | REX_W) 1370 * CALL __tls_get_addr@PLT32 1371 * MOV reg,[RAX] 1372 */ 1373 1374 if (I64) 1375 { 1376 elfobj_refGOTsym(); 1377 switch (s->Sclass) 1378 { 1379 case SCstatic: 1380 case SClocstat: 1381 x = 0; 1382 goto case_got64; 1383 1384 case SCcomdat: 1385 case SCcomdef: 1386 case SCglobal: 1387 case SCextern: 1388 x = 1; 1389 case_got64: 1390 { 1391 int op = e->Eoper; 1392 tym_t tym = e->Ety; 1393 e->Ety = TYnptr; 1394 1395 if (s->Stype->Tty & mTYthread) 1396 { 1397 /* Add "volatile" to prevent e from being common subexpressioned. 1398 * This is so we can preserve the magic sequence of instructions 1399 * that the gnu linker patches: 1400 * lea EDI,x@tlsgd[RIP], call __tls_get_addr@plt 1401 * => 1402 * mov EAX,gs[0], sub EAX,x@tpoff 1403 */ 1404 e->Eoper = OPrelconst; 1405 e->Ety |= mTYvolatile; 1406 if (!tls_get_addr_sym) 1407 { 1408 /* void *__tls_get_addr(void *ptr); 1409 * Parameter ptr is passed in RDI, matching TYnfunc calling convention. 1410 */ 1411 tls_get_addr_sym = symbol_name("__tls_get_addr",SCglobal,type_fake(TYnfunc)); 1412 symbol_keep(tls_get_addr_sym); 1413 } 1414 e = el_bin(OPcall, TYnptr, el_var(tls_get_addr_sym), e); 1415 } 1416 1417 switch (op * 2 + x) 1418 { 1419 case OPvar * 2 + 1: 1420 e = el_una(OPind, TYnptr, e); 1421 break; 1422 case OPvar * 2 + 0: 1423 case OPrelconst * 2 + 1: 1424 break; 1425 case OPrelconst * 2 + 0: 1426 e = el_una(OPaddr, TYnptr, e); 1427 break; 1428 default: 1429 assert(0); 1430 break; 1431 } 1432 e->Ety = tym; 1433 break; 1434 } 1435 default: 1436 break; 1437 } 1438 } 1439 else 1346 1440 switch (s->Sclass) 1347 1441 { 1348 1442 /* local (and thread) symbols get only one level of indirection; 1349 1443 * all globally known symbols get two. 1350 1444 */ 1351 1445 case SCstatic: 1352 1446 case SClocstat: 1353 1447 x = 0; 1354 1448 goto case_got; 1355 1449 1356 1450 case SCcomdat: 1357 1451 case SCcomdef: 1358 1452 case SCglobal: 1359 1453 case SCextern: 1360 1454 if (s->Stype->Tty & mTYthread) 1361 1455 x = 0; 1362 1456 else 1363 1457 x = 1; 1364 1458 case_got: 1365 1459 { if (!localgot) … … 1424 1518 break; 1425 1519 } 1426 1520 return e; 1427 1521 } 1428 1522 #endif 1429 1523 1430 1524 /************************** 1431 1525 * Make an elem out of a symbol. 1432 1526 */ 1433 1527 1434 1528 #if MARS 1435 1529 elem * el_var(symbol *s) 1436 1530 { elem *e; 1437 1531 1438 1532 //printf("el_var(s = '%s')\n", s->Sident); 1439 1533 //printf("%x\n", s->Stype->Tty); 1440 1534 #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_SOLARIS 1441 1535 // OSX is currently always pic 1442 1536 if (config.flags3 & CFG3pic && 1443 1537 #if TARGET_LINUX || TARGET_FREEBSD || TARGET_SOLARIS 1444 !(s->Stype->Tty & mTYthread) &&1538 (!(s->Stype->Tty & mTYthread) || I64) && 1445 1539 #endif 1446 1540 !tyfunc(s->ty())) 1447 1541 // Position Independent Code 1448 1542 return el_picvar(s); 1449 1543 #endif 1450 1544 symbol_debug(s); 1451 1545 type_debug(s->Stype); 1452 1546 e = el_calloc(); 1453 1547 e->Eoper = OPvar; 1454 1548 e->EV.sp.Vsym = s; 1455 1549 type_debug(s->Stype); 1456 1550 e->Ety = s->ty(); 1457 1551 if (s->Stype->Tty & mTYthread) 1458 1552 { 1459 1553 //printf("thread local %s\n", s->Sident); 1460 1554 #if TARGET_OSX 1461 1555 ; 1462 1556 #elif TARGET_LINUX || TARGET_FREEBSD || TARGET_SOLARIS 1463 1557 /* For 32 bit: 1464 1558 * Generate for var locals: … … 1477 1571 * For var globals: 1478 1572 * MOV RAX,s@GOTTPOFF[RIP] 1479 1573 * MOV reg,FS:[RAX] 1480 1574 * 1481 1575 * For address of locals: 1482 1576 * MOV RAX,FS:[00] 1483 1577 * LEA reg,s@TPOFF32[RAX] 1484 1578 * e => &s + *(FS:0) 1485 1579 * For address of globals: 1486 1580 * MOV reg,FS:[00] 1487 1581 * MOV RAX,s@GOTTPOFF[RIP] 1488 1582 * ADD reg,RAX 1489 1583 * e => s + *(FS:0) 1490 1584 * This leaves us with a problem, as the 'var' version cannot simply have 1491 1585 * its address taken, as what is the address of FS:s ? The (not so efficient) 1492 1586 * solution is to just use the second address form, and * it. 1493 1587 * Turns out that is identical to the 32 bit version, except GS => FS and the 1494 1588 * fixups are different. 1495 1589 * In the future, we should figure out a way to optimize to the 'var' version. 1496 1590 */ 1591 if (I64) 1592 elfobj_refGOTsym(); 1497 1593 elem *e1 = el_calloc(); 1498 1594 e1->EV.sp.Vsym = s; 1499 1595 if (s->Sclass == SCstatic || s->Sclass == SClocstat) 1500 1596 { e1->Eoper = OPrelconst; 1501 1597 e1->Ety = TYnptr; 1502 1598 } 1503 1599 else 1504 1600 { 1505 1601 e1->Eoper = OPvar; 1506 1602 e1->Ety = TYnptr; 1507 1603 } 1508 1604 1509 1605 /* Fake GS:[0000] as a load of _tls_array, and then in the back end recognize 1510 1606 * the fake and rewrite it as GS:[0000] (or FS:[0000] for I64), because there is 1511 1607 * no way to represent segment overrides in the elem nodes. 1512 1608 */ 1513 1609 elem *e2 = el_calloc(); 1514 1610 e2->Eoper = OPvar; 1515 1611 e2->EV.sp.Vsym = rtlsym[RTLSYM_TLS_ARRAY]; 1516 1612 e2->Ety = e2->EV.sp.Vsym->ty(); trunk/src/backend/elfobj.c
r579 r581 67 67 * RI_TYPE_GOT32 R_X86_64_ R_X86_64_ 68 68 * RI_TYPE_TLS_GD R_X86_64_TLSGD R_X86_64_ 69 69 * RI_TYPE_TLS_IE R_X86_64_GOTTPOFF R_X86_64_ 70 70 * RI_TYPE_TLS_LE R_X86_64_TPOFF32 R_X86_64_ 71 71 * RI_TYPE_PLT32 R_X86_64_PLT32 R_X86_64_ 72 72 * RI_TYPE_PC32 R_X86_64_PC32 R_X86_64_ 73 73 */ 74 74 75 75 /****************************************** 76 76 */ 77 77 78 78 symbol *GOTsym; // global offset table reference 79 79 80 80 symbol *elfobj_getGOTsym() 81 81 { 82 82 if (!GOTsym) 83 83 { 84 84 GOTsym = symbol_name("_GLOBAL_OFFSET_TABLE_",SCglobal,tspvoid); 85 85 } 86 86 return GOTsym; 87 } 88 89 void elfobj_refGOTsym() 90 { 91 if (!GOTsym) 92 { 93 symbol *s = elfobj_getGOTsym(); 94 objextern(s); 95 } 87 96 } 88 97 89 98 static void objfile_write(FILE *fd, void *buffer, unsigned len); 90 99 91 100 STATIC char * objmodtoseg (const char *modname); 92 101 STATIC void obj_browse_flush(); 93 102 STATIC void objfixupp (struct FIXUP *); 94 103 STATIC void ledata_new (int seg,targ_size_t offset); 95 104 void obj_tlssections(); 96 105 97 106 static IDXSYM elf_addsym(IDXSTR sym, targ_size_t val, unsigned sz, 98 107 unsigned typ,unsigned bind,IDXSEC sec); 99 108 static long elf_align(FILE *fd, targ_size_t size, long offset); 100 109 101 110 // The object file is built is several separate pieces 102 111 103 112 // Non-repeatable section types have single output buffers 104 113 // Pre-allocated buffers are defined for: 105 114 // Section Names string table 106 115 // Section Headers table … … 2296 2305 * seg = where the address is going 2297 2306 * offset = offset within seg 2298 2307 * val = displacement from address 2299 2308 * targetdatum = DATA, CDATA or UDATA, depending where the address is 2300 2309 * flags = CFoff, CFseg 2301 2310 * Example: 2302 2311 * int *abc = &def[3]; 2303 2312 * to allocate storage: 2304 2313 * reftodatseg(DATA,offset,3 * sizeof(int *),UDATA); 2305 2314 */ 2306 2315 2307 2316 void reftodatseg(int seg,targ_size_t offset,targ_size_t val, 2308 2317 unsigned targetdatum,int flags) 2309 2318 { 2310 2319 Outbuffer *buf; 2311 2320 int save; 2312 2321 2313 2322 buf = SegData[seg]->SDbuf; 2314 2323 save = buf->size(); 2315 2324 buf->setsize(offset); 2316 //dbg_printf("reftodatseg(seg=%d, offset=x%lx, val=x%lx,data %x, flags %x )\n", 2317 // seg,offset,val,targetdatum,flags); 2325 #if 0 2326 printf("reftodatseg(seg=%d, offset=x%llx, val=x%llx,data %x, flags %x)\n", 2327 seg,(unsigned long long)offset,(unsigned long long)val,targetdatum,flags); 2328 #endif 2318 2329 /*if (OPT_IS_SET(OPTfwritable_strings)) 2319 2330 { 2320 2331 elf_addrel(seg,offset,RI_TYPE_SYM32,STI_DATA,0); 2321 2332 } 2322 2333 else*/ 2323 2334 { 2324 2335 int relinfo; 2325 2336 2326 2337 if (I64) 2327 2338 { 2328 2339 if (MAP_SEG2TYP(seg) == CODE && config.flags3 & CFG3pic) 2329 2340 relinfo = R_X86_64_PC32; 2330 2341 else if (MAP_SEG2SEC(targetdatum)->sh_flags & SHF_TLS) 2331 2342 relinfo = config.flags3 & CFG3pic ? R_X86_64_TLSGD : R_X86_64_TPOFF32; 2332 2343 else 2333 2344 relinfo = R_X86_64_32; 2334 2345 } 2335 2346 else 2336 2347 { 2337 2348 if (MAP_SEG2TYP(seg) == CODE && config.flags3 & CFG3pic) … … 2402 2413 * flags = CFselfrel: self-relative 2403 2414 * CFseg: get segment 2404 2415 * CFoff: get offset 2405 2416 * CFoffset64: 64 bit fixup 2406 2417 * Returns: 2407 2418 * number of bytes in reference (4 or 8) 2408 2419 */ 2409 2420 2410 2421 int reftoident(int seg, targ_size_t offset, Symbol *s, targ_size_t val, 2411 2422 int flags) 2412 2423 { 2413 2424 tym_t ty; 2414 2425 bool external = TRUE; 2415 2426 Outbuffer *buf; 2416 2427 elf_u32_f32 relinfo,refseg; 2417 2428 int segtyp = MAP_SEG2TYP(seg); 2418 2429 //assert(val == 0); 2419 2430 int retsize = (flags & CFoffset64) ? 8 : 4; 2420 2431 2421 2432 #if 0 2422 dbg_printf("\nreftoident('%s' seg %d, offset x%lx, val x%lx, flags x%x)\n",2433 printf("\nreftoident('%s' seg %d, offset x%llx, val x%llx, flags x%x)\n", 2423 2434 s->Sident,seg,offset,val,flags); 2424 dbg_printf("Sseg = %d, Sxtrnnum = %d \n",s->Sseg,s->Sxtrnnum);2435 dbg_printf("Sseg = %d, Sxtrnnum = %d, retsize = %d\n",s->Sseg,s->Sxtrnnum,retsize); 2425 2436 symbol_print(s); 2426 2437 #endif 2427 2438 2428 2439 ty = s->ty(); 2429 2440 if (s->Sxtrnnum) 2430 2441 { // identifier is defined somewhere else 2431 2442 if (I64) 2432 2443 { 2433 2444 if (SymbolTable64[s->Sxtrnnum].st_shndx != SHT_UNDEF) 2434 2445 external = FALSE; 2435 2446 } 2436 2447 else 2437 2448 { 2438 2449 if (SymbolTable[s->Sxtrnnum].st_shndx != SHT_UNDEF) 2439 2450 external = FALSE; 2440 2451 } 2441 2452 } 2442 2453 2443 2454 switch (s->Sclass) 2444 2455 { … … 2503 2514 if (!external && // local definition found 2504 2515 s->Sseg == seg && // within same code segment 2505 2516 (!(config.flags3 & CFG3pic) || // not position indp code 2506 2517 s->Sclass == SCstatic)) // or is pic, but declared static 2507 2518 { // Can use PC relative 2508 2519 //dbg_printf("\tdoing PC relative\n"); 2509 2520 val = (s->Soffset+val) - (offset+4); 2510 2521 } 2511 2522 else 2512 2523 { 2513 2524 val = (targ_size_t)-4; 2514 2525 //dbg_printf("\tadding relocation\n"); 2515 2526 if (I64) 2516 2527 relinfo = config.flags3 & CFG3pic ? R_X86_64_PLT32 : R_X86_64_PC32; 2517 2528 else 2518 2529 relinfo = config.flags3 & CFG3pic ? RI_TYPE_PLT32 : RI_TYPE_PC32; 2519 2530 elf_addrel(seg,offset, 2520 2531 relinfo, 2521 2532 s->Sxtrnnum,0); 2522 2533 } 2534 if (I64) 2535 val += 4; 2523 2536 } 2524 2537 else 2525 2538 { // code to code code to data, data to code, data to data refs 2526 2539 refseg = s->Sxtrnnum; // default to name symbol table entry 2527 2540 if (s->Sclass == SCstatic) 2528 2541 { // offset into .data or .bss seg 2529 2542 refseg = MAP_SEG2SYMIDX(s->Sseg); 2530 2543 // use segment symbol table entry 2531 2544 val += s->Soffset; 2532 2545 if (!(config.flags3 & CFG3pic) || // all static refs from normal code 2533 2546 segtyp == DATA) // or refs from data from posi indp 2534 2547 { 2535 2548 relinfo = I64 ? R_X86_64_32 : RI_TYPE_SYM32; 2536 2549 } 2537 2550 else 2538 2551 { 2539 2552 relinfo = I64 ? R_X86_64_PC32 : RI_TYPE_GOTOFF; 2540 2553 } 2541 2554 } 2542 2555 else if (config.flags3 & CFG3pic && s == GOTsym) 2543 2556 { // relocation for Gbl Offset Tab 2544 2557 relinfo = I64 ? R_X86_64_NONE : RI_TYPE_GOTPC; 2545 2558 } 2546 2559 else if (segtyp == DATA) 2547 2560 { // relocation from within DATA seg 2548 2561 relinfo = I64 ? R_X86_64_32 : RI_TYPE_SYM32; 2549 2562 } 2550 2563 else 2551 2564 { // relocation from within CODE seg 2552 2565 if (I64) 2553 relinfo = config.flags3 & CFG3pic ? R_X86_64_NONE : R_X86_64_32; 2566 { if (config.flags3 & CFG3pic) 2567 relinfo = R_X86_64_GOTPCREL; 2568 else 2569 relinfo = (flags & CFpc32) ? R_X86_64_PC32 : R_X86_64_32; 2570 } 2554 2571 else 2555 2572 relinfo = config.flags3 & CFG3pic ? RI_TYPE_GOT32 : RI_TYPE_SYM32; 2556 2573 } 2557 2574 if ((s->ty() & mTYLINK) & mTYthread) 2558 2575 { 2559 2576 if (I64) 2560 2577 { 2561 2578 if (config.flags3 & CFG3pic) 2562 2579 { 2563 if (s->Sclass == SCstatic )2564 relinfo = R_X86_64_T POFF32; // TLS_GD?2580 if (s->Sclass == SCstatic || s->Sclass == SClocstat) 2581 relinfo = R_X86_64_TLSGD; // TLS_GD? 2565 2582 else 2566 relinfo = R_X86_64_ GOTTPOFF;2583 relinfo = R_X86_64_TLSGD; 2567 2584 } 2568 2585 else 2569 2586 { 2570 if (s->Sclass == SCstatic )2587 if (s->Sclass == SCstatic || s->Sclass == SClocstat) 2571 2588 relinfo = R_X86_64_TPOFF32; 2572 2589 else 2573 2590 relinfo = R_X86_64_GOTTPOFF; 2574 2591 } 2575 2592 } 2576 2593 else 2577 2594 { 2578 2595 if (config.flags3 & CFG3pic) 2579 2596 { 2580 2597 if (s->Sclass == SCstatic) 2581 2598 relinfo = RI_TYPE_TLS_LE; // TLS_GD? 2582 2599 else 2583 2600 relinfo = RI_TYPE_TLS_IE; 2584 2601 } 2585 2602 else 2586 2603 { 2587 2604 if (s->Sclass == SCstatic) 2588 2605 relinfo = RI_TYPE_TLS_LE; 2589 2606 else 2590 2607 relinfo = RI_TYPE_TLS_IE; trunk/src/clone.c
r428 r581 100 100 if (tv->ty == Tstruct) 101 101 { TypeStruct *ts = (TypeStruct *)tv; 102 102 StructDeclaration *sd = ts->sym; 103 103 if (sd->eq) 104 104 goto Lneed; 105 105 } 106 106 } 107 107 Ldontneed: 108 108 if (X) printf("\tdontneed\n"); 109 109 return 0; 110 110 111 111 Lneed: 112 112 if (X) printf("\tneed\n"); 113 113 return 1; 114 114 #undef X 115 115 } 116 116 117 117 /****************************************** 118 118 * Build opAssign for struct. 119 119 * S* opAssign(S s) { ... } 120 * 121 * Note that s will be constructed onto the stack, probably copy-constructed. 122 * Then, the body is: 123 * S tmp = *this; // bit copy 124 * *this = s; // bit copy 125 * tmp.dtor(); 126 * Instead of running the destructor on s, run it on tmp instead. 120 127 */ 121 128 122 129 FuncDeclaration *StructDeclaration::buildOpAssign(Scope *sc) 123 130 { 124 131 if (!needOpAssign()) 125 132 return NULL; 126 133 127 134 //printf("StructDeclaration::buildOpAssign() %s\n", toChars()); 128 135 129 136 FuncDeclaration *fop = NULL; 130 137 131 138 Parameter *param = new Parameter(STCnodtor, type, Id::p, NULL); 132 139 Parameters *fparams = new Parameters; 133 140 fparams->push(param); 134 141 Type *ftype = new TypeFunction(fparams, handle, FALSE, LINKd); 135 142 #if STRUCTTHISREF 136 143 ((TypeFunction *)ftype)->isref = 1; 137 144 #endif 138 145 139 146 fop = new FuncDeclaration(0, 0, Id::assign, STCundefined, ftype);
