Merge remote-tracking branch 'remotes/rth/tcg-next' into staging

* remotes/rth/tcg-next:
  tcg/i386: Use SHLX/SHRX/SARX instructions
  tcg/i386: Use ANDN instruction
  tcg/i386: Add tcg_out_vex_modrm
  tcg/i386: Move TCG_CT_CONST_* to tcg-target.c
  disas/i386: Disassemble ANDN/SHLX/SHRX/SHAX
  tcg/optimize: Add more identity simplifications
  tcg/optimize: Optmize ANDC X,Y,Y to MOV X,0
  tcg/optimize: Simply some logical ops to NOT
  tcg/optimize: Handle known-zeros masks for ANDC
  tcg/optimize: add known-zero bits compute for load ops
  tcg/optimize: improve known-zero bits for 32-bit ops
  tcg/optimize: fix known-zero bits optimization
  tcg/optimize: fix known-zero bits for right shift ops
  tcg-arm: The shift count of op_rotl_i32 is in args[2] not args[1].
  TCG: Fix 32-bit host allocation typo

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2014-02-20 15:02:06 +00:00
commit 3d2bb5cc81
6 changed files with 415 additions and 67 deletions

View file

@ -171,6 +171,7 @@ static void print_operand_value (char *buf, size_t bufsize, int hex, bfd_vma dis
static void print_displacement (char *, bfd_vma); static void print_displacement (char *, bfd_vma);
static void OP_E (int, int); static void OP_E (int, int);
static void OP_G (int, int); static void OP_G (int, int);
static void OP_vvvv (int, int);
static bfd_vma get64 (void); static bfd_vma get64 (void);
static bfd_signed_vma get32 (void); static bfd_signed_vma get32 (void);
static bfd_signed_vma get32s (void); static bfd_signed_vma get32s (void);
@ -264,6 +265,9 @@ static int rex_used;
current instruction. */ current instruction. */
static int used_prefixes; static int used_prefixes;
/* The VEX.vvvv register, unencoded. */
static int vex_reg;
/* Flags stored in PREFIXES. */ /* Flags stored in PREFIXES. */
#define PREFIX_REPZ 1 #define PREFIX_REPZ 1
#define PREFIX_REPNZ 2 #define PREFIX_REPNZ 2
@ -278,6 +282,10 @@ static int used_prefixes;
#define PREFIX_ADDR 0x400 #define PREFIX_ADDR 0x400
#define PREFIX_FWAIT 0x800 #define PREFIX_FWAIT 0x800
#define PREFIX_VEX_0F 0x1000
#define PREFIX_VEX_0F38 0x2000
#define PREFIX_VEX_0F3A 0x4000
/* Make sure that bytes from INFO->PRIVATE_DATA->BUFFER (inclusive) /* Make sure that bytes from INFO->PRIVATE_DATA->BUFFER (inclusive)
to ADDR (exclusive) are valid. Returns 1 for success, longjmps to ADDR (exclusive) are valid. Returns 1 for success, longjmps
on error. */ on error. */
@ -323,6 +331,7 @@ fetch_data(struct disassemble_info *info, bfd_byte *addr)
#define XX { NULL, 0 } #define XX { NULL, 0 }
#define Bv { OP_vvvv, v_mode }
#define Eb { OP_E, b_mode } #define Eb { OP_E, b_mode }
#define Ev { OP_E, v_mode } #define Ev { OP_E, v_mode }
#define Ed { OP_E, d_mode } #define Ed { OP_E, d_mode }
@ -671,7 +680,8 @@ fetch_data(struct disassemble_info *info, bfd_byte *addr)
#define PREGRP102 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 102 } } #define PREGRP102 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 102 } }
#define PREGRP103 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 103 } } #define PREGRP103 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 103 } }
#define PREGRP104 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 104 } } #define PREGRP104 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 104 } }
#define PREGRP105 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 105 } }
#define PREGRP106 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 106 } }
#define X86_64_0 NULL, { { NULL, X86_64_SPECIAL }, { NULL, 0 } } #define X86_64_0 NULL, { { NULL, X86_64_SPECIAL }, { NULL, 0 } }
#define X86_64_1 NULL, { { NULL, X86_64_SPECIAL }, { NULL, 1 } } #define X86_64_1 NULL, { { NULL, X86_64_SPECIAL }, { NULL, 1 } }
@ -1449,7 +1459,7 @@ static const unsigned char threebyte_0x38_uses_DATA_prefix[256] = {
/* c0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* cf */ /* c0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* cf */
/* d0 */ 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1, /* df */ /* d0 */ 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1, /* df */
/* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ef */ /* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ef */
/* f0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ff */ /* f0 */ 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, /* ff */
/* ------------------------------- */ /* ------------------------------- */
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
}; };
@ -1473,7 +1483,7 @@ static const unsigned char threebyte_0x38_uses_REPNZ_prefix[256] = {
/* c0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* cf */ /* c0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* cf */
/* d0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* df */ /* d0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* df */
/* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ef */ /* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ef */
/* f0 */ 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ff */ /* f0 */ 1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0, /* ff */
/* ------------------------------- */ /* ------------------------------- */
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
}; };
@ -1497,7 +1507,7 @@ static const unsigned char threebyte_0x38_uses_REPZ_prefix[256] = {
/* c0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* cf */ /* c0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* cf */
/* d0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* df */ /* d0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* df */
/* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ef */ /* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ef */
/* f0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ff */ /* f0 */ 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, /* ff */
/* ------------------------------- */ /* ------------------------------- */
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
}; };
@ -2774,6 +2784,22 @@ static const struct dis386 prefix_user_table[][4] = {
{ "(bad)", { XX } }, { "(bad)", { XX } },
}, },
/* PREGRP105 */
{
{ "andnS", { Gv, Bv, Ev } },
{ "(bad)", { XX } },
{ "(bad)", { XX } },
{ "(bad)", { XX } },
},
/* PREGRP106 */
{
{ "bextrS", { Gv, Ev, Bv } },
{ "sarxS", { Gv, Ev, Bv } },
{ "shlxS", { Gv, Ev, Bv } },
{ "shrxS", { Gv, Ev, Bv } },
},
}; };
static const struct dis386 x86_64_table[][2] = { static const struct dis386 x86_64_table[][2] = {
@ -3071,12 +3097,12 @@ static const struct dis386 three_byte_table[][256] = {
/* f0 */ /* f0 */
{ PREGRP87 }, { PREGRP87 },
{ PREGRP88 }, { PREGRP88 },
{ PREGRP105 },
{ "(bad)", { XX } }, { "(bad)", { XX } },
{ "(bad)", { XX } }, { "(bad)", { XX } },
{ "(bad)", { XX } }, { "(bad)", { XX } },
{ "(bad)", { XX } }, { "(bad)", { XX } },
{ "(bad)", { XX } }, { PREGRP106 },
{ "(bad)", { XX } },
/* f8 */ /* f8 */
{ "(bad)", { XX } }, { "(bad)", { XX } },
{ "(bad)", { XX } }, { "(bad)", { XX } },
@ -3477,6 +3503,74 @@ ckprefix (void)
} }
} }
static void
ckvexprefix (void)
{
int op, vex2, vex3, newrex = 0, newpfx = prefixes;
if (address_mode == mode_16bit) {
return;
}
fetch_data(the_info, codep + 1);
op = *codep;
if (op != 0xc4 && op != 0xc5) {
return;
}
fetch_data(the_info, codep + 2);
vex2 = codep[1];
if (address_mode == mode_32bit && (vex2 & 0xc0) != 0xc0) {
return;
}
if (op == 0xc4) {
/* Three byte VEX prefix. */
fetch_data(the_info, codep + 3);
vex3 = codep[2];
newrex |= (vex2 & 0x80 ? 0 : REX_R);
newrex |= (vex2 & 0x40 ? 0 : REX_X);
newrex |= (vex2 & 0x20 ? 0 : REX_B);
newrex |= (vex3 & 0x80 ? REX_W : 0);
switch (vex2 & 0x1f) { /* VEX.m-mmmm */
case 1:
newpfx |= PREFIX_VEX_0F;
break;
case 2:
newpfx |= PREFIX_VEX_0F | PREFIX_VEX_0F38;
break;
case 3:
newpfx |= PREFIX_VEX_0F | PREFIX_VEX_0F3A;
break;
}
vex2 = vex3;
codep += 3;
} else {
/* Two byte VEX prefix. */
newrex |= (vex2 & 0x80 ? 0 : REX_R);
codep += 2;
}
vex_reg = (~vex2 >> 3) & 15; /* VEX.vvvv */
switch (vex2 & 3) { /* VEX.pp */
case 1:
newpfx |= PREFIX_DATA; /* 0x66 */
break;
case 2:
newpfx |= PREFIX_REPZ; /* 0xf3 */
break;
case 3:
newpfx |= PREFIX_REPNZ; /* 0xf2 */
break;
}
rex = newrex;
prefixes = newpfx;
}
/* Return the name of the prefix byte PREF, or NULL if PREF is not a /* Return the name of the prefix byte PREF, or NULL if PREF is not a
prefix byte. */ prefix byte. */
@ -3598,6 +3692,7 @@ print_insn (bfd_vma pc, disassemble_info *info)
const char *p; const char *p;
struct dis_private priv; struct dis_private priv;
unsigned char op; unsigned char op;
unsigned char threebyte;
if (info->mach == bfd_mach_x86_64_intel_syntax if (info->mach == bfd_mach_x86_64_intel_syntax
|| info->mach == bfd_mach_x86_64) || info->mach == bfd_mach_x86_64)
@ -3752,6 +3847,7 @@ print_insn (bfd_vma pc, disassemble_info *info)
obufp = obuf; obufp = obuf;
ckprefix (); ckprefix ();
ckvexprefix ();
insn_codep = codep; insn_codep = codep;
sizeflag = priv.orig_sizeflag; sizeflag = priv.orig_sizeflag;
@ -3775,18 +3871,29 @@ print_insn (bfd_vma pc, disassemble_info *info)
} }
op = 0; op = 0;
if (prefixes & PREFIX_VEX_0F)
{
used_prefixes |= PREFIX_VEX_0F | PREFIX_VEX_0F38 | PREFIX_VEX_0F3A;
if (prefixes & PREFIX_VEX_0F38)
threebyte = 0x38;
else if (prefixes & PREFIX_VEX_0F3A)
threebyte = 0x3a;
else
threebyte = *codep++;
goto vex_opcode;
}
if (*codep == 0x0f) if (*codep == 0x0f)
{ {
unsigned char threebyte;
fetch_data(info, codep + 2); fetch_data(info, codep + 2);
threebyte = *++codep; threebyte = codep[1];
codep += 2;
vex_opcode:
dp = &dis386_twobyte[threebyte]; dp = &dis386_twobyte[threebyte];
need_modrm = twobyte_has_modrm[*codep]; need_modrm = twobyte_has_modrm[threebyte];
uses_DATA_prefix = twobyte_uses_DATA_prefix[*codep]; uses_DATA_prefix = twobyte_uses_DATA_prefix[threebyte];
uses_REPNZ_prefix = twobyte_uses_REPNZ_prefix[*codep]; uses_REPNZ_prefix = twobyte_uses_REPNZ_prefix[threebyte];
uses_REPZ_prefix = twobyte_uses_REPZ_prefix[*codep]; uses_REPZ_prefix = twobyte_uses_REPZ_prefix[threebyte];
uses_LOCK_prefix = (*codep & ~0x02) == 0x20; uses_LOCK_prefix = (threebyte & ~0x02) == 0x20;
codep++;
if (dp->name == NULL && dp->op[0].bytemode == IS_3BYTE_OPCODE) if (dp->name == NULL && dp->op[0].bytemode == IS_3BYTE_OPCODE)
{ {
fetch_data(info, codep + 2); fetch_data(info, codep + 2);
@ -5291,6 +5398,17 @@ OP_G (int bytemode, int sizeflag)
} }
} }
static void
OP_vvvv (int bytemode, int sizeflags)
{
USED_REX (REX_W);
if (rex & REX_W) {
oappend(names64[vex_reg]);
} else {
oappend(names32[vex_reg]);
}
}
static bfd_vma static bfd_vma
get64 (void) get64 (void)
{ {

View file

@ -1866,7 +1866,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) : SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
SHIFT_IMM_LSL(0)); SHIFT_IMM_LSL(0));
} else { } else {
tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[1], 0x20); tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20);
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
SHIFT_REG_ROR(TCG_REG_TMP)); SHIFT_REG_ROR(TCG_REG_TMP));
} }

View file

@ -88,6 +88,11 @@ static const int tcg_target_call_oarg_regs[] = {
#endif #endif
}; };
/* Constants we accept. */
#define TCG_CT_CONST_S32 0x100
#define TCG_CT_CONST_U32 0x200
#define TCG_CT_CONST_I32 0x400
/* Registers used with L constraint, which are the first argument /* Registers used with L constraint, which are the first argument
registers on x86_64, and two random call clobbered registers on registers on x86_64, and two random call clobbered registers on
i386. */ i386. */
@ -124,6 +129,16 @@ static bool have_movbe;
# define have_movbe 0 # define have_movbe 0
#endif #endif
/* We need this symbol in tcg-target.h, and we can't properly conditionalize
it there. Therefore we always define the variable. */
bool have_bmi1;
#if defined(CONFIG_CPUID_H) && defined(bit_BMI2)
static bool have_bmi2;
#else
# define have_bmi2 0
#endif
static uint8_t *tb_ret_addr; static uint8_t *tb_ret_addr;
static void patch_reloc(uint8_t *code_ptr, int type, static void patch_reloc(uint8_t *code_ptr, int type,
@ -166,6 +181,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX); tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
break; break;
case 'c': case 'c':
case_c:
ct->ct |= TCG_CT_REG; ct->ct |= TCG_CT_REG;
tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX); tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
break; break;
@ -194,6 +210,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
tcg_regset_set32(ct->u.regs, 0, 0xf); tcg_regset_set32(ct->u.regs, 0, 0xf);
break; break;
case 'r': case 'r':
case_r:
ct->ct |= TCG_CT_REG; ct->ct |= TCG_CT_REG;
if (TCG_TARGET_REG_BITS == 64) { if (TCG_TARGET_REG_BITS == 64) {
tcg_regset_set32(ct->u.regs, 0, 0xffff); tcg_regset_set32(ct->u.regs, 0, 0xffff);
@ -201,6 +218,13 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
tcg_regset_set32(ct->u.regs, 0, 0xff); tcg_regset_set32(ct->u.regs, 0, 0xff);
} }
break; break;
case 'C':
/* With SHRX et al, we need not use ECX as shift count register. */
if (have_bmi2) {
goto case_r;
} else {
goto case_c;
}
/* qemu_ld/st address constraint */ /* qemu_ld/st address constraint */
case 'L': case 'L':
@ -220,6 +244,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
case 'Z': case 'Z':
ct->ct |= TCG_CT_CONST_U32; ct->ct |= TCG_CT_CONST_U32;
break; break;
case 'I':
ct->ct |= TCG_CT_CONST_I32;
break;
default: default:
return -1; return -1;
@ -243,6 +270,9 @@ static inline int tcg_target_const_match(tcg_target_long val,
if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
return 1; return 1;
} }
if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
return 1;
}
return 0; return 0;
} }
@ -268,10 +298,13 @@ static inline int tcg_target_const_match(tcg_target_long val,
# define P_REXB_RM 0 # define P_REXB_RM 0
# define P_GS 0 # define P_GS 0
#endif #endif
#define P_SIMDF3 0x10000 /* 0xf3 opcode prefix */
#define P_SIMDF2 0x20000 /* 0xf2 opcode prefix */
#define OPC_ARITH_EvIz (0x81) #define OPC_ARITH_EvIz (0x81)
#define OPC_ARITH_EvIb (0x83) #define OPC_ARITH_EvIb (0x83)
#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */ #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
#define OPC_ANDN (0xf2 | P_EXT38)
#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3)) #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
#define OPC_BSWAP (0xc8 | P_EXT) #define OPC_BSWAP (0xc8 | P_EXT)
#define OPC_CALL_Jz (0xe8) #define OPC_CALL_Jz (0xe8)
@ -309,6 +342,9 @@ static inline int tcg_target_const_match(tcg_target_long val,
#define OPC_SHIFT_1 (0xd1) #define OPC_SHIFT_1 (0xd1)
#define OPC_SHIFT_Ib (0xc1) #define OPC_SHIFT_Ib (0xc1)
#define OPC_SHIFT_cl (0xd3) #define OPC_SHIFT_cl (0xd3)
#define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3)
#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
#define OPC_TESTL (0x85) #define OPC_TESTL (0x85)
#define OPC_XCHG_ax_r32 (0x90) #define OPC_XCHG_ax_r32 (0x90)
@ -398,9 +434,9 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
rex = 0; rex = 0;
rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */ rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */
rex |= (r & 8) >> 1; /* REX.R */ rex |= (r & 8) >> 1; /* REX.R */
rex |= (x & 8) >> 2; /* REX.X */ rex |= (x & 8) >> 2; /* REX.X */
rex |= (rm & 8) >> 3; /* REX.B */ rex |= (rm & 8) >> 3; /* REX.B */
/* P_REXB_{R,RM} indicates that the given register is the low byte. /* P_REXB_{R,RM} indicates that the given register is the low byte.
For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do, For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
@ -449,6 +485,48 @@ static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
} }
static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
{
int tmp;
if ((opc & (P_REXW | P_EXT | P_EXT38)) || (rm & 8)) {
/* Three byte VEX prefix. */
tcg_out8(s, 0xc4);
/* VEX.m-mmmm */
if (opc & P_EXT38) {
tmp = 2;
} else if (opc & P_EXT) {
tmp = 1;
} else {
tcg_abort();
}
tmp |= 0x40; /* VEX.X */
tmp |= (r & 8 ? 0 : 0x80); /* VEX.R */
tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */
tcg_out8(s, tmp);
tmp = (opc & P_REXW ? 0x80 : 0); /* VEX.W */
} else {
/* Two byte VEX prefix. */
tcg_out8(s, 0xc5);
tmp = (r & 8 ? 0 : 0x80); /* VEX.R */
}
/* VEX.pp */
if (opc & P_DATA16) {
tmp |= 1; /* 0x66 */
} else if (opc & P_SIMDF3) {
tmp |= 2; /* 0xf3 */
} else if (opc & P_SIMDF2) {
tmp |= 3; /* 0xf2 */
}
tmp |= (~v & 15) << 3; /* VEX.vvvv */
tcg_out8(s, tmp);
tcg_out8(s, opc);
tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
}
/* Output an opcode with a full "rm + (index<<shift) + offset" address mode. /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
We handle either RM and INDEX missing with a negative value. In 64-bit We handle either RM and INDEX missing with a negative value. In 64-bit
mode for absolute addresses, ~RM is the size of the immediate operand mode for absolute addresses, ~RM is the size of the immediate operand
@ -1638,7 +1716,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
const TCGArg *args, const int *const_args) const TCGArg *args, const int *const_args)
{ {
int c, rexw = 0; int c, vexop, rexw = 0;
#if TCG_TARGET_REG_BITS == 64 #if TCG_TARGET_REG_BITS == 64
# define OP_32_64(x) \ # define OP_32_64(x) \
@ -1774,6 +1852,16 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
} }
break; break;
OP_32_64(andc):
if (const_args[2]) {
tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32,
args[0], args[1]);
tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0);
} else {
tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]);
}
break;
OP_32_64(mul): OP_32_64(mul):
if (const_args[2]) { if (const_args[2]) {
int32_t val; int32_t val;
@ -1799,19 +1887,28 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
OP_32_64(shl): OP_32_64(shl):
c = SHIFT_SHL; c = SHIFT_SHL;
goto gen_shift; vexop = OPC_SHLX;
goto gen_shift_maybe_vex;
OP_32_64(shr): OP_32_64(shr):
c = SHIFT_SHR; c = SHIFT_SHR;
goto gen_shift; vexop = OPC_SHRX;
goto gen_shift_maybe_vex;
OP_32_64(sar): OP_32_64(sar):
c = SHIFT_SAR; c = SHIFT_SAR;
goto gen_shift; vexop = OPC_SARX;
goto gen_shift_maybe_vex;
OP_32_64(rotl): OP_32_64(rotl):
c = SHIFT_ROL; c = SHIFT_ROL;
goto gen_shift; goto gen_shift;
OP_32_64(rotr): OP_32_64(rotr):
c = SHIFT_ROR; c = SHIFT_ROR;
goto gen_shift; goto gen_shift;
gen_shift_maybe_vex:
if (have_bmi2 && !const_args[2]) {
tcg_out_vex_modrm(s, vexop + rexw, args[0], args[2], args[1]);
break;
}
/* FALLTHRU */
gen_shift: gen_shift:
if (const_args[2]) { if (const_args[2]) {
tcg_out_shifti(s, c + rexw, args[0], args[2]); tcg_out_shifti(s, c + rexw, args[0], args[2]);
@ -2002,10 +2099,11 @@ static const TCGTargetOpDef x86_op_defs[] = {
{ INDEX_op_and_i32, { "r", "0", "ri" } }, { INDEX_op_and_i32, { "r", "0", "ri" } },
{ INDEX_op_or_i32, { "r", "0", "ri" } }, { INDEX_op_or_i32, { "r", "0", "ri" } },
{ INDEX_op_xor_i32, { "r", "0", "ri" } }, { INDEX_op_xor_i32, { "r", "0", "ri" } },
{ INDEX_op_andc_i32, { "r", "r", "ri" } },
{ INDEX_op_shl_i32, { "r", "0", "ci" } }, { INDEX_op_shl_i32, { "r", "0", "Ci" } },
{ INDEX_op_shr_i32, { "r", "0", "ci" } }, { INDEX_op_shr_i32, { "r", "0", "Ci" } },
{ INDEX_op_sar_i32, { "r", "0", "ci" } }, { INDEX_op_sar_i32, { "r", "0", "Ci" } },
{ INDEX_op_rotl_i32, { "r", "0", "ci" } }, { INDEX_op_rotl_i32, { "r", "0", "ci" } },
{ INDEX_op_rotr_i32, { "r", "0", "ci" } }, { INDEX_op_rotr_i32, { "r", "0", "ci" } },
@ -2059,10 +2157,11 @@ static const TCGTargetOpDef x86_op_defs[] = {
{ INDEX_op_and_i64, { "r", "0", "reZ" } }, { INDEX_op_and_i64, { "r", "0", "reZ" } },
{ INDEX_op_or_i64, { "r", "0", "re" } }, { INDEX_op_or_i64, { "r", "0", "re" } },
{ INDEX_op_xor_i64, { "r", "0", "re" } }, { INDEX_op_xor_i64, { "r", "0", "re" } },
{ INDEX_op_andc_i64, { "r", "r", "rI" } },
{ INDEX_op_shl_i64, { "r", "0", "ci" } }, { INDEX_op_shl_i64, { "r", "0", "Ci" } },
{ INDEX_op_shr_i64, { "r", "0", "ci" } }, { INDEX_op_shr_i64, { "r", "0", "Ci" } },
{ INDEX_op_sar_i64, { "r", "0", "ci" } }, { INDEX_op_sar_i64, { "r", "0", "Ci" } },
{ INDEX_op_rotl_i64, { "r", "0", "ci" } }, { INDEX_op_rotl_i64, { "r", "0", "ci" } },
{ INDEX_op_rotr_i64, { "r", "0", "ci" } }, { INDEX_op_rotr_i64, { "r", "0", "ci" } },
@ -2196,25 +2295,34 @@ static void tcg_target_qemu_prologue(TCGContext *s)
static void tcg_target_init(TCGContext *s) static void tcg_target_init(TCGContext *s)
{ {
#if !(defined(have_cmov) && defined(have_movbe)) unsigned a, b, c, d;
{ int max = __get_cpuid_max(0, 0);
unsigned a, b, c, d;
int ret = __get_cpuid(1, &a, &b, &c, &d);
# ifndef have_cmov if (max >= 1) {
__cpuid(1, a, b, c, d);
#ifndef have_cmov
/* For 32-bit, 99% certainty that we're running on hardware that /* For 32-bit, 99% certainty that we're running on hardware that
supports cmov, but we still need to check. In case cmov is not supports cmov, but we still need to check. In case cmov is not
available, we'll use a small forward branch. */ available, we'll use a small forward branch. */
have_cmov = ret && (d & bit_CMOV); have_cmov = (d & bit_CMOV) != 0;
# endif #endif
#ifndef have_movbe
# ifndef have_movbe
/* MOVBE is only available on Intel Atom and Haswell CPUs, so we /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
need to probe for it. */ need to probe for it. */
have_movbe = ret && (c & bit_MOVBE); have_movbe = (c & bit_MOVBE) != 0;
# endif
}
#endif #endif
}
if (max >= 7) {
/* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
__cpuid_count(7, 0, a, b, c, d);
#ifdef bit_BMI
have_bmi1 = (b & bit_BMI) != 0;
#endif
#ifndef have_bmi2
have_bmi2 = (b & bit_BMI2) != 0;
#endif
}
if (TCG_TARGET_REG_BITS == 64) { if (TCG_TARGET_REG_BITS == 64) {
tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);

View file

@ -64,9 +64,6 @@ typedef enum {
TCG_REG_RDI = TCG_REG_EDI, TCG_REG_RDI = TCG_REG_EDI,
} TCGReg; } TCGReg;
#define TCG_CT_CONST_S32 0x100
#define TCG_CT_CONST_U32 0x200
/* used for function call generation */ /* used for function call generation */
#define TCG_REG_CALL_STACK TCG_REG_ESP #define TCG_REG_CALL_STACK TCG_REG_ESP
#define TCG_TARGET_STACK_ALIGN 16 #define TCG_TARGET_STACK_ALIGN 16
@ -76,6 +73,8 @@ typedef enum {
#define TCG_TARGET_CALL_STACK_OFFSET 0 #define TCG_TARGET_CALL_STACK_OFFSET 0
#endif #endif
extern bool have_bmi1;
/* optional instructions */ /* optional instructions */
#define TCG_TARGET_HAS_div2_i32 1 #define TCG_TARGET_HAS_div2_i32 1
#define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_rot_i32 1
@ -87,7 +86,7 @@ typedef enum {
#define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1
#define TCG_TARGET_HAS_neg_i32 1 #define TCG_TARGET_HAS_neg_i32 1
#define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_not_i32 1
#define TCG_TARGET_HAS_andc_i32 0 #define TCG_TARGET_HAS_andc_i32 have_bmi1
#define TCG_TARGET_HAS_orc_i32 0 #define TCG_TARGET_HAS_orc_i32 0
#define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_eqv_i32 0
#define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nand_i32 0
@ -115,7 +114,7 @@ typedef enum {
#define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1
#define TCG_TARGET_HAS_neg_i64 1 #define TCG_TARGET_HAS_neg_i64 1
#define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_not_i64 1
#define TCG_TARGET_HAS_andc_i64 0 #define TCG_TARGET_HAS_andc_i64 have_bmi1
#define TCG_TARGET_HAS_orc_i64 0 #define TCG_TARGET_HAS_orc_i64 0
#define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_eqv_i64 0
#define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nand_i64 0

View file

@ -655,11 +655,68 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
} }
} }
break; break;
CASE_OP_32_64(xor):
CASE_OP_32_64(nand):
if (temps[args[1]].state != TCG_TEMP_CONST
&& temps[args[2]].state == TCG_TEMP_CONST
&& temps[args[2]].val == -1) {
i = 1;
goto try_not;
}
break;
CASE_OP_32_64(nor):
if (temps[args[1]].state != TCG_TEMP_CONST
&& temps[args[2]].state == TCG_TEMP_CONST
&& temps[args[2]].val == 0) {
i = 1;
goto try_not;
}
break;
CASE_OP_32_64(andc):
if (temps[args[2]].state != TCG_TEMP_CONST
&& temps[args[1]].state == TCG_TEMP_CONST
&& temps[args[1]].val == -1) {
i = 2;
goto try_not;
}
break;
CASE_OP_32_64(orc):
CASE_OP_32_64(eqv):
if (temps[args[2]].state != TCG_TEMP_CONST
&& temps[args[1]].state == TCG_TEMP_CONST
&& temps[args[1]].val == 0) {
i = 2;
goto try_not;
}
break;
try_not:
{
TCGOpcode not_op;
bool have_not;
if (def->flags & TCG_OPF_64BIT) {
not_op = INDEX_op_not_i64;
have_not = TCG_TARGET_HAS_not_i64;
} else {
not_op = INDEX_op_not_i32;
have_not = TCG_TARGET_HAS_not_i32;
}
if (!have_not) {
break;
}
s->gen_opc_buf[op_index] = not_op;
reset_temp(args[0]);
gen_args[0] = args[0];
gen_args[1] = args[i];
args += 3;
gen_args += 2;
continue;
}
default: default:
break; break;
} }
/* Simplify expression for "op r, a, 0 => mov r, a" cases */ /* Simplify expression for "op r, a, const => mov r, a" cases */
switch (op) { switch (op) {
CASE_OP_32_64(add): CASE_OP_32_64(add):
CASE_OP_32_64(sub): CASE_OP_32_64(sub):
@ -670,28 +727,38 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
CASE_OP_32_64(rotr): CASE_OP_32_64(rotr):
CASE_OP_32_64(or): CASE_OP_32_64(or):
CASE_OP_32_64(xor): CASE_OP_32_64(xor):
if (temps[args[1]].state == TCG_TEMP_CONST) { CASE_OP_32_64(andc):
/* Proceed with possible constant folding. */ if (temps[args[1]].state != TCG_TEMP_CONST
break; && temps[args[2]].state == TCG_TEMP_CONST
}
if (temps[args[2]].state == TCG_TEMP_CONST
&& temps[args[2]].val == 0) { && temps[args[2]].val == 0) {
if (temps_are_copies(args[0], args[1])) { goto do_mov3;
s->gen_opc_buf[op_index] = INDEX_op_nop;
} else {
s->gen_opc_buf[op_index] = op_to_mov(op);
tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
gen_args += 2;
}
args += 3;
continue;
} }
break; break;
CASE_OP_32_64(and):
CASE_OP_32_64(orc):
CASE_OP_32_64(eqv):
if (temps[args[1]].state != TCG_TEMP_CONST
&& temps[args[2]].state == TCG_TEMP_CONST
&& temps[args[2]].val == -1) {
goto do_mov3;
}
break;
do_mov3:
if (temps_are_copies(args[0], args[1])) {
s->gen_opc_buf[op_index] = INDEX_op_nop;
} else {
s->gen_opc_buf[op_index] = op_to_mov(op);
tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
gen_args += 2;
}
args += 3;
continue;
default: default:
break; break;
} }
/* Simplify using known-zero bits */ /* Simplify using known-zero bits. Currently only ops with a single
output argument is supported. */
mask = -1; mask = -1;
affected = -1; affected = -1;
switch (op) { switch (op) {
@ -726,16 +793,36 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
mask = temps[args[1]].mask & mask; mask = temps[args[1]].mask & mask;
break; break;
CASE_OP_32_64(sar): CASE_OP_32_64(andc):
/* Known-zeros does not imply known-ones. Therefore unless
args[2] is constant, we can't infer anything from it. */
if (temps[args[2]].state == TCG_TEMP_CONST) { if (temps[args[2]].state == TCG_TEMP_CONST) {
mask = ((tcg_target_long)temps[args[1]].mask mask = ~temps[args[2]].mask;
>> temps[args[2]].val); goto and_const;
}
/* But we certainly know nothing outside args[1] may be set. */
mask = temps[args[1]].mask;
break;
case INDEX_op_sar_i32:
if (temps[args[2]].state == TCG_TEMP_CONST) {
mask = (int32_t)temps[args[1]].mask >> temps[args[2]].val;
}
break;
case INDEX_op_sar_i64:
if (temps[args[2]].state == TCG_TEMP_CONST) {
mask = (int64_t)temps[args[1]].mask >> temps[args[2]].val;
} }
break; break;
CASE_OP_32_64(shr): case INDEX_op_shr_i32:
if (temps[args[2]].state == TCG_TEMP_CONST) { if (temps[args[2]].state == TCG_TEMP_CONST) {
mask = temps[args[1]].mask >> temps[args[2]].val; mask = (uint32_t)temps[args[1]].mask >> temps[args[2]].val;
}
break;
case INDEX_op_shr_i64:
if (temps[args[2]].state == TCG_TEMP_CONST) {
mask = (uint64_t)temps[args[1]].mask >> temps[args[2]].val;
} }
break; break;
@ -769,10 +856,40 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
mask = temps[args[3]].mask | temps[args[4]].mask; mask = temps[args[3]].mask | temps[args[4]].mask;
break; break;
CASE_OP_32_64(ld8u):
case INDEX_op_qemu_ld8u:
mask = 0xff;
break;
CASE_OP_32_64(ld16u):
case INDEX_op_qemu_ld16u:
mask = 0xffff;
break;
case INDEX_op_ld32u_i64:
#if TCG_TARGET_REG_BITS == 64
case INDEX_op_qemu_ld32u:
#endif
mask = 0xffffffffu;
break;
CASE_OP_32_64(qemu_ld):
{
TCGMemOp mop = args[def->nb_oargs + def->nb_iargs];
if (!(mop & MO_SIGN)) {
mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
}
}
break;
default: default:
break; break;
} }
/* 32-bit ops (non 64-bit ops and non load/store ops) generate 32-bit
results */
if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_64BIT))) {
mask &= 0xffffffffu;
}
if (mask == 0) { if (mask == 0) {
assert(def->nb_oargs == 1); assert(def->nb_oargs == 1);
s->gen_opc_buf[op_index] = op_to_movi(op); s->gen_opc_buf[op_index] = op_to_movi(op);
@ -839,6 +956,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
/* Simplify expression for "op r, a, a => movi r, 0" cases */ /* Simplify expression for "op r, a, a => movi r, 0" cases */
switch (op) { switch (op) {
CASE_OP_32_64(andc):
CASE_OP_32_64(sub): CASE_OP_32_64(sub):
CASE_OP_32_64(xor): CASE_OP_32_64(xor):
if (temps_are_copies(args[1], args[2])) { if (temps_are_copies(args[1], args[2])) {
@ -1140,6 +1258,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
} else { } else {
for (i = 0; i < def->nb_oargs; i++) { for (i = 0; i < def->nb_oargs; i++) {
reset_temp(args[i]); reset_temp(args[i]);
/* Save the corresponding known-zero bits mask for the
first output argument (only one supported so far). */
if (i == 0) {
temps[args[i]].mask = mask;
}
} }
} }
for (i = 0; i < def->nb_args; i++) { for (i = 0; i < def->nb_args; i++) {

View file

@ -526,7 +526,7 @@ static inline int tcg_temp_new_internal(TCGType type, int temp_local)
ts->temp_local = temp_local; ts->temp_local = temp_local;
ts->name = NULL; ts->name = NULL;
ts++; ts++;
ts->base_type = TCG_TYPE_I32; ts->base_type = type;
ts->type = TCG_TYPE_I32; ts->type = TCG_TYPE_I32;
ts->temp_allocated = 1; ts->temp_allocated = 1;
ts->temp_local = temp_local; ts->temp_local = temp_local;