tcg-aarch64: Handle constant operands to and, or, xor

Handle a simplified set of logical immediates for the moment.

The way gcc and binutils do it, with 52k worth of tables, and
a binary search depth of log2(5334) = 13, seems slow for the
most common cases.

Signed-off-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Claudio Fontana <claudio.fontana@huawei.com>
Tested-by: Claudio Fontana <claudio.fontana@huawei.com>
This commit is contained in:
Richard Henderson 2013-08-14 11:27:03 -07:00
parent 90f1cd9138
commit e029f29385

View file

@ -112,6 +112,7 @@ static inline void patch_reloc(uint8_t *code_ptr, int type,
#define TCG_CT_CONST_IS32 0x100 #define TCG_CT_CONST_IS32 0x100
#define TCG_CT_CONST_AIMM 0x200 #define TCG_CT_CONST_AIMM 0x200
#define TCG_CT_CONST_LIMM 0x400
/* parse target specific constraints */ /* parse target specific constraints */
static int target_parse_constraint(TCGArgConstraint *ct, static int target_parse_constraint(TCGArgConstraint *ct,
@ -142,6 +143,9 @@ static int target_parse_constraint(TCGArgConstraint *ct,
case 'A': /* Valid for arithmetic immediate (positive or negative). */ case 'A': /* Valid for arithmetic immediate (positive or negative). */
ct->ct |= TCG_CT_CONST_AIMM; ct->ct |= TCG_CT_CONST_AIMM;
break; break;
case 'L': /* Valid for logical immediate. */
ct->ct |= TCG_CT_CONST_LIMM;
break;
default: default:
return -1; return -1;
} }
@ -156,6 +160,26 @@ static inline bool is_aimm(uint64_t val)
return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
} }
static inline bool is_limm(uint64_t val)
{
/* Taking a simplified view of the logical immediates for now, ignoring
the replication that can happen across the field. Match bit patterns
of the forms
0....01....1
0..01..10..0
and their inverses. */
/* Make things easier below, by testing the form with msb clear. */
if ((int64_t)val < 0) {
val = ~val;
}
if (val == 0) {
return false;
}
val += val & -val;
return (val & (val - 1)) == 0;
}
static int tcg_target_const_match(tcg_target_long val, static int tcg_target_const_match(tcg_target_long val,
const TCGArgConstraint *arg_ct) const TCGArgConstraint *arg_ct)
{ {
@ -170,6 +194,9 @@ static int tcg_target_const_match(tcg_target_long val,
if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
return 1; return 1;
} }
if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
return 1;
}
return 0; return 0;
} }
@ -235,6 +262,11 @@ typedef enum {
I3401_SUBI = 0x51000000, I3401_SUBI = 0x51000000,
I3401_SUBSI = 0x71000000, I3401_SUBSI = 0x71000000,
/* Logical immediate instructions. */
I3404_ANDI = 0x12000000,
I3404_ORRI = 0x32000000,
I3404_EORI = 0x52000000,
/* Add/subtract shifted register instructions (without a shift). */ /* Add/subtract shifted register instructions (without a shift). */
I3502_ADD = 0x0b000000, I3502_ADD = 0x0b000000,
I3502_ADDS = 0x2b000000, I3502_ADDS = 0x2b000000,
@ -351,6 +383,18 @@ static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
} }
/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
(Logical immediate). Both insn groups have N, IMMR and IMMS fields
that feed the DecodeBitMasks pseudo function. */
static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
TCGReg rd, TCGReg rn, int n, int immr, int imms)
{
tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
| rn << 5 | rd);
}
#define tcg_out_insn_3404 tcg_out_insn_3402
/* This function is for both 3.5.2 (Add/Subtract shifted register), for /* This function is for both 3.5.2 (Add/Subtract shifted register), for
the rare occasion when we actually want to supply a shift amount. */ the rare occasion when we actually want to supply a shift amount. */
static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
@ -665,40 +709,6 @@ static inline void tcg_out_call(TCGContext *s, intptr_t target)
} }
} }
/* encode a logical immediate, mapping user parameter
M=set bits pattern length to S=M-1 */
static inline unsigned int
aarch64_limm(unsigned int m, unsigned int r)
{
assert(m > 0);
return r << 16 | (m - 1) << 10;
}
/* test a register against an immediate bit pattern made of
M set bits rotated right by R.
Examples:
to test a 32/64 reg against 0x00000007, pass M = 3, R = 0.
to test a 32/64 reg against 0x000000ff, pass M = 8, R = 0.
to test a 32bit reg against 0xff000000, pass M = 8, R = 8.
to test a 32bit reg against 0xff0000ff, pass M = 16, R = 8.
*/
static inline void tcg_out_tst(TCGContext *s, TCGType ext, TCGReg rn,
unsigned int m, unsigned int r)
{
/* using TST alias of ANDS XZR, Xn,#bimm64 0x7200001f */
unsigned int base = ext ? 0xf240001f : 0x7200001f;
tcg_out32(s, base | aarch64_limm(m, r) | rn << 5);
}
/* and a register with a bit pattern, similarly to TST, no flags change */
static inline void tcg_out_andi(TCGContext *s, TCGType ext, TCGReg rd,
TCGReg rn, unsigned int m, unsigned int r)
{
/* using AND 0x12000000 */
unsigned int base = ext ? 0x92400000 : 0x12000000;
tcg_out32(s, base | aarch64_limm(m, r) | rn << 5 | rd);
}
static inline void tcg_out_ret(TCGContext *s) static inline void tcg_out_ret(TCGContext *s)
{ {
/* emit RET { LR } */ /* emit RET { LR } */
@ -788,6 +798,37 @@ static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
} }
} }
/* This function is used for the Logical (immediate) instruction group.
The value of LIMM must satisfy IS_LIMM. See the comment above about
only supporting simplified logical immediates. */
static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
TCGReg rd, TCGReg rn, uint64_t limm)
{
unsigned h, l, r, c;
assert(is_limm(limm));
h = clz64(limm);
l = ctz64(limm);
if (l == 0) {
r = 0; /* form 0....01....1 */
c = ctz64(~limm) - 1;
if (h == 0) {
r = clz64(~limm); /* form 1..10..01..1 */
c += r;
}
} else {
r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
c = r - h - 1;
}
if (ext == TCG_TYPE_I32) {
r &= 31;
c &= 31;
}
tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
}
#ifdef CONFIG_SOFTMMU #ifdef CONFIG_SOFTMMU
/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
* int mmu_idx, uintptr_t ra) * int mmu_idx, uintptr_t ra)
@ -879,9 +920,8 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
/* Store the page mask part of the address and the low s_bits into X3. /* Store the page mask part of the address and the low s_bits into X3.
Later this allows checking for equality and alignment at the same time. Later this allows checking for equality and alignment at the same time.
X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */ X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */
tcg_out_andi(s, (TARGET_LONG_BITS == 64), TCG_REG_X3, addr_reg, tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, TCG_REG_X3,
(TARGET_LONG_BITS - TARGET_PAGE_BITS) + s_bits, addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
(TARGET_LONG_BITS - TARGET_PAGE_BITS));
/* Add any "high bits" from the tlb offset to the env address into X2, /* Add any "high bits" from the tlb offset to the env address into X2,
to take advantage of the LSL12 form of the ADDI instruction. to take advantage of the LSL12 form of the ADDI instruction.
X2 = env + (tlb_offset & 0xfff000) */ X2 = env + (tlb_offset & 0xfff000) */
@ -1186,19 +1226,37 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
} }
break; break;
case INDEX_op_and_i64:
case INDEX_op_and_i32: case INDEX_op_and_i32:
tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); a2 = (int32_t)a2;
/* FALLTHRU */
case INDEX_op_and_i64:
if (c2) {
tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
} else {
tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
}
break; break;
case INDEX_op_or_i64:
case INDEX_op_or_i32: case INDEX_op_or_i32:
tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); a2 = (int32_t)a2;
/* FALLTHRU */
case INDEX_op_or_i64:
if (c2) {
tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
} else {
tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
}
break; break;
case INDEX_op_xor_i64:
case INDEX_op_xor_i32: case INDEX_op_xor_i32:
tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); a2 = (int32_t)a2;
/* FALLTHRU */
case INDEX_op_xor_i64:
if (c2) {
tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
} else {
tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
}
break; break;
case INDEX_op_mul_i64: case INDEX_op_mul_i64:
@ -1391,12 +1449,12 @@ static const TCGTargetOpDef aarch64_op_defs[] = {
{ INDEX_op_sub_i64, { "r", "r", "rA" } }, { INDEX_op_sub_i64, { "r", "r", "rA" } },
{ INDEX_op_mul_i32, { "r", "r", "r" } }, { INDEX_op_mul_i32, { "r", "r", "r" } },
{ INDEX_op_mul_i64, { "r", "r", "r" } }, { INDEX_op_mul_i64, { "r", "r", "r" } },
{ INDEX_op_and_i32, { "r", "r", "r" } }, { INDEX_op_and_i32, { "r", "r", "rwL" } },
{ INDEX_op_and_i64, { "r", "r", "r" } }, { INDEX_op_and_i64, { "r", "r", "rL" } },
{ INDEX_op_or_i32, { "r", "r", "r" } }, { INDEX_op_or_i32, { "r", "r", "rwL" } },
{ INDEX_op_or_i64, { "r", "r", "r" } }, { INDEX_op_or_i64, { "r", "r", "rL" } },
{ INDEX_op_xor_i32, { "r", "r", "r" } }, { INDEX_op_xor_i32, { "r", "r", "rwL" } },
{ INDEX_op_xor_i64, { "r", "r", "r" } }, { INDEX_op_xor_i64, { "r", "r", "rL" } },
{ INDEX_op_shl_i32, { "r", "r", "ri" } }, { INDEX_op_shl_i32, { "r", "r", "ri" } },
{ INDEX_op_shr_i32, { "r", "r", "ri" } }, { INDEX_op_shr_i32, { "r", "r", "ri" } },