From 386bba2368842fc74388a3c1651c6c0c0c70adbd Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 11 Jun 2019 16:39:51 +0100 Subject: [PATCH] target/arm: Convert VFP comparison insns to decodetree Convert the VFP comparison instructions to decodetree. Note that comparison instructions should not honour the VFP short-vector length and stride information: they are scalar-only operations. This applies to all the 2-operand instructions except for VMOV, VABS, VNEG and VSQRT. (In the old decoder this is implemented via the "if (op == 15 && rn > 3) { veclen = 0; }" check.) Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson --- target/arm/translate-vfp.inc.c | 75 ++++++++++++++++++++++++++++++++++ target/arm/translate.c | 51 +---------------------- target/arm/vfp.decode | 5 +++ 3 files changed, 81 insertions(+), 50 deletions(-) diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c index a7e4ae3198..ebde86210a 100644 --- a/target/arm/translate-vfp.inc.c +++ b/target/arm/translate-vfp.inc.c @@ -1938,3 +1938,78 @@ static bool trans_VSQRT_dp(DisasContext *s, arg_VSQRT_dp *a) { return do_vfp_2op_dp(s, gen_VSQRT_dp, a->vd, a->vm); } + +static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a) +{ + TCGv_i32 vd, vm; + + /* Vm/M bits must be zero for the Z variant */ + if (a->z && a->vm != 0) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + vd = tcg_temp_new_i32(); + vm = tcg_temp_new_i32(); + + neon_load_reg32(vd, a->vd); + if (a->z) { + tcg_gen_movi_i32(vm, 0); + } else { + neon_load_reg32(vm, a->vm); + } + + if (a->e) { + gen_helper_vfp_cmpes(vd, vm, cpu_env); + } else { + gen_helper_vfp_cmps(vd, vm, cpu_env); + } + + tcg_temp_free_i32(vd); + tcg_temp_free_i32(vm); + + return true; +} + +static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a) +{ + TCGv_i64 vd, vm; + + /* Vm/M bits must be zero for the Z variant */ + if (a->z && a->vm != 0) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_fp_d32, s) && ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + vd = tcg_temp_new_i64(); + vm = tcg_temp_new_i64(); + + neon_load_reg64(vd, a->vd); + if (a->z) { + tcg_gen_movi_i64(vm, 0); + } else { + neon_load_reg64(vm, a->vm); + } + + if (a->e) { + gen_helper_vfp_cmped(vd, vm, cpu_env); + } else { + gen_helper_vfp_cmpd(vd, vm, cpu_env); + } + + tcg_temp_free_i64(vd); + tcg_temp_free_i64(vm); + + return true; +} diff --git a/target/arm/translate.c b/target/arm/translate.c index 79e9cca600..eb16269021 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -1390,30 +1390,6 @@ static inline void gen_vfp_neg(int dp) gen_helper_vfp_negs(cpu_F0s, cpu_F0s); } -static inline void gen_vfp_cmp(int dp) -{ - if (dp) - gen_helper_vfp_cmpd(cpu_F0d, cpu_F1d, cpu_env); - else - gen_helper_vfp_cmps(cpu_F0s, cpu_F1s, cpu_env); -} - -static inline void gen_vfp_cmpe(int dp) -{ - if (dp) - gen_helper_vfp_cmped(cpu_F0d, cpu_F1d, cpu_env); - else - gen_helper_vfp_cmpes(cpu_F0s, cpu_F1s, cpu_env); -} - -static inline void gen_vfp_F1_ld0(int dp) -{ - if (dp) - tcg_gen_movi_i64(cpu_F1d, 0); - else - tcg_gen_movi_i32(cpu_F1s, 0); -} - #define VFP_GEN_ITOF(name) \ static inline void gen_vfp_##name(int dp, int neon) \ { \ @@ -3091,6 +3067,7 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn) case 15: switch (rn) { case 0 ... 3: + case 8 ... 11: /* Already handled by decodetree */ return 1; default: @@ -3135,11 +3112,6 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn) rd_is_dp = false; break; - case 0x08: case 0x0a: /* vcmp, vcmpz */ - case 0x09: case 0x0b: /* vcmpe, vcmpez */ - no_output = true; - break; - case 0x0c: /* vrintr */ case 0x0d: /* vrintz */ case 0x0e: /* vrintx */ @@ -3240,14 +3212,6 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn) /* Load the initial operands. */ if (op == 15) { switch (rn) { - case 0x08: case 0x09: /* Compare */ - gen_mov_F0_vreg(dp, rd); - gen_mov_F1_vreg(dp, rm); - break; - case 0x0a: case 0x0b: /* Compare with zero */ - gen_mov_F0_vreg(dp, rd); - gen_vfp_F1_ld0(dp); - break; case 0x14: /* vcvt fp <-> fixed */ case 0x15: case 0x16: @@ -3357,19 +3321,6 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn) gen_vfp_msr(tmp); break; } - case 8: /* cmp */ - gen_vfp_cmp(dp); - break; - case 9: /* cmpe */ - gen_vfp_cmpe(dp); - break; - case 10: /* cmpz */ - gen_vfp_cmp(dp); - break; - case 11: /* cmpez */ - gen_vfp_F1_ld0(dp); - gen_vfp_cmpe(dp); - break; case 12: /* vrintr */ { TCGv_ptr fpst = get_fpstatus_ptr(0); diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode index b72ab8b806..9db7aa7021 100644 --- a/target/arm/vfp.decode +++ b/target/arm/vfp.decode @@ -176,3 +176,8 @@ VSQRT_sp ---- 1110 1.11 0001 .... 1010 11.0 .... \ vd=%vd_sp vm=%vm_sp VSQRT_dp ---- 1110 1.11 0001 .... 1011 11.0 .... \ vd=%vd_dp vm=%vm_dp + +VCMP_sp ---- 1110 1.11 010 z:1 .... 1010 e:1 1.0 .... \ + vd=%vd_sp vm=%vm_sp +VCMP_dp ---- 1110 1.11 010 z:1 .... 1011 e:1 1.0 .... \ + vd=%vd_dp vm=%vm_dp