target/arm: Convert VFP fused multiply-add insns to decodetree

Convert the VFP fused multiply-add instructions (VFNMA, VFNMS,
VFMA, VFMS) to decodetree.

Note that in the old decode structure we were implementing
these to honour the VFP vector stride/length. These instructions
were introduced in VFPv4, and in the v7A architecture they
are UNPREDICTABLE if the vector stride or length are non-zero.
In v8A they must UNDEF if stride or length are non-zero, like
all VFP instructions; we choose to UNDEF always.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Peter Maydell 2019-06-11 16:39:49 +01:00
parent 519ee7ae31
commit d4893b01d2
3 changed files with 131 additions and 52 deletions

View file

@ -1481,3 +1481,124 @@ static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_sp *a)
{
return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
}
static bool trans_VFM_sp(DisasContext *s, arg_VFM_sp *a)
{
/*
* VFNMA : fd = muladd(-fd, fn, fm)
* VFNMS : fd = muladd(-fd, -fn, fm)
* VFMA : fd = muladd( fd, fn, fm)
* VFMS : fd = muladd( fd, -fn, fm)
*
* These are fused multiply-add, and must be done as one floating
* point operation with no rounding between the multiplication and
* addition steps. NB that doing the negations here as separate
* steps is correct : an input NaN should come out with its sign
* bit flipped if it is a negated-input.
*/
TCGv_ptr fpst;
TCGv_i32 vn, vm, vd;
/*
* Present in VFPv4 only.
* In v7A, UNPREDICTABLE with non-zero vector length/stride; from
* v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
*/
if (!arm_dc_feature(s, ARM_FEATURE_VFP4) ||
(s->vec_len != 0 || s->vec_stride != 0)) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
vn = tcg_temp_new_i32();
vm = tcg_temp_new_i32();
vd = tcg_temp_new_i32();
neon_load_reg32(vn, a->vn);
neon_load_reg32(vm, a->vm);
if (a->o2) {
/* VFNMS, VFMS */
gen_helper_vfp_negs(vn, vn);
}
neon_load_reg32(vd, a->vd);
if (a->o1 & 1) {
/* VFNMA, VFNMS */
gen_helper_vfp_negs(vd, vd);
}
fpst = get_fpstatus_ptr(0);
gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
neon_store_reg32(vd, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(vn);
tcg_temp_free_i32(vm);
tcg_temp_free_i32(vd);
return true;
}
static bool trans_VFM_dp(DisasContext *s, arg_VFM_sp *a)
{
/*
* VFNMA : fd = muladd(-fd, fn, fm)
* VFNMS : fd = muladd(-fd, -fn, fm)
* VFMA : fd = muladd( fd, fn, fm)
* VFMS : fd = muladd( fd, -fn, fm)
*
* These are fused multiply-add, and must be done as one floating
* point operation with no rounding between the multiplication and
* addition steps. NB that doing the negations here as separate
* steps is correct : an input NaN should come out with its sign
* bit flipped if it is a negated-input.
*/
TCGv_ptr fpst;
TCGv_i64 vn, vm, vd;
/*
* Present in VFPv4 only.
* In v7A, UNPREDICTABLE with non-zero vector length/stride; from
* v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
*/
if (!arm_dc_feature(s, ARM_FEATURE_VFP4) ||
(s->vec_len != 0 || s->vec_stride != 0)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_fp_d32, s) && ((a->vd | a->vn | a->vm) & 0x10)) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
vn = tcg_temp_new_i64();
vm = tcg_temp_new_i64();
vd = tcg_temp_new_i64();
neon_load_reg64(vn, a->vn);
neon_load_reg64(vm, a->vm);
if (a->o2) {
/* VFNMS, VFMS */
gen_helper_vfp_negd(vn, vn);
}
neon_load_reg64(vd, a->vd);
if (a->o1 & 1) {
/* VFNMA, VFNMS */
gen_helper_vfp_negd(vd, vd);
}
fpst = get_fpstatus_ptr(0);
gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
neon_store_reg64(vd, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i64(vn);
tcg_temp_free_i64(vm);
tcg_temp_free_i64(vd);
return true;
}

View file

@ -3093,7 +3093,7 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
rn = VFP_SREG_N(insn);
switch (op) {
case 0 ... 8:
case 0 ... 13:
/* Already handled by decodetree */
return 1;
default:
@ -3279,57 +3279,6 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
for (;;) {
/* Perform the calculation. */
switch (op) {
case 10: /* VFNMA : fd = muladd(-fd, fn, fm) */
case 11: /* VFNMS : fd = muladd(-fd, -fn, fm) */
case 12: /* VFMA : fd = muladd( fd, fn, fm) */
case 13: /* VFMS : fd = muladd( fd, -fn, fm) */
/* These are fused multiply-add, and must be done as one
* floating point operation with no rounding between the
* multiplication and addition steps.
* NB that doing the negations here as separate steps is
* correct : an input NaN should come out with its sign bit
* flipped if it is a negated-input.
*/
if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
return 1;
}
if (dp) {
TCGv_ptr fpst;
TCGv_i64 frd;
if (op & 1) {
/* VFNMS, VFMS */
gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
}
frd = tcg_temp_new_i64();
tcg_gen_ld_f64(frd, cpu_env, vfp_reg_offset(dp, rd));
if (op & 2) {
/* VFNMA, VFNMS */
gen_helper_vfp_negd(frd, frd);
}
fpst = get_fpstatus_ptr(0);
gen_helper_vfp_muladdd(cpu_F0d, cpu_F0d,
cpu_F1d, frd, fpst);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i64(frd);
} else {
TCGv_ptr fpst;
TCGv_i32 frd;
if (op & 1) {
/* VFNMS, VFMS */
gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
}
frd = tcg_temp_new_i32();
tcg_gen_ld_f32(frd, cpu_env, vfp_reg_offset(dp, rd));
if (op & 2) {
gen_helper_vfp_negs(frd, frd);
}
fpst = get_fpstatus_ptr(0);
gen_helper_vfp_muladds(cpu_F0s, cpu_F0s,
cpu_F1s, frd, fpst);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(frd);
}
break;
case 14: /* fconst */
if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
return 1;

View file

@ -142,3 +142,12 @@ VDIV_sp ---- 1110 1.00 .... .... 1010 .0.0 .... \
vm=%vm_sp vn=%vn_sp vd=%vd_sp
VDIV_dp ---- 1110 1.00 .... .... 1011 .0.0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
VFM_sp ---- 1110 1.01 .... .... 1010 . o2:1 . 0 .... \
vm=%vm_sp vn=%vn_sp vd=%vd_sp o1=1
VFM_dp ---- 1110 1.01 .... .... 1011 . o2:1 . 0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp o1=1
VFM_sp ---- 1110 1.10 .... .... 1010 . o2:1 . 0 .... \
vm=%vm_sp vn=%vn_sp vd=%vd_sp o1=2
VFM_dp ---- 1110 1.10 .... .... 1011 . o2:1 . 0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp o1=2