From 128123ea34e9e6afe4842aefcb9cf84b9642ac22 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 16 Jun 2020 18:08:40 +0100 Subject: [PATCH] target/arm: Convert Neon 2-reg-misc VRINT insns to decodetree Convert the Neon 2-reg-misc VRINT insns to decodetree. Giving these insns their own do_vrint() function allows us to change the rounding mode just once at the start and end rather than doing it for every element in the vector. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20200616170844.13318-18-peter.maydell@linaro.org --- target/arm/neon-dp.decode | 8 +++++ target/arm/translate-neon.inc.c | 61 +++++++++++++++++++++++++++++++++ target/arm/translate.c | 31 +++-------------- 3 files changed, 74 insertions(+), 26 deletions(-) diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode index c9acd00f1e..e0717c7e4a 100644 --- a/target/arm/neon-dp.decode +++ b/target/arm/neon-dp.decode @@ -503,11 +503,19 @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm SHA1SU1 1111 001 11 . 11 .. 10 .... 0 0111 0 . 0 .... @2misc_q1 SHA256SU0 1111 001 11 . 11 .. 10 .... 0 0111 1 . 0 .... @2misc_q1 + VRINTN 1111 001 11 . 11 .. 10 .... 0 1000 . . 0 .... @2misc VRINTX 1111 001 11 . 11 .. 10 .... 0 1001 . . 0 .... @2misc + VRINTA 1111 001 11 . 11 .. 10 .... 0 1010 . . 0 .... @2misc + VRINTZ 1111 001 11 . 11 .. 10 .... 0 1011 . . 0 .... @2misc VCVT_F16_F32 1111 001 11 . 11 .. 10 .... 0 1100 0 . 0 .... @2misc_q0 + + VRINTM 1111 001 11 . 11 .. 10 .... 0 1101 . . 0 .... @2misc + VCVT_F32_F16 1111 001 11 . 11 .. 10 .... 0 1110 0 . 0 .... @2misc_q0 + VRINTP 1111 001 11 . 11 .. 10 .... 0 1111 . . 0 .... @2misc + VRECPE 1111 001 11 . 11 .. 11 .... 0 1000 . . 0 .... @2misc VRSQRTE 1111 001 11 . 11 .. 11 .... 0 1001 . . 0 .... @2misc VRECPE_F 1111 001 11 . 11 .. 11 .... 0 1010 . . 0 .... @2misc diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c index a62da21b15..0e7f86ad15 100644 --- a/target/arm/translate-neon.inc.c +++ b/target/arm/translate-neon.inc.c @@ -3796,3 +3796,64 @@ DO_FP_CMP0(VCGE0_F, gen_helper_neon_cge_f32, FWD) DO_FP_CMP0(VCEQ0_F, gen_helper_neon_ceq_f32, FWD) DO_FP_CMP0(VCLE0_F, gen_helper_neon_cge_f32, REV) DO_FP_CMP0(VCLT0_F, gen_helper_neon_cgt_f32, REV) + +static bool do_vrint(DisasContext *s, arg_2misc *a, int rmode) +{ + /* + * Handle a VRINT* operation by iterating 32 bits at a time, + * with a specified rounding mode in operation. + */ + int pass; + TCGv_ptr fpst; + TCGv_i32 tcg_rmode; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON) || + !arm_dc_feature(s, ARM_FEATURE_V8)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->size != 2) { + /* TODO: FP16 will be the size == 1 case */ + return false; + } + + if ((a->vd | a->vm) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fpst = get_fpstatus_ptr(1); + tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); + gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env); + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + TCGv_i32 tmp = neon_load_reg(a->vm, pass); + gen_helper_rints(tmp, tmp, fpst); + neon_store_reg(a->vd, pass, tmp); + } + gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_temp_free_i32(tcg_rmode); + tcg_temp_free_ptr(fpst); + + return true; +} + +#define DO_VRINT(INSN, RMODE) \ + static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ + { \ + return do_vrint(s, a, RMODE); \ + } + +DO_VRINT(VRINTN, FPROUNDING_TIEEVEN) +DO_VRINT(VRINTA, FPROUNDING_TIEAWAY) +DO_VRINT(VRINTZ, FPROUNDING_ZERO) +DO_VRINT(VRINTM, FPROUNDING_NEGINF) +DO_VRINT(VRINTP, FPROUNDING_POSINF) diff --git a/target/arm/translate.c b/target/arm/translate.c index dc98928856..61dfc3ae7a 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -4959,6 +4959,11 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_2RM_VCEQ0_F: case NEON_2RM_VCLE0_F: case NEON_2RM_VCLT0_F: + case NEON_2RM_VRINTN: + case NEON_2RM_VRINTA: + case NEON_2RM_VRINTM: + case NEON_2RM_VRINTP: + case NEON_2RM_VRINTZ: /* handled by decodetree */ return 1; case NEON_2RM_VTRN: @@ -4993,32 +4998,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } neon_store_reg(rm, pass, tmp2); break; - case NEON_2RM_VRINTN: - case NEON_2RM_VRINTA: - case NEON_2RM_VRINTM: - case NEON_2RM_VRINTP: - case NEON_2RM_VRINTZ: - { - TCGv_i32 tcg_rmode; - TCGv_ptr fpstatus = get_fpstatus_ptr(1); - int rmode; - - if (op == NEON_2RM_VRINTZ) { - rmode = FPROUNDING_ZERO; - } else { - rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1]; - } - - tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); - gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, - cpu_env); - gen_helper_rints(tmp, tmp, fpstatus); - gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, - cpu_env); - tcg_temp_free_ptr(fpstatus); - tcg_temp_free_i32(tcg_rmode); - break; - } case NEON_2RM_VCVTAU: case NEON_2RM_VCVTAS: case NEON_2RM_VCVTNU: