From 6812dfdc6b0286730d6f903ebfbdc4f81b80c29b Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 12 May 2020 17:38:54 +0100 Subject: [PATCH] target/arm: Convert Neon VQSHL, VRSHL, VQRSHL 3-reg-same insns to decodetree Convert the VQSHL, VRSHL and VQRSHL insns in the 3-reg-same group to decodetree. We have already implemented the size==0b11 case of these insns; this commit handles the remaining sizes. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20200512163904.10918-8-peter.maydell@linaro.org --- target/arm/neon-dp.decode | 30 ++++++++++++++++++----- target/arm/translate-neon.inc.c | 43 +++++++++++++++++++++++++++++++++ target/arm/translate.c | 22 +++-------------- 3 files changed, 70 insertions(+), 25 deletions(-) diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode index 4984a5d4e1..34bce81c43 100644 --- a/target/arm/neon-dp.decode +++ b/target/arm/neon-dp.decode @@ -95,12 +95,30 @@ VSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 0 .... @3same_rev @3same_64_rev .... ... . . . 11 .... .... .... . q:1 . . .... \ &3same vm=%vn_dp vn=%vm_dp vd=%vd_dp size=3 -VQSHL_S64_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev -VQSHL_U64_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev -VRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev -VRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev -VQRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev -VQRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev +{ + VQSHL_S64_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev + VQSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_rev +} +{ + VQSHL_U64_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev + VQSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_rev +} +{ + VRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev + VRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_rev +} +{ + VRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev + VRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_rev +} +{ + VQRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev + VQRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_rev +} +{ + VQRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev + VQRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_rev +} VMAX_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 0 .... @3same VMAX_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 0 .... @3same diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c index e9da47171c..7097c18f33 100644 --- a/target/arm/translate-neon.inc.c +++ b/target/arm/translate-neon.inc.c @@ -875,9 +875,52 @@ DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64) return do_3same(s, a, gen_##INSN##_3s); \ } +/* + * Some helper functions need to be passed the cpu_env. In order + * to use those with the gvec APIs like tcg_gen_gvec_3() we need + * to create wrapper functions whose prototype is a NeonGenTwoOpFn() + * and which call a NeonGenTwoOpEnvFn(). + */ +#define WRAP_ENV_FN(WRAPNAME, FUNC) \ + static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m) \ + { \ + FUNC(d, cpu_env, n, m); \ + } + +#define DO_3SAME_32_ENV(INSN, FUNC) \ + WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8); \ + WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16); \ + WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32); \ + static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ + uint32_t rn_ofs, uint32_t rm_ofs, \ + uint32_t oprsz, uint32_t maxsz) \ + { \ + static const GVecGen3 ops[4] = { \ + { .fni4 = gen_##INSN##_tramp8 }, \ + { .fni4 = gen_##INSN##_tramp16 }, \ + { .fni4 = gen_##INSN##_tramp32 }, \ + { 0 }, \ + }; \ + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \ + } \ + static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ + { \ + if (a->size > 2) { \ + return false; \ + } \ + return do_3same(s, a, gen_##INSN##_3s); \ + } + DO_3SAME_32(VHADD_S, hadd_s) DO_3SAME_32(VHADD_U, hadd_u) DO_3SAME_32(VHSUB_S, hsub_s) DO_3SAME_32(VHSUB_U, hsub_u) DO_3SAME_32(VRHADD_S, rhadd_s) DO_3SAME_32(VRHADD_U, rhadd_u) +DO_3SAME_32(VRSHL_S, rshl_s) +DO_3SAME_32(VRSHL_U, rshl_u) + +DO_3SAME_32_ENV(VQSHL_S, qshl_s) +DO_3SAME_32_ENV(VQSHL_U, qshl_u) +DO_3SAME_32_ENV(VQRSHL_S, qrshl_s) +DO_3SAME_32_ENV(VQRSHL_U, qrshl_u) diff --git a/target/arm/translate.c b/target/arm/translate.c index ebb899d846..3aabb18720 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -5439,6 +5439,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_3R_VHSUB: case NEON_3R_VABD: case NEON_3R_VABA: + case NEON_3R_VQSHL: + case NEON_3R_VRSHL: + case NEON_3R_VQRSHL: /* Already handled by decodetree */ return 1; } @@ -5449,17 +5452,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } pairwise = 0; switch (op) { - case NEON_3R_VQSHL: - case NEON_3R_VRSHL: - case NEON_3R_VQRSHL: - { - int rtmp; - /* Shift instruction operands are reversed. */ - rtmp = rn; - rn = rm; - rm = rtmp; - } - break; case NEON_3R_VPADD_VQRDMLAH: case NEON_3R_VPMAX: case NEON_3R_VPMIN: @@ -5519,14 +5511,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tmp2 = neon_load_reg(rm, pass); } switch (op) { - case NEON_3R_VQSHL: - GEN_NEON_INTEGER_OP_ENV(qshl); - break; - case NEON_3R_VRSHL: - GEN_NEON_INTEGER_OP(rshl); - break; - case NEON_3R_VQRSHL: - GEN_NEON_INTEGER_OP_ENV(qrshl); break; case NEON_3R_VPMAX: GEN_NEON_INTEGER_OP(pmax);