From 929e521a47b7110339526771720bc2096f69dddf Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Thu, 14 Mar 2024 14:56:59 -0300 Subject: [PATCH] target/riscv: always clear vstart for ldst_whole insns Commit 8ff8ac6329 added a conditional to guard the vext_ldst_whole() helper if vstart >= evl. But by skipping the helper we're also not setting vstart = 0 at the end of the insns, which is incorrect. We'll move the conditional to vext_ldst_whole(), following in line with the removal of all brconds vstart >= vl that the next patch will do. The idea is to make the helpers responsible for their own vstart management. Fix ldst_whole isns by: - remove the brcond that skips the helper if vstart is >= evl; - vext_ldst_whole() now does an early exit with the same check, where evl = (vlenb * nf) >> log2_esz, but the early exit will also clear vstart. The 'width' param is now unneeded in ldst_whole_trans() and is also removed. It was used for the evl calculation for the brcond and has no other use now. The 'width' is reflected in vext_ldst_whole() via log2_esz, which is encoded by GEN_VEXT_LD_WHOLE() as "ctzl(sizeof(ETYPE))". Suggested-by: Max Chou Fixes: 8ff8ac6329 ("target/riscv: rvv: Add missing early exit condition for whole register load/store") Signed-off-by: Daniel Henrique Barboza Reviewed-by: Alistair Francis Reviewed-by: Max Chou Message-ID: <20240314175704.478276-6-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis --- target/riscv/insn_trans/trans_rvv.c.inc | 52 +++++++++++-------------- target/riscv/vector_helper.c | 5 +++ 2 files changed, 28 insertions(+), 29 deletions(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 52c26a7834..1366445e1f 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -1097,13 +1097,9 @@ GEN_VEXT_TRANS(vle64ff_v, MO_64, r2nfvm, ldff_op, ld_us_check) typedef void gen_helper_ldst_whole(TCGv_ptr, TCGv, TCGv_env, TCGv_i32); static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf, - uint32_t width, gen_helper_ldst_whole *fn, + gen_helper_ldst_whole *fn, DisasContext *s) { - uint32_t evl = s->cfg_ptr->vlenb * nf / width; - TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_GEU, cpu_vstart, evl, over); - TCGv_ptr dest; TCGv base; TCGv_i32 desc; @@ -1120,8 +1116,6 @@ static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf, fn(dest, base, tcg_env, desc); - gen_set_label(over); - return true; } @@ -1129,42 +1123,42 @@ static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf, * load and store whole register instructions ignore vtype and vl setting. * Thus, we don't need to check vill bit. (Section 7.9) */ -#define GEN_LDST_WHOLE_TRANS(NAME, ARG_NF, WIDTH) \ +#define GEN_LDST_WHOLE_TRANS(NAME, ARG_NF) \ static bool trans_##NAME(DisasContext *s, arg_##NAME * a) \ { \ if (require_rvv(s) && \ QEMU_IS_ALIGNED(a->rd, ARG_NF)) { \ - return ldst_whole_trans(a->rd, a->rs1, ARG_NF, WIDTH, \ + return ldst_whole_trans(a->rd, a->rs1, ARG_NF, \ gen_helper_##NAME, s); \ } \ return false; \ } -GEN_LDST_WHOLE_TRANS(vl1re8_v, 1, 1) -GEN_LDST_WHOLE_TRANS(vl1re16_v, 1, 2) -GEN_LDST_WHOLE_TRANS(vl1re32_v, 1, 4) -GEN_LDST_WHOLE_TRANS(vl1re64_v, 1, 8) -GEN_LDST_WHOLE_TRANS(vl2re8_v, 2, 1) -GEN_LDST_WHOLE_TRANS(vl2re16_v, 2, 2) -GEN_LDST_WHOLE_TRANS(vl2re32_v, 2, 4) -GEN_LDST_WHOLE_TRANS(vl2re64_v, 2, 8) -GEN_LDST_WHOLE_TRANS(vl4re8_v, 4, 1) -GEN_LDST_WHOLE_TRANS(vl4re16_v, 4, 2) -GEN_LDST_WHOLE_TRANS(vl4re32_v, 4, 4) -GEN_LDST_WHOLE_TRANS(vl4re64_v, 4, 8) -GEN_LDST_WHOLE_TRANS(vl8re8_v, 8, 1) -GEN_LDST_WHOLE_TRANS(vl8re16_v, 8, 2) -GEN_LDST_WHOLE_TRANS(vl8re32_v, 8, 4) -GEN_LDST_WHOLE_TRANS(vl8re64_v, 8, 8) +GEN_LDST_WHOLE_TRANS(vl1re8_v, 1) +GEN_LDST_WHOLE_TRANS(vl1re16_v, 1) +GEN_LDST_WHOLE_TRANS(vl1re32_v, 1) +GEN_LDST_WHOLE_TRANS(vl1re64_v, 1) +GEN_LDST_WHOLE_TRANS(vl2re8_v, 2) +GEN_LDST_WHOLE_TRANS(vl2re16_v, 2) +GEN_LDST_WHOLE_TRANS(vl2re32_v, 2) +GEN_LDST_WHOLE_TRANS(vl2re64_v, 2) +GEN_LDST_WHOLE_TRANS(vl4re8_v, 4) +GEN_LDST_WHOLE_TRANS(vl4re16_v, 4) +GEN_LDST_WHOLE_TRANS(vl4re32_v, 4) +GEN_LDST_WHOLE_TRANS(vl4re64_v, 4) +GEN_LDST_WHOLE_TRANS(vl8re8_v, 8) +GEN_LDST_WHOLE_TRANS(vl8re16_v, 8) +GEN_LDST_WHOLE_TRANS(vl8re32_v, 8) +GEN_LDST_WHOLE_TRANS(vl8re64_v, 8) /* * The vector whole register store instructions are encoded similar to * unmasked unit-stride store of elements with EEW=8. */ -GEN_LDST_WHOLE_TRANS(vs1r_v, 1, 1) -GEN_LDST_WHOLE_TRANS(vs2r_v, 2, 1) -GEN_LDST_WHOLE_TRANS(vs4r_v, 4, 1) -GEN_LDST_WHOLE_TRANS(vs8r_v, 8, 1) +GEN_LDST_WHOLE_TRANS(vs1r_v, 1) +GEN_LDST_WHOLE_TRANS(vs2r_v, 2) +GEN_LDST_WHOLE_TRANS(vs4r_v, 4) +GEN_LDST_WHOLE_TRANS(vs8r_v, 8) /* *** Vector Integer Arithmetic Instructions diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index bcc553c0e2..1f4c276b21 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -572,6 +572,11 @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, uint32_t vlenb = riscv_cpu_cfg(env)->vlenb; uint32_t max_elems = vlenb >> log2_esz; + if (env->vstart >= ((vlenb * nf) >> log2_esz)) { + env->vstart = 0; + return; + } + k = env->vstart / max_elems; off = env->vstart % max_elems;