target/riscv: always clear vstart for ldst_whole insns

Commit 8ff8ac6329 added a conditional to guard the vext_ldst_whole()
helper if vstart >= evl. But by skipping the helper we're also not
setting vstart = 0 at the end of the insns, which is incorrect.

We'll move the conditional to vext_ldst_whole(), following in line with
the removal of all brconds vstart >= vl that the next patch will do. The
idea is to make the helpers responsible for their own vstart management.

Fix ldst_whole isns by:

- remove the brcond that skips the helper if vstart is >= evl;

- vext_ldst_whole() now does an early exit with the same check, where
  evl = (vlenb * nf) >> log2_esz, but the early exit will also clear
  vstart.

The 'width' param is now unneeded in ldst_whole_trans() and is also
removed. It was used for the evl calculation for the brcond and has no
other use now.  The 'width' is reflected in vext_ldst_whole() via
log2_esz, which is encoded by GEN_VEXT_LD_WHOLE() as
"ctzl(sizeof(ETYPE))".

Suggested-by: Max Chou <max.chou@sifive.com>
Fixes: 8ff8ac6329 ("target/riscv: rvv: Add missing early exit condition for whole register load/store")
Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Reviewed-by: Max Chou <max.chou@sifive.com>
Message-ID: <20240314175704.478276-6-dbarboza@ventanamicro.com>
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
This commit is contained in:
Daniel Henrique Barboza 2024-03-14 14:56:59 -03:00 committed by Alistair Francis
parent 7e53e3ddf6
commit 929e521a47
2 changed files with 28 additions and 29 deletions

View File

@ -1097,13 +1097,9 @@ GEN_VEXT_TRANS(vle64ff_v, MO_64, r2nfvm, ldff_op, ld_us_check)
typedef void gen_helper_ldst_whole(TCGv_ptr, TCGv, TCGv_env, TCGv_i32);
static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf,
uint32_t width, gen_helper_ldst_whole *fn,
gen_helper_ldst_whole *fn,
DisasContext *s)
{
uint32_t evl = s->cfg_ptr->vlenb * nf / width;
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_GEU, cpu_vstart, evl, over);
TCGv_ptr dest;
TCGv base;
TCGv_i32 desc;
@ -1120,8 +1116,6 @@ static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf,
fn(dest, base, tcg_env, desc);
gen_set_label(over);
return true;
}
@ -1129,42 +1123,42 @@ static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf,
* load and store whole register instructions ignore vtype and vl setting.
* Thus, we don't need to check vill bit. (Section 7.9)
*/
#define GEN_LDST_WHOLE_TRANS(NAME, ARG_NF, WIDTH) \
#define GEN_LDST_WHOLE_TRANS(NAME, ARG_NF) \
static bool trans_##NAME(DisasContext *s, arg_##NAME * a) \
{ \
if (require_rvv(s) && \
QEMU_IS_ALIGNED(a->rd, ARG_NF)) { \
return ldst_whole_trans(a->rd, a->rs1, ARG_NF, WIDTH, \
return ldst_whole_trans(a->rd, a->rs1, ARG_NF, \
gen_helper_##NAME, s); \
} \
return false; \
}
GEN_LDST_WHOLE_TRANS(vl1re8_v, 1, 1)
GEN_LDST_WHOLE_TRANS(vl1re16_v, 1, 2)
GEN_LDST_WHOLE_TRANS(vl1re32_v, 1, 4)
GEN_LDST_WHOLE_TRANS(vl1re64_v, 1, 8)
GEN_LDST_WHOLE_TRANS(vl2re8_v, 2, 1)
GEN_LDST_WHOLE_TRANS(vl2re16_v, 2, 2)
GEN_LDST_WHOLE_TRANS(vl2re32_v, 2, 4)
GEN_LDST_WHOLE_TRANS(vl2re64_v, 2, 8)
GEN_LDST_WHOLE_TRANS(vl4re8_v, 4, 1)
GEN_LDST_WHOLE_TRANS(vl4re16_v, 4, 2)
GEN_LDST_WHOLE_TRANS(vl4re32_v, 4, 4)
GEN_LDST_WHOLE_TRANS(vl4re64_v, 4, 8)
GEN_LDST_WHOLE_TRANS(vl8re8_v, 8, 1)
GEN_LDST_WHOLE_TRANS(vl8re16_v, 8, 2)
GEN_LDST_WHOLE_TRANS(vl8re32_v, 8, 4)
GEN_LDST_WHOLE_TRANS(vl8re64_v, 8, 8)
GEN_LDST_WHOLE_TRANS(vl1re8_v, 1)
GEN_LDST_WHOLE_TRANS(vl1re16_v, 1)
GEN_LDST_WHOLE_TRANS(vl1re32_v, 1)
GEN_LDST_WHOLE_TRANS(vl1re64_v, 1)
GEN_LDST_WHOLE_TRANS(vl2re8_v, 2)
GEN_LDST_WHOLE_TRANS(vl2re16_v, 2)
GEN_LDST_WHOLE_TRANS(vl2re32_v, 2)
GEN_LDST_WHOLE_TRANS(vl2re64_v, 2)
GEN_LDST_WHOLE_TRANS(vl4re8_v, 4)
GEN_LDST_WHOLE_TRANS(vl4re16_v, 4)
GEN_LDST_WHOLE_TRANS(vl4re32_v, 4)
GEN_LDST_WHOLE_TRANS(vl4re64_v, 4)
GEN_LDST_WHOLE_TRANS(vl8re8_v, 8)
GEN_LDST_WHOLE_TRANS(vl8re16_v, 8)
GEN_LDST_WHOLE_TRANS(vl8re32_v, 8)
GEN_LDST_WHOLE_TRANS(vl8re64_v, 8)
/*
* The vector whole register store instructions are encoded similar to
* unmasked unit-stride store of elements with EEW=8.
*/
GEN_LDST_WHOLE_TRANS(vs1r_v, 1, 1)
GEN_LDST_WHOLE_TRANS(vs2r_v, 2, 1)
GEN_LDST_WHOLE_TRANS(vs4r_v, 4, 1)
GEN_LDST_WHOLE_TRANS(vs8r_v, 8, 1)
GEN_LDST_WHOLE_TRANS(vs1r_v, 1)
GEN_LDST_WHOLE_TRANS(vs2r_v, 2)
GEN_LDST_WHOLE_TRANS(vs4r_v, 4)
GEN_LDST_WHOLE_TRANS(vs8r_v, 8)
/*
*** Vector Integer Arithmetic Instructions

View File

@ -572,6 +572,11 @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
uint32_t vlenb = riscv_cpu_cfg(env)->vlenb;
uint32_t max_elems = vlenb >> log2_esz;
if (env->vstart >= ((vlenb * nf) >> log2_esz)) {
env->vstart = 0;
return;
}
k = env->vstart / max_elems;
off = env->vstart % max_elems;