target/arm: Reuse sve_probe_page for scatter stores

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200508154359.7494-18-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2020-05-08 08:43:57 -07:00 committed by Peter Maydell
parent 50de9b78ce
commit 88a660a48e
1 changed files with 111 additions and 71 deletions

View File

@ -5413,94 +5413,134 @@ DO_LDFF1_ZPZ_D(dd_be, zd, MO_64)
/* Stores with a vector index. */
static void sve_st1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
target_ulong base, uint32_t desc, uintptr_t ra,
zreg_off_fn *off_fn, sve_ldst1_tlb_fn *tlb_fn)
static inline QEMU_ALWAYS_INLINE
void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
target_ulong base, uint32_t desc, uintptr_t retaddr,
int esize, int msize, zreg_off_fn *off_fn,
sve_ldst1_host_fn *host_fn,
sve_ldst1_tlb_fn *tlb_fn)
{
const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
intptr_t i, oprsz = simd_oprsz(desc);
const int mmu_idx = cpu_mmu_index(env, false);
const intptr_t reg_max = simd_oprsz(desc);
void *host[ARM_MAX_VQ * 4];
intptr_t reg_off, i;
SVEHostPage info, info2;
for (i = 0; i < oprsz; ) {
uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
/*
* Probe all of the elements for host addresses and flags.
*/
i = reg_off = 0;
do {
uint64_t pg = vg[reg_off >> 6];
do {
if (likely(pg & 1)) {
target_ulong off = off_fn(vm, i);
tlb_fn(env, vd, i, base + (off << scale), ra);
target_ulong addr = base + (off_fn(vm, reg_off) << scale);
target_ulong in_page = -(addr | TARGET_PAGE_MASK);
host[i] = NULL;
if (likely((pg >> (reg_off & 63)) & 1)) {
if (likely(in_page >= msize)) {
sve_probe_page(&info, false, env, addr, 0, MMU_DATA_STORE,
mmu_idx, retaddr);
host[i] = info.host;
} else {
/*
* Element crosses the page boundary.
* Probe both pages, but do not record the host address,
* so that we use the slow path.
*/
sve_probe_page(&info, false, env, addr, 0,
MMU_DATA_STORE, mmu_idx, retaddr);
sve_probe_page(&info2, false, env, addr + in_page, 0,
MMU_DATA_STORE, mmu_idx, retaddr);
info.flags |= info2.flags;
}
if (unlikely(info.flags & TLB_WATCHPOINT)) {
cpu_check_watchpoint(env_cpu(env), addr, msize,
info.attrs, BP_MEM_WRITE, retaddr);
}
/* TODO: MTE check. */
}
i += 4, pg >>= 4;
} while (i & 15);
}
}
i += 1;
reg_off += esize;
} while (reg_off & 63);
} while (reg_off < reg_max);
static void sve_st1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
target_ulong base, uint32_t desc, uintptr_t ra,
zreg_off_fn *off_fn, sve_ldst1_tlb_fn *tlb_fn)
{
const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
intptr_t i, oprsz = simd_oprsz(desc) / 8;
for (i = 0; i < oprsz; i++) {
uint8_t pg = *(uint8_t *)(vg + H1(i));
if (likely(pg & 1)) {
target_ulong off = off_fn(vm, i * 8);
tlb_fn(env, vd, i * 8, base + (off << scale), ra);
/*
* Now that we have recognized all exceptions except SyncExternal
* (from TLB_MMIO), which we cannot avoid, perform all of the stores.
*
* Note for the common case of an element in RAM, not crossing a page
* boundary, we have stored the host address in host[]. This doubles
* as a first-level check against the predicate, since only enabled
* elements have non-null host addresses.
*/
i = reg_off = 0;
do {
void *h = host[i];
if (likely(h != NULL)) {
host_fn(vd, reg_off, h);
} else if ((vg[reg_off >> 6] >> (reg_off & 63)) & 1) {
target_ulong addr = base + (off_fn(vm, reg_off) << scale);
tlb_fn(env, vd, reg_off, addr, retaddr);
}
}
i += 1;
reg_off += esize;
} while (reg_off < reg_max);
}
#define DO_ST1_ZPZ_S(MEM, OFS) \
void QEMU_FLATTEN HELPER(sve_st##MEM##_##OFS) \
(CPUARMState *env, void *vd, void *vg, void *vm, \
target_ulong base, uint32_t desc) \
{ \
sve_st1_zs(env, vd, vg, vm, base, desc, GETPC(), \
off_##OFS##_s, sve_st1##MEM##_tlb); \
#define DO_ST1_ZPZ_S(MEM, OFS, MSZ) \
void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \
void *vm, target_ulong base, uint32_t desc) \
{ \
sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \
off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \
}
#define DO_ST1_ZPZ_D(MEM, OFS) \
void QEMU_FLATTEN HELPER(sve_st##MEM##_##OFS) \
(CPUARMState *env, void *vd, void *vg, void *vm, \
target_ulong base, uint32_t desc) \
{ \
sve_st1_zd(env, vd, vg, vm, base, desc, GETPC(), \
off_##OFS##_d, sve_st1##MEM##_tlb); \
#define DO_ST1_ZPZ_D(MEM, OFS, MSZ) \
void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \
void *vm, target_ulong base, uint32_t desc) \
{ \
sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \
off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \
}
DO_ST1_ZPZ_S(bs, zsu)
DO_ST1_ZPZ_S(hs_le, zsu)
DO_ST1_ZPZ_S(hs_be, zsu)
DO_ST1_ZPZ_S(ss_le, zsu)
DO_ST1_ZPZ_S(ss_be, zsu)
DO_ST1_ZPZ_S(bs, zsu, MO_8)
DO_ST1_ZPZ_S(hs_le, zsu, MO_16)
DO_ST1_ZPZ_S(hs_be, zsu, MO_16)
DO_ST1_ZPZ_S(ss_le, zsu, MO_32)
DO_ST1_ZPZ_S(ss_be, zsu, MO_32)
DO_ST1_ZPZ_S(bs, zss)
DO_ST1_ZPZ_S(hs_le, zss)
DO_ST1_ZPZ_S(hs_be, zss)
DO_ST1_ZPZ_S(ss_le, zss)
DO_ST1_ZPZ_S(ss_be, zss)
DO_ST1_ZPZ_S(bs, zss, MO_8)
DO_ST1_ZPZ_S(hs_le, zss, MO_16)
DO_ST1_ZPZ_S(hs_be, zss, MO_16)
DO_ST1_ZPZ_S(ss_le, zss, MO_32)
DO_ST1_ZPZ_S(ss_be, zss, MO_32)
DO_ST1_ZPZ_D(bd, zsu)
DO_ST1_ZPZ_D(hd_le, zsu)
DO_ST1_ZPZ_D(hd_be, zsu)
DO_ST1_ZPZ_D(sd_le, zsu)
DO_ST1_ZPZ_D(sd_be, zsu)
DO_ST1_ZPZ_D(dd_le, zsu)
DO_ST1_ZPZ_D(dd_be, zsu)
DO_ST1_ZPZ_D(bd, zsu, MO_8)
DO_ST1_ZPZ_D(hd_le, zsu, MO_16)
DO_ST1_ZPZ_D(hd_be, zsu, MO_16)
DO_ST1_ZPZ_D(sd_le, zsu, MO_32)
DO_ST1_ZPZ_D(sd_be, zsu, MO_32)
DO_ST1_ZPZ_D(dd_le, zsu, MO_64)
DO_ST1_ZPZ_D(dd_be, zsu, MO_64)
DO_ST1_ZPZ_D(bd, zss)
DO_ST1_ZPZ_D(hd_le, zss)
DO_ST1_ZPZ_D(hd_be, zss)
DO_ST1_ZPZ_D(sd_le, zss)
DO_ST1_ZPZ_D(sd_be, zss)
DO_ST1_ZPZ_D(dd_le, zss)
DO_ST1_ZPZ_D(dd_be, zss)
DO_ST1_ZPZ_D(bd, zss, MO_8)
DO_ST1_ZPZ_D(hd_le, zss, MO_16)
DO_ST1_ZPZ_D(hd_be, zss, MO_16)
DO_ST1_ZPZ_D(sd_le, zss, MO_32)
DO_ST1_ZPZ_D(sd_be, zss, MO_32)
DO_ST1_ZPZ_D(dd_le, zss, MO_64)
DO_ST1_ZPZ_D(dd_be, zss, MO_64)
DO_ST1_ZPZ_D(bd, zd)
DO_ST1_ZPZ_D(hd_le, zd)
DO_ST1_ZPZ_D(hd_be, zd)
DO_ST1_ZPZ_D(sd_le, zd)
DO_ST1_ZPZ_D(sd_be, zd)
DO_ST1_ZPZ_D(dd_le, zd)
DO_ST1_ZPZ_D(dd_be, zd)
DO_ST1_ZPZ_D(bd, zd, MO_8)
DO_ST1_ZPZ_D(hd_le, zd, MO_16)
DO_ST1_ZPZ_D(hd_be, zd, MO_16)
DO_ST1_ZPZ_D(sd_le, zd, MO_32)
DO_ST1_ZPZ_D(sd_be, zd, MO_32)
DO_ST1_ZPZ_D(dd_le, zd, MO_64)
DO_ST1_ZPZ_D(dd_be, zd, MO_64)
#undef DO_ST1_ZPZ_S
#undef DO_ST1_ZPZ_D