target/arm: Reuse sve_probe_page for gather loads

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200508154359.7494-19-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2020-05-08 08:43:58 -07:00 committed by Peter Maydell
parent 88a660a48e
commit 10a85e2c8a
1 changed files with 109 additions and 99 deletions

View File

@ -5124,130 +5124,140 @@ static target_ulong off_zd_d(void *reg, intptr_t reg_ofs)
return *(uint64_t *)(reg + reg_ofs);
}
static void sve_ld1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
target_ulong base, uint32_t desc, uintptr_t ra,
zreg_off_fn *off_fn, sve_ldst1_tlb_fn *tlb_fn)
static inline QEMU_ALWAYS_INLINE
void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
target_ulong base, uint32_t desc, uintptr_t retaddr,
int esize, int msize, zreg_off_fn *off_fn,
sve_ldst1_host_fn *host_fn,
sve_ldst1_tlb_fn *tlb_fn)
{
const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
intptr_t i, oprsz = simd_oprsz(desc);
ARMVectorReg scratch = { };
const int mmu_idx = cpu_mmu_index(env, false);
const intptr_t reg_max = simd_oprsz(desc);
ARMVectorReg scratch;
intptr_t reg_off;
SVEHostPage info, info2;
for (i = 0; i < oprsz; ) {
uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
memset(&scratch, 0, reg_max);
reg_off = 0;
do {
uint64_t pg = vg[reg_off >> 6];
do {
if (likely(pg & 1)) {
target_ulong off = off_fn(vm, i);
tlb_fn(env, &scratch, i, base + (off << scale), ra);
target_ulong addr = base + (off_fn(vm, reg_off) << scale);
target_ulong in_page = -(addr | TARGET_PAGE_MASK);
sve_probe_page(&info, false, env, addr, 0, MMU_DATA_LOAD,
mmu_idx, retaddr);
if (likely(in_page >= msize)) {
if (unlikely(info.flags & TLB_WATCHPOINT)) {
cpu_check_watchpoint(env_cpu(env), addr, msize,
info.attrs, BP_MEM_READ, retaddr);
}
/* TODO: MTE check */
host_fn(&scratch, reg_off, info.host);
} else {
/* Element crosses the page boundary. */
sve_probe_page(&info2, false, env, addr + in_page, 0,
MMU_DATA_LOAD, mmu_idx, retaddr);
if (unlikely((info.flags | info2.flags) & TLB_WATCHPOINT)) {
cpu_check_watchpoint(env_cpu(env), addr,
msize, info.attrs,
BP_MEM_READ, retaddr);
}
/* TODO: MTE check */
tlb_fn(env, &scratch, reg_off, addr, retaddr);
}
}
i += 4, pg >>= 4;
} while (i & 15);
}
reg_off += esize;
pg >>= esize;
} while (reg_off & 63);
} while (reg_off < reg_max);
/* Wait until all exceptions have been raised to write back. */
memcpy(vd, &scratch, oprsz);
memcpy(vd, &scratch, reg_max);
}
static void sve_ld1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
target_ulong base, uint32_t desc, uintptr_t ra,
zreg_off_fn *off_fn, sve_ldst1_tlb_fn *tlb_fn)
{
const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
intptr_t i, oprsz = simd_oprsz(desc) / 8;
ARMVectorReg scratch = { };
for (i = 0; i < oprsz; i++) {
uint8_t pg = *(uint8_t *)(vg + H1(i));
if (likely(pg & 1)) {
target_ulong off = off_fn(vm, i * 8);
tlb_fn(env, &scratch, i * 8, base + (off << scale), ra);
}
}
/* Wait until all exceptions have been raised to write back. */
memcpy(vd, &scratch, oprsz * 8);
#define DO_LD1_ZPZ_S(MEM, OFS, MSZ) \
void HELPER(sve_ld##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \
void *vm, target_ulong base, uint32_t desc) \
{ \
sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \
off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \
}
#define DO_LD1_ZPZ_S(MEM, OFS) \
void QEMU_FLATTEN HELPER(sve_ld##MEM##_##OFS) \
(CPUARMState *env, void *vd, void *vg, void *vm, \
target_ulong base, uint32_t desc) \
{ \
sve_ld1_zs(env, vd, vg, vm, base, desc, GETPC(), \
off_##OFS##_s, sve_ld1##MEM##_tlb); \
#define DO_LD1_ZPZ_D(MEM, OFS, MSZ) \
void HELPER(sve_ld##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \
void *vm, target_ulong base, uint32_t desc) \
{ \
sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \
off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \
}
#define DO_LD1_ZPZ_D(MEM, OFS) \
void QEMU_FLATTEN HELPER(sve_ld##MEM##_##OFS) \
(CPUARMState *env, void *vd, void *vg, void *vm, \
target_ulong base, uint32_t desc) \
{ \
sve_ld1_zd(env, vd, vg, vm, base, desc, GETPC(), \
off_##OFS##_d, sve_ld1##MEM##_tlb); \
}
DO_LD1_ZPZ_S(bsu, zsu, MO_8)
DO_LD1_ZPZ_S(bsu, zss, MO_8)
DO_LD1_ZPZ_D(bdu, zsu, MO_8)
DO_LD1_ZPZ_D(bdu, zss, MO_8)
DO_LD1_ZPZ_D(bdu, zd, MO_8)
DO_LD1_ZPZ_S(bsu, zsu)
DO_LD1_ZPZ_S(bsu, zss)
DO_LD1_ZPZ_D(bdu, zsu)
DO_LD1_ZPZ_D(bdu, zss)
DO_LD1_ZPZ_D(bdu, zd)
DO_LD1_ZPZ_S(bss, zsu, MO_8)
DO_LD1_ZPZ_S(bss, zss, MO_8)
DO_LD1_ZPZ_D(bds, zsu, MO_8)
DO_LD1_ZPZ_D(bds, zss, MO_8)
DO_LD1_ZPZ_D(bds, zd, MO_8)
DO_LD1_ZPZ_S(bss, zsu)
DO_LD1_ZPZ_S(bss, zss)
DO_LD1_ZPZ_D(bds, zsu)
DO_LD1_ZPZ_D(bds, zss)
DO_LD1_ZPZ_D(bds, zd)
DO_LD1_ZPZ_S(hsu_le, zsu, MO_16)
DO_LD1_ZPZ_S(hsu_le, zss, MO_16)
DO_LD1_ZPZ_D(hdu_le, zsu, MO_16)
DO_LD1_ZPZ_D(hdu_le, zss, MO_16)
DO_LD1_ZPZ_D(hdu_le, zd, MO_16)
DO_LD1_ZPZ_S(hsu_le, zsu)
DO_LD1_ZPZ_S(hsu_le, zss)
DO_LD1_ZPZ_D(hdu_le, zsu)
DO_LD1_ZPZ_D(hdu_le, zss)
DO_LD1_ZPZ_D(hdu_le, zd)
DO_LD1_ZPZ_S(hsu_be, zsu, MO_16)
DO_LD1_ZPZ_S(hsu_be, zss, MO_16)
DO_LD1_ZPZ_D(hdu_be, zsu, MO_16)
DO_LD1_ZPZ_D(hdu_be, zss, MO_16)
DO_LD1_ZPZ_D(hdu_be, zd, MO_16)
DO_LD1_ZPZ_S(hsu_be, zsu)
DO_LD1_ZPZ_S(hsu_be, zss)
DO_LD1_ZPZ_D(hdu_be, zsu)
DO_LD1_ZPZ_D(hdu_be, zss)
DO_LD1_ZPZ_D(hdu_be, zd)
DO_LD1_ZPZ_S(hss_le, zsu, MO_16)
DO_LD1_ZPZ_S(hss_le, zss, MO_16)
DO_LD1_ZPZ_D(hds_le, zsu, MO_16)
DO_LD1_ZPZ_D(hds_le, zss, MO_16)
DO_LD1_ZPZ_D(hds_le, zd, MO_16)
DO_LD1_ZPZ_S(hss_le, zsu)
DO_LD1_ZPZ_S(hss_le, zss)
DO_LD1_ZPZ_D(hds_le, zsu)
DO_LD1_ZPZ_D(hds_le, zss)
DO_LD1_ZPZ_D(hds_le, zd)
DO_LD1_ZPZ_S(hss_be, zsu, MO_16)
DO_LD1_ZPZ_S(hss_be, zss, MO_16)
DO_LD1_ZPZ_D(hds_be, zsu, MO_16)
DO_LD1_ZPZ_D(hds_be, zss, MO_16)
DO_LD1_ZPZ_D(hds_be, zd, MO_16)
DO_LD1_ZPZ_S(hss_be, zsu)
DO_LD1_ZPZ_S(hss_be, zss)
DO_LD1_ZPZ_D(hds_be, zsu)
DO_LD1_ZPZ_D(hds_be, zss)
DO_LD1_ZPZ_D(hds_be, zd)
DO_LD1_ZPZ_S(ss_le, zsu, MO_32)
DO_LD1_ZPZ_S(ss_le, zss, MO_32)
DO_LD1_ZPZ_D(sdu_le, zsu, MO_32)
DO_LD1_ZPZ_D(sdu_le, zss, MO_32)
DO_LD1_ZPZ_D(sdu_le, zd, MO_32)
DO_LD1_ZPZ_S(ss_le, zsu)
DO_LD1_ZPZ_S(ss_le, zss)
DO_LD1_ZPZ_D(sdu_le, zsu)
DO_LD1_ZPZ_D(sdu_le, zss)
DO_LD1_ZPZ_D(sdu_le, zd)
DO_LD1_ZPZ_S(ss_be, zsu, MO_32)
DO_LD1_ZPZ_S(ss_be, zss, MO_32)
DO_LD1_ZPZ_D(sdu_be, zsu, MO_32)
DO_LD1_ZPZ_D(sdu_be, zss, MO_32)
DO_LD1_ZPZ_D(sdu_be, zd, MO_32)
DO_LD1_ZPZ_S(ss_be, zsu)
DO_LD1_ZPZ_S(ss_be, zss)
DO_LD1_ZPZ_D(sdu_be, zsu)
DO_LD1_ZPZ_D(sdu_be, zss)
DO_LD1_ZPZ_D(sdu_be, zd)
DO_LD1_ZPZ_D(sds_le, zsu, MO_32)
DO_LD1_ZPZ_D(sds_le, zss, MO_32)
DO_LD1_ZPZ_D(sds_le, zd, MO_32)
DO_LD1_ZPZ_D(sds_le, zsu)
DO_LD1_ZPZ_D(sds_le, zss)
DO_LD1_ZPZ_D(sds_le, zd)
DO_LD1_ZPZ_D(sds_be, zsu, MO_32)
DO_LD1_ZPZ_D(sds_be, zss, MO_32)
DO_LD1_ZPZ_D(sds_be, zd, MO_32)
DO_LD1_ZPZ_D(sds_be, zsu)
DO_LD1_ZPZ_D(sds_be, zss)
DO_LD1_ZPZ_D(sds_be, zd)
DO_LD1_ZPZ_D(dd_le, zsu, MO_64)
DO_LD1_ZPZ_D(dd_le, zss, MO_64)
DO_LD1_ZPZ_D(dd_le, zd, MO_64)
DO_LD1_ZPZ_D(dd_le, zsu)
DO_LD1_ZPZ_D(dd_le, zss)
DO_LD1_ZPZ_D(dd_le, zd)
DO_LD1_ZPZ_D(dd_be, zsu)
DO_LD1_ZPZ_D(dd_be, zss)
DO_LD1_ZPZ_D(dd_be, zd)
DO_LD1_ZPZ_D(dd_be, zsu, MO_64)
DO_LD1_ZPZ_D(dd_be, zss, MO_64)
DO_LD1_ZPZ_D(dd_be, zd, MO_64)
#undef DO_LD1_ZPZ_S
#undef DO_LD1_ZPZ_D