target/arm: Use SVEContLdSt for contiguous stores

Follow the model set up for contiguous loads.  This handles
watchpoints correctly for contiguous stores, recognizing the
exception before any changes to memory.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200508154359.7494-16-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2020-05-08 08:43:55 -07:00 committed by Peter Maydell
parent c647673ce4
commit 0fa476c1bb
1 changed files with 159 additions and 126 deletions

View File

@ -3995,6 +3995,10 @@ static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \
*(TYPEE *)(vd + H(reg_off)) = val; \
}
#define DO_ST_HOST(NAME, H, TYPEE, TYPEM, HOST) \
static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \
{ HOST(host, (TYPEM)*(TYPEE *)(vd + H(reg_off))); }
#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, TLB) \
static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \
target_ulong addr, uintptr_t ra) \
@ -4022,6 +4026,7 @@ DO_LD_PRIM_1(ld1bdu, , uint64_t, uint8_t)
DO_LD_PRIM_1(ld1bds, , uint64_t, int8_t)
#define DO_ST_PRIM_1(NAME, H, TE, TM) \
DO_ST_HOST(st1##NAME, H, TE, TM, stb_p) \
DO_ST_TLB(st1##NAME, H, TE, TM, cpu_stb_data_ra)
DO_ST_PRIM_1(bb, H1, uint8_t, uint8_t)
@ -4036,6 +4041,8 @@ DO_ST_PRIM_1(bd, , uint64_t, uint8_t)
DO_LD_TLB(ld1##NAME##_le, H, TE, TM, cpu_##LD##_le_data_ra)
#define DO_ST_PRIM_2(NAME, H, TE, TM, ST) \
DO_ST_HOST(st1##NAME##_be, H, TE, TM, ST##_be_p) \
DO_ST_HOST(st1##NAME##_le, H, TE, TM, ST##_le_p) \
DO_ST_TLB(st1##NAME##_be, H, TE, TM, cpu_##ST##_be_data_ra) \
DO_ST_TLB(st1##NAME##_le, H, TE, TM, cpu_##ST##_le_data_ra)
@ -4908,151 +4915,177 @@ DO_LDFF1_LDNF1_2(dd, MO_64, MO_64)
#undef DO_LDFF1_LDNF1_2
/*
* Common helpers for all contiguous 1,2,3,4-register predicated stores.
* Common helper for all contiguous 1,2,3,4-register predicated stores.
*/
static void sve_st1_r(CPUARMState *env, void *vg, target_ulong addr,
uint32_t desc, const uintptr_t ra,
const int esize, const int msize,
sve_ldst1_tlb_fn *tlb_fn)
static inline QEMU_ALWAYS_INLINE
void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, uint32_t desc,
const uintptr_t retaddr, const int esz,
const int msz, const int N,
sve_ldst1_host_fn *host_fn,
sve_ldst1_tlb_fn *tlb_fn)
{
const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
intptr_t i, oprsz = simd_oprsz(desc);
void *vd = &env->vfp.zregs[rd];
const intptr_t reg_max = simd_oprsz(desc);
intptr_t reg_off, reg_last, mem_off;
SVEContLdSt info;
void *host;
int i, flags;
for (i = 0; i < oprsz; ) {
uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
do {
if (pg & 1) {
tlb_fn(env, vd, i, addr, ra);
/* Find the active elements. */
if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, N << msz)) {
/* The entire predicate was false; no store occurs. */
return;
}
/* Probe the page(s). Exit with exception for any invalid page. */
sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_STORE, retaddr);
/* Handle watchpoints for all active elements. */
sve_cont_ldst_watchpoints(&info, env, vg, addr, 1 << esz, N << msz,
BP_MEM_WRITE, retaddr);
/* TODO: MTE check. */
flags = info.page[0].flags | info.page[1].flags;
if (unlikely(flags != 0)) {
#ifdef CONFIG_USER_ONLY
g_assert_not_reached();
#else
/*
* At least one page includes MMIO.
* Any bus operation can fail with cpu_transaction_failed,
* which for ARM will raise SyncExternal. We cannot avoid
* this fault and will leave with the store incomplete.
*/
mem_off = info.mem_off_first[0];
reg_off = info.reg_off_first[0];
reg_last = info.reg_off_last[1];
if (reg_last < 0) {
reg_last = info.reg_off_split;
if (reg_last < 0) {
reg_last = info.reg_off_last[0];
}
i += esize, pg >>= esize;
addr += msize;
} while (i & 15);
}
do {
uint64_t pg = vg[reg_off >> 6];
do {
if ((pg >> (reg_off & 63)) & 1) {
for (i = 0; i < N; ++i) {
tlb_fn(env, &env->vfp.zregs[(rd + i) & 31], reg_off,
addr + mem_off + (i << msz), retaddr);
}
}
reg_off += 1 << esz;
mem_off += N << msz;
} while (reg_off & 63);
} while (reg_off <= reg_last);
return;
#endif
}
mem_off = info.mem_off_first[0];
reg_off = info.reg_off_first[0];
reg_last = info.reg_off_last[0];
host = info.page[0].host;
while (reg_off <= reg_last) {
uint64_t pg = vg[reg_off >> 6];
do {
if ((pg >> (reg_off & 63)) & 1) {
for (i = 0; i < N; ++i) {
host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off,
host + mem_off + (i << msz));
}
}
reg_off += 1 << esz;
mem_off += N << msz;
} while (reg_off <= reg_last && (reg_off & 63));
}
/*
* Use the slow path to manage the cross-page misalignment.
* But we know this is RAM and cannot trap.
*/
mem_off = info.mem_off_split;
if (unlikely(mem_off >= 0)) {
reg_off = info.reg_off_split;
for (i = 0; i < N; ++i) {
tlb_fn(env, &env->vfp.zregs[(rd + i) & 31], reg_off,
addr + mem_off + (i << msz), retaddr);
}
}
mem_off = info.mem_off_first[1];
if (unlikely(mem_off >= 0)) {
reg_off = info.reg_off_first[1];
reg_last = info.reg_off_last[1];
host = info.page[1].host;
do {
uint64_t pg = vg[reg_off >> 6];
do {
if ((pg >> (reg_off & 63)) & 1) {
for (i = 0; i < N; ++i) {
host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off,
host + mem_off + (i << msz));
}
}
reg_off += 1 << esz;
mem_off += N << msz;
} while (reg_off & 63);
} while (reg_off <= reg_last);
}
}
static void sve_st2_r(CPUARMState *env, void *vg, target_ulong addr,
uint32_t desc, const uintptr_t ra,
const int esize, const int msize,
sve_ldst1_tlb_fn *tlb_fn)
{
const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
intptr_t i, oprsz = simd_oprsz(desc);
void *d1 = &env->vfp.zregs[rd];
void *d2 = &env->vfp.zregs[(rd + 1) & 31];
for (i = 0; i < oprsz; ) {
uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
do {
if (pg & 1) {
tlb_fn(env, d1, i, addr, ra);
tlb_fn(env, d2, i, addr + msize, ra);
}
i += esize, pg >>= esize;
addr += 2 * msize;
} while (i & 15);
}
}
static void sve_st3_r(CPUARMState *env, void *vg, target_ulong addr,
uint32_t desc, const uintptr_t ra,
const int esize, const int msize,
sve_ldst1_tlb_fn *tlb_fn)
{
const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
intptr_t i, oprsz = simd_oprsz(desc);
void *d1 = &env->vfp.zregs[rd];
void *d2 = &env->vfp.zregs[(rd + 1) & 31];
void *d3 = &env->vfp.zregs[(rd + 2) & 31];
for (i = 0; i < oprsz; ) {
uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
do {
if (pg & 1) {
tlb_fn(env, d1, i, addr, ra);
tlb_fn(env, d2, i, addr + msize, ra);
tlb_fn(env, d3, i, addr + 2 * msize, ra);
}
i += esize, pg >>= esize;
addr += 3 * msize;
} while (i & 15);
}
}
static void sve_st4_r(CPUARMState *env, void *vg, target_ulong addr,
uint32_t desc, const uintptr_t ra,
const int esize, const int msize,
sve_ldst1_tlb_fn *tlb_fn)
{
const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
intptr_t i, oprsz = simd_oprsz(desc);
void *d1 = &env->vfp.zregs[rd];
void *d2 = &env->vfp.zregs[(rd + 1) & 31];
void *d3 = &env->vfp.zregs[(rd + 2) & 31];
void *d4 = &env->vfp.zregs[(rd + 3) & 31];
for (i = 0; i < oprsz; ) {
uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
do {
if (pg & 1) {
tlb_fn(env, d1, i, addr, ra);
tlb_fn(env, d2, i, addr + msize, ra);
tlb_fn(env, d3, i, addr + 2 * msize, ra);
tlb_fn(env, d4, i, addr + 3 * msize, ra);
}
i += esize, pg >>= esize;
addr += 4 * msize;
} while (i & 15);
}
}
#define DO_STN_1(N, NAME, ESIZE) \
void QEMU_FLATTEN HELPER(sve_st##N##NAME##_r) \
(CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \
#define DO_STN_1(N, NAME, ESZ) \
void HELPER(sve_st##N##NAME##_r)(CPUARMState *env, void *vg, \
target_ulong addr, uint32_t desc) \
{ \
sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, 1, \
sve_st1##NAME##_tlb); \
sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, \
sve_st1##NAME##_host, sve_st1##NAME##_tlb); \
}
#define DO_STN_2(N, NAME, ESIZE, MSIZE) \
void QEMU_FLATTEN HELPER(sve_st##N##NAME##_le_r) \
(CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \
#define DO_STN_2(N, NAME, ESZ, MSZ) \
void HELPER(sve_st##N##NAME##_le_r)(CPUARMState *env, void *vg, \
target_ulong addr, uint32_t desc) \
{ \
sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE, \
sve_st1##NAME##_le_tlb); \
sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \
sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb); \
} \
void QEMU_FLATTEN HELPER(sve_st##N##NAME##_be_r) \
(CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \
void HELPER(sve_st##N##NAME##_be_r)(CPUARMState *env, void *vg, \
target_ulong addr, uint32_t desc) \
{ \
sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE, \
sve_st1##NAME##_be_tlb); \
sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \
sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb); \
}
DO_STN_1(1, bb, 1)
DO_STN_1(1, bh, 2)
DO_STN_1(1, bs, 4)
DO_STN_1(1, bd, 8)
DO_STN_1(2, bb, 1)
DO_STN_1(3, bb, 1)
DO_STN_1(4, bb, 1)
DO_STN_1(1, bb, MO_8)
DO_STN_1(1, bh, MO_16)
DO_STN_1(1, bs, MO_32)
DO_STN_1(1, bd, MO_64)
DO_STN_1(2, bb, MO_8)
DO_STN_1(3, bb, MO_8)
DO_STN_1(4, bb, MO_8)
DO_STN_2(1, hh, 2, 2)
DO_STN_2(1, hs, 4, 2)
DO_STN_2(1, hd, 8, 2)
DO_STN_2(2, hh, 2, 2)
DO_STN_2(3, hh, 2, 2)
DO_STN_2(4, hh, 2, 2)
DO_STN_2(1, hh, MO_16, MO_16)
DO_STN_2(1, hs, MO_32, MO_16)
DO_STN_2(1, hd, MO_64, MO_16)
DO_STN_2(2, hh, MO_16, MO_16)
DO_STN_2(3, hh, MO_16, MO_16)
DO_STN_2(4, hh, MO_16, MO_16)
DO_STN_2(1, ss, 4, 4)
DO_STN_2(1, sd, 8, 4)
DO_STN_2(2, ss, 4, 4)
DO_STN_2(3, ss, 4, 4)
DO_STN_2(4, ss, 4, 4)
DO_STN_2(1, ss, MO_32, MO_32)
DO_STN_2(1, sd, MO_64, MO_32)
DO_STN_2(2, ss, MO_32, MO_32)
DO_STN_2(3, ss, MO_32, MO_32)
DO_STN_2(4, ss, MO_32, MO_32)
DO_STN_2(1, dd, 8, 8)
DO_STN_2(2, dd, 8, 8)
DO_STN_2(3, dd, 8, 8)
DO_STN_2(4, dd, 8, 8)
DO_STN_2(1, dd, MO_64, MO_64)
DO_STN_2(2, dd, MO_64, MO_64)
DO_STN_2(3, dd, MO_64, MO_64)
DO_STN_2(4, dd, MO_64, MO_64)
#undef DO_STN_1
#undef DO_STN_2