tcg: Add tcg_gen_{non}atomic_cmpxchg_i128

This will allow targets to avoid rolling their own.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2022-11-09 00:23:44 +11:00
parent cb48f3654e
commit 123ae5683c
4 changed files with 146 additions and 0 deletions

View File

@ -55,8 +55,53 @@ CMPXCHG_HELPER(cmpxchgq_be, uint64_t)
CMPXCHG_HELPER(cmpxchgq_le, uint64_t)
#endif
#ifdef CONFIG_CMPXCHG128
CMPXCHG_HELPER(cmpxchgo_be, Int128)
CMPXCHG_HELPER(cmpxchgo_le, Int128)
#endif
#undef CMPXCHG_HELPER
Int128 HELPER(nonatomic_cmpxchgo_be)(CPUArchState *env, target_ulong addr,
Int128 cmpv, Int128 newv, uint32_t oi)
{
#if TCG_TARGET_REG_BITS == 32
uintptr_t ra = GETPC();
Int128 oldv;
oldv = cpu_ld16_be_mmu(env, addr, oi, ra);
if (int128_eq(oldv, cmpv)) {
cpu_st16_be_mmu(env, addr, newv, oi, ra);
} else {
/* Even with comparison failure, still need a write cycle. */
probe_write(env, addr, 16, get_mmuidx(oi), ra);
}
return oldv;
#else
g_assert_not_reached();
#endif
}
Int128 HELPER(nonatomic_cmpxchgo_le)(CPUArchState *env, target_ulong addr,
Int128 cmpv, Int128 newv, uint32_t oi)
{
#if TCG_TARGET_REG_BITS == 32
uintptr_t ra = GETPC();
Int128 oldv;
oldv = cpu_ld16_le_mmu(env, addr, oi, ra);
if (int128_eq(oldv, cmpv)) {
cpu_st16_le_mmu(env, addr, newv, oi, ra);
} else {
/* Even with comparison failure, still need a write cycle. */
probe_write(env, addr, 16, get_mmuidx(oi), ra);
}
return oldv;
#else
g_assert_not_reached();
#endif
}
#define ATOMIC_HELPER(OP, TYPE) \
TYPE HELPER(glue(atomic_,OP))(CPUArchState *env, target_ulong addr, \
TYPE val, uint32_t oi) \

View File

@ -55,6 +55,17 @@ DEF_HELPER_FLAGS_5(atomic_cmpxchgq_be, TCG_CALL_NO_WG,
DEF_HELPER_FLAGS_5(atomic_cmpxchgq_le, TCG_CALL_NO_WG,
i64, env, tl, i64, i64, i32)
#endif
#ifdef CONFIG_CMPXCHG128
DEF_HELPER_FLAGS_5(atomic_cmpxchgo_be, TCG_CALL_NO_WG,
i128, env, tl, i128, i128, i32)
DEF_HELPER_FLAGS_5(atomic_cmpxchgo_le, TCG_CALL_NO_WG,
i128, env, tl, i128, i128, i32)
#endif
DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo_be, TCG_CALL_NO_WG,
i128, env, tl, i128, i128, i32)
DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo_le, TCG_CALL_NO_WG,
i128, env, tl, i128, i128, i32)
#ifdef CONFIG_ATOMIC64
#define GEN_ATOMIC_HELPERS(NAME) \

View File

@ -907,6 +907,11 @@ void tcg_gen_atomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32,
TCGArg, MemOp);
void tcg_gen_atomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
TCGArg, MemOp);
void tcg_gen_atomic_cmpxchg_i128(TCGv_i128, TCGv, TCGv_i128, TCGv_i128,
TCGArg, MemOp);
void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128, TCGv, TCGv_i128, TCGv_i128,
TCGArg, MemOp);
void tcg_gen_atomic_xchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
void tcg_gen_atomic_xchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);

View File

@ -3295,6 +3295,8 @@ typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
TCGv_i32, TCGv_i32, TCGv_i32);
typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
TCGv_i64, TCGv_i64, TCGv_i32);
typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv,
TCGv_i128, TCGv_i128, TCGv_i32);
typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
TCGv_i32, TCGv_i32);
typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
@ -3305,6 +3307,11 @@ typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
#else
# define WITH_ATOMIC64(X)
#endif
#ifdef CONFIG_CMPXCHG128
# define WITH_ATOMIC128(X) X,
#else
# define WITH_ATOMIC128(X)
#endif
static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
[MO_8] = gen_helper_atomic_cmpxchgb,
@ -3314,6 +3321,8 @@ static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
[MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
};
void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
@ -3412,6 +3421,82 @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
}
}
void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
TCGv_i128 newv, TCGArg idx, MemOp memop)
{
if (TCG_TARGET_REG_BITS == 32) {
/* Inline expansion below is simply too large for 32-bit hosts. */
gen_atomic_cx_i128 gen = ((memop & MO_BSWAP) == MO_LE
? gen_helper_nonatomic_cmpxchgo_le
: gen_helper_nonatomic_cmpxchgo_be);
MemOpIdx oi = make_memop_idx(memop, idx);
tcg_debug_assert((memop & MO_SIZE) == MO_128);
tcg_debug_assert((memop & MO_SIGN) == 0);
gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
} else {
TCGv_i128 oldv = tcg_temp_new_i128();
TCGv_i128 tmpv = tcg_temp_new_i128();
TCGv_i64 t0 = tcg_temp_new_i64();
TCGv_i64 t1 = tcg_temp_new_i64();
TCGv_i64 z = tcg_constant_i64(0);
tcg_gen_qemu_ld_i128(oldv, addr, idx, memop);
/* Compare i128 */
tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv));
tcg_gen_or_i64(t0, t0, t1);
/* tmpv = equal ? newv : oldv */
tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z,
TCGV128_LOW(newv), TCGV128_LOW(oldv));
tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z,
TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
/* Unconditional writeback. */
tcg_gen_qemu_st_i128(tmpv, addr, idx, memop);
tcg_gen_mov_i128(retv, oldv);
tcg_temp_free_i64(t0);
tcg_temp_free_i64(t1);
tcg_temp_free_i128(tmpv);
tcg_temp_free_i128(oldv);
}
}
void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
TCGv_i128 newv, TCGArg idx, MemOp memop)
{
gen_atomic_cx_i128 gen;
if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
return;
}
tcg_debug_assert((memop & MO_SIZE) == MO_128);
tcg_debug_assert((memop & MO_SIGN) == 0);
gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
if (gen) {
MemOpIdx oi = make_memop_idx(memop, idx);
gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
return;
}
gen_helper_exit_atomic(cpu_env);
/*
* Produce a result for a well-formed opcode stream. This satisfies
* liveness for set before used, which happens before this dead code
* is removed.
*/
tcg_gen_movi_i64(TCGV128_LOW(retv), 0);
tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
}
static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
TCGArg idx, MemOp memop, bool new_val,
void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))