tcg: Reduce serial context atomicity earlier

Reduce atomicity while emitting opcodes, instead of later during code generation. This ensures that any helper called also sees the reduced atomicity requirement. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2034 Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Tested-by: Alex Bennée <alex.bennee@linaro.org> Message-Id: <20231212193542.149117-1-richard.henderson@linaro.org>
2023-12-12 11:35:42 -08:00 · 2023-12-12 11:35:42 -08:00 · cbb145567c
commit cbb145567c
parent b5e0d5d22f
2 changed files with 25 additions and 12 deletions
--- a/tcg/tcg-op-ldst.c
+++ b/tcg/tcg-op-ldst.c
@ -77,6 +77,13 @@ static MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
    if (st) {
        op &= ~MO_SIGN;
    }
    /* In serial mode, reduce atomicity. */
    if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
        op &= ~MO_ATOM_MASK;
        op |= MO_ATOM_NONE;
    }
    return op;
 }
@ -428,8 +435,7 @@ static bool use_two_i64_for_i128(MemOp mop)
    case MO_ATOM_SUBALIGN:
    case MO_ATOM_WITHIN16:
    case MO_ATOM_WITHIN16_PAIR:
-        /* In a serialized context, no atomicity is required. */
+        return false;
        return !(tcg_ctx->gen_tb->cflags & CF_PARALLEL);
    default:
        g_assert_not_reached();
    }
@ -499,13 +505,20 @@ static void maybe_free_addr64(TCGv_i64 a64)
 static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
                                     TCGArg idx, MemOp memop)
 {
-    const MemOpIdx orig_oi = make_memop_idx(memop, idx);
+    MemOpIdx orig_oi;
    TCGv_i64 ext_addr = NULL;
    TCGOpcode opc;
    check_max_alignment(get_alignment_bits(memop));
    tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
    /* In serial mode, reduce atomicity. */
    if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
        memop &= ~MO_ATOM_MASK;
        memop |= MO_ATOM_NONE;
    }
    orig_oi = make_memop_idx(memop, idx);
    /* TODO: For now, force 32-bit hosts to use the helper. */
    if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
        TCGv_i64 lo, hi;
@ -608,13 +621,20 @@ void tcg_gen_qemu_ld_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
 static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
                                     TCGArg idx, MemOp memop)
 {
-    const MemOpIdx orig_oi = make_memop_idx(memop, idx);
+    MemOpIdx orig_oi;
    TCGv_i64 ext_addr = NULL;
    TCGOpcode opc;
    check_max_alignment(get_alignment_bits(memop));
    tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
    /* In serial mode, reduce atomicity. */
    if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
        memop &= ~MO_ATOM_MASK;
        memop |= MO_ATOM_NONE;
    }
    orig_oi = make_memop_idx(memop, idx);
    /* TODO: For now, force 32-bit hosts to use the helper. */
    if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@ -5440,15 +5440,8 @@ static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
    MemOp align = get_alignment_bits(opc);
    MemOp size = opc & MO_SIZE;
    MemOp half = size ? size - 1 : 0;
    MemOp atom = opc & MO_ATOM_MASK;
    MemOp atmax;
    MemOp atom;
    /* When serialized, no further atomicity required.  */
    if (s->gen_tb->cflags & CF_PARALLEL) {
        atom = opc & MO_ATOM_MASK;
    } else {
        atom = MO_ATOM_NONE;
    }
    switch (atom) {
    case MO_ATOM_NONE: