diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c index c654279a6c..8433010d5c 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -787,6 +787,8 @@ static void add_qemu_cpu_model_features(S390FeatBitmap fbm) S390_FEAT_ETF2_ENH, S390_FEAT_STORE_CLOCK_FAST, S390_FEAT_MOVE_WITH_OPTIONAL_SPEC, + S390_FEAT_COMPARE_AND_SWAP_AND_STORE, + S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2, S390_FEAT_GENERAL_INSTRUCTIONS_EXT, S390_FEAT_EXECUTE_EXT, S390_FEAT_FLOATING_POINT_SUPPPORT_ENH, diff --git a/target/s390x/helper.h b/target/s390x/helper.h index 964097b2ce..23e8d1d551 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -33,6 +33,7 @@ DEF_HELPER_3(celgb, i64, env, i64, i32) DEF_HELPER_3(cdlgb, i64, env, i64, i32) DEF_HELPER_3(cxlgb, i64, env, i64, i32) DEF_HELPER_4(cdsg, void, env, i64, i32, i32) +DEF_HELPER_4(csst, i32, env, i32, i64, i64) DEF_HELPER_FLAGS_3(aeb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(adb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_5(axb, TCG_CALL_NO_WG, i64, env, i64, i64, i64, i64) diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def index d3bb8516ed..6ac12b8b97 100644 --- a/target/s390x/insn-data.def +++ b/target/s390x/insn-data.def @@ -265,6 +265,8 @@ D(0xbb00, CDS, RS_a, Z, r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEQ) D(0xeb31, CDSY, RSY_a, LD, r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEQ) C(0xeb3e, CDSG, RSY_a, Z, 0, 0, 0, 0, cdsg, 0) +/* COMPARE AND SWAP AND STORE */ + C(0xc802, CSST, SSF, CASS, la1, a2, 0, 0, csst, 0) /* COMPARE AND TRAP */ D(0xb972, CRT, RRF_c, GIE, r1_32s, r2_32s, 0, 0, ct, 0, 0) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index ede84711d1..7424f7f62f 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -1353,6 +1353,195 @@ void HELPER(cdsg)(CPUS390XState *env, uint64_t addr, env->regs[r1 + 1] = int128_getlo(oldv); } +uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2) +{ +#if !defined(CONFIG_USER_ONLY) || defined(CONFIG_ATOMIC128) + uint32_t mem_idx = cpu_mmu_index(env, false); +#endif + uintptr_t ra = GETPC(); + uint32_t fc = extract32(env->regs[0], 0, 8); + uint32_t sc = extract32(env->regs[0], 8, 8); + uint64_t pl = get_address(env, 1) & -16; + uint64_t svh, svl; + uint32_t cc; + + /* Sanity check the function code and storage characteristic. */ + if (fc > 1 || sc > 3) { + if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) { + goto spec_exception; + } + if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) { + goto spec_exception; + } + } + + /* Sanity check the alignments. */ + if (extract32(a1, 0, 4 << fc) || extract32(a2, 0, 1 << sc)) { + goto spec_exception; + } + + /* Sanity check writability of the store address. */ +#ifndef CONFIG_USER_ONLY + probe_write(env, a2, mem_idx, ra); +#endif + + /* Note that the compare-and-swap is atomic, and the store is atomic, but + the complete operation is not. Therefore we do not need to assert serial + context in order to implement this. That said, restart early if we can't + support either operation that is supposed to be atomic. */ + if (parallel_cpus) { + int mask = 0; +#if !defined(CONFIG_ATOMIC64) + mask = -8; +#elif !defined(CONFIG_ATOMIC128) + mask = -16; +#endif + if (((4 << fc) | (1 << sc)) & mask) { + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); + } + } + + /* All loads happen before all stores. For simplicity, load the entire + store value area from the parameter list. */ + svh = cpu_ldq_data_ra(env, pl + 16, ra); + svl = cpu_ldq_data_ra(env, pl + 24, ra); + + switch (fc) { + case 0: + { + uint32_t nv = cpu_ldl_data_ra(env, pl, ra); + uint32_t cv = env->regs[r3]; + uint32_t ov; + + if (parallel_cpus) { +#ifdef CONFIG_USER_ONLY + uint32_t *haddr = g2h(a1); + ov = atomic_cmpxchg__nocheck(haddr, cv, nv); +#else + TCGMemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx); + ov = helper_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra); +#endif + } else { + ov = cpu_ldl_data_ra(env, a1, ra); + cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra); + } + cc = (ov != cv); + env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov); + } + break; + + case 1: + { + uint64_t nv = cpu_ldq_data_ra(env, pl, ra); + uint64_t cv = env->regs[r3]; + uint64_t ov; + + if (parallel_cpus) { +#ifdef CONFIG_ATOMIC64 +# ifdef CONFIG_USER_ONLY + uint64_t *haddr = g2h(a1); + ov = atomic_cmpxchg__nocheck(haddr, cv, nv); +# else + TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN, mem_idx); + ov = helper_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra); +# endif +#else + /* Note that we asserted !parallel_cpus above. */ + g_assert_not_reached(); +#endif + } else { + ov = cpu_ldq_data_ra(env, a1, ra); + cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra); + } + cc = (ov != cv); + env->regs[r3] = ov; + } + break; + + case 2: + { + uint64_t nvh = cpu_ldq_data_ra(env, pl, ra); + uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra); + Int128 nv = int128_make128(nvl, nvh); + Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]); + Int128 ov; + + if (parallel_cpus) { +#ifdef CONFIG_ATOMIC128 + TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra); + cc = !int128_eq(ov, cv); +#else + /* Note that we asserted !parallel_cpus above. */ + g_assert_not_reached(); +#endif + } else { + uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra); + uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra); + + ov = int128_make128(ol, oh); + cc = !int128_eq(ov, cv); + if (cc) { + nv = ov; + } + + cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra); + cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra); + } + + env->regs[r3 + 0] = int128_gethi(ov); + env->regs[r3 + 1] = int128_getlo(ov); + } + break; + + default: + g_assert_not_reached(); + } + + /* Store only if the comparison succeeded. Note that above we use a pair + of 64-bit big-endian loads, so for sc < 3 we must extract the value + from the most-significant bits of svh. */ + if (cc == 0) { + switch (sc) { + case 0: + cpu_stb_data_ra(env, a2, svh >> 56, ra); + break; + case 1: + cpu_stw_data_ra(env, a2, svh >> 48, ra); + break; + case 2: + cpu_stl_data_ra(env, a2, svh >> 32, ra); + break; + case 3: + cpu_stq_data_ra(env, a2, svh, ra); + break; + case 4: + if (parallel_cpus) { +#ifdef CONFIG_ATOMIC128 + TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + Int128 sv = int128_make128(svl, svh); + helper_atomic_sto_be_mmu(env, a2, sv, oi, ra); +#else + /* Note that we asserted !parallel_cpus above. */ + g_assert_not_reached(); +#endif + } else { + cpu_stq_data_ra(env, a2 + 0, svh, ra); + cpu_stq_data_ra(env, a2 + 8, svl, ra); + } + default: + g_assert_not_reached(); + } + } + + return cc; + + spec_exception: + cpu_restore_state(ENV_GET_CPU(env), ra); + program_interrupt(env, PGM_SPECIFICATION, 6); + g_assert_not_reached(); +} + #if !defined(CONFIG_USER_ONLY) void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3) { diff --git a/target/s390x/translate.c b/target/s390x/translate.c index 592d6b0f38..e7395256d2 100644 --- a/target/s390x/translate.c +++ b/target/s390x/translate.c @@ -2033,6 +2033,18 @@ static ExitStatus op_cdsg(DisasContext *s, DisasOps *o) return NO_EXIT; } +static ExitStatus op_csst(DisasContext *s, DisasOps *o) +{ + int r3 = get_field(s->fields, r3); + TCGv_i32 t_r3 = tcg_const_i32(r3); + + gen_helper_csst(cc_op, cpu_env, t_r3, o->in1, o->in2); + tcg_temp_free_i32(t_r3); + + set_cc_static(s); + return NO_EXIT; +} + #ifndef CONFIG_USER_ONLY static ExitStatus op_csp(DisasContext *s, DisasOps *o) { @@ -5437,7 +5449,6 @@ enum DisasInsnEnum { /* Give smaller names to the various facilities. */ #define FAC_Z S390_FEAT_ZARCH #define FAC_CASS S390_FEAT_COMPARE_AND_SWAP_AND_STORE -#define FAC_CASS2 S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2 #define FAC_DFP S390_FEAT_DFP #define FAC_DFPR S390_FEAT_FLOATING_POINT_SUPPPORT_ENH /* DFP-rounding */ #define FAC_DO S390_FEAT_STFLE_45 /* distinct-operands */