From d450bd0157be43d273116c3e3617883c8a0ac3d1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 6 Jun 2023 10:19:35 +0100 Subject: [PATCH] target/arm: Use tcg_gen_qemu_{st, ld}_i128 for do_fp_{st, ld} While we don't require 16-byte atomicity here, using a single larger operation simplifies the code. Introduce finalize_memop_asimd for this. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20230530191438.411344-6-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/tcg/translate-a64.c | 35 +++++++++++----------------------- target/arm/tcg/translate.h | 24 +++++++++++++++++++++++ 2 files changed, 35 insertions(+), 24 deletions(-) diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index 1fff74c73a..3674fc1bc1 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -911,26 +911,20 @@ static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size) { /* This writes the bottom N bits of a 128 bit wide vector to memory */ TCGv_i64 tmplo = tcg_temp_new_i64(); - MemOp mop; + MemOp mop = finalize_memop_asimd(s, size); tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64)); - if (size < 4) { - mop = finalize_memop(s, size); + if (size < MO_128) { tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); } else { - bool be = s->be_data == MO_BE; - TCGv_i64 tcg_hiaddr = tcg_temp_new_i64(); TCGv_i64 tmphi = tcg_temp_new_i64(); + TCGv_i128 t16 = tcg_temp_new_i128(); tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx)); + tcg_gen_concat_i64_i128(t16, tmplo, tmphi); - mop = s->be_data | MO_UQ; - tcg_gen_qemu_st_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s), - mop | (s->align_mem ? MO_ALIGN_16 : 0)); - tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8); - tcg_gen_qemu_st_i64(be ? tmplo : tmphi, tcg_hiaddr, - get_mem_index(s), mop); + tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop); } } @@ -942,24 +936,17 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) /* This always zero-extends and writes to a full 128 bit wide vector */ TCGv_i64 tmplo = tcg_temp_new_i64(); TCGv_i64 tmphi = NULL; - MemOp mop; + MemOp mop = finalize_memop_asimd(s, size); - if (size < 4) { - mop = finalize_memop(s, size); + if (size < MO_128) { tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); } else { - bool be = s->be_data == MO_BE; - TCGv_i64 tcg_hiaddr; + TCGv_i128 t16 = tcg_temp_new_i128(); + + tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop); tmphi = tcg_temp_new_i64(); - tcg_hiaddr = tcg_temp_new_i64(); - - mop = s->be_data | MO_UQ; - tcg_gen_qemu_ld_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s), - mop | (s->align_mem ? MO_ALIGN_16 : 0)); - tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8); - tcg_gen_qemu_ld_i64(be ? tmplo : tmphi, tcg_hiaddr, - get_mem_index(s), mop); + tcg_gen_extr_i128_i64(tmplo, tmphi, t16); } tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64)); diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h index c1e57a52ca..3aa486a1ab 100644 --- a/target/arm/tcg/translate.h +++ b/target/arm/tcg/translate.h @@ -609,6 +609,30 @@ static inline MemOp finalize_memop_pair(DisasContext *s, MemOp opc) return finalize_memop_atom(s, opc, atom); } +/** + * finalize_memop_asimd: + * @s: DisasContext + * @opc: size+sign+align of the memory operation + * + * Like finalize_memop_atom, but with atomicity of AccessType_ASIMD. + */ +static inline MemOp finalize_memop_asimd(DisasContext *s, MemOp opc) +{ + /* + * In the pseudocode for Mem[], with AccessType_ASIMD, size == 16, + * if IsAligned(8), the first case provides separate atomicity for + * the pair of 64-bit accesses. If !IsAligned(8), the middle cases + * do not apply, and we're left with the final case of no atomicity. + * Thus MO_ATOM_IFALIGN_PAIR. + * + * For other sizes, normal LSE2 rules apply. + */ + if ((opc & MO_SIZE) == MO_128) { + return finalize_memop_atom(s, opc, MO_ATOM_IFALIGN_PAIR); + } + return finalize_memop(s, opc); +} + /** * asimd_imm_const: Expand an encoded SIMD constant value *