diff --git a/target/arm/translate-a32.h b/target/arm/translate-a32.h index 2326405300..6dfcafe179 100644 --- a/target/arm/translate-a32.h +++ b/target/arm/translate-a32.h @@ -47,6 +47,8 @@ long neon_full_reg_offset(unsigned reg); long neon_element_offset(int reg, int element, MemOp memop); void gen_rev16(TCGv_i32 dest, TCGv_i32 var); void clear_eci_state(DisasContext *s); +bool mve_eci_check(DisasContext *s); +void mve_update_and_store_eci(DisasContext *s); static inline TCGv_i32 load_cpu_offset(int offset) { diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c index 04d84e8846..67462bdf27 100644 --- a/target/arm/translate-mve.c +++ b/target/arm/translate-mve.c @@ -57,7 +57,7 @@ static bool mve_check_qreg_bank(DisasContext *s, int qmask) return qmask < 8; } -static bool mve_eci_check(DisasContext *s) +bool mve_eci_check(DisasContext *s) { /* * This is a beatwise insn: check that ECI is valid (not a @@ -91,7 +91,7 @@ static void mve_update_eci(DisasContext *s) } } -static void mve_update_and_store_eci(DisasContext *s) +void mve_update_and_store_eci(DisasContext *s) { /* * For insns which don't call a helper function that will call diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c index 86e43c02dc..b2991e21ec 100644 --- a/target/arm/translate-vfp.c +++ b/target/arm/translate-vfp.c @@ -581,6 +581,48 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a) return true; } +static bool mve_skip_vmov(DisasContext *s, int vn, int index, int size) +{ + /* + * In a CPU with MVE, the VMOV (vector lane to general-purpose register) + * and VMOV (general-purpose register to vector lane) insns are not + * predicated, but they are subject to beatwise execution if they are + * not in an IT block. + * + * Since our implementation always executes all 4 beats in one tick, + * this means only that if PSR.ECI says we should not be executing + * the beat corresponding to the lane of the vector register being + * accessed then we should skip performing the move, and that we need + * to do the usual check for bad ECI state and advance of ECI state. + * + * Note that if PSR.ECI is non-zero then we cannot be in an IT block. + * + * Return true if this VMOV scalar <-> gpreg should be skipped because + * the MVE PSR.ECI state says we skip the beat where the store happens. + */ + + /* Calculate the byte offset into Qn which we're going to access */ + int ofs = (index << size) + ((vn & 1) * 8); + + if (!dc_isar_feature(aa32_mve, s)) { + return false; + } + + switch (s->eci) { + case ECI_NONE: + return false; + case ECI_A0: + return ofs < 4; + case ECI_A0A1: + return ofs < 8; + case ECI_A0A1A2: + case ECI_A0A1A2B0: + return ofs < 12; + default: + g_assert_not_reached(); + } +} + static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a) { /* VMOV scalar to general purpose register */ @@ -603,14 +645,26 @@ static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a) return false; } + if (dc_isar_feature(aa32_mve, s)) { + if (!mve_eci_check(s)) { + return true; + } + } + if (!vfp_access_check(s)) { return true; } - tmp = tcg_temp_new_i32(); - read_neon_element32(tmp, a->vn, a->index, a->size | (a->u ? 0 : MO_SIGN)); - store_reg(s, a->rt, tmp); + if (!mve_skip_vmov(s, a->vn, a->index, a->size)) { + tmp = tcg_temp_new_i32(); + read_neon_element32(tmp, a->vn, a->index, + a->size | (a->u ? 0 : MO_SIGN)); + store_reg(s, a->rt, tmp); + } + if (dc_isar_feature(aa32_mve, s)) { + mve_update_and_store_eci(s); + } return true; } @@ -636,14 +690,25 @@ static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a) return false; } + if (dc_isar_feature(aa32_mve, s)) { + if (!mve_eci_check(s)) { + return true; + } + } + if (!vfp_access_check(s)) { return true; } - tmp = load_reg(s, a->rt); - write_neon_element32(tmp, a->vn, a->index, a->size); - tcg_temp_free_i32(tmp); + if (!mve_skip_vmov(s, a->vn, a->index, a->size)) { + tmp = load_reg(s, a->rt); + write_neon_element32(tmp, a->vn, a->index, a->size); + tcg_temp_free_i32(tmp); + } + if (dc_isar_feature(aa32_mve, s)) { + mve_update_and_store_eci(s); + } return true; }