diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 1f4f3e0485..1e36a839ee 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -3158,6 +3158,11 @@ FIELD(TBFLAG_A32, HSTR_ACTIVE, 9, 1) * the same thing as the current security state of the processor! */ FIELD(TBFLAG_A32, NS, 10, 1) +/* + * Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not. + * This requires an SME trap from AArch32 mode when using NEON. + */ +FIELD(TBFLAG_A32, SME_TRAP_NONSTREAMING, 11, 1) /* * Bit usage when in AArch32 state, for M-profile only. @@ -3195,6 +3200,8 @@ FIELD(TBFLAG_A64, SMEEXC_EL, 20, 2) FIELD(TBFLAG_A64, PSTATE_SM, 22, 1) FIELD(TBFLAG_A64, PSTATE_ZA, 23, 1) FIELD(TBFLAG_A64, SVL, 24, 4) +/* Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not. */ +FIELD(TBFLAG_A64, SME_TRAP_NONSTREAMING, 28, 1) /* * Helpers for using the above. diff --git a/target/arm/helper.c b/target/arm/helper.c index e6f37e160f..73a5b2b86d 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -6098,6 +6098,32 @@ int sme_exception_el(CPUARMState *env, int el) return 0; } +/* This corresponds to the ARM pseudocode function IsFullA64Enabled(). */ +static bool sme_fa64(CPUARMState *env, int el) +{ + if (!cpu_isar_feature(aa64_sme_fa64, env_archcpu(env))) { + return false; + } + + if (el <= 1 && !el_is_in_host(env, el)) { + if (!FIELD_EX64(env->vfp.smcr_el[1], SMCR, FA64)) { + return false; + } + } + if (el <= 2 && arm_is_el2_enabled(env)) { + if (!FIELD_EX64(env->vfp.smcr_el[2], SMCR, FA64)) { + return false; + } + } + if (arm_feature(env, ARM_FEATURE_EL3)) { + if (!FIELD_EX64(env->vfp.smcr_el[3], SMCR, FA64)) { + return false; + } + } + + return true; +} + /* * Given that SVE is enabled, return the vector length for EL. */ @@ -10801,6 +10827,20 @@ static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el, DP_TBFLAG_ANY(flags, PSTATE__IL, 1); } + /* + * The SME exception we are testing for is raised via + * AArch64.CheckFPAdvSIMDEnabled(), as called from + * AArch32.CheckAdvSIMDOrFPEnabled(). + */ + if (el == 0 + && FIELD_EX64(env->svcr, SVCR, SM) + && (!arm_is_el2_enabled(env) + || (arm_el_is_aa64(env, 2) && !(env->cp15.hcr_el2 & HCR_TGE))) + && arm_el_is_aa64(env, 1) + && !sme_fa64(env, el)) { + DP_TBFLAG_A32(flags, SME_TRAP_NONSTREAMING, 1); + } + return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags); } @@ -10850,6 +10890,7 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, } if (FIELD_EX64(env->svcr, SVCR, SM)) { DP_TBFLAG_A64(flags, PSTATE_SM, 1); + DP_TBFLAG_A64(flags, SME_TRAP_NONSTREAMING, !sme_fa64(env, el)); } DP_TBFLAG_A64(flags, PSTATE_ZA, FIELD_EX64(env->svcr, SVCR, ZA)); } diff --git a/target/arm/meson.build b/target/arm/meson.build index 6dd7e93643..87e911b27f 100644 --- a/target/arm/meson.build +++ b/target/arm/meson.build @@ -1,6 +1,7 @@ gen = [ decodetree.process('sve.decode', extra_args: '--decode=disas_sve'), decodetree.process('sme.decode', extra_args: '--decode=disas_sme'), + decodetree.process('sme-fa64.decode', extra_args: '--static-decode=disas_sme_fa64'), decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'), decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'), decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'), diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode new file mode 100644 index 0000000000..3d90837fc7 --- /dev/null +++ b/target/arm/sme-fa64.decode @@ -0,0 +1,90 @@ +# AArch64 SME allowed instruction decoding +# +# Copyright (c) 2022 Linaro, Ltd +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, see . + +# +# This file is processed by scripts/decodetree.py +# + +# These patterns are taken from Appendix E1.1 of DDI0616 A.a, +# Arm Architecture Reference Manual Supplement, +# The Scalable Matrix Extension (SME), for Armv9-A + +{ + [ + OK 0-00 1110 0000 0001 0010 11-- ---- ---- # SMOV W|Xd,Vn.B[0] + OK 0-00 1110 0000 0010 0010 11-- ---- ---- # SMOV W|Xd,Vn.H[0] + OK 0100 1110 0000 0100 0010 11-- ---- ---- # SMOV Xd,Vn.S[0] + OK 0000 1110 0000 0001 0011 11-- ---- ---- # UMOV Wd,Vn.B[0] + OK 0000 1110 0000 0010 0011 11-- ---- ---- # UMOV Wd,Vn.H[0] + OK 0000 1110 0000 0100 0011 11-- ---- ---- # UMOV Wd,Vn.S[0] + OK 0100 1110 0000 1000 0011 11-- ---- ---- # UMOV Xd,Vn.D[0] + ] + FAIL 0--0 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD vector operations +} + +{ + [ + OK 0101 1110 --1- ---- 11-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar) + OK 0101 1110 -10- ---- 00-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar, FP16) + OK 01-1 1110 1-10 0001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar) + OK 01-1 1110 1111 1001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar, FP16) + ] + FAIL 01-1 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD single-element operations +} + +FAIL 0-00 110- ---- ---- ---- ---- ---- ---- # Advanced SIMD structure load/store +FAIL 1100 1110 ---- ---- ---- ---- ---- ---- # Advanced SIMD cryptography extensions +FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS + +# These are the "avoidance of doubt" final table of Illegal Advanced SIMD instructions +# We don't actually need to include these, as the default is OK. +# -001 111- ---- ---- ---- ---- ---- ---- # Scalar floating-point operations +# --10 110- ---- ---- ---- ---- ---- ---- # Load/store pair of FP registers +# --01 1100 ---- ---- ---- ---- ---- ---- # Load FP register (PC-relative literal) +# --11 1100 --0- ---- ---- ---- ---- ---- # Load/store FP register (unscaled imm) +# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset) +# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm) + +FAIL 0000 0100 --1- ---- 1010 ---- ---- ---- # ADR +FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA +FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT +FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS +FAIL 0010 0101 --10 1--- 1001 ---- ---- ---- # WRFFR, SETFFR +FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP +FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result) +FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA +FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL +FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD +FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA +FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA +FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions +FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar) +FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm) +FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector) +FAIL 1000 010- -01- ---- 1--- ---- ---- ---- # SVE 32-bit gather load (vector+imm) +FAIL 1000 0100 0-0- ---- 0--- ---- ---- ---- # SVE 32-bit gather load byte (scalar+vector) +FAIL 1000 0100 1--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load half (scalar+vector) +FAIL 1000 0101 0--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load word (scalar+vector) +FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar) +FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm) +FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar) +FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm) +FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch +FAIL 1110 010- -00- ---- 001- ---- ---- ---- # SVE2 64-bit scatter NT store (vector+scalar) +FAIL 1110 010- -10- ---- 001- ---- ---- ---- # SVE2 32-bit scatter NT store (vector+scalar) +FAIL 1110 010- ---- ---- 1-0- ---- ---- ---- # SVE scatter store (scalar+32-bit vector) +FAIL 1110 010- ---- ---- 101- ---- ---- ---- # SVE scatter store (misc) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index a5f8a6c771..7fab7f64f8 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -1155,7 +1155,7 @@ static void do_vec_ld(DisasContext *s, int destidx, int element, * unallocated-encoding checks (otherwise the syndrome information * for the resulting exception will be incorrect). */ -static bool fp_access_check(DisasContext *s) +static bool fp_access_check_only(DisasContext *s) { if (s->fp_excp_el) { assert(!s->fp_access_checked); @@ -1170,6 +1170,19 @@ static bool fp_access_check(DisasContext *s) return true; } +static bool fp_access_check(DisasContext *s) +{ + if (!fp_access_check_only(s)) { + return false; + } + if (s->sme_trap_nonstreaming && s->is_nonstreaming) { + gen_exception_insn(s, s->pc_curr, EXCP_UDEF, + syn_smetrap(SME_ET_Streaming, false)); + return false; + } + return true; +} + /* Check that SVE access is enabled. If it is, return true. * If not, emit code to generate an appropriate exception and return false. */ @@ -1994,7 +2007,7 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, default: g_assert_not_reached(); } - if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) { + if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) { return; } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) { return; @@ -14530,6 +14543,23 @@ static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn) } } +/* + * Include the generated SME FA64 decoder. + */ + +#include "decode-sme-fa64.c.inc" + +static bool trans_OK(DisasContext *s, arg_OK *a) +{ + return true; +} + +static bool trans_FAIL(DisasContext *s, arg_OK *a) +{ + s->is_nonstreaming = true; + return true; +} + /** * is_guarded_page: * @env: The cpu environment @@ -14657,6 +14687,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM); dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); + dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); dc->vec_len = 0; dc->vec_stride = 0; dc->cp_regs = arm_cpu->cp_regs; @@ -14805,6 +14836,11 @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) } } + s->is_nonstreaming = false; + if (s->sme_trap_nonstreaming) { + disas_sme_fa64(s, insn); + } + switch (extract32(insn, 25, 4)) { case 0x0: if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) { diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c index 82fdbcae53..bd5ae27d09 100644 --- a/target/arm/translate-vfp.c +++ b/target/arm/translate-vfp.c @@ -234,6 +234,18 @@ static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled) return false; } + /* + * Note that rebuild_hflags_a32 has already accounted for being in EL0 + * and the higher EL in A64 mode, etc. Unlike A64 mode, there do not + * appear to be any insns which touch VFP which are allowed. + */ + if (s->sme_trap_nonstreaming) { + gen_exception_insn(s, s->pc_curr, EXCP_UDEF, + syn_smetrap(SME_ET_Streaming, + s->base.pc_next - s->pc_curr == 2)); + return false; + } + if (!s->vfp_enabled && !ignore_vfp_enabled) { assert(!arm_dc_feature(s, ARM_FEATURE_M)); unallocated_encoding(s); diff --git a/target/arm/translate.c b/target/arm/translate.c index 6617de775f..4ffb095c73 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -9378,6 +9378,8 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN); dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE); } + dc->sme_trap_nonstreaming = + EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING); } dc->cp_regs = cpu->cp_regs; dc->features = env->features; diff --git a/target/arm/translate.h b/target/arm/translate.h index 22fd882368..cbc907c751 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -102,6 +102,10 @@ typedef struct DisasContext { bool pstate_sm; /* True if PSTATE.ZA is set. */ bool pstate_za; + /* True if non-streaming insns should raise an SME Streaming exception. */ + bool sme_trap_nonstreaming; + /* True if the current instruction is non-streaming. */ + bool is_nonstreaming; /* True if MVE insns are definitely not predicated by VPR or LTPSIZE */ bool mve_no_pred; /*