target/arm: Handle SVE vector length changes in system mode
SVE vector length can change when changing EL, or when writing to one of the ZCR_ELn registers. For correctness, our implementation requires that predicate bits that are inaccessible are never set. Which means noticing length changes and zeroing the appropriate register bits. Tested-by: Laurent Desnogues <laurent.desnogues@gmail.com> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20181005175350.30752-5-richard.henderson@linaro.org Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
2de7ace292
commit
0ab5953b00
@ -910,6 +910,10 @@ int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
|
||||
int aarch64_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
|
||||
int aarch64_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
|
||||
void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq);
|
||||
void aarch64_sve_change_el(CPUARMState *env, int old_el, int new_el);
|
||||
#else
|
||||
static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { }
|
||||
static inline void aarch64_sve_change_el(CPUARMState *env, int o, int n) { }
|
||||
#endif
|
||||
|
||||
target_ulong do_arm_semihosting(CPUARMState *env);
|
||||
|
@ -410,45 +410,3 @@ static void aarch64_cpu_register_types(void)
|
||||
}
|
||||
|
||||
type_init(aarch64_cpu_register_types)
|
||||
|
||||
/* The manual says that when SVE is enabled and VQ is widened the
|
||||
* implementation is allowed to zero the previously inaccessible
|
||||
* portion of the registers. The corollary to that is that when
|
||||
* SVE is enabled and VQ is narrowed we are also allowed to zero
|
||||
* the now inaccessible portion of the registers.
|
||||
*
|
||||
* The intent of this is that no predicate bit beyond VQ is ever set.
|
||||
* Which means that some operations on predicate registers themselves
|
||||
* may operate on full uint64_t or even unrolled across the maximum
|
||||
* uint64_t[4]. Performing 4 bits of host arithmetic unconditionally
|
||||
* may well be cheaper than conditionals to restrict the operation
|
||||
* to the relevant portion of a uint16_t[16].
|
||||
*
|
||||
* TODO: Need to call this for changes to the real system registers
|
||||
* and EL state changes.
|
||||
*/
|
||||
void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq)
|
||||
{
|
||||
int i, j;
|
||||
uint64_t pmask;
|
||||
|
||||
assert(vq >= 1 && vq <= ARM_MAX_VQ);
|
||||
assert(vq <= arm_env_get_cpu(env)->sve_max_vq);
|
||||
|
||||
/* Zap the high bits of the zregs. */
|
||||
for (i = 0; i < 32; i++) {
|
||||
memset(&env->vfp.zregs[i].d[2 * vq], 0, 16 * (ARM_MAX_VQ - vq));
|
||||
}
|
||||
|
||||
/* Zap the high bits of the pregs and ffr. */
|
||||
pmask = 0;
|
||||
if (vq & 3) {
|
||||
pmask = ~(-1ULL << (16 * (vq & 3)));
|
||||
}
|
||||
for (j = vq / 4; j < ARM_MAX_VQ / 4; j++) {
|
||||
for (i = 0; i < 17; ++i) {
|
||||
env->vfp.pregs[i].p[j] &= pmask;
|
||||
}
|
||||
pmask = 0;
|
||||
}
|
||||
}
|
||||
|
@ -4461,11 +4461,44 @@ static int sve_exception_el(CPUARMState *env, int el)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Given that SVE is enabled, return the vector length for EL.
|
||||
*/
|
||||
static uint32_t sve_zcr_len_for_el(CPUARMState *env, int el)
|
||||
{
|
||||
ARMCPU *cpu = arm_env_get_cpu(env);
|
||||
uint32_t zcr_len = cpu->sve_max_vq - 1;
|
||||
|
||||
if (el <= 1) {
|
||||
zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[1]);
|
||||
}
|
||||
if (el < 2 && arm_feature(env, ARM_FEATURE_EL2)) {
|
||||
zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[2]);
|
||||
}
|
||||
if (el < 3 && arm_feature(env, ARM_FEATURE_EL3)) {
|
||||
zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]);
|
||||
}
|
||||
return zcr_len;
|
||||
}
|
||||
|
||||
static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
|
||||
uint64_t value)
|
||||
{
|
||||
int cur_el = arm_current_el(env);
|
||||
int old_len = sve_zcr_len_for_el(env, cur_el);
|
||||
int new_len;
|
||||
|
||||
/* Bits other than [3:0] are RAZ/WI. */
|
||||
raw_write(env, ri, value & 0xf);
|
||||
|
||||
/*
|
||||
* Because we arrived here, we know both FP and SVE are enabled;
|
||||
* otherwise we would have trapped access to the ZCR_ELn register.
|
||||
*/
|
||||
new_len = sve_zcr_len_for_el(env, cur_el);
|
||||
if (new_len < old_len) {
|
||||
aarch64_sve_narrow_vq(env, new_len + 1);
|
||||
}
|
||||
}
|
||||
|
||||
static const ARMCPRegInfo zcr_el1_reginfo = {
|
||||
@ -8304,8 +8337,11 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
|
||||
unsigned int new_el = env->exception.target_el;
|
||||
target_ulong addr = env->cp15.vbar_el[new_el];
|
||||
unsigned int new_mode = aarch64_pstate_mode(new_el, true);
|
||||
unsigned int cur_el = arm_current_el(env);
|
||||
|
||||
if (arm_current_el(env) < new_el) {
|
||||
aarch64_sve_change_el(env, cur_el, new_el);
|
||||
|
||||
if (cur_el < new_el) {
|
||||
/* Entry vector offset depends on whether the implemented EL
|
||||
* immediately lower than the target level is using AArch32 or AArch64
|
||||
*/
|
||||
@ -12597,18 +12633,7 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
|
||||
if (sve_el != 0 && fp_el == 0) {
|
||||
zcr_len = 0;
|
||||
} else {
|
||||
ARMCPU *cpu = arm_env_get_cpu(env);
|
||||
|
||||
zcr_len = cpu->sve_max_vq - 1;
|
||||
if (current_el <= 1) {
|
||||
zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[1]);
|
||||
}
|
||||
if (current_el < 2 && arm_feature(env, ARM_FEATURE_EL2)) {
|
||||
zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[2]);
|
||||
}
|
||||
if (current_el < 3 && arm_feature(env, ARM_FEATURE_EL3)) {
|
||||
zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]);
|
||||
}
|
||||
zcr_len = sve_zcr_len_for_el(env, current_el);
|
||||
}
|
||||
flags |= sve_el << ARM_TBFLAG_SVEEXC_EL_SHIFT;
|
||||
flags |= zcr_len << ARM_TBFLAG_ZCR_LEN_SHIFT;
|
||||
@ -12664,3 +12689,85 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
|
||||
*pflags = flags;
|
||||
*cs_base = 0;
|
||||
}
|
||||
|
||||
#ifdef TARGET_AARCH64
|
||||
/*
|
||||
* The manual says that when SVE is enabled and VQ is widened the
|
||||
* implementation is allowed to zero the previously inaccessible
|
||||
* portion of the registers. The corollary to that is that when
|
||||
* SVE is enabled and VQ is narrowed we are also allowed to zero
|
||||
* the now inaccessible portion of the registers.
|
||||
*
|
||||
* The intent of this is that no predicate bit beyond VQ is ever set.
|
||||
* Which means that some operations on predicate registers themselves
|
||||
* may operate on full uint64_t or even unrolled across the maximum
|
||||
* uint64_t[4]. Performing 4 bits of host arithmetic unconditionally
|
||||
* may well be cheaper than conditionals to restrict the operation
|
||||
* to the relevant portion of a uint16_t[16].
|
||||
*/
|
||||
void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq)
|
||||
{
|
||||
int i, j;
|
||||
uint64_t pmask;
|
||||
|
||||
assert(vq >= 1 && vq <= ARM_MAX_VQ);
|
||||
assert(vq <= arm_env_get_cpu(env)->sve_max_vq);
|
||||
|
||||
/* Zap the high bits of the zregs. */
|
||||
for (i = 0; i < 32; i++) {
|
||||
memset(&env->vfp.zregs[i].d[2 * vq], 0, 16 * (ARM_MAX_VQ - vq));
|
||||
}
|
||||
|
||||
/* Zap the high bits of the pregs and ffr. */
|
||||
pmask = 0;
|
||||
if (vq & 3) {
|
||||
pmask = ~(-1ULL << (16 * (vq & 3)));
|
||||
}
|
||||
for (j = vq / 4; j < ARM_MAX_VQ / 4; j++) {
|
||||
for (i = 0; i < 17; ++i) {
|
||||
env->vfp.pregs[i].p[j] &= pmask;
|
||||
}
|
||||
pmask = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Notice a change in SVE vector size when changing EL.
|
||||
*/
|
||||
void aarch64_sve_change_el(CPUARMState *env, int old_el, int new_el)
|
||||
{
|
||||
int old_len, new_len;
|
||||
|
||||
/* Nothing to do if no SVE. */
|
||||
if (!arm_feature(env, ARM_FEATURE_SVE)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Nothing to do if FP is disabled in either EL. */
|
||||
if (fp_exception_el(env, old_el) || fp_exception_el(env, new_el)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* DDI0584A.d sec 3.2: "If SVE instructions are disabled or trapped
|
||||
* at ELx, or not available because the EL is in AArch32 state, then
|
||||
* for all purposes other than a direct read, the ZCR_ELx.LEN field
|
||||
* has an effective value of 0".
|
||||
*
|
||||
* Consider EL2 (aa64, vq=4) -> EL0 (aa32) -> EL1 (aa64, vq=0).
|
||||
* If we ignore aa32 state, we would fail to see the vq4->vq0 transition
|
||||
* from EL2->EL1. Thus we go ahead and narrow when entering aa32 so that
|
||||
* we already have the correct register contents when encountering the
|
||||
* vq0->vq0 transition between EL0->EL1.
|
||||
*/
|
||||
old_len = (arm_el_is_aa64(env, old_el) && !sve_exception_el(env, old_el)
|
||||
? sve_zcr_len_for_el(env, old_el) : 0);
|
||||
new_len = (arm_el_is_aa64(env, new_el) && !sve_exception_el(env, new_el)
|
||||
? sve_zcr_len_for_el(env, new_el) : 0);
|
||||
|
||||
/* When changing vector length, clear inaccessible state. */
|
||||
if (new_len < old_len) {
|
||||
aarch64_sve_narrow_vq(env, new_len + 1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -1082,6 +1082,7 @@ void HELPER(exception_return)(CPUARMState *env)
|
||||
"AArch64 EL%d PC 0x%" PRIx64 "\n",
|
||||
cur_el, new_el, env->pc);
|
||||
}
|
||||
aarch64_sve_change_el(env, cur_el, new_el);
|
||||
|
||||
qemu_mutex_lock_iothread();
|
||||
arm_call_el_change_hook(arm_env_get_cpu(env));
|
||||
|
Loading…
x
Reference in New Issue
Block a user