linux-user: Implement aarch64 PR_SVE_SET/GET_VL

As an implementation choice, widening VL has zeroed the previously inaccessible portion of the sve registers. Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Acked-by: Alex Bennée <alex.bennee@linaro.org> Message-id: 20180303143823.27055-2-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2018-03-09 17:09:43 +00:00 · 2018-03-09 17:09:43 +00:00 · 85fc716732
commit 85fc716732
parent 843361ed04
4 changed files with 72 additions and 0 deletions
--- a/linux-user/aarch64/target_syscall.h
+++ b/linux-user/aarch64/target_syscall.h
@ -19,4 +19,7 @@ struct target_pt_regs {
 #define TARGET_MLOCKALL_MCL_CURRENT 1
 #define TARGET_MLOCKALL_MCL_FUTURE  2
 #define TARGET_PR_SVE_SET_VL  50
 #define TARGET_PR_SVE_GET_VL  51
 #endif /* AARCH64_TARGET_SYSCALL_H */
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@ -10672,6 +10672,33 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
            break;
        }
 #endif
 #ifdef TARGET_AARCH64
        case TARGET_PR_SVE_SET_VL:
            /* We cannot support either PR_SVE_SET_VL_ONEXEC
               or PR_SVE_VL_INHERIT.  Therefore, anything above
               ARM_MAX_VQ results in EINVAL.  */
            ret = -TARGET_EINVAL;
            if (arm_feature(cpu_env, ARM_FEATURE_SVE)
                && arg2 >= 0 && arg2 <= ARM_MAX_VQ * 16 && !(arg2 & 15)) {
                CPUARMState *env = cpu_env;
                int old_vq = (env->vfp.zcr_el[1] & 0xf) + 1;
                int vq = MAX(arg2 / 16, 1);
                if (vq < old_vq) {
                    aarch64_sve_narrow_vq(env, vq);
                }
                env->vfp.zcr_el[1] = vq - 1;
                ret = vq * 16;
            }
            break;
        case TARGET_PR_SVE_GET_VL:
            ret = -TARGET_EINVAL;
            if (arm_feature(cpu_env, ARM_FEATURE_SVE)) {
                CPUARMState *env = cpu_env;
                ret = ((env->vfp.zcr_el[1] & 0xf) + 1) * 16;
            }
            break;
 #endif /* AARCH64 */
        case PR_GET_SECCOMP:
        case PR_SET_SECCOMP:
            /* Disable seccomp to prevent the target disabling syscalls we
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@ -866,6 +866,7 @@ int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
 #ifdef TARGET_AARCH64
 int aarch64_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
 int aarch64_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
 void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq);
 #endif
 target_ulong do_arm_semihosting(CPUARMState *env);
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@ -368,3 +368,44 @@ static void aarch64_cpu_register_types(void)
 }
 type_init(aarch64_cpu_register_types)
 /* The manual says that when SVE is enabled and VQ is widened the
 * implementation is allowed to zero the previously inaccessible
 * portion of the registers.  The corollary to that is that when
 * SVE is enabled and VQ is narrowed we are also allowed to zero
 * the now inaccessible portion of the registers.
 *
 * The intent of this is that no predicate bit beyond VQ is ever set.
 * Which means that some operations on predicate registers themselves
 * may operate on full uint64_t or even unrolled across the maximum
 * uint64_t[4].  Performing 4 bits of host arithmetic unconditionally
 * may well be cheaper than conditionals to restrict the operation
 * to the relevant portion of a uint16_t[16].
 *
 * TODO: Need to call this for changes to the real system registers
 * and EL state changes.
 */
 void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq)
 {
    int i, j;
    uint64_t pmask;
    assert(vq >= 1 && vq <= ARM_MAX_VQ);
    /* Zap the high bits of the zregs.  */
    for (i = 0; i < 32; i++) {
        memset(&env->vfp.zregs[i].d[2 * vq], 0, 16 * (ARM_MAX_VQ - vq));
    }
    /* Zap the high bits of the pregs and ffr.  */
    pmask = 0;
    if (vq & 3) {
        pmask = ~(-1ULL << (16 * (vq & 3)));
    }
    for (j = vq / 4; j < ARM_MAX_VQ / 4; j++) {
        for (i = 0; i < 17; ++i) {
            env->vfp.pregs[i].p[j] &= pmask;
        }
        pmask = 0;
    }
 }