target-i386: Add XSAVE extension

This includes XSAVE, XRSTOR, XGETBV, XSETBV, which are all related,
as well as the associate cpuid bits.

Signed-off-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
Richard Henderson 2015-07-02 14:53:40 +01:00
parent 121f315788
commit 19dc85dba2
6 changed files with 241 additions and 24 deletions

View File

@ -331,14 +331,14 @@ static const char *cpuid_6_feature_name[] = {
#define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | \
CPUID_EXT_MONITOR | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | \
CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_POPCNT | \
CPUID_EXT_XSAVE | /* CPUID_EXT_OSXSAVE is dynamic */ \
CPUID_EXT_MOVBE | CPUID_EXT_AES | CPUID_EXT_HYPERVISOR)
/* missing:
CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_SMX,
CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID, CPUID_EXT_FMA,
CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_PCID, CPUID_EXT_DCA,
CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_XSAVE,
CPUID_EXT_OSXSAVE, CPUID_EXT_AVX, CPUID_EXT_F16C,
CPUID_EXT_RDRAND */
CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_AVX,
CPUID_EXT_F16C, CPUID_EXT_RDRAND */
#ifdef TARGET_X86_64
#define TCG_EXT2_X86_64_FEATURES (CPUID_EXT2_SYSCALL | CPUID_EXT2_LM)
@ -2323,10 +2323,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*ebx = (cpu->apic_id << 24) |
8 << 8; /* CLFLUSH size in quad words, Linux wants it. */
*ecx = env->features[FEAT_1_ECX];
if ((*ecx & CPUID_EXT_XSAVE) && (env->cr[4] & CR4_OSXSAVE_MASK)) {
*ecx |= CPUID_EXT_OSXSAVE;
}
*edx = env->features[FEAT_1_EDX];
if (cs->nr_cores * cs->nr_threads > 1) {
*ebx |= (cs->nr_cores * cs->nr_threads) << 16;
*edx |= 1 << 28; /* HTT bit */
*edx |= CPUID_HT;
}
break;
case 2:
@ -2450,7 +2453,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
break;
case 0xD: {
KVMState *s = cs->kvm_state;
uint64_t kvm_mask;
uint64_t ena_mask;
int i;
/* Processor Extended State */
@ -2458,35 +2461,39 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*ebx = 0;
*ecx = 0;
*edx = 0;
if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) || !kvm_enabled()) {
if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) {
break;
}
kvm_mask =
kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX) |
((uint64_t)kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX) << 32);
if (kvm_enabled()) {
ena_mask = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX);
ena_mask <<= 32;
ena_mask |= kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
} else {
ena_mask = -1;
}
if (count == 0) {
*ecx = 0x240;
for (i = 2; i < ARRAY_SIZE(ext_save_areas); i++) {
const ExtSaveArea *esa = &ext_save_areas[i];
if ((env->features[esa->feature] & esa->bits) == esa->bits &&
(kvm_mask & (1 << i)) != 0) {
if ((env->features[esa->feature] & esa->bits) == esa->bits
&& ((ena_mask >> i) & 1) != 0) {
if (i < 32) {
*eax |= 1 << i;
*eax |= 1u << i;
} else {
*edx |= 1 << (i - 32);
*edx |= 1u << (i - 32);
}
*ecx = MAX(*ecx, esa->offset + esa->size);
}
}
*eax |= kvm_mask & (XSTATE_FP | XSTATE_SSE);
*eax |= ena_mask & (XSTATE_FP | XSTATE_SSE);
*ebx = *ecx;
} else if (count == 1) {
*eax = env->features[FEAT_XSAVE];
} else if (count < ARRAY_SIZE(ext_save_areas)) {
const ExtSaveArea *esa = &ext_save_areas[count];
if ((env->features[esa->feature] & esa->bits) == esa->bits &&
(kvm_mask & (1 << count)) != 0) {
if ((env->features[esa->feature] & esa->bits) == esa->bits
&& ((ena_mask >> count) & 1) != 0) {
*eax = esa->size;
*ebx = esa->offset;
}

View File

@ -1190,6 +1190,45 @@ void helper_fxsave(CPUX86State *env, target_ulong ptr)
}
}
static uint64_t get_xinuse(CPUX86State *env)
{
/* We don't track XINUSE. We could calculate it here, but it's
probably less work to simply indicate all components in use. */
return -1;
}
void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
{
uintptr_t ra = GETPC();
uint64_t old_bv, new_bv;
/* The OS must have enabled XSAVE. */
if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
raise_exception_ra(env, EXCP06_ILLOP, ra);
}
/* The operand must be 64 byte aligned. */
if (ptr & 63) {
raise_exception_ra(env, EXCP0D_GPF, ra);
}
/* Never save anything not enabled by XCR0. */
rfbm &= env->xcr0;
if (rfbm & XSTATE_FP) {
do_xsave_fpu(env, ptr, ra);
}
if (rfbm & XSTATE_SSE) {
do_xsave_mxcsr(env, ptr, ra);
do_xsave_sse(env, ptr, ra);
}
/* Update the XSTATE_BV field. */
old_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
new_bv = (old_bv & ~rfbm) | (get_xinuse(env) & rfbm);
cpu_stq_data_ra(env, ptr + 512, new_bv, ra);
}
static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
{
int i, fpus, fptag;
@ -1259,6 +1298,112 @@ void helper_fxrstor(CPUX86State *env, target_ulong ptr)
}
}
void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
{
uintptr_t ra = GETPC();
uint64_t xstate_bv, xcomp_bv0, xcomp_bv1;
rfbm &= env->xcr0;
/* The OS must have enabled XSAVE. */
if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
raise_exception_ra(env, EXCP06_ILLOP, ra);
}
/* The operand must be 64 byte aligned. */
if (ptr & 63) {
raise_exception_ra(env, EXCP0D_GPF, ra);
}
xstate_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
if ((int64_t)xstate_bv < 0) {
/* FIXME: Compact form. */
raise_exception_ra(env, EXCP0D_GPF, ra);
}
/* Standard form. */
/* The XSTATE field must not set bits not present in XCR0. */
if (xstate_bv & ~env->xcr0) {
raise_exception_ra(env, EXCP0D_GPF, ra);
}
/* The XCOMP field must be zero. */
xcomp_bv0 = cpu_ldq_data_ra(env, ptr + 520, ra);
xcomp_bv1 = cpu_ldq_data_ra(env, ptr + 528, ra);
if (xcomp_bv0 || xcomp_bv1) {
raise_exception_ra(env, EXCP0D_GPF, ra);
}
if (rfbm & XSTATE_FP) {
if (xstate_bv & XSTATE_FP) {
do_xrstor_fpu(env, ptr, ra);
} else {
helper_fninit(env);
memset(env->fpregs, 0, sizeof(env->fpregs));
}
}
if (rfbm & XSTATE_SSE) {
/* Note that the standard form of XRSTOR loads MXCSR from memory
whether or not the XSTATE_BV bit is set. */
do_xrstor_mxcsr(env, ptr, ra);
if (xstate_bv & XSTATE_SSE) {
do_xrstor_sse(env, ptr, ra);
} else {
/* ??? When AVX is implemented, we may have to be more
selective in the clearing. */
memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
}
}
}
uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
{
/* The OS must have enabled XSAVE. */
if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
raise_exception_ra(env, EXCP06_ILLOP, GETPC());
}
switch (ecx) {
case 0:
return env->xcr0;
case 1:
/* FIXME: #GP if !CPUID.(EAX=0DH,ECX=1):EAX.XG1[bit 2]. */
return env->xcr0 & get_xinuse(env);
}
raise_exception_ra(env, EXCP0D_GPF, GETPC());
}
void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
{
uint32_t dummy, ena_lo, ena_hi;
uint64_t ena;
/* The OS must have enabled XSAVE. */
if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
raise_exception_ra(env, EXCP06_ILLOP, GETPC());
}
/* Only XCR0 is defined at present; the FPU may not be disabled. */
if (ecx != 0 || (mask & XSTATE_FP) == 0) {
goto do_gpf;
}
/* Disallow enabling unimplemented features. */
cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
ena = ((uint64_t)ena_hi << 32) | ena_lo;
if (mask & ~ena) {
goto do_gpf;
}
env->xcr0 = mask;
return;
do_gpf:
raise_exception_ra(env, EXCP0D_GPF, GETPC());
}
void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
{
CPU_LDoubleU temp;

View File

@ -647,6 +647,7 @@ void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3)
void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4)
{
X86CPU *cpu = x86_env_get_cpu(env);
uint32_t hflags;
#if defined(DEBUG_MMU)
printf("CR4 update: CR4=%08x\n", (uint32_t)env->cr[4]);
@ -656,24 +657,27 @@ void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4)
CR4_SMEP_MASK | CR4_SMAP_MASK)) {
tlb_flush(CPU(cpu), 1);
}
/* Clear bits we're going to recompute. */
hflags = env->hflags & ~(HF_OSFXSR_MASK | HF_SMAP_MASK);
/* SSE handling */
if (!(env->features[FEAT_1_EDX] & CPUID_SSE)) {
new_cr4 &= ~CR4_OSFXSR_MASK;
}
env->hflags &= ~HF_OSFXSR_MASK;
if (new_cr4 & CR4_OSFXSR_MASK) {
env->hflags |= HF_OSFXSR_MASK;
hflags |= HF_OSFXSR_MASK;
}
if (!(env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SMAP)) {
new_cr4 &= ~CR4_SMAP_MASK;
}
env->hflags &= ~HF_SMAP_MASK;
if (new_cr4 & CR4_SMAP_MASK) {
env->hflags |= HF_SMAP_MASK;
hflags |= HF_SMAP_MASK;
}
env->cr[4] = new_cr4;
env->hflags = hflags;
}
#if defined(CONFIG_USER_ONLY)

View File

@ -187,6 +187,10 @@ DEF_HELPER_3(fsave, void, env, tl, int)
DEF_HELPER_3(frstor, void, env, tl, int)
DEF_HELPER_FLAGS_2(fxsave, TCG_CALL_NO_WG, void, env, tl)
DEF_HELPER_FLAGS_2(fxrstor, TCG_CALL_NO_WG, void, env, tl)
DEF_HELPER_FLAGS_3(xsave, TCG_CALL_NO_WG, void, env, tl, i64)
DEF_HELPER_FLAGS_3(xrstor, TCG_CALL_NO_WG, void, env, tl, i64)
DEF_HELPER_FLAGS_2(xgetbv, TCG_CALL_NO_WG, i64, env, i32)
DEF_HELPER_FLAGS_3(xsetbv, TCG_CALL_NO_WG, void, env, i32, i64)
DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl)
DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl)

View File

@ -1855,13 +1855,16 @@ static int kvm_get_sregs(X86CPU *cpu)
HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
hflags = (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
hflags = env->hflags & HFLAG_COPY_MASK;
hflags |= (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
(HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
if (env->cr[4] & CR4_OSFXSR_MASK) {
hflags |= HF_OSFXSR_MASK;
}
if (env->efer & MSR_EFER_LMA) {
hflags |= HF_LMA_MASK;
@ -1882,7 +1885,7 @@ static int kvm_get_sregs(X86CPU *cpu)
env->segs[R_SS].base) != 0) << HF_ADDSEG_SHIFT;
}
}
env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
env->hflags = hflags;
return 0;
}

View File

@ -7079,6 +7079,36 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
break;
case 0xd0: /* xgetbv */
if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
|| (s->prefix & (PREFIX_LOCK | PREFIX_DATA
| PREFIX_REPZ | PREFIX_REPNZ))) {
goto illegal_op;
}
tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
gen_helper_xgetbv(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
break;
case 0xd1: /* xsetbv */
if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
|| (s->prefix & (PREFIX_LOCK | PREFIX_DATA
| PREFIX_REPZ | PREFIX_REPNZ))) {
goto illegal_op;
}
if (s->cpl != 0) {
gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
break;
}
tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
cpu_regs[R_EDX]);
tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
gen_helper_xsetbv(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
/* End TB because translation flags may change. */
gen_jmp_im(s->pc - pc_start);
gen_eob(s);
break;
case 0xd8: /* VMRUN */
if (!(s->flags & HF_SVME_MASK) || !s->pe) {
goto illegal_op;
@ -7580,6 +7610,30 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
break;
CASE_MEM_OP(4): /* xsave */
if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
|| (prefixes & (PREFIX_LOCK | PREFIX_DATA
| PREFIX_REPZ | PREFIX_REPNZ))) {
goto illegal_op;
}
gen_lea_modrm(env, s, modrm);
tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
cpu_regs[R_EDX]);
gen_helper_xsave(cpu_env, cpu_A0, cpu_tmp1_i64);
break;
CASE_MEM_OP(5): /* xrstor */
if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
|| (prefixes & (PREFIX_LOCK | PREFIX_DATA
| PREFIX_REPZ | PREFIX_REPNZ))) {
goto illegal_op;
}
gen_lea_modrm(env, s, modrm);
tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
cpu_regs[R_EDX]);
gen_helper_xrstor(cpu_env, cpu_A0, cpu_tmp1_i64);
break;
CASE_MEM_OP(6): /* clwb */
if (prefixes & PREFIX_LOCK) {
goto illegal_op;