x86: Implement SMEP and SMAP

This patch implements Supervisor Mode Execution Prevention (SMEP) and
Supervisor Mode Access Prevention (SMAP) for x86.  The purpose of the
patch, obviously, is to help kernel developers debug the support for
those features.

A fair bit of the code relates to the handling of CPUID features.  The
CPUID code probably would get greatly simplified if all the feature
bit words were unified into a single vector object, but in the
interest of producing a minimal patch for SMEP/SMAP, and because I had
very limited time for this project, I followed the existing style.

[ v2: don't change the definition of the qemu64 CPU shorthand, since
  that breaks loading old snapshots.  Per Anthony Liguori this can be
  fixed once the CPU feature set is snapshot.

  Change the coding style slightly to conform to checkpatch.pl. ]

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
This commit is contained in:
H. Peter Anvin 2012-09-26 13:18:43 -07:00 committed by Anthony Liguori
parent 4a19e505df
commit a9321a4d49
6 changed files with 207 additions and 49 deletions

View File

@ -353,6 +353,16 @@ void helper_sti(CPUX86State *env)
env->eflags |= IF_MASK;
}
void helper_clac(CPUX86State *env)
{
env->eflags &= ~AC_MASK;
}
void helper_stac(CPUX86State *env)
{
env->eflags |= AC_MASK;
}
#if 0
/* vm86plus instructions */
void helper_cli_vm(CPUX86State *env)

View File

@ -104,6 +104,13 @@ static const char *svm_feature_name[] = {
NULL, NULL, NULL, NULL,
};
static const char *cpuid_7_0_ebx_feature_name[] = {
NULL, NULL, NULL, NULL, NULL, NULL, NULL, "smep",
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, "smap", NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
};
/* collects per-function cpuid data
*/
typedef struct model_features_t {
@ -219,14 +226,17 @@ static void add_flagname_to_bitmaps(const char *flagname, uint32_t *features,
uint32_t *ext2_features,
uint32_t *ext3_features,
uint32_t *kvm_features,
uint32_t *svm_features)
uint32_t *svm_features,
uint32_t *cpuid_7_0_ebx_features)
{
if (!lookup_feature(features, flagname, NULL, feature_name) &&
!lookup_feature(ext_features, flagname, NULL, ext_feature_name) &&
!lookup_feature(ext2_features, flagname, NULL, ext2_feature_name) &&
!lookup_feature(ext3_features, flagname, NULL, ext3_feature_name) &&
!lookup_feature(kvm_features, flagname, NULL, kvm_feature_name) &&
!lookup_feature(svm_features, flagname, NULL, svm_feature_name))
!lookup_feature(svm_features, flagname, NULL, svm_feature_name) &&
!lookup_feature(cpuid_7_0_ebx_features, flagname, NULL,
cpuid_7_0_ebx_feature_name))
fprintf(stderr, "CPU feature %s not found\n", flagname);
}
@ -287,6 +297,7 @@ typedef struct x86_def_t {
#define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \
CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A)
#define TCG_SVM_FEATURES 0
#define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP)
/* maintains list of cpu model definitions
*/
@ -1097,10 +1108,12 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
uint32_t plus_features = 0, plus_ext_features = 0;
uint32_t plus_ext2_features = 0, plus_ext3_features = 0;
uint32_t plus_kvm_features = 0, plus_svm_features = 0;
uint32_t plus_7_0_ebx_features = 0;
/* Features to be removed */
uint32_t minus_features = 0, minus_ext_features = 0;
uint32_t minus_ext2_features = 0, minus_ext3_features = 0;
uint32_t minus_kvm_features = 0, minus_svm_features = 0;
uint32_t minus_7_0_ebx_features = 0;
uint32_t numvalue;
for (def = x86_defs; def; def = def->next)
@ -1127,8 +1140,8 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
#endif
add_flagname_to_bitmaps("hypervisor", &plus_features,
&plus_ext_features, &plus_ext2_features, &plus_ext3_features,
&plus_kvm_features, &plus_svm_features);
&plus_ext_features, &plus_ext2_features, &plus_ext3_features,
&plus_kvm_features, &plus_svm_features, &plus_7_0_ebx_features);
featurestr = strtok(NULL, ",");
@ -1138,12 +1151,12 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
add_flagname_to_bitmaps(featurestr + 1, &plus_features,
&plus_ext_features, &plus_ext2_features,
&plus_ext3_features, &plus_kvm_features,
&plus_svm_features);
&plus_svm_features, &plus_7_0_ebx_features);
} else if (featurestr[0] == '-') {
add_flagname_to_bitmaps(featurestr + 1, &minus_features,
&minus_ext_features, &minus_ext2_features,
&minus_ext3_features, &minus_kvm_features,
&minus_svm_features);
&minus_svm_features, &minus_7_0_ebx_features);
} else if ((val = strchr(featurestr, '='))) {
*val = 0; val++;
if (!strcmp(featurestr, "family")) {
@ -1249,16 +1262,21 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
x86_cpu_def->ext3_features |= plus_ext3_features;
x86_cpu_def->kvm_features |= plus_kvm_features;
x86_cpu_def->svm_features |= plus_svm_features;
x86_cpu_def->cpuid_7_0_ebx_features |= plus_7_0_ebx_features;
x86_cpu_def->features &= ~minus_features;
x86_cpu_def->ext_features &= ~minus_ext_features;
x86_cpu_def->ext2_features &= ~minus_ext2_features;
x86_cpu_def->ext3_features &= ~minus_ext3_features;
x86_cpu_def->kvm_features &= ~minus_kvm_features;
x86_cpu_def->svm_features &= ~minus_svm_features;
x86_cpu_def->cpuid_7_0_ebx_features &= ~minus_7_0_ebx_features;
if (check_cpuid) {
if (check_features_against_host(x86_cpu_def) && enforce_cpuid)
goto error;
}
if (x86_cpu_def->cpuid_7_0_ebx_features && x86_cpu_def->level < 7) {
x86_cpu_def->level = 7;
}
g_free(s);
return 0;
@ -1374,7 +1392,7 @@ int cpu_x86_register(X86CPU *cpu, const char *cpu_model)
env->cpuid_kvm_features = def->kvm_features;
env->cpuid_svm_features = def->svm_features;
env->cpuid_ext4_features = def->ext4_features;
env->cpuid_7_0_ebx = def->cpuid_7_0_ebx_features;
env->cpuid_7_0_ebx_features = def->cpuid_7_0_ebx_features;
env->cpuid_xlevel2 = def->xlevel2;
object_property_set_int(OBJECT(cpu), (int64_t)def->tsc_khz * 1000,
"tsc-frequency", &error);
@ -1562,7 +1580,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
/* Structured Extended Feature Flags Enumeration Leaf */
if (count == 0) {
*eax = 0; /* Maximum ECX value for sub-leaves */
*ebx = env->cpuid_7_0_ebx; /* Feature flags */
*ebx = env->cpuid_7_0_ebx_features; /* Feature flags */
*ecx = 0; /* Reserved */
*edx = 0; /* Reserved */
} else {

View File

@ -123,8 +123,8 @@
/* hidden flags - used internally by qemu to represent additional cpu
states. Only the CPL, INHIBIT_IRQ, SMM and SVMI are not
redundant. We avoid using the IOPL_MASK, TF_MASK and VM_MASK bit
position to ease oring with eflags. */
redundant. We avoid using the IOPL_MASK, TF_MASK, VM_MASK and AC_MASK
bit positions to ease oring with eflags. */
/* current cpl */
#define HF_CPL_SHIFT 0
/* true if soft mmu is being used */
@ -147,10 +147,12 @@
#define HF_CS64_SHIFT 15 /* only used on x86_64: 64 bit code segment */
#define HF_RF_SHIFT 16 /* must be same as eflags */
#define HF_VM_SHIFT 17 /* must be same as eflags */
#define HF_AC_SHIFT 18 /* must be same as eflags */
#define HF_SMM_SHIFT 19 /* CPU in SMM mode */
#define HF_SVME_SHIFT 20 /* SVME enabled (copy of EFER.SVME) */
#define HF_SVMI_SHIFT 21 /* SVM intercepts are active */
#define HF_OSFXSR_SHIFT 22 /* CR4.OSFXSR */
#define HF_SMAP_SHIFT 23 /* CR4.SMAP */
#define HF_CPL_MASK (3 << HF_CPL_SHIFT)
#define HF_SOFTMMU_MASK (1 << HF_SOFTMMU_SHIFT)
@ -168,10 +170,12 @@
#define HF_CS64_MASK (1 << HF_CS64_SHIFT)
#define HF_RF_MASK (1 << HF_RF_SHIFT)
#define HF_VM_MASK (1 << HF_VM_SHIFT)
#define HF_AC_MASK (1 << HF_AC_SHIFT)
#define HF_SMM_MASK (1 << HF_SMM_SHIFT)
#define HF_SVME_MASK (1 << HF_SVME_SHIFT)
#define HF_SVMI_MASK (1 << HF_SVMI_SHIFT)
#define HF_OSFXSR_MASK (1 << HF_OSFXSR_SHIFT)
#define HF_SMAP_MASK (1 << HF_SMAP_SHIFT)
/* hflags2 */
@ -210,6 +214,13 @@
#define CR4_OSFXSR_SHIFT 9
#define CR4_OSFXSR_MASK (1 << CR4_OSFXSR_SHIFT)
#define CR4_OSXMMEXCPT_MASK (1 << 10)
#define CR4_VMXE_MASK (1 << 13)
#define CR4_SMXE_MASK (1 << 14)
#define CR4_FSGSBASE_MASK (1 << 16)
#define CR4_PCIDE_MASK (1 << 17)
#define CR4_OSXSAVE_MASK (1 << 18)
#define CR4_SMEP_MASK (1 << 20)
#define CR4_SMAP_MASK (1 << 21)
#define DR6_BD (1 << 13)
#define DR6_BS (1 << 14)
@ -474,6 +485,9 @@
#define CPUID_SVM_PAUSEFILTER (1 << 10)
#define CPUID_SVM_PFTHRESHOLD (1 << 12)
#define CPUID_7_0_EBX_SMEP (1 << 7)
#define CPUID_7_0_EBX_SMAP (1 << 20)
#define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */
#define CPUID_VENDOR_INTEL_2 0x49656e69 /* "ineI" */
#define CPUID_VENDOR_INTEL_3 0x6c65746e /* "ntel" */
@ -649,7 +663,7 @@ typedef struct {
#define CPU_NB_REGS CPU_NB_REGS32
#endif
#define NB_MMU_MODES 2
#define NB_MMU_MODES 3
typedef enum TPRAccess {
TPR_ACCESS_READ,
@ -779,7 +793,7 @@ typedef struct CPUX86State {
uint32_t cpuid_xlevel2;
uint32_t cpuid_ext4_features;
/* Flags from CPUID[EAX=7,ECX=0].EBX */
uint32_t cpuid_7_0_ebx;
uint32_t cpuid_7_0_ebx_features;
/* MTRRs */
uint64_t mtrr_fixed[11];
@ -1018,10 +1032,15 @@ static inline CPUX86State *cpu_init(const char *cpu_model)
/* MMU modes definitions */
#define MMU_MODE0_SUFFIX _kernel
#define MMU_MODE1_SUFFIX _user
#define MMU_USER_IDX 1
#define MMU_MODE2_SUFFIX _ksmap /* Kernel with SMAP override */
#define MMU_KERNEL_IDX 0
#define MMU_USER_IDX 1
#define MMU_KSMAP_IDX 2
static inline int cpu_mmu_index (CPUX86State *env)
{
return (env->hflags & HF_CPL_MASK) == 3 ? 1 : 0;
return (env->hflags & HF_CPL_MASK) == 3 ? MMU_USER_IDX :
((env->hflags & HF_SMAP_MASK) && (env->eflags & AC_MASK))
? MMU_KSMAP_IDX : MMU_KERNEL_IDX;
}
#undef EAX
@ -1107,7 +1126,7 @@ static inline void cpu_get_tb_cpu_state(CPUX86State *env, target_ulong *pc,
*cs_base = env->segs[R_CS].base;
*pc = *cs_base + env->eip;
*flags = env->hflags |
(env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK));
(env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK | AC_MASK));
}
void do_cpu_init(X86CPU *cpu);

View File

@ -443,17 +443,27 @@ void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4)
#if defined(DEBUG_MMU)
printf("CR4 update: CR4=%08x\n", (uint32_t)env->cr[4]);
#endif
if ((new_cr4 & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK)) !=
(env->cr[4] & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK))) {
if ((new_cr4 ^ env->cr[4]) &
(CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK |
CR4_SMEP_MASK | CR4_SMAP_MASK)) {
tlb_flush(env, 1);
}
/* SSE handling */
if (!(env->cpuid_features & CPUID_SSE))
if (!(env->cpuid_features & CPUID_SSE)) {
new_cr4 &= ~CR4_OSFXSR_MASK;
if (new_cr4 & CR4_OSFXSR_MASK)
}
env->hflags &= ~HF_OSFXSR_MASK;
if (new_cr4 & CR4_OSFXSR_MASK) {
env->hflags |= HF_OSFXSR_MASK;
else
env->hflags &= ~HF_OSFXSR_MASK;
}
if (!(env->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)) {
new_cr4 &= ~CR4_SMAP_MASK;
}
env->hflags &= ~HF_SMAP_MASK;
if (new_cr4 & CR4_SMAP_MASK) {
env->hflags |= HF_SMAP_MASK;
}
env->cr[4] = new_cr4;
}
@ -591,17 +601,38 @@ int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
/* 2 MB page */
page_size = 2048 * 1024;
ptep ^= PG_NX_MASK;
if ((ptep & PG_NX_MASK) && is_write1 == 2)
if ((ptep & PG_NX_MASK) && is_write1 == 2) {
goto do_fault_protect;
if (is_user) {
if (!(ptep & PG_USER_MASK))
}
switch (mmu_idx) {
case MMU_USER_IDX:
if (!(ptep & PG_USER_MASK)) {
goto do_fault_protect;
if (is_write && !(ptep & PG_RW_MASK))
}
if (is_write && !(ptep & PG_RW_MASK)) {
goto do_fault_protect;
} else {
}
break;
case MMU_KERNEL_IDX:
if (is_write1 != 2 && (env->cr[4] & CR4_SMAP_MASK) &&
(ptep & PG_USER_MASK)) {
goto do_fault_protect;
}
/* fall through */
case MMU_KSMAP_IDX:
if (is_write1 == 2 && (env->cr[4] & CR4_SMEP_MASK) &&
(ptep & PG_USER_MASK)) {
goto do_fault_protect;
}
if ((env->cr[0] & CR0_WP_MASK) &&
is_write && !(ptep & PG_RW_MASK))
is_write && !(ptep & PG_RW_MASK)) {
goto do_fault_protect;
}
break;
default: /* cannot happen */
break;
}
is_dirty = is_write && !(pde & PG_DIRTY_MASK);
if (!(pde & PG_ACCESSED_MASK) || is_dirty) {
@ -635,15 +666,35 @@ int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
ptep ^= PG_NX_MASK;
if ((ptep & PG_NX_MASK) && is_write1 == 2)
goto do_fault_protect;
if (is_user) {
if (!(ptep & PG_USER_MASK))
switch (mmu_idx) {
case MMU_USER_IDX:
if (!(ptep & PG_USER_MASK)) {
goto do_fault_protect;
if (is_write && !(ptep & PG_RW_MASK))
}
if (is_write && !(ptep & PG_RW_MASK)) {
goto do_fault_protect;
} else {
}
break;
case MMU_KERNEL_IDX:
if (is_write1 != 2 && (env->cr[4] & CR4_SMAP_MASK) &&
(ptep & PG_USER_MASK)) {
goto do_fault_protect;
}
/* fall through */
case MMU_KSMAP_IDX:
if (is_write1 == 2 && (env->cr[4] & CR4_SMEP_MASK) &&
(ptep & PG_USER_MASK)) {
goto do_fault_protect;
}
if ((env->cr[0] & CR0_WP_MASK) &&
is_write && !(ptep & PG_RW_MASK))
is_write && !(ptep & PG_RW_MASK)) {
goto do_fault_protect;
}
break;
default: /* cannot happen */
break;
}
is_dirty = is_write && !(pte & PG_DIRTY_MASK);
if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
@ -670,15 +721,35 @@ int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
/* if PSE bit is set, then we use a 4MB page */
if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
page_size = 4096 * 1024;
if (is_user) {
if (!(pde & PG_USER_MASK))
switch (mmu_idx) {
case MMU_USER_IDX:
if (!(pde & PG_USER_MASK)) {
goto do_fault_protect;
if (is_write && !(pde & PG_RW_MASK))
}
if (is_write && !(pde & PG_RW_MASK)) {
goto do_fault_protect;
} else {
}
break;
case MMU_KERNEL_IDX:
if (is_write1 != 2 && (env->cr[4] & CR4_SMAP_MASK) &&
(pde & PG_USER_MASK)) {
goto do_fault_protect;
}
/* fall through */
case MMU_KSMAP_IDX:
if (is_write1 == 2 && (env->cr[4] & CR4_SMEP_MASK) &&
(pde & PG_USER_MASK)) {
goto do_fault_protect;
}
if ((env->cr[0] & CR0_WP_MASK) &&
is_write && !(pde & PG_RW_MASK))
is_write && !(pde & PG_RW_MASK)) {
goto do_fault_protect;
}
break;
default: /* cannot happen */
break;
}
is_dirty = is_write && !(pde & PG_DIRTY_MASK);
if (!(pde & PG_ACCESSED_MASK) || is_dirty) {
@ -707,15 +778,35 @@ int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
}
/* combine pde and pte user and rw protections */
ptep = pte & pde;
if (is_user) {
if (!(ptep & PG_USER_MASK))
switch (mmu_idx) {
case MMU_USER_IDX:
if (!(ptep & PG_USER_MASK)) {
goto do_fault_protect;
if (is_write && !(ptep & PG_RW_MASK))
}
if (is_write && !(ptep & PG_RW_MASK)) {
goto do_fault_protect;
} else {
}
break;
case MMU_KERNEL_IDX:
if (is_write1 != 2 && (env->cr[4] & CR4_SMAP_MASK) &&
(ptep & PG_USER_MASK)) {
goto do_fault_protect;
}
/* fall through */
case MMU_KSMAP_IDX:
if (is_write1 == 2 && (env->cr[4] & CR4_SMEP_MASK) &&
(ptep & PG_USER_MASK)) {
goto do_fault_protect;
}
if ((env->cr[0] & CR0_WP_MASK) &&
is_write && !(ptep & PG_RW_MASK))
is_write && !(ptep & PG_RW_MASK)) {
goto do_fault_protect;
}
break;
default: /* cannot happen */
break;
}
is_dirty = is_write && !(pte & PG_DIRTY_MASK);
if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
@ -762,8 +853,9 @@ int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
if (is_user)
error_code |= PG_ERROR_U_MASK;
if (is_write1 == 2 &&
(env->efer & MSR_EFER_NXE) &&
(env->cr[4] & CR4_PAE_MASK))
(((env->efer & MSR_EFER_NXE) &&
(env->cr[4] & CR4_PAE_MASK)) ||
(env->cr[4] & CR4_SMEP_MASK)))
error_code |= PG_ERROR_I_D_MASK;
if (env->intercept_exceptions & (1 << EXCP0E_PAGE)) {
/* cr2 is not modified in case of exceptions */

View File

@ -67,6 +67,8 @@ DEF_HELPER_3(raise_interrupt, void, env, int, int)
DEF_HELPER_2(raise_exception, void, env, int)
DEF_HELPER_1(cli, void, env)
DEF_HELPER_1(sti, void, env)
DEF_HELPER_1(clac, void, env)
DEF_HELPER_1(stac, void, env)
DEF_HELPER_1(set_inhibit_irq, void, env)
DEF_HELPER_1(reset_inhibit_irq, void, env)
DEF_HELPER_3(boundw, void, env, tl, int)

View File

@ -107,6 +107,7 @@ typedef struct DisasContext {
int cpuid_ext_features;
int cpuid_ext2_features;
int cpuid_ext3_features;
int cpuid_7_0_ebx_features;
} DisasContext;
static void gen_eob(DisasContext *s);
@ -6556,7 +6557,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
}
gen_pop_update(s);
s->cc_op = CC_OP_EFLAGS;
/* abort translation because TF flag may change */
/* abort translation because TF/AC flag may change */
gen_jmp_im(s->pc - s->cs_base);
gen_eob(s);
}
@ -7206,6 +7207,24 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
gen_eob(s);
break;
case 2: /* clac */
if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP) ||
s->cpl != 0) {
goto illegal_op;
}
gen_helper_clac(cpu_env);
gen_jmp_im(s->pc - s->cs_base);
gen_eob(s);
break;
case 3: /* stac */
if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP) ||
s->cpl != 0) {
goto illegal_op;
}
gen_helper_stac(cpu_env);
gen_jmp_im(s->pc - s->cs_base);
gen_eob(s);
break;
default:
goto illegal_op;
}
@ -7901,15 +7920,13 @@ static inline void gen_intermediate_code_internal(CPUX86State *env,
/* select memory access functions */
dc->mem_index = 0;
if (flags & HF_SOFTMMU_MASK) {
if (dc->cpl == 3)
dc->mem_index = 2 * 4;
else
dc->mem_index = 1 * 4;
dc->mem_index = (cpu_mmu_index(env) + 1) << 2;
}
dc->cpuid_features = env->cpuid_features;
dc->cpuid_ext_features = env->cpuid_ext_features;
dc->cpuid_ext2_features = env->cpuid_ext2_features;
dc->cpuid_ext3_features = env->cpuid_ext3_features;
dc->cpuid_7_0_ebx_features = env->cpuid_7_0_ebx_features;
#ifdef TARGET_X86_64
dc->lma = (flags >> HF_LMA_SHIFT) & 1;
dc->code64 = (flags >> HF_CS64_SHIFT) & 1;