diff --git a/MAINTAINERS b/MAINTAINERS index 61a323a6b2cf..1d5b4becab6f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6252,8 +6252,8 @@ S: Maintained F: tools/testing/selftests KERNEL VIRTUAL MACHINE (KVM) -M: Gleb Natapov M: Paolo Bonzini +M: Radim Krčmář L: kvm@vger.kernel.org W: http://www.linux-kvm.org T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index b5384311dec4..dded1b763c16 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -1112,10 +1112,17 @@ static void __init hyp_cpu_pm_init(void) { cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); } +static void __init hyp_cpu_pm_exit(void) +{ + cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb); +} #else static inline void hyp_cpu_pm_init(void) { } +static inline void hyp_cpu_pm_exit(void) +{ +} #endif static void teardown_common_resources(void) @@ -1141,9 +1148,7 @@ static int init_subsystems(void) /* * Register CPU Hotplug notifier */ - cpu_notifier_register_begin(); - err = __register_cpu_notifier(&hyp_init_cpu_nb); - cpu_notifier_register_done(); + err = register_cpu_notifier(&hyp_init_cpu_nb); if (err) { kvm_err("Cannot register KVM init CPU notifier (%d)\n", err); return err; @@ -1193,6 +1198,8 @@ static void teardown_hyp_mode(void) free_hyp_pgds(); for_each_possible_cpu(cpu) free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); + unregister_cpu_notifier(&hyp_init_cpu_nb); + hyp_cpu_pm_exit(); } static int init_vhe_mode(void) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 4150fd8bae01..3f29887995bc 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -151,8 +151,7 @@ */ #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \ VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ - VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B | \ - VTCR_EL2_RES1) + VTCR_EL2_SL0_LVL1 | VTCR_EL2_RES1) #define VTTBR_X (38 - VTCR_EL2_T0SZ_40B) #else /* @@ -163,8 +162,7 @@ */ #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \ VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ - VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B | \ - VTCR_EL2_RES1) + VTCR_EL2_SL0_LVL1 | VTCR_EL2_RES1) #define VTTBR_X (37 - VTCR_EL2_T0SZ_40B) #endif diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index eb7490d232a0..40a0a24e6c98 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -54,7 +54,7 @@ extern void __vgic_v3_init_lrs(void); extern u32 __kvm_get_mdcr_el2(void); -extern void __init_stage2_translation(void); +extern u32 __init_stage2_translation(void); #endif diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index b7e82a795ac9..f5c6bd2541ef 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -369,11 +369,12 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); -/* #define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) */ - static inline void __cpu_init_stage2(void) { - kvm_call_hyp(__init_stage2_translation); + u32 parange = kvm_call_hyp(__init_stage2_translation); + + WARN_ONCE(parange < 40, + "PARange is %d bits, unsupported configuration!", parange); } #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c index 5a9f3bf542b0..bcbe761a5a3d 100644 --- a/arch/arm64/kvm/hyp/s2-setup.c +++ b/arch/arm64/kvm/hyp/s2-setup.c @@ -20,9 +20,10 @@ #include #include -void __hyp_text __init_stage2_translation(void) +u32 __hyp_text __init_stage2_translation(void) { u64 val = VTCR_EL2_FLAGS; + u64 parange; u64 tmp; /* @@ -30,7 +31,39 @@ void __hyp_text __init_stage2_translation(void) * bits in VTCR_EL2. Amusingly, the PARange is 4 bits, while * PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2... */ - val |= (read_sysreg(id_aa64mmfr0_el1) & 7) << 16; + parange = read_sysreg(id_aa64mmfr0_el1) & 7; + val |= parange << 16; + + /* Compute the actual PARange... */ + switch (parange) { + case 0: + parange = 32; + break; + case 1: + parange = 36; + break; + case 2: + parange = 40; + break; + case 3: + parange = 42; + break; + case 4: + parange = 44; + break; + case 5: + default: + parange = 48; + break; + } + + /* + * ... and clamp it to 40 bits, unless we have some braindead + * HW that implements less than that. In all cases, we'll + * return that value for the rest of the kernel to decide what + * to do. + */ + val |= 64 - (parange > 40 ? 40 : parange); /* * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS @@ -42,4 +75,6 @@ void __hyp_text __init_stage2_translation(void) VTCR_EL2_VS_8BIT; write_sysreg(val, vtcr_el2); + + return parange; } diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 8efb839948e5..bbbaa802d13e 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -534,6 +534,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, do_cpuid_1_ent(&entry[i], function, idx); if (idx == 1) { entry[i].eax &= kvm_cpuid_D_1_eax_x86_features; + cpuid_mask(&entry[i].eax, CPUID_D_1_EAX); entry[i].ebx = 0; if (entry[i].eax & (F(XSAVES)|F(XSAVEC))) entry[i].ebx = diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index b70df72e2b33..66b33b96a31b 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -173,10 +173,9 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, int index = (pfec >> 1) + (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1)); bool fault = (mmu->permissions[index] >> pte_access) & 1; + u32 errcode = PFERR_PRESENT_MASK; WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK)); - pfec |= PFERR_PRESENT_MASK; - if (unlikely(mmu->pkru_mask)) { u32 pkru_bits, offset; @@ -189,15 +188,15 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3; /* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */ - offset = pfec - 1 + + offset = (pfec & ~1) + ((pte_access & PT_USER_MASK) << (PFERR_RSVD_BIT - PT_USER_SHIFT)); pkru_bits &= mmu->pkru_mask >> offset; - pfec |= -pkru_bits & PFERR_PK_MASK; + errcode |= -pkru_bits & PFERR_PK_MASK; fault |= (pkru_bits != 0); } - return -(uint32_t)fault & pfec; + return -(u32)fault & errcode; } void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 1d971c7553c3..bc019f70e0b6 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -360,7 +360,7 @@ retry_walk: goto error; if (unlikely(is_rsvd_bits_set(mmu, pte, walker->level))) { - errcode |= PFERR_RSVD_MASK | PFERR_PRESENT_MASK; + errcode = PFERR_RSVD_MASK | PFERR_PRESENT_MASK; goto error; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0a2c70e43bc8..9b7798c7b210 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -700,7 +700,6 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512) return 1; } - kvm_put_guest_xcr0(vcpu); vcpu->arch.xcr0 = xcr0; if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND) @@ -6590,8 +6589,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_x86_ops->prepare_guest_switch(vcpu); if (vcpu->fpu_active) kvm_load_guest_fpu(vcpu); - kvm_load_guest_xcr0(vcpu); - vcpu->mode = IN_GUEST_MODE; srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); @@ -6618,6 +6615,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) goto cancel_injection; } + kvm_load_guest_xcr0(vcpu); + if (req_immediate_exit) smp_send_reschedule(vcpu->cpu); @@ -6667,6 +6666,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); + kvm_put_guest_xcr0(vcpu); + /* Interrupt is enabled by handle_external_intr() */ kvm_x86_ops->handle_external_intr(vcpu); @@ -7314,7 +7315,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) * and assume host would use all available bits. * Guest xcr0 would be loaded later. */ - kvm_put_guest_xcr0(vcpu); vcpu->guest_fpu_loaded = 1; __kernel_fpu_begin(); __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state); @@ -7323,8 +7323,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { - kvm_put_guest_xcr0(vcpu); - if (!vcpu->guest_fpu_loaded) { vcpu->fpu_counter = 0; return; diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index a9ad4fe3f68f..9aaa35dd9144 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -91,6 +91,8 @@ static void kvm_timer_inject_irq_work(struct work_struct *work) vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired); vcpu->arch.timer_cpu.armed = false; + WARN_ON(!kvm_timer_should_fire(vcpu)); + /* * If the vcpu is blocked we want to wake it up so that it will see * the timer has expired when entering the guest. @@ -98,10 +100,46 @@ static void kvm_timer_inject_irq_work(struct work_struct *work) kvm_vcpu_kick(vcpu); } +static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu) +{ + cycle_t cval, now; + + cval = vcpu->arch.timer_cpu.cntv_cval; + now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + + if (now < cval) { + u64 ns; + + ns = cyclecounter_cyc2ns(timecounter->cc, + cval - now, + timecounter->mask, + &timecounter->frac); + return ns; + } + + return 0; +} + static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) { struct arch_timer_cpu *timer; + struct kvm_vcpu *vcpu; + u64 ns; + timer = container_of(hrt, struct arch_timer_cpu, timer); + vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); + + /* + * Check that the timer has really expired from the guest's + * PoV (NTP on the host may have forced it to expire + * early). If we should have slept longer, restart it. + */ + ns = kvm_timer_compute_delta(vcpu); + if (unlikely(ns)) { + hrtimer_forward_now(hrt, ns_to_ktime(ns)); + return HRTIMER_RESTART; + } + queue_work(wqueue, &timer->expired); return HRTIMER_NORESTART; } @@ -176,8 +214,6 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu) void kvm_timer_schedule(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - u64 ns; - cycle_t cval, now; BUG_ON(timer_is_armed(timer)); @@ -197,14 +233,7 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu) return; /* The timer has not yet expired, schedule a background timer */ - cval = timer->cntv_cval; - now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; - - ns = cyclecounter_cyc2ns(timecounter->cc, - cval - now, - timecounter->mask, - &timecounter->frac); - timer_arm(timer, ns); + timer_arm(timer, kvm_timer_compute_delta(vcpu)); } void kvm_timer_unschedule(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index b5754c6c5508..575c7aa30d7e 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -193,11 +193,12 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) { u64 reg = 0; - if ((vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) + if ((vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { reg = vcpu_sys_reg(vcpu, PMOVSSET_EL0); reg &= vcpu_sys_reg(vcpu, PMCNTENSET_EL0); reg &= vcpu_sys_reg(vcpu, PMINTENSET_EL1); reg &= kvm_pmu_valid_counter_mask(vcpu); + } return reg; }