From b41fb528dd8784a87414e3af5579674badefa76b Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 4 May 2019 06:51:45 +0000 Subject: [PATCH 01/33] KVM: s390: fix typo in parameter description Fix typo in parameter description. Fixes: 8b905d28ee17 ("KVM: s390: provide kvm_arch_no_poll function") Signed-off-by: Wei Yongjun Message-Id: <20190504065145.53665-1-weiyongjun1@huawei.com> Reviewed-by: Cornelia Huck Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 8d6d75db8de6..ac6163c334d6 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -181,7 +181,7 @@ MODULE_PARM_DESC(hpage, "1m huge page backing support"); /* maximum percentage of steal time for polling. >100 is treated like 100 */ static u8 halt_poll_max_steal = 10; module_param(halt_poll_max_steal, byte, 0644); -MODULE_PARM_DESC(hpage, "Maximum percentage of steal time to allow polling"); +MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling"); /* * For now we handle at most 16 double words as this is what the s390 base From 6e9b622d1c3628fae65217465a148b157a361c2a Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 15 May 2019 09:59:08 +0200 Subject: [PATCH 02/33] KVM: s390: change default halt poll time to 50us Recent measurements indicate that using 50us results in a reduced CPU consumption, while still providing the benefit of halt polling. Let's use 50us instead. Acked-by: David Hildenbrand Acked-by: Janosch Frank Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index bdbc81b5bc91..2b00a3ebee08 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -36,7 +36,7 @@ */ #define KVM_NR_IRQCHIPS 1 #define KVM_IRQCHIP_NUM_PINS 4096 -#define KVM_HALT_POLL_NS_DEFAULT 80000 +#define KVM_HALT_POLL_NS_DEFAULT 50000 /* s390-specific vcpu->requests bit members */ #define KVM_REQ_ENABLE_IBS KVM_ARCH_REQ(0) From 493fcbc8435e00db1643852cbdfe0dd6398c57ae Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 21 May 2019 15:25:40 +0200 Subject: [PATCH 03/33] MAINTAINERS: KVM: arm/arm64: Remove myself as maintainer I no longer have time to actively review patches and manage the tree and it's time to make that official. Huge thanks to the incredible Linux community and all the contributors who have put up with me over the past years. I also take this opportunity to remove the website link to the Columbia web page, as that information is no longer up to date and I don't know who manages that anymore. Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- MAINTAINERS | 2 -- 1 file changed, 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 5cfbea4ce575..4ba271a8e0ef 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8611,14 +8611,12 @@ F: arch/x86/include/asm/svm.h F: arch/x86/kvm/svm.c KERNEL VIRTUAL MACHINE FOR ARM/ARM64 (KVM/arm, KVM/arm64) -M: Christoffer Dall M: Marc Zyngier R: James Morse R: Julien Thierry R: Suzuki K Pouloze L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: kvmarm@lists.cs.columbia.edu -W: http://systems.cs.columbia.edu/projects/kvm-arm T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git S: Maintained F: arch/arm/include/uapi/asm/kvm* From b7c50fab66ab66e2d3e00f809a09578d78232836 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 22 May 2019 18:47:04 +0100 Subject: [PATCH 04/33] KVM: arm64: Move pmu hyp code under hyp's Makefile to avoid instrumentation KVM's pmu.c contains the __hyp_text needed to switch the pmu registers between host and guest. Because this isn't covered by the 'hyp' Makefile, it can be built with kasan and friends when these are enabled in Kconfig. When starting a guest, this results in: | Kernel panic - not syncing: HYP panic: | PS:a00003c9 PC:000083000028ada0 ESR:86000007 | FAR:000083000028ada0 HPFAR:0000000029df5300 PAR:0000000000000000 | VCPU:000000004e10b7d6 | CPU: 0 PID: 3088 Comm: qemu-system-aar Not tainted 5.2.0-rc1 #11026 | Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno Development Plat | Call trace: | dump_backtrace+0x0/0x200 | show_stack+0x20/0x30 | dump_stack+0xec/0x158 | panic+0x1ec/0x420 | panic+0x0/0x420 | SMP: stopping secondary CPUs | Kernel Offset: disabled | CPU features: 0x002,25006082 | Memory Limit: none | ---[ end Kernel panic - not syncing: HYP panic: This is caused by functions in pmu.c calling the instrumented code, which isn't mapped to hyp. From objdump -r: | RELOCATION RECORDS FOR [.hyp.text]: | OFFSET TYPE VALUE | 0000000000000010 R_AARCH64_CALL26 __sanitizer_cov_trace_pc | 0000000000000018 R_AARCH64_CALL26 __asan_load4_noabort | 0000000000000024 R_AARCH64_CALL26 __asan_load4_noabort Move the affected code to a new file under 'hyp's Makefile. Fixes: 3d91befbb3a0 ("arm64: KVM: Enable !VHE support for :G/:H perf event modifiers") Cc: Andrew Murray Signed-off-by: James Morse Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 3 --- arch/arm64/kvm/hyp/switch.c | 39 +++++++++++++++++++++++++++++++ arch/arm64/kvm/pmu.c | 38 ------------------------------ 3 files changed, 39 insertions(+), 41 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2a8d3f8ca22c..4bcd9c1291d5 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -592,9 +592,6 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr); void kvm_clr_pmu_events(u32 clr); -void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt); -bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt); - void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu); void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu); #else diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 22b4c335e0b2..8799e0c267d4 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -16,6 +16,7 @@ */ #include +#include #include #include #include @@ -505,6 +506,44 @@ static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) #endif } +/** + * Disable host events, enable guest events + */ +static bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) +{ + struct kvm_host_data *host; + struct kvm_pmu_events *pmu; + + host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); + pmu = &host->pmu_events; + + if (pmu->events_host) + write_sysreg(pmu->events_host, pmcntenclr_el0); + + if (pmu->events_guest) + write_sysreg(pmu->events_guest, pmcntenset_el0); + + return (pmu->events_host || pmu->events_guest); +} + +/** + * Disable guest events, enable host events + */ +static void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) +{ + struct kvm_host_data *host; + struct kvm_pmu_events *pmu; + + host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); + pmu = &host->pmu_events; + + if (pmu->events_guest) + write_sysreg(pmu->events_guest, pmcntenclr_el0); + + if (pmu->events_host) + write_sysreg(pmu->events_host, pmcntenset_el0); +} + /* Switch to the guest for VHE systems running in EL2 */ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) { diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index 3da94a5bb6b7..e71d00bb5271 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -53,44 +53,6 @@ void kvm_clr_pmu_events(u32 clr) ctx->pmu_events.events_guest &= ~clr; } -/** - * Disable host events, enable guest events - */ -bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) -{ - struct kvm_host_data *host; - struct kvm_pmu_events *pmu; - - host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); - pmu = &host->pmu_events; - - if (pmu->events_host) - write_sysreg(pmu->events_host, pmcntenclr_el0); - - if (pmu->events_guest) - write_sysreg(pmu->events_guest, pmcntenset_el0); - - return (pmu->events_host || pmu->events_guest); -} - -/** - * Disable guest events, enable host events - */ -void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) -{ - struct kvm_host_data *host; - struct kvm_pmu_events *pmu; - - host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); - pmu = &host->pmu_events; - - if (pmu->events_guest) - write_sysreg(pmu->events_guest, pmcntenclr_el0); - - if (pmu->events_host) - write_sysreg(pmu->events_host, pmcntenset_el0); -} - #define PMEVTYPER_READ_CASE(idx) \ case idx: \ return read_sysreg(pmevtyper##idx##_el0) From 623e1528d4090bd1abaf93ec46f047dee9a6fb32 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 22 May 2019 18:47:05 +0100 Subject: [PATCH 05/33] KVM: arm/arm64: Move cc/it checks under hyp's Makefile to avoid instrumentation KVM has helpers to handle the condition codes of trapped aarch32 instructions. These are marked __hyp_text and used from HYP, but they aren't built by the 'hyp' Makefile, which has all the runes to avoid ASAN and KCOV instrumentation. Move this code to a new hyp/aarch32.c to avoid a hyp-panic when starting an aarch32 guest on a host built with the ASAN/KCOV debug options. Fixes: 021234ef3752f ("KVM: arm64: Make kvm_condition_valid32() accessible from EL2") Fixes: 8cebe750c4d9a ("arm64: KVM: Make kvm_skip_instr32 available to HYP") Signed-off-by: James Morse Signed-off-by: Marc Zyngier --- arch/arm/kvm/hyp/Makefile | 1 + arch/arm64/kvm/hyp/Makefile | 1 + virt/kvm/arm/aarch32.c | 121 -------------------------------- virt/kvm/arm/hyp/aarch32.c | 136 ++++++++++++++++++++++++++++++++++++ 4 files changed, 138 insertions(+), 121 deletions(-) create mode 100644 virt/kvm/arm/hyp/aarch32.c diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile index d2b5ec9c4b92..ba88b1eca93c 100644 --- a/arch/arm/kvm/hyp/Makefile +++ b/arch/arm/kvm/hyp/Makefile @@ -11,6 +11,7 @@ CFLAGS_ARMV7VE :=$(call cc-option, -march=armv7ve) obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/aarch32.o obj-$(CONFIG_KVM_ARM_HOST) += tlb.o obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 82d1904328ad..ea710f674cb6 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -10,6 +10,7 @@ KVM=../../../../virt/kvm obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/aarch32.o obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-cpuif-proxy.o obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c index 5abbe9b3c652..6880236974b8 100644 --- a/virt/kvm/arm/aarch32.c +++ b/virt/kvm/arm/aarch32.c @@ -25,127 +25,6 @@ #include #include -/* - * stolen from arch/arm/kernel/opcodes.c - * - * condition code lookup table - * index into the table is test code: EQ, NE, ... LT, GT, AL, NV - * - * bit position in short is condition code: NZCV - */ -static const unsigned short cc_map[16] = { - 0xF0F0, /* EQ == Z set */ - 0x0F0F, /* NE */ - 0xCCCC, /* CS == C set */ - 0x3333, /* CC */ - 0xFF00, /* MI == N set */ - 0x00FF, /* PL */ - 0xAAAA, /* VS == V set */ - 0x5555, /* VC */ - 0x0C0C, /* HI == C set && Z clear */ - 0xF3F3, /* LS == C clear || Z set */ - 0xAA55, /* GE == (N==V) */ - 0x55AA, /* LT == (N!=V) */ - 0x0A05, /* GT == (!Z && (N==V)) */ - 0xF5FA, /* LE == (Z || (N!=V)) */ - 0xFFFF, /* AL always */ - 0 /* NV */ -}; - -/* - * Check if a trapped instruction should have been executed or not. - */ -bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu) -{ - unsigned long cpsr; - u32 cpsr_cond; - int cond; - - /* Top two bits non-zero? Unconditional. */ - if (kvm_vcpu_get_hsr(vcpu) >> 30) - return true; - - /* Is condition field valid? */ - cond = kvm_vcpu_get_condition(vcpu); - if (cond == 0xE) - return true; - - cpsr = *vcpu_cpsr(vcpu); - - if (cond < 0) { - /* This can happen in Thumb mode: examine IT state. */ - unsigned long it; - - it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3); - - /* it == 0 => unconditional. */ - if (it == 0) - return true; - - /* The cond for this insn works out as the top 4 bits. */ - cond = (it >> 4); - } - - cpsr_cond = cpsr >> 28; - - if (!((cc_map[cond] >> cpsr_cond) & 1)) - return false; - - return true; -} - -/** - * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block - * @vcpu: The VCPU pointer - * - * When exceptions occur while instructions are executed in Thumb IF-THEN - * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have - * to do this little bit of work manually. The fields map like this: - * - * IT[7:0] -> CPSR[26:25],CPSR[15:10] - */ -static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu) -{ - unsigned long itbits, cond; - unsigned long cpsr = *vcpu_cpsr(vcpu); - bool is_arm = !(cpsr & PSR_AA32_T_BIT); - - if (is_arm || !(cpsr & PSR_AA32_IT_MASK)) - return; - - cond = (cpsr & 0xe000) >> 13; - itbits = (cpsr & 0x1c00) >> (10 - 2); - itbits |= (cpsr & (0x3 << 25)) >> 25; - - /* Perform ITAdvance (see page A2-52 in ARM DDI 0406C) */ - if ((itbits & 0x7) == 0) - itbits = cond = 0; - else - itbits = (itbits << 1) & 0x1f; - - cpsr &= ~PSR_AA32_IT_MASK; - cpsr |= cond << 13; - cpsr |= (itbits & 0x1c) << (10 - 2); - cpsr |= (itbits & 0x3) << 25; - *vcpu_cpsr(vcpu) = cpsr; -} - -/** - * kvm_skip_instr - skip a trapped instruction and proceed to the next - * @vcpu: The vcpu pointer - */ -void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) -{ - bool is_thumb; - - is_thumb = !!(*vcpu_cpsr(vcpu) & PSR_AA32_T_BIT); - if (is_thumb && !is_wide_instr) - *vcpu_pc(vcpu) += 2; - else - *vcpu_pc(vcpu) += 4; - kvm_adjust_itstate(vcpu); -} - /* * Table taken from ARMv8 ARM DDI0487B-B, table G1-10. */ diff --git a/virt/kvm/arm/hyp/aarch32.c b/virt/kvm/arm/hyp/aarch32.c new file mode 100644 index 000000000000..d31f267961e7 --- /dev/null +++ b/virt/kvm/arm/hyp/aarch32.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hyp portion of the (not much of an) Emulation layer for 32bit guests. + * + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier + * + * based on arch/arm/kvm/emulate.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + */ + +#include +#include +#include + +/* + * stolen from arch/arm/kernel/opcodes.c + * + * condition code lookup table + * index into the table is test code: EQ, NE, ... LT, GT, AL, NV + * + * bit position in short is condition code: NZCV + */ +static const unsigned short cc_map[16] = { + 0xF0F0, /* EQ == Z set */ + 0x0F0F, /* NE */ + 0xCCCC, /* CS == C set */ + 0x3333, /* CC */ + 0xFF00, /* MI == N set */ + 0x00FF, /* PL */ + 0xAAAA, /* VS == V set */ + 0x5555, /* VC */ + 0x0C0C, /* HI == C set && Z clear */ + 0xF3F3, /* LS == C clear || Z set */ + 0xAA55, /* GE == (N==V) */ + 0x55AA, /* LT == (N!=V) */ + 0x0A05, /* GT == (!Z && (N==V)) */ + 0xF5FA, /* LE == (Z || (N!=V)) */ + 0xFFFF, /* AL always */ + 0 /* NV */ +}; + +/* + * Check if a trapped instruction should have been executed or not. + */ +bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu) +{ + unsigned long cpsr; + u32 cpsr_cond; + int cond; + + /* Top two bits non-zero? Unconditional. */ + if (kvm_vcpu_get_hsr(vcpu) >> 30) + return true; + + /* Is condition field valid? */ + cond = kvm_vcpu_get_condition(vcpu); + if (cond == 0xE) + return true; + + cpsr = *vcpu_cpsr(vcpu); + + if (cond < 0) { + /* This can happen in Thumb mode: examine IT state. */ + unsigned long it; + + it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3); + + /* it == 0 => unconditional. */ + if (it == 0) + return true; + + /* The cond for this insn works out as the top 4 bits. */ + cond = (it >> 4); + } + + cpsr_cond = cpsr >> 28; + + if (!((cc_map[cond] >> cpsr_cond) & 1)) + return false; + + return true; +} + +/** + * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block + * @vcpu: The VCPU pointer + * + * When exceptions occur while instructions are executed in Thumb IF-THEN + * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have + * to do this little bit of work manually. The fields map like this: + * + * IT[7:0] -> CPSR[26:25],CPSR[15:10] + */ +static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu) +{ + unsigned long itbits, cond; + unsigned long cpsr = *vcpu_cpsr(vcpu); + bool is_arm = !(cpsr & PSR_AA32_T_BIT); + + if (is_arm || !(cpsr & PSR_AA32_IT_MASK)) + return; + + cond = (cpsr & 0xe000) >> 13; + itbits = (cpsr & 0x1c00) >> (10 - 2); + itbits |= (cpsr & (0x3 << 25)) >> 25; + + /* Perform ITAdvance (see page A2-52 in ARM DDI 0406C) */ + if ((itbits & 0x7) == 0) + itbits = cond = 0; + else + itbits = (itbits << 1) & 0x1f; + + cpsr &= ~PSR_AA32_IT_MASK; + cpsr |= cond << 13; + cpsr |= (itbits & 0x1c) << (10 - 2); + cpsr |= (itbits & 0x3) << 25; + *vcpu_cpsr(vcpu) = cpsr; +} + +/** + * kvm_skip_instr - skip a trapped instruction and proceed to the next + * @vcpu: The vcpu pointer + */ +void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) +{ + bool is_thumb; + + is_thumb = !!(*vcpu_cpsr(vcpu) & PSR_AA32_T_BIT); + if (is_thumb && !is_wide_instr) + *vcpu_pc(vcpu) += 2; + else + *vcpu_pc(vcpu) += 4; + kvm_adjust_itstate(vcpu); +} From db80927ea1977a845230a161df643b48fd1e1ea4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 20 May 2019 11:55:36 +0200 Subject: [PATCH 06/33] KVM: nVMX: really fix the size checks on KVM_SET_NESTED_STATE The offset for reading the shadow VMCS is sizeof(*kvm_state)+VMCS12_SIZE, so the correct size must be that plus sizeof(*vmcs12). This could lead to KVM reading garbage data from userspace and not reporting an error, but is otherwise not sensitive. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index f1a69117ac0f..2fd251ded754 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5427,7 +5427,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, vmcs12->vmcs_link_pointer != -1ull) { struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); - if (kvm_state->size < sizeof(*kvm_state) + 2 * sizeof(*vmcs12)) + if (kvm_state->size < sizeof(*kvm_state) + VMCS12_SIZE + sizeof(*vmcs12)) return -EINVAL; if (copy_from_user(shadow_vmcs12, From 21be4ca1ea685ab3ccc2fd18babb89bc477a9e5c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 8 May 2019 11:04:32 -0700 Subject: [PATCH 07/33] KVM: nVMX: Clear nested_run_pending if setting nested state fails VMX's nested_run_pending flag is subtly consumed when stuffing state to enter guest mode, i.e. needs to be set according before KVM knows if setting guest state is successful. If setting guest state fails, clear the flag as a nested run is obviously not pending. Reported-by: Aaron Lewis Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 2fd251ded754..6b450839c766 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5423,39 +5423,44 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) return 0; + vmx->nested.nested_run_pending = + !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); + + ret = -EINVAL; if (nested_cpu_has_shadow_vmcs(vmcs12) && vmcs12->vmcs_link_pointer != -1ull) { struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); if (kvm_state->size < sizeof(*kvm_state) + VMCS12_SIZE + sizeof(*vmcs12)) - return -EINVAL; + goto error_guest_mode; if (copy_from_user(shadow_vmcs12, user_kvm_nested_state->data + VMCS12_SIZE, - sizeof(*vmcs12))) - return -EFAULT; + sizeof(*vmcs12))) { + ret = -EFAULT; + goto error_guest_mode; + } if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION || !shadow_vmcs12->hdr.shadow_vmcs) - return -EINVAL; + goto error_guest_mode; } if (nested_vmx_check_controls(vcpu, vmcs12) || nested_vmx_check_host_state(vcpu, vmcs12) || nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual)) - return -EINVAL; + goto error_guest_mode; vmx->nested.dirty_vmcs12 = true; - vmx->nested.nested_run_pending = - !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); - ret = nested_vmx_enter_non_root_mode(vcpu, false); - if (ret) { - vmx->nested.nested_run_pending = 0; - return -EINVAL; - } + if (ret) + goto error_guest_mode; return 0; + +error_guest_mode: + vmx->nested.nested_run_pending = 0; + return ret; } void nested_vmx_vcpu_setup(void) From 2eb06c306a579853346d22eda73332ed4f3e81e3 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 17 May 2019 16:49:49 +0800 Subject: [PATCH 08/33] KVM: Fix spinlock taken warning during host resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WARNING: CPU: 0 PID: 13554 at kvm/arch/x86/kvm//../../../virt/kvm/kvm_main.c:4183 kvm_resume+0x3c/0x40 [kvm] CPU: 0 PID: 13554 Comm: step_after_susp Tainted: G OE 5.1.0-rc4+ #1 RIP: 0010:kvm_resume+0x3c/0x40 [kvm] Call Trace: syscore_resume+0x63/0x2d0 suspend_devices_and_enter+0x9d1/0xa40 pm_suspend+0x33a/0x3b0 state_store+0x82/0xf0 kobj_attr_store+0x12/0x20 sysfs_kf_write+0x4b/0x60 kernfs_fop_write+0x120/0x1a0 __vfs_write+0x1b/0x40 vfs_write+0xcd/0x1d0 ksys_write+0x5f/0xe0 __x64_sys_write+0x1a/0x20 do_syscall_64+0x6f/0x6c0 entry_SYSCALL_64_after_hwframe+0x49/0xbe Commit ca84d1a24 (KVM: x86: Add clock sync request to hardware enable) mentioned that "we always hold kvm_lock when hardware_enable is called. The one place that doesn't need to worry about it is resume, as resuming a frozen CPU, the spinlock won't be taken." However, commit 6706dae9 (virt/kvm: Replace spin_is_locked() with lockdep) introduces a bug, it asserts when the lock is not held which is contrary to the original goal. This patch fixes it by WARN_ON when the lock is held. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Paul E. McKenney Signed-off-by: Wanpeng Li Fixes: 6706dae9 ("virt/kvm: Replace spin_is_locked() with lockdep") [Wrap with #ifdef CONFIG_LOCKDEP - Paolo] Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f0d13d9d125d..1fadfb9cf36e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include @@ -4181,7 +4182,9 @@ static int kvm_suspend(void) static void kvm_resume(void) { if (kvm_usage_count) { - lockdep_assert_held(&kvm_count_lock); +#ifdef CONFIG_LOCKDEP + WARN_ON(lockdep_is_held(&kvm_count_lock)); +#endif hardware_enable_nolock(NULL); } } From be7fcf1d1701a5266dd36eab4978476f63d1bd57 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 14 May 2019 13:34:51 +0300 Subject: [PATCH 09/33] KVM: selftests: Fix a condition in test_hv_cpuid() The code is trying to check that all the padding is zeroed out and it does this: entry->padding[0] == entry->padding[1] == entry->padding[2] == 0 Assume everything is zeroed correctly, then the first comparison is true, the next comparison is false and false is equal to zero so the overall condition is true. This bug doesn't affect run time very badly, but the code should instead just check that all three paddings are zero individually. Also the error message was copy and pasted from an earlier error and it wasn't correct. Fixes: 7edcb7343327 ("KVM: selftests: Add hyperv_cpuid test") Signed-off-by: Dan Carpenter Reviewed-by: Vitaly Kuznetsov Reviewed-by: Thomas Huth Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c index 9a21e912097c..63b9fc3fdfbe 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c @@ -58,9 +58,8 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, TEST_ASSERT(entry->flags == 0, ".flags field should be zero"); - TEST_ASSERT(entry->padding[0] == entry->padding[1] - == entry->padding[2] == 0, - ".index field should be zero"); + TEST_ASSERT(!entry->padding[0] && !entry->padding[1] && + !entry->padding[2], "padding should be zero"); /* * If needed for debug: From 3b339e2527a6db257fa8f2a4cab3c510432a98d5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 20 May 2019 12:02:16 +0200 Subject: [PATCH 10/33] kvm: selftests: avoid type punning Avoid warnings from -Wstrict-aliasing by using memcpy. Reviewed-by: Thomas Huth Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/ucall.c | 2 +- tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/ucall.c b/tools/testing/selftests/kvm/lib/ucall.c index a2ab38be2f47..b701a01cfcb6 100644 --- a/tools/testing/selftests/kvm/lib/ucall.c +++ b/tools/testing/selftests/kvm/lib/ucall.c @@ -142,7 +142,7 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) vm_vaddr_t gva; TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8, "Unexpected ucall exit mmio address access"); - gva = *(vm_vaddr_t *)run->mmio.data; + memcpy(&gva, run->mmio.data, sizeof(gva)); memcpy(uc, addr_gva2hva(vm, gva), sizeof(*uc)); } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c index 61a2163cf9f1..9d62e2c7e024 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c @@ -75,7 +75,7 @@ void set_revision_id_for_vmcs12(struct kvm_nested_state *state, u32 vmcs12_revision) { /* Set revision_id in vmcs12 to vmcs12_revision. */ - *(u32 *)(state->data) = vmcs12_revision; + memcpy(state->data, &vmcs12_revision, sizeof(u32)); } void set_default_state(struct kvm_nested_state *state) From 319f6f97e3a16c38795168753db8740f77b156c9 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 17 May 2019 11:04:45 +0200 Subject: [PATCH 11/33] KVM: selftests: Compile code with warnings enabled So far the KVM selftests are compiled without any compiler warnings enabled. That's quite bad, since we miss a lot of possible bugs this way. Let's enable at least "-Wall" and some other useful warning flags now, and fix at least the trivial problems in the code (like unused variables). Signed-off-by: Thomas Huth Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 4 +++- tools/testing/selftests/kvm/dirty_log_test.c | 6 +++++- tools/testing/selftests/kvm/lib/kvm_util.c | 3 --- tools/testing/selftests/kvm/lib/x86_64/processor.c | 4 +--- tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c | 1 + tools/testing/selftests/kvm/x86_64/evmcs_test.c | 7 +------ tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c | 1 - tools/testing/selftests/kvm/x86_64/platform_info_test.c | 1 - tools/testing/selftests/kvm/x86_64/smm_test.c | 3 +-- tools/testing/selftests/kvm/x86_64/state_test.c | 7 +------ .../selftests/kvm/x86_64/vmx_close_while_nested_test.c | 5 +---- tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c | 5 ++--- 12 files changed, 16 insertions(+), 31 deletions(-) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 79c524395ebe..d113eaf2d570 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -34,7 +34,9 @@ LIBKVM += $(LIBKVM_$(UNAME_M)) INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include -CFLAGS += -O2 -g -std=gnu99 -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(page_size; @@ -1334,7 +1332,6 @@ void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs) int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); - int ret; TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index dc7fae9fa424..21f3040d90cb 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -229,8 +229,6 @@ void sregs_dump(FILE *stream, struct kvm_sregs *sregs, void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) { - int rc; - TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); @@ -549,7 +547,6 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) struct pageDirectoryPointerEntry *pdpe; struct pageDirectoryEntry *pde; struct pageTableEntry *pte; - void *hva; TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); @@ -582,6 +579,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) unmapped_gva: TEST_ASSERT(false, "No mapping for vm virtual address, " "gva: 0x%lx", gva); + exit(EXIT_FAILURE); } static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot, diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c index 7c2c4d4055a8..63cc9c3f5ab6 100644 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c @@ -87,6 +87,7 @@ int main(int argc, char *argv[]) while (1) { rc = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Unexpected exit reason: %u (%s),\n", run->exit_reason, diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index 36669684eca5..b38260e29775 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -19,8 +19,6 @@ #define VCPU_ID 5 -static bool have_nested_state; - void l2_guest_code(void) { GUEST_SYNC(6); @@ -73,7 +71,6 @@ void guest_code(struct vmx_pages *vmx_pages) int main(int argc, char *argv[]) { - struct vmx_pages *vmx_pages = NULL; vm_vaddr_t vmx_pages_gva = 0; struct kvm_regs regs1, regs2; @@ -88,8 +85,6 @@ int main(int argc, char *argv[]) .args[0] = (unsigned long)&evmcs_ver }; - struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); - /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); @@ -113,7 +108,7 @@ int main(int argc, char *argv[]) vcpu_regs_get(vm, VCPU_ID, ®s1); - vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_alloc_vmx(vm, &vmx_pages_gva); vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); for (stage = 1;; stage++) { diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c index 63b9fc3fdfbe..071fe32dc84b 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c @@ -89,7 +89,6 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm) { int nent = 20; /* should be enough */ static struct kvm_cpuid2 *cpuid; - int ret; cpuid = malloc(sizeof(*cpuid) + nent * sizeof(struct kvm_cpuid_entry2)); diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c index eb3e7a838cb4..40050e44ec0a 100644 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c @@ -81,7 +81,6 @@ static void test_msr_platform_info_disabled(struct kvm_vm *vm) int main(int argc, char *argv[]) { struct kvm_vm *vm; - struct kvm_run *state; int rv; uint64_t msr_platform_info; diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c index fb8086964d83..4daf520bada1 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -87,7 +87,6 @@ void guest_code(struct vmx_pages *vmx_pages) int main(int argc, char *argv[]) { - struct vmx_pages *vmx_pages = NULL; vm_vaddr_t vmx_pages_gva = 0; struct kvm_regs regs; @@ -115,7 +114,7 @@ int main(int argc, char *argv[]) vcpu_set_msr(vm, VCPU_ID, MSR_IA32_SMBASE, SMRAM_GPA); if (kvm_check_cap(KVM_CAP_NESTED_STATE)) { - vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_alloc_vmx(vm, &vmx_pages_gva); vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); } else { printf("will skip SMM test with VMX enabled\n"); diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index e0a3c0204b7c..2a4121f4de01 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -22,8 +22,6 @@ #define VCPU_ID 5 -static bool have_nested_state; - void l2_guest_code(void) { GUEST_SYNC(6); @@ -122,7 +120,6 @@ void guest_code(struct vmx_pages *vmx_pages) int main(int argc, char *argv[]) { - struct vmx_pages *vmx_pages = NULL; vm_vaddr_t vmx_pages_gva = 0; struct kvm_regs regs1, regs2; @@ -132,8 +129,6 @@ int main(int argc, char *argv[]) struct ucall uc; int stage; - struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); - /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); @@ -142,7 +137,7 @@ int main(int argc, char *argv[]) vcpu_regs_get(vm, VCPU_ID, ®s1); if (kvm_check_cap(KVM_CAP_NESTED_STATE)) { - vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_alloc_vmx(vm, &vmx_pages_gva); vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); } else { printf("will skip nested state checks\n"); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c index 6edec6fd790b..97182b47b10c 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c @@ -39,8 +39,6 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - uint32_t control; - uintptr_t save_cr3; GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); GUEST_ASSERT(load_vmcs(vmx_pages)); @@ -55,7 +53,6 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) int main(int argc, char *argv[]) { - struct vmx_pages *vmx_pages; vm_vaddr_t vmx_pages_gva; struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); @@ -68,7 +65,7 @@ int main(int argc, char *argv[]) vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); /* Allocate VMX pages and shared descriptors (vmx_pages). */ - vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_alloc_vmx(vm, &vmx_pages_gva); vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); for (;;) { diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c index 18fa64db0d7a..6d37a3173956 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c @@ -121,7 +121,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_DONE(); } -void report(int64_t val) +static void report(int64_t val) { printf("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n", val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE); @@ -129,7 +129,6 @@ void report(int64_t val) int main(int argc, char *argv[]) { - struct vmx_pages *vmx_pages; vm_vaddr_t vmx_pages_gva; struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); @@ -142,7 +141,7 @@ int main(int argc, char *argv[]) vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); /* Allocate VMX pages and shared descriptors (vmx_pages). */ - vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_alloc_vmx(vm, &vmx_pages_gva); vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); for (;;) { From 32a243df82c8dc04ccba7fc6c6564ae9261cb738 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 27 Mar 2019 13:15:36 -0700 Subject: [PATCH 12/33] kvm: x86: Include multiple indices with CPUID leaf 0x8000001d Per the APM, "CPUID Fn8000_001D_E[D,C,B,A]X reports cache topology information for the cache enumerated by the value passed to the instruction in ECX, referred to as Cache n in the following description. To gather information for all cache levels, software must repeatedly execute CPUID with 8000_001Dh in EAX and ECX set to increasing values beginning with 0 until a value of 00h is returned in the field CacheType (EAX[4:0]) indicating no more cache descriptions are available for this processor." The termination condition is the same as leaf 4, so we can reuse that code block for leaf 0x8000001d. Fixes: 8765d75329a38 ("KVM: X86: Extend CPUID range to include new leaf") Cc: Brijesh Singh Cc: Borislav Petkov Signed-off-by: Jim Mattson Reviewed-by: Marc Orr Reviewed-by: Borislav Petkov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 80a642a0143d..3c96ce8fbb96 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -456,8 +456,9 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, } break; } - /* function 4 has additional index. */ - case 4: { + /* functions 4 and 0x8000001d have additional index. */ + case 4: + case 0x8000001d: { int i, cache_type; entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; @@ -702,8 +703,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, break; case 0x8000001a: break; - case 0x8000001d: - break; /*Add support for Centaur's CPUID instruction*/ case 0xC0000000: /*Just support up to 0xC0000004 now*/ From 382409b4c43e5b44ae4a869ff793d3cf01d12004 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 27 Mar 2019 13:15:37 -0700 Subject: [PATCH 13/33] kvm: x86: Include CPUID leaf 0x8000001e in kvm's supported CPUID Kvm now supports extended CPUID functions through 0x8000001f. CPUID leaf 0x8000001e is AMD's Processor Topology Information leaf. This contains similar information to CPUID leaf 0xb (Intel's Extended Topology Enumeration leaf), and should be included in the output of KVM_GET_SUPPORTED_CPUID, even though userspace is likely to override some of this information based upon the configuration of the particular VM. Cc: Brijesh Singh Cc: Borislav Petkov Fixes: 8765d75329a38 ("KVM: X86: Extend CPUID range to include new leaf") Signed-off-by: Jim Mattson Reviewed-by: Marc Orr Reviewed-by: Borislav Petkov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 3c96ce8fbb96..e18a9f9f65b5 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -702,6 +702,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ecx = entry->edx = 0; break; case 0x8000001a: + case 0x8000001e: break; /*Add support for Centaur's CPUID instruction*/ case 0xC0000000: From d30b214d1d0addb7b2c9c78178d1501cd39a01fb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 20 May 2019 12:06:36 +0200 Subject: [PATCH 14/33] kvm: fix compilation on s390 s390 does not have memremap, even though in this particular case it would be useful. Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1fadfb9cf36e..134ec0283a8a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1761,8 +1761,10 @@ static int __kvm_map_gfn(struct kvm_memory_slot *slot, gfn_t gfn, if (pfn_valid(pfn)) { page = pfn_to_page(pfn); hva = kmap(page); +#ifdef CONFIG_HAS_IOMEM } else { hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB); +#endif } if (!hva) From 541e886f7972cc647804dbb4909189e67987a945 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 17 May 2019 16:49:50 +0800 Subject: [PATCH 15/33] KVM: nVMX: Fix using __this_cpu_read() in preemptible context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BUG: using __this_cpu_read() in preemptible [00000000] code: qemu-system-x86/4590 caller is nested_vmx_enter_non_root_mode+0xebd/0x1790 [kvm_intel] CPU: 4 PID: 4590 Comm: qemu-system-x86 Tainted: G OE 5.1.0-rc4+ #1 Call Trace: dump_stack+0x67/0x95 __this_cpu_preempt_check+0xd2/0xe0 nested_vmx_enter_non_root_mode+0xebd/0x1790 [kvm_intel] nested_vmx_run+0xda/0x2b0 [kvm_intel] handle_vmlaunch+0x13/0x20 [kvm_intel] vmx_handle_exit+0xbd/0x660 [kvm_intel] kvm_arch_vcpu_ioctl_run+0xa2c/0x1e50 [kvm] kvm_vcpu_ioctl+0x3ad/0x6d0 [kvm] do_vfs_ioctl+0xa5/0x6e0 ksys_ioctl+0x6d/0x80 __x64_sys_ioctl+0x1a/0x20 do_syscall_64+0x6f/0x6c0 entry_SYSCALL_64_after_hwframe+0x49/0xbe Accessing per-cpu variable should disable preemption, this patch extends the preemption disable region for __this_cpu_read(). Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Fixes: 52017608da33 ("KVM: nVMX: add option to perform early consistency checks via H/W") Cc: stable@vger.kernel.org Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 6b450839c766..1032f068f0b9 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2784,14 +2784,13 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) : "cc", "memory" ); - preempt_enable(); - if (vmx->msr_autoload.host.nr) vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); if (vmx->msr_autoload.guest.nr) vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); if (vm_fail) { + preempt_enable(); WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) != VMXERR_ENTRY_INVALID_CONTROL_FIELD); return 1; @@ -2803,6 +2802,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) local_irq_enable(); if (hw_breakpoint_active()) set_debugreg(__this_cpu_read(cpu_dr7), 7); + preempt_enable(); /* * A non-failing VMEntry means we somehow entered guest mode with From 4d259965655c92053f3255aca14d81aab1e21219 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Mon, 20 May 2019 12:27:47 +0800 Subject: [PATCH 16/33] kvm: vmx: Fix -Wmissing-prototypes warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We get a warning when build kernel W=1: arch/x86/kvm/vmx/vmx.c:6365:6: warning: no previous prototype for ‘vmx_update_host_rsp’ [-Wmissing-prototypes] void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) Add the missing declaration to fix this. Signed-off-by: Yi Wang Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 63d37ccce3dc..61128b48c503 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -319,6 +319,7 @@ void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr); void pt_update_intercept_for_msr(struct vcpu_vmx *vmx); +void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); #define POSTED_INTR_ON 0 #define POSTED_INTR_SN 1 From 0e6edceb8f18a4e31526d83e6099fef1f29c3af5 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 20 May 2019 16:18:06 +0800 Subject: [PATCH 17/33] KVM: LAPIC: Fix lapic_timer_advance_ns parameter overflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit c3941d9e0 (KVM: lapic: Allow user to disable adaptive tuning of timer advancement), '-1' enables adaptive tuning starting from default advancment of 1000ns. However, we should expose an int instead of an overflow uint module parameter. Before patch: /sys/module/kvm/parameters/lapic_timer_advance_ns:4294967295 After patch: /sys/module/kvm/parameters/lapic_timer_advance_ns:-1 Fixes: c3941d9e0 (KVM: lapic: Allow user to disable adaptive tuning of timer advancement) Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Sean Christopherson Cc: Liran Alon Reviewed-by: Sean Christopherson Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 536b78c4af6e..e7e57de50a3c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -143,7 +143,7 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); * tuning, i.e. allows priveleged userspace to set an exact advancement time. */ static int __read_mostly lapic_timer_advance_ns = -1; -module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); +module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR); static bool __read_mostly vector_hashing = true; module_param(vector_hashing, bool, S_IRUGO); From 16ba3ab4e15ca031270deb3cbfd38416c719e291 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 20 May 2019 16:18:07 +0800 Subject: [PATCH 18/33] KVM: LAPIC: Expose per-vCPU timer_advance_ns to userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expose per-vCPU timer_advance_ns to userspace, so it is able to query the auto-adjusted value. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Sean Christopherson Cc: Liran Alon Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/debugfs.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c index c19c7ede9bd6..a2f3432ce090 100644 --- a/arch/x86/kvm/debugfs.c +++ b/arch/x86/kvm/debugfs.c @@ -9,12 +9,22 @@ */ #include #include +#include "lapic.h" bool kvm_arch_has_vcpu_debugfs(void) { return true; } +static int vcpu_get_timer_advance_ns(void *data, u64 *val) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data; + *val = vcpu->arch.apic->lapic_timer.timer_advance_ns; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(vcpu_timer_advance_ns_fops, vcpu_get_timer_advance_ns, NULL, "%llu\n"); + static int vcpu_get_tsc_offset(void *data, u64 *val) { struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data; @@ -51,6 +61,14 @@ int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) if (!ret) return -ENOMEM; + if (lapic_in_kernel(vcpu)) { + ret = debugfs_create_file("lapic_timer_advance_ns", 0444, + vcpu->debugfs_dentry, + vcpu, &vcpu_timer_advance_ns_fops); + if (!ret) + return -ENOMEM; + } + if (kvm_has_tsc_control) { ret = debugfs_create_file("tsc-scaling-ratio", 0444, vcpu->debugfs_dentry, From 12e9612cae0c272e0dabfc570a6d855f07361914 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 20 May 2019 12:55:11 +0200 Subject: [PATCH 19/33] KVM: selftests: Remove duplicated TEST_ASSERT in hyperv_cpuid.c The check for entry->index == 0 is done twice. One time should be sufficient. Suggested-by: Vitaly Kuznetsov Signed-off-by: Thomas Huth Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c index 071fe32dc84b..f72b3043db0e 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c @@ -52,9 +52,6 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, TEST_ASSERT(entry->index == 0, ".index field should be zero"); - TEST_ASSERT(entry->index == 0, - ".index field should be zero"); - TEST_ASSERT(entry->flags == 0, ".flags field should be zero"); From 204c91eff798a78498cc7cbf1bc76892badfa96d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 20 May 2019 13:31:02 +0200 Subject: [PATCH 20/33] KVM: selftests: do not blindly clobber registers in guest asm The guest_code of sync_regs_test is assuming that the compiler will not touch %r11 outside the asm that increments it, which is a bit brittle. Instead, we can increment a variable and use a dummy asm to ensure the increment is not optimized away. However, we also need to use a callee-save register or the compiler will insert a save/restore around the vmexit, breaking the whole idea behind the test. (Yes, "if it ain't broken...", but I would like the test to be clean before it is copied into the upcoming s390 selftests). Signed-off-by: Paolo Bonzini --- .../selftests/kvm/x86_64/sync_regs_test.c | 54 ++++++++++--------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c index c8478ce9ea77..25cacd3316f6 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c @@ -25,9 +25,15 @@ void guest_code(void) { + /* + * use a callee-save register, otherwise the compiler + * saves it around the call to GUEST_SYNC. + */ + register u32 stage asm("rbx"); for (;;) { GUEST_SYNC(0); - asm volatile ("inc %r11"); + stage++; + asm volatile ("" : : "r" (stage)); } } @@ -147,7 +153,7 @@ int main(int argc, char *argv[]) compare_vcpu_events(&events, &run->s.regs.events); /* Set and verify various register values. */ - run->s.regs.regs.r11 = 0xBAD1DEA; + run->s.regs.regs.rbx = 0xBAD1DEA; run->s.regs.sregs.apic_base = 1 << 11; /* TODO run->s.regs.events.XYZ = ABC; */ @@ -158,9 +164,9 @@ int main(int argc, char *argv[]) "Unexpected exit reason: %u (%s),\n", run->exit_reason, exit_reason_str(run->exit_reason)); - TEST_ASSERT(run->s.regs.regs.r11 == 0xBAD1DEA + 1, - "r11 sync regs value incorrect 0x%llx.", - run->s.regs.regs.r11); + TEST_ASSERT(run->s.regs.regs.rbx == 0xBAD1DEA + 1, + "rbx sync regs value incorrect 0x%llx.", + run->s.regs.regs.rbx); TEST_ASSERT(run->s.regs.sregs.apic_base == 1 << 11, "apic_base sync regs value incorrect 0x%llx.", run->s.regs.sregs.apic_base); @@ -179,15 +185,15 @@ int main(int argc, char *argv[]) */ run->kvm_valid_regs = TEST_SYNC_FIELDS; run->kvm_dirty_regs = 0; - run->s.regs.regs.r11 = 0xDEADBEEF; + run->s.regs.regs.rbx = 0xDEADBEEF; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Unexpected exit reason: %u (%s),\n", run->exit_reason, exit_reason_str(run->exit_reason)); - TEST_ASSERT(run->s.regs.regs.r11 != 0xDEADBEEF, - "r11 sync regs value incorrect 0x%llx.", - run->s.regs.regs.r11); + TEST_ASSERT(run->s.regs.regs.rbx != 0xDEADBEEF, + "rbx sync regs value incorrect 0x%llx.", + run->s.regs.regs.rbx); /* Clear kvm_valid_regs bits and kvm_dirty_bits. * Verify s.regs values are not overwritten with existing guest values @@ -195,21 +201,21 @@ int main(int argc, char *argv[]) */ run->kvm_valid_regs = 0; run->kvm_dirty_regs = 0; - run->s.regs.regs.r11 = 0xAAAA; - regs.r11 = 0xBAC0; + run->s.regs.regs.rbx = 0xAAAA; + regs.rbx = 0xBAC0; vcpu_regs_set(vm, VCPU_ID, ®s); rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Unexpected exit reason: %u (%s),\n", run->exit_reason, exit_reason_str(run->exit_reason)); - TEST_ASSERT(run->s.regs.regs.r11 == 0xAAAA, - "r11 sync regs value incorrect 0x%llx.", - run->s.regs.regs.r11); + TEST_ASSERT(run->s.regs.regs.rbx == 0xAAAA, + "rbx sync regs value incorrect 0x%llx.", + run->s.regs.regs.rbx); vcpu_regs_get(vm, VCPU_ID, ®s); - TEST_ASSERT(regs.r11 == 0xBAC0 + 1, - "r11 guest value incorrect 0x%llx.", - regs.r11); + TEST_ASSERT(regs.rbx == 0xBAC0 + 1, + "rbx guest value incorrect 0x%llx.", + regs.rbx); /* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten * with existing guest values but that guest values are overwritten @@ -217,19 +223,19 @@ int main(int argc, char *argv[]) */ run->kvm_valid_regs = 0; run->kvm_dirty_regs = TEST_SYNC_FIELDS; - run->s.regs.regs.r11 = 0xBBBB; + run->s.regs.regs.rbx = 0xBBBB; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Unexpected exit reason: %u (%s),\n", run->exit_reason, exit_reason_str(run->exit_reason)); - TEST_ASSERT(run->s.regs.regs.r11 == 0xBBBB, - "r11 sync regs value incorrect 0x%llx.", - run->s.regs.regs.r11); + TEST_ASSERT(run->s.regs.regs.rbx == 0xBBBB, + "rbx sync regs value incorrect 0x%llx.", + run->s.regs.regs.rbx); vcpu_regs_get(vm, VCPU_ID, ®s); - TEST_ASSERT(regs.r11 == 0xBBBB + 1, - "r11 guest value incorrect 0x%llx.", - regs.r11); + TEST_ASSERT(regs.rbx == 0xBBBB + 1, + "rbx guest value incorrect 0x%llx.", + regs.rbx); kvm_vm_free(vm); From c9bcd3e3335d0a29d89fabd2c385e1b989e6f1b0 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Tue, 14 May 2019 15:49:52 +0000 Subject: [PATCH 21/33] kvm: svm/avic: fix off-by-one in checking host APIC ID Current logic does not allow VCPU to be loaded onto CPU with APIC ID 255. This should be allowed since the host physical APIC ID field in the AVIC Physical APIC table entry is an 8-bit value, and APIC ID 255 is valid in system with x2APIC enabled. Instead, do not allow VCPU load if the host APIC ID cannot be represented by an 8-bit value. Also, use the more appropriate AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK instead of AVIC_MAX_PHYSICAL_ID_COUNT. Signed-off-by: Suravee Suthikulpanit Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a849dcb7fbc5..a9e553a1317f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2024,7 +2024,11 @@ static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (!kvm_vcpu_apicv_active(vcpu)) return; - if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT)) + /* + * Since the host physical APIC id is 8 bits, + * we can support host APIC ID upto 255. + */ + if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) return; entry = READ_ONCE(*(svm->avic_physical_id_cache)); From 654f1f13ea56b92bacade8ce2725aea0457f91c0 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Sun, 5 May 2019 16:56:42 +0800 Subject: [PATCH 22/33] kvm: Check irqchip mode before assign irqfd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When assigning kvm irqfd we didn't check the irqchip mode but we allow KVM_IRQFD to succeed with all the irqchip modes. However it does not make much sense to create irqfd even without the kernel chips. Let's provide a arch-dependent helper to check whether a specific irqfd is allowed by the arch. At least for x86, it should make sense to check: - when irqchip mode is NONE, all irqfds should be disallowed, and, - when irqchip mode is SPLIT, irqfds that are with resamplefd should be disallowed. For either of the case, previously we'll silently ignore the irq or the irq ack event if the irqchip mode is incorrect. However that can cause misterious guest behaviors and it can be hard to triage. Let's fail KVM_IRQFD even earlier to detect these incorrect configurations. CC: Paolo Bonzini CC: Radim Krčmář CC: Alex Williamson CC: Eduardo Habkost Signed-off-by: Peter Xu Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/irq.c | 7 +++++++ arch/x86/kvm/irq.h | 1 + virt/kvm/eventfd.c | 9 +++++++++ 3 files changed, 17 insertions(+) diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index faa264822cee..007bc654f928 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -172,3 +172,10 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu) __kvm_migrate_apic_timer(vcpu); __kvm_migrate_pit_timer(vcpu); } + +bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args) +{ + bool resample = args->flags & KVM_IRQFD_FLAG_RESAMPLE; + + return resample ? irqchip_kernel(kvm) : irqchip_in_kernel(kvm); +} diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index d5005cc26521..fd210cdd4983 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -114,6 +114,7 @@ static inline int irqchip_in_kernel(struct kvm *kvm) return mode != KVM_IRQCHIP_NONE; } +bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 001aeda4c154..3972a9564c76 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -44,6 +44,12 @@ static struct workqueue_struct *irqfd_cleanup_wq; +bool __attribute__((weak)) +kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args) +{ + return true; +} + static void irqfd_inject(struct work_struct *work) { @@ -297,6 +303,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) if (!kvm_arch_intc_initialized(kvm)) return -EAGAIN; + if (!kvm_arch_irqfd_allowed(kvm, args)) + return -EINVAL; + irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL_ACCOUNT); if (!irqfd) return -ENOMEM; From 6f2f84532c153d32a5e53b6083b06a3e24368d30 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 20 May 2019 15:34:35 +0200 Subject: [PATCH 23/33] KVM: x86: do not spam dmesg with VMCS/VMCB dumps Userspace can easily set up invalid processor state in such a way that dmesg will be filled with VMCS or VMCB dumps. Disable this by default using a module parameter. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 9 ++++++++- arch/x86/kvm/vmx/vmx.c | 26 +++++++++++++++++++------- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a9e553a1317f..735b8c01895e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -379,6 +379,9 @@ module_param(vgif, int, 0444); static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); module_param(sev, int, 0444); +static bool __read_mostly dump_invalid_vmcb = 0; +module_param(dump_invalid_vmcb, bool, 0644); + static u8 rsm_ins_bytes[] = "\x0f\xaa"; static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); @@ -4828,6 +4831,11 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) struct vmcb_control_area *control = &svm->vmcb->control; struct vmcb_save_area *save = &svm->vmcb->save; + if (!dump_invalid_vmcb) { + pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n"); + return; + } + pr_err("VMCB Control Area:\n"); pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff); pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16); @@ -4986,7 +4994,6 @@ static int handle_exit(struct kvm_vcpu *vcpu) kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; kvm_run->fail_entry.hardware_entry_failure_reason = svm->vmcb->control.exit_code; - pr_err("KVM: FAILED VMRUN WITH VMCB:\n"); dump_vmcb(vcpu); return 0; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 1ac167614032..b93e36ddee5e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -114,6 +114,9 @@ static u64 __read_mostly host_xss; bool __read_mostly enable_pml = 1; module_param_named(pml, enable_pml, bool, S_IRUGO); +static bool __read_mostly dump_invalid_vmcs = 0; +module_param(dump_invalid_vmcs, bool, 0644); + #define MSR_BITMAP_MODE_X2APIC 1 #define MSR_BITMAP_MODE_X2APIC_APICV 2 @@ -5607,15 +5610,24 @@ static void vmx_dump_dtsel(char *name, uint32_t limit) void dump_vmcs(void) { - u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS); - u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS); - u32 cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); - u32 secondary_exec_control = 0; - unsigned long cr4 = vmcs_readl(GUEST_CR4); - u64 efer = vmcs_read64(GUEST_IA32_EFER); + u32 vmentry_ctl, vmexit_ctl; + u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control; + unsigned long cr4; + u64 efer; int i, n; + if (!dump_invalid_vmcs) { + pr_warn_ratelimited("set kvm_intel.dump_invalid_vmcs=1 to dump internal KVM state.\n"); + return; + } + + vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS); + vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS); + cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); + cr4 = vmcs_readl(GUEST_CR4); + efer = vmcs_read64(GUEST_IA32_EFER); + secondary_exec_control = 0; if (cpu_has_secondary_exec_ctrls()) secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); From a80c4ec10ed9632c44c829452dc40a0443ff4e85 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 8 May 2019 19:02:48 +0200 Subject: [PATCH 24/33] x86/kvm/pmu: Set AMD's virt PMU version to 1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit: 672ff6cff80c ("KVM: x86: Raise #GP when guest vCPU do not support PMU") my AMD guests started #GPing like this: general protection fault: 0000 [#1] PREEMPT SMP CPU: 1 PID: 4355 Comm: bash Not tainted 5.1.0-rc6+ #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 RIP: 0010:x86_perf_event_update+0x3b/0xa0 with Code: pointing to RDPMC. It is RDPMC because the guest has the hardware watchdog CONFIG_HARDLOCKUP_DETECTOR_PERF enabled which uses perf. Instrumenting kvm_pmu_rdpmc() some, showed that it fails due to: if (!pmu->version) return 1; which the above commit added. Since AMD's PMU leaves the version at 0, that causes the #GP injection into the guest. Set pmu->version arbitrarily to 1 and move it above the non-applicable struct kvm_pmu members. Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Janakarajan Natarajan Cc: kvm@vger.kernel.org Cc: Liran Alon Cc: Mihai Carabas Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: "Radim Krčmář" Cc: Thomas Gleixner Cc: Tom Lendacky Cc: x86@kernel.org Cc: stable@vger.kernel.org Fixes: 672ff6cff80c ("KVM: x86: Raise #GP when guest vCPU do not support PMU") Signed-off-by: Paolo Bonzini --- arch/x86/kvm/pmu_amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c index 1495a735b38e..50fa9450fcf1 100644 --- a/arch/x86/kvm/pmu_amd.c +++ b/arch/x86/kvm/pmu_amd.c @@ -269,10 +269,10 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu) pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1; pmu->reserved_bits = 0xffffffff00200000ull; + pmu->version = 1; /* not applicable to AMD; but clean them to prevent any fall out */ pmu->counter_bitmask[KVM_PMC_FIXED] = 0; pmu->nr_arch_fixed_counters = 0; - pmu->version = 0; pmu->global_status = 0; } From 0e6f467ee28ec97f68c7b74e35ec1601bb1368a7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 20 May 2019 17:20:40 +0200 Subject: [PATCH 25/33] KVM: x86/pmu: mask the result of rdpmc according to the width of the counters This patch will simplify the changes in the next, by enforcing the masking of the counters to RDPMC and RDMSR. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/pmu.c | 10 +++------- arch/x86/kvm/pmu.h | 3 ++- arch/x86/kvm/pmu_amd.c | 2 +- arch/x86/kvm/vmx/pmu_intel.c | 13 +++++++++---- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index e39741997893..dd745b58ffd8 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -283,7 +283,7 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) bool fast_mode = idx & (1u << 31); struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmc *pmc; - u64 ctr_val; + u64 mask = fast_mode ? ~0u : ~0ull; if (!pmu->version) return 1; @@ -291,15 +291,11 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) if (is_vmware_backdoor_pmc(idx)) return kvm_pmu_rdpmc_vmware(vcpu, idx, data); - pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx); + pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx, &mask); if (!pmc) return 1; - ctr_val = pmc_read_counter(pmc); - if (fast_mode) - ctr_val = (u32)ctr_val; - - *data = ctr_val; + *data = pmc_read_counter(pmc) & mask; return 0; } diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index ba8898e1a854..22dff661145a 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -25,7 +25,8 @@ struct kvm_pmu_ops { unsigned (*find_fixed_event)(int idx); bool (*pmc_is_enabled)(struct kvm_pmc *pmc); struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx); - struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, unsigned idx); + struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, unsigned idx, + u64 *mask); int (*is_valid_msr_idx)(struct kvm_vcpu *vcpu, unsigned idx); bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr); int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 *data); diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c index 50fa9450fcf1..d3118088f1cd 100644 --- a/arch/x86/kvm/pmu_amd.c +++ b/arch/x86/kvm/pmu_amd.c @@ -186,7 +186,7 @@ static int amd_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx) } /* idx is the ECX register of RDPMC instruction */ -static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, unsigned idx) +static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *mask) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmc *counters; diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index f8502c376b37..b6f5157445fe 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -126,7 +126,7 @@ static int intel_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx) } static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, - unsigned idx) + unsigned idx, u64 *mask) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); bool fixed = idx & (1u << 30); @@ -138,6 +138,7 @@ static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, if (fixed && idx >= pmu->nr_arch_fixed_counters) return NULL; counters = fixed ? pmu->fixed_counters : pmu->gp_counters; + *mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP]; return &counters[idx]; } @@ -183,9 +184,13 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) *data = pmu->global_ovf_ctrl; return 0; default: - if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || - (pmc = get_fixed_pmc(pmu, msr))) { - *data = pmc_read_counter(pmc); + if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) { + u64 val = pmc_read_counter(pmc); + *data = val & pmu->counter_bitmask[KVM_PMC_GP]; + return 0; + } else if ((pmc = get_fixed_pmc(pmu, msr))) { + u64 val = pmc_read_counter(pmc); + *data = val & pmu->counter_bitmask[KVM_PMC_FIXED]; return 0; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { *data = pmc->eventsel; From 2924b52117b2812e9633d5ea337333299166d373 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 20 May 2019 17:34:30 +0200 Subject: [PATCH 26/33] KVM: x86/pmu: do not mask the value that is written to fixed PMUs According to the SDM, for MSR_IA32_PERFCTR0/1 "the lower-order 32 bits of each MSR may be written with any value, and the high-order 8 bits are sign-extended according to the value of bit 31", but the fixed counters in real hardware are limited to the width of the fixed counters ("bits beyond the width of the fixed-function counter are reserved and must be written as zeros"). Fix KVM to do the same. Reported-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/pmu_intel.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index b6f5157445fe..a99613a060dd 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -240,11 +240,14 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } break; default: - if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || - (pmc = get_fixed_pmc(pmu, msr))) { - if (!msr_info->host_initiated) - data = (s64)(s32)data; - pmc->counter += data - pmc_read_counter(pmc); + if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) { + if (msr_info->host_initiated) + pmc->counter = data; + else + pmc->counter = (s32)data; + return 0; + } else if ((pmc = get_fixed_pmc(pmu, msr))) { + pmc->counter = data; return 0; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { if (data == pmc->eventsel) From 19ec166c3f39fe1d3789888a74cc95544ac266d4 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 24 May 2019 16:06:23 +0200 Subject: [PATCH 27/33] KVM: s390: fix memory slot handling for KVM_SET_USER_MEMORY_REGION kselftests exposed a problem in the s390 handling for memory slots. Right now we only do proper memory slot handling for creation of new memory slots. Neither MOVE, nor DELETION are handled properly. Let us implement those. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- arch/s390/kvm/kvm-s390.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ac6163c334d6..e5e8eb29e68e 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4524,21 +4524,28 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *new, enum kvm_mr_change change) { - int rc; + int rc = 0; - /* If the basics of the memslot do not change, we do not want - * to update the gmap. Every update causes several unnecessary - * segment translation exceptions. This is usually handled just - * fine by the normal fault handler + gmap, but it will also - * cause faults on the prefix page of running guest CPUs. - */ - if (old->userspace_addr == mem->userspace_addr && - old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && - old->npages * PAGE_SIZE == mem->memory_size) - return; - - rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, - mem->guest_phys_addr, mem->memory_size); + switch (change) { + case KVM_MR_DELETE: + rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, + old->npages * PAGE_SIZE); + break; + case KVM_MR_MOVE: + rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, + old->npages * PAGE_SIZE); + if (rc) + break; + /* FALLTHROUGH */ + case KVM_MR_CREATE: + rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, + mem->guest_phys_addr, mem->memory_size); + break; + case KVM_MR_FLAGS_ONLY: + break; + default: + WARN(1, "Unknown KVM MR CHANGE: %d\n", change); + } if (rc) pr_warn("failed to commit memory region\n"); return; From bffed38d4fb536c5d5d6c37846a7fb8fde1452fa Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 23 May 2019 11:34:05 +0200 Subject: [PATCH 28/33] kvm: selftests: aarch64: dirty_log_test: fix unaligned memslot size The memory slot size must be aligned to the host's page size. When testing a guest with a 4k page size on a host with a 64k page size, then 3 guest pages are not host page size aligned. Since we just need a nearly arbitrary number of extra pages to ensure the memslot is not aligned to a 64 host-page boundary for this test, then we can use 16, as that's 64k aligned, but not 64 * 64k aligned. Fixes: 76d58e0f07ec ("KVM: fix KVM_CLEAR_DIRTY_LOG for memory slots of unaligned size", 2019-04-17) Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 4a2bdaf616fb..fc27f890155b 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -293,7 +293,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, * A little more than 1G of guest page sized pages. Cover the * case where the size is not aligned to 64 pages. */ - guest_num_pages = (1ul << (30 - guest_page_shift)) + 3; + guest_num_pages = (1ul << (30 - guest_page_shift)) + 16; host_page_size = getpagesize(); host_num_pages = (guest_num_pages * guest_page_size) / host_page_size + !!((guest_num_pages * guest_page_size) % host_page_size); From 55eda003f02f075bab0223a188e548dbf3ac8dfe Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 23 May 2019 13:05:46 +0200 Subject: [PATCH 29/33] kvm: selftests: aarch64: fix default vm mode VM_MODE_P52V48_4K is not a valid mode for AArch64. Replace its use in vm_create_default() with a mode that works and represents a good AArch64 default. (We didn't ever see a problem with this because we don't have any unit tests using vm_create_default(), but it's good to get it fixed in advance.) Reported-by: Thomas Huth Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/aarch64/processor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index e8c42506a09d..fa6cd340137c 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -226,7 +226,7 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, uint64_t extra_pg_pages = (extra_mem_pages / ptrs_per_4k_pte) * 2; struct kvm_vm *vm; - vm = vm_create(VM_MODE_P52V48_4K, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); + vm = vm_create(VM_MODE_P40V48_4K, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); kvm_vm_elf_load(vm, program_invocation_name, 0, 0); vm_vcpu_add_default(vm, vcpuid, guest_code); From 98e683443ba298685205c0c5157df49e42c1c0b1 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 23 May 2019 12:16:34 +0200 Subject: [PATCH 30/33] kvm: selftests: aarch64: compile with warnings on aarch64 fixups needed to compile with warnings as errors. Reviewed-by: Thomas Huth Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/aarch64/processor.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index fa6cd340137c..19e667911496 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -7,6 +7,8 @@ #define _GNU_SOURCE /* for program_invocation_name */ +#include + #include "kvm_util.h" #include "../kvm_util_internal.h" #include "processor.h" @@ -67,15 +69,13 @@ static uint64_t ptrs_per_pgd(struct kvm_vm *vm) return 1 << (vm->va_bits - shift); } -static uint64_t ptrs_per_pte(struct kvm_vm *vm) +static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm) { return 1 << (vm->page_shift - 3); } void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) { - int rc; - if (!vm->pgd_created) { vm_paddr_t paddr = vm_phy_pages_alloc(vm, page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size, @@ -181,6 +181,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) unmapped_gva: TEST_ASSERT(false, "No mapping for vm virtual address, " "gva: 0x%lx", gva); + exit(1); } static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level) @@ -312,6 +313,6 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pstate), &pstate); get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), &pc); - fprintf(stream, "%*spstate: 0x%.16llx pc: 0x%.16llx\n", + fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n", indent, "", pstate, pc); } From c795720629ae1bfcbaef7a934a4cc1ce8c2e2834 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 23 May 2019 11:31:14 +0200 Subject: [PATCH 31/33] KVM: selftests: Wrap vcpu_nested_state_get/set functions with x86 guard struct kvm_nested_state is only available on x86 so far. To be able to compile the code on other architectures as well, we need to wrap the related code with #ifdefs. Signed-off-by: Thomas Huth Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/kvm_util.h | 2 ++ tools/testing/selftests/kvm/lib/kvm_util.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 8c6b9619797d..a5a4b28f14d8 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -118,10 +118,12 @@ void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_events *events); void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_events *events); +#ifdef __x86_64__ void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_nested_state *state); int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_nested_state *state, bool ignore_error); +#endif const char *exit_reason_str(unsigned int exit_reason); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index cf62de377310..633b22df46a4 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1248,6 +1248,7 @@ void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, ret, errno); } +#ifdef __x86_64__ void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_nested_state *state) { @@ -1279,6 +1280,7 @@ int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid, return ret; } +#endif /* * VM VCPU System Regs Get From 883d25e70b2f699fed9017e509d1ef8e36229b89 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Sun, 21 Apr 2019 15:26:24 +0200 Subject: [PATCH 32/33] tools/kvm_stat: fix fields filter for child events The fields filter would not work with child fields, as the respective parents would not be included. No parents displayed == no childs displayed. To reproduce, run on s390 (would work on other platforms, too, but would require a different filter name): - Run 'kvm_stat -d' - Press 'f' - Enter 'instruct' Notice that events like instruction_diag_44 or instruction_diag_500 are not displayed - the output remains empty. With this patch, we will filter by matching events and their parents. However, consider the following example where we filter by instruction_diag_44: kvm statistics - summary regex filter: instruction_diag_44 Event Total %Total CurAvg/s exit_instruction 276 100.0 12 instruction_diag_44 256 92.8 11 Total 276 12 Note that the parent ('exit_instruction') displays the total events, but the childs listed do not match its total (256 instead of 276). This is intended (since we're filtering all but one child), but might be confusing on first sight. Signed-off-by: Stefan Raspl Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat | 16 ++++++++++++---- tools/kvm/kvm_stat/kvm_stat.txt | 2 ++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 2ed395b817cb..bc508dae286c 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -575,8 +575,12 @@ class TracepointProvider(Provider): def update_fields(self, fields_filter): """Refresh fields, applying fields_filter""" self.fields = [field for field in self._get_available_fields() - if self.is_field_wanted(fields_filter, field) or - ARCH.tracepoint_is_child(field)] + if self.is_field_wanted(fields_filter, field)] + # add parents for child fields - otherwise we won't see any output! + for field in self._fields: + parent = ARCH.tracepoint_is_child(field) + if (parent and parent not in self._fields): + self.fields.append(parent) @staticmethod def _get_online_cpus(): @@ -735,8 +739,12 @@ class DebugfsProvider(Provider): def update_fields(self, fields_filter): """Refresh fields, applying fields_filter""" self._fields = [field for field in self._get_available_fields() - if self.is_field_wanted(fields_filter, field) or - ARCH.debugfs_is_child(field)] + if self.is_field_wanted(fields_filter, field)] + # add parents for child fields - otherwise we won't see any output! + for field in self._fields: + parent = ARCH.debugfs_is_child(field) + if (parent and parent not in self._fields): + self.fields.append(parent) @property def fields(self): diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt index 0811d860fe75..c057ba52364e 100644 --- a/tools/kvm/kvm_stat/kvm_stat.txt +++ b/tools/kvm/kvm_stat/kvm_stat.txt @@ -34,6 +34,8 @@ INTERACTIVE COMMANDS *c*:: clear filter *f*:: filter by regular expression + :: *Note*: Child events pull in their parents, and parents' stats summarize + all child events, not just the filtered ones *g*:: filter by guest name/PID From 66f61c92889ff3ca365161fb29dd36d6354682ba Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 24 May 2019 21:52:46 +0200 Subject: [PATCH 33/33] KVM: x86: fix return value for reserved EFER Commit 11988499e62b ("KVM: x86: Skip EFER vs. guest CPUID checks for host-initiated writes", 2019-04-02) introduced a "return false" in a function returning int, and anyway set_efer has a "nonzero on error" conventon so it should be returning 1. Reported-by: Pavel Machek Fixes: 11988499e62b ("KVM: x86: Skip EFER vs. guest CPUID checks for host-initiated writes") Cc: Sean Christopherson Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e7e57de50a3c..acb179f78fdc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1298,7 +1298,7 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) u64 efer = msr_info->data; if (efer & efer_reserved_bits) - return false; + return 1; if (!msr_info->host_initiated) { if (!__kvm_valid_efer(vcpu, efer))