From f4e23cf9477452640d2522f3286db5df6fcfe45a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 3 Oct 2019 18:02:08 +0100 Subject: [PATCH 1/4] KVM: arm64: pmu: Fix cycle counter truncation When a counter is disabled, its value is sampled before the event is being disabled, and the value written back in the shadow register. In that process, the value gets truncated to 32bit, which is adequate for any counter but the cycle counter (defined as a 64bit counter). This obviously results in a corrupted counter, and things like "perf record -e cycles" not working at all when run in a guest... A similar, but less critical bug exists in kvm_pmu_get_counter_value. Make the truncation conditional on the counter not being the cycle counter, which results in a minor code reorganisation. Fixes: 80f393a23be6 ("KVM: arm/arm64: Support chained PMU counters") Reviewed-by: Andrew Murray Reported-by: Julien Thierry Signed-off-by: Marc Zyngier --- virt/kvm/arm/pmu.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 362a01886bab..c30c3a74fc7f 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -146,8 +146,7 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) if (kvm_pmu_pmc_is_chained(pmc) && kvm_pmu_idx_is_high_counter(select_idx)) counter = upper_32_bits(counter); - - else if (!kvm_pmu_idx_is_64bit(vcpu, select_idx)) + else if (select_idx != ARMV8_PMU_CYCLE_IDX) counter = lower_32_bits(counter); return counter; @@ -193,7 +192,7 @@ static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) */ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) { - u64 counter, reg; + u64 counter, reg, val; pmc = kvm_pmu_get_canonical_pmc(pmc); if (!pmc->perf_event) @@ -201,16 +200,19 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); - if (kvm_pmu_pmc_is_chained(pmc)) { - reg = PMEVCNTR0_EL0 + pmc->idx; - __vcpu_sys_reg(vcpu, reg) = lower_32_bits(counter); - __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter); + if (pmc->idx == ARMV8_PMU_CYCLE_IDX) { + reg = PMCCNTR_EL0; + val = counter; } else { - reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; - __vcpu_sys_reg(vcpu, reg) = lower_32_bits(counter); + reg = PMEVCNTR0_EL0 + pmc->idx; + val = lower_32_bits(counter); } + __vcpu_sys_reg(vcpu, reg) = val; + + if (kvm_pmu_pmc_is_chained(pmc)) + __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter); + kvm_pmu_release_perf_event(pmc); } From 6f16371453476fd094760ea3d6f00144e9ae3057 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 4 Oct 2019 11:03:09 +0100 Subject: [PATCH 2/4] arm64: KVM: Handle PMCR_EL0.LC as RES1 on pure AArch64 systems Of PMCR_EL0.LC, the ARMv8 ARM says: "In an AArch64 only implementation, this field is RES 1." So be it. Fixes: ab9468340d2bc ("arm64: KVM: Add access handler for PMCR register") Reviewed-by: Andrew Murray Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 2071260a275b..46822afc57e0 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -632,6 +632,8 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) */ val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); + if (!system_supports_32bit_el0()) + val |= ARMV8_PMU_PMCR_LC; __vcpu_sys_reg(vcpu, r->reg) = val; } @@ -682,6 +684,8 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, val = __vcpu_sys_reg(vcpu, PMCR_EL0); val &= ~ARMV8_PMU_PMCR_MASK; val |= p->regval & ARMV8_PMU_PMCR_MASK; + if (!system_supports_32bit_el0()) + val |= ARMV8_PMU_PMCR_LC; __vcpu_sys_reg(vcpu, PMCR_EL0) = val; kvm_pmu_handle_pmcr(vcpu, val); kvm_vcpu_pmu_restore_guest(vcpu); From 725ce66979fb6da5c1aec5b064d0871bedc23bf7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Oct 2019 15:09:55 +0100 Subject: [PATCH 3/4] KVM: arm64: pmu: Set the CHAINED attribute before creating the in-kernel event The current convention for KVM to request a chained event from the host PMU is to set bit[0] in attr.config1 (PERF_ATTR_CFG1_KVM_PMU_CHAINED). But as it turns out, this bit gets set *after* we create the kernel event that backs our virtual counter, meaning that we never get a 64bit counter. Moving the setting to an earlier point solves the problem. Fixes: 80f393a23be6 ("KVM: arm/arm64: Support chained PMU counters") Reviewed-by: Andrew Murray Signed-off-by: Marc Zyngier --- virt/kvm/arm/pmu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index c30c3a74fc7f..f291d4ac3519 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -569,12 +569,12 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) * high counter. */ attr.sample_period = (-counter) & GENMASK(63, 0); + if (kvm_pmu_counter_is_enabled(vcpu, pmc->idx + 1)) + attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; + event = perf_event_create_kernel_counter(&attr, -1, current, kvm_pmu_perf_overflow, pmc + 1); - - if (kvm_pmu_counter_is_enabled(vcpu, pmc->idx + 1)) - attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; } else { /* The initial sample period (overflow count) of an event. */ if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) From 8c3252c06516eac22c4f8e2506122171abedcc09 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 6 Oct 2019 10:28:50 +0100 Subject: [PATCH 4/4] KVM: arm64: pmu: Reset sample period on overflow handling The PMU emulation code uses the perf event sample period to trigger the overflow detection. This works fine for the *first* overflow handling, but results in a huge number of interrupts on the host, unrelated to the number of interrupts handled in the guest (a x20 factor is pretty common for the cycle counter). On a slow system (such as a SW model), this can result in the guest only making forward progress at a glacial pace. It turns out that the clue is in the name. The sample period is exactly that: a period. And once the an overflow has occured, the following period should be the full width of the associated counter, instead of whatever the guest had initially programed. Reset the sample period to the architected value in the overflow handler, which now results in a number of host interrupts that is much closer to the number of interrupts in the guest. Fixes: b02386eb7dac ("arm64: KVM: Add PMU overflow interrupt routing") Reviewed-by: Andrew Murray Signed-off-by: Marc Zyngier --- virt/kvm/arm/pmu.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index f291d4ac3519..8731dfeced8b 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -442,8 +443,25 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, struct pt_regs *regs) { struct kvm_pmc *pmc = perf_event->overflow_handler_context; + struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu); struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); int idx = pmc->idx; + u64 period; + + cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE); + + /* + * Reset the sample period to the architectural limit, + * i.e. the point where the counter overflows. + */ + period = -(local64_read(&perf_event->count)); + + if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) + period &= GENMASK(31, 0); + + local64_set(&perf_event->hw.period_left, 0); + perf_event->attr.sample_period = period; + perf_event->hw.sample_period = period; __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); @@ -451,6 +469,8 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); kvm_vcpu_kick(vcpu); } + + cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD); } /**