perf_counter, x86: Make NMI lockups more robust
We have a debug check that detects stuck NMIs and returns with the PMU disabled in the global ctrl MSR - but i managed to trigger a situation where this was not enough to deassert the NMI. So clear/reset the full PMU and keep the disable count balanced when exiting from here. This way the box produces a debug warning but stays up and is more debuggable. [ Impact: in case of PMU related bugs, recover more gracefully ] Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: John Kacur <jkacur@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
79202ba9ff
commit
aaba98018b
|
@ -724,6 +724,30 @@ static void intel_pmu_save_and_restart(struct perf_counter *counter)
|
||||||
intel_pmu_enable_counter(hwc, idx);
|
intel_pmu_enable_counter(hwc, idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void intel_pmu_reset(void)
|
||||||
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
int idx;
|
||||||
|
|
||||||
|
if (!x86_pmu.num_counters)
|
||||||
|
return;
|
||||||
|
|
||||||
|
local_irq_save(flags);
|
||||||
|
|
||||||
|
printk("clearing PMU state on CPU#%d\n", smp_processor_id());
|
||||||
|
|
||||||
|
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
||||||
|
checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
|
||||||
|
checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
|
||||||
|
}
|
||||||
|
for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
|
||||||
|
checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
|
||||||
|
}
|
||||||
|
|
||||||
|
local_irq_restore(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This handler is triggered by the local APIC, so the APIC IRQ handling
|
* This handler is triggered by the local APIC, so the APIC IRQ handling
|
||||||
* rules apply:
|
* rules apply:
|
||||||
|
@ -750,6 +774,8 @@ again:
|
||||||
if (++loops > 100) {
|
if (++loops > 100) {
|
||||||
WARN_ONCE(1, "perfcounters: irq loop stuck!\n");
|
WARN_ONCE(1, "perfcounters: irq loop stuck!\n");
|
||||||
perf_counter_print_debug();
|
perf_counter_print_debug();
|
||||||
|
intel_pmu_reset();
|
||||||
|
perf_enable();
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue