x86/mce: Add mechanism to safely save information in MCE handler

Machine checks on Intel cpus interrupt execution on all cpus, regardless of interrupt masking. We have a need to save some data about the cause of the machine check (physical address) in the machine check handler that can be retrieved later to attempt recovery in a more flexible execution state. Signed-off-by: Tony Luck <tony.luck@intel.com>
2011-12-14 15:55:20 -08:00 · 2011-12-14 15:55:20 -08:00 · af104e394e
parent 85f92694af
commit af104e394e
1 changed files with 43 additions and 0 deletions
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@ -886,6 +886,49 @@ static void mce_clear_state(unsigned long *toclear)
 	}
 }

+/*
+ * Need to save faulting physical address associated with a process
+ * in the machine check handler some place where we can grab it back
+ * later in mce_notify_process()
+ */
+#define	MCE_INFO_MAX	16
+
+struct mce_info {
+	atomic_t		inuse;
+	struct task_struct	*t;
+	__u64			paddr;
+} mce_info[MCE_INFO_MAX];
+
+static void mce_save_info(__u64 addr)
+{
+	struct mce_info *mi;
+
+	for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) {
+		if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) {
+			mi->t = current;
+			mi->paddr = addr;
+			return;
+		}
+	}
+
+	mce_panic("Too many concurrent recoverable errors", NULL, NULL);
+}
+
+static struct mce_info *mce_find_info(void)
+{
+	struct mce_info *mi;
+
+	for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++)
+		if (atomic_read(&mi->inuse) && mi->t == current)
+			return mi;
+	return NULL;
+}
+
+static void mce_clear_info(struct mce_info *mi)
+{
+	atomic_set(&mi->inuse, 0);
+}
+
 /*
 * The actual machine check handler. This only handles real
 * exceptions when something got corrupted coming in through int 18.