From 5359534505c74841dbb2c6baf41db1a395acd34d Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 28 Apr 2016 07:52:11 -0700 Subject: [PATCH] EDAC, i7core: Remove double buffering of error records In the bad old days the functions from x86_mce_decoder_chain could be called in machine check context. So we used to carefully copy them and defer processing until later. But in f29a7aff4bd60 ("x86/mce: Avoid potential deadlock due to printk() in MCE context") we switched the logging code to save the record in a genpool, and call the functions that registered to be notified later from a work queue. So drop all the double buffering and do all the work we want to do as soon as i7core_mce_check_error() is called. Signed-off-by: Tony Luck Acked-by: Mauro Carvalho Chehab Cc: linux-edac Link: http://lkml.kernel.org/r/29ab2c370915c6e132fc5d88e7b72cb834bedbfe.1461855008.git.tony.luck@intel.com Signed-off-by: Borislav Petkov --- drivers/edac/i7core_edac.c | 81 +++----------------------------------- 1 file changed, 5 insertions(+), 76 deletions(-) diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 01087a38da22..60e0bb53e9c9 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -271,16 +271,6 @@ struct i7core_pvt { bool is_registered, enable_scrub; - /* Fifo double buffers */ - struct mce mce_entry[MCE_LOG_LEN]; - struct mce mce_outentry[MCE_LOG_LEN]; - - /* Fifo in/out counters */ - unsigned mce_in, mce_out; - - /* Count indicator to show errors not got */ - unsigned mce_overrun; - /* DCLK Frequency used for computing scrub rate */ int dclk_freq; @@ -1792,56 +1782,15 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci, * i7core_check_error Retrieve and process errors reported by the * hardware. Called by the Core module. */ -static void i7core_check_error(struct mem_ctl_info *mci) +static void i7core_check_error(struct mem_ctl_info *mci, struct mce *m) { struct i7core_pvt *pvt = mci->pvt_info; - int i; - unsigned count = 0; - struct mce *m; - /* - * MCE first step: Copy all mce errors into a temporary buffer - * We use a double buffering here, to reduce the risk of - * losing an error. - */ - smp_rmb(); - count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in) - % MCE_LOG_LEN; - if (!count) - goto check_ce_error; - - m = pvt->mce_outentry; - if (pvt->mce_in + count > MCE_LOG_LEN) { - unsigned l = MCE_LOG_LEN - pvt->mce_in; - - memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l); - smp_wmb(); - pvt->mce_in = 0; - count -= l; - m += l; - } - memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count); - smp_wmb(); - pvt->mce_in += count; - - smp_rmb(); - if (pvt->mce_overrun) { - i7core_printk(KERN_ERR, "Lost %d memory errors\n", - pvt->mce_overrun); - smp_wmb(); - pvt->mce_overrun = 0; - } - - /* - * MCE second step: parse errors and display - */ - for (i = 0; i < count; i++) - i7core_mce_output_error(mci, &pvt->mce_outentry[i]); + i7core_mce_output_error(mci, m); /* * Now, let's increment CE error counts */ -check_ce_error: if (!pvt->is_registered) i7core_udimm_check_mc_ecc_err(mci); else @@ -1849,12 +1798,8 @@ check_ce_error: } /* - * i7core_mce_check_error Replicates mcelog routine to get errors - * This routine simply queues mcelog errors, and - * return. The error itself should be handled later - * by i7core_check_error. - * WARNING: As this routine should be called at NMI time, extra care should - * be taken to avoid deadlocks, and to be as fast as possible. + * Check that logging is enabled and that this is the right type + * of error for us to handle. */ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, void *data) @@ -1882,21 +1827,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, if (mce->bank != 8) return NOTIFY_DONE; - smp_rmb(); - if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) { - smp_wmb(); - pvt->mce_overrun++; - return NOTIFY_DONE; - } - - /* Copy memory error at the ringbuffer */ - memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce)); - smp_wmb(); - pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN; - - /* Handle fatal errors immediately */ - if (mce->mcgstatus & 1) - i7core_check_error(mci); + i7core_check_error(mci, mce); /* Advise mcelog that the errors were handled */ return NOTIFY_STOP; @@ -2243,8 +2174,6 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) get_dimm_config(mci); /* record ptr to the generic device */ mci->pdev = &i7core_dev->pdev[0]->dev; - /* Set the function pointer to an actual operation function */ - mci->edac_check = i7core_check_error; /* Enable scrubrate setting */ if (pvt->enable_scrub)