* skx_edac: Address translation for NVDIMMs (Tony Luck and Qiuxu Zhuo)

* ACPI_ADXL build fix
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAlvcKnEACgkQEsHwGGHe
 VUr45g/9E67lU84Dz41ly6zFDTmQdYBNPRayz9QgIGHfIMwIN8aAVoezC8B4NCqc
 8rQ3W48REkewLmPO2GoEVld4UnHbvVlZYZ4bGcxhYzWL5dcleoJIFVupF4Ifo6/Y
 SPbVUyihtw4FFr+Ft8x/bOJQ6QQ4CIiVX3mJBdwdqQ6Lm9yoEz6AlSbTJiyyzr8I
 gGfcKD5TcmJWpsDzRXJ/xWddfA+hfUpKxkuJqPIRZvmKnJpy79af8MlQAZwXuXVS
 361wj5SzP0LzktT5JQn73V04NzSSDTbFSycnWXUex3lxIsE6KolsEwfglccdhvIy
 Nz/Du4kJn1Ye/zbsO27qtkGCXSz0qYKsdfUg1RY+MnZPe4mFmmAm0izTKH3EltQx
 +OQWtcBz5vvNf3Odwnfw1nNvYrbnzaDNIsjHspopVrsmD2oRefmpsYsOEzGyAGDw
 PtUC3l70u7i4e2NtF7Doo23g1yIyXWQZtrEDDFmQZGUo7YKxE7AkGPzINxpNSQME
 z11ny9GyISaUc/Zf5zUHYmIFcYtiCngecl4F8hCXvfyp4MbxYgTI+5YY185NfLSU
 pQHwyMGKLifI6ndhWd3sO9KSCqFzSZKaVF/DdKScSqB+v1NqflMyjzfulR625/5U
 cSWWQP8Zu6H7os/X3+o5KCC/Pzbs5Nx/QPLwCabrOwcI3kdmQ2E=
 =JQUK
 -----END PGP SIGNATURE-----

Merge tag 'edac_for_4.20_2' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp

Pull more EDAC updates from Borislav Petkov:
 "The second part of the EDAC pile which contains the ADXL user and a
  build fix which addresses a not-so-sensical .config but fixes
  randconfig builds people do:

   - skx_edac: Address translation for NVDIMMs (Tony Luck and Qiuxu Zhuo)

   - ACPI_ADXL build fix"

[ I don't think "sensical" is a word, particularly when used in the
  context of actually meaning "nonsensical", but I like it   - Linus ]

* tag 'edac_for_4.20_2' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp:
  EDAC, skx: Fix randconfig builds
  EDAC, skx_edac: Add address translation for non-volatile DIMMs
This commit is contained in:
Linus Torvalds 2018-11-02 11:17:22 -07:00
commit 0b21f21ae0
3 changed files with 186 additions and 13 deletions

View File

@ -234,6 +234,7 @@ config EDAC_SKX
depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG
depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_SKX can't be y depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_SKX can't be y
select DMI select DMI
select ACPI_ADXL if ACPI
help help
Support for error detection and correction the Intel Support for error detection and correction the Intel
Skylake server Integrated Memory Controllers. If your Skylake server Integrated Memory Controllers. If your

View File

@ -26,6 +26,7 @@
#include <linux/bitmap.h> #include <linux/bitmap.h>
#include <linux/math64.h> #include <linux/math64.h>
#include <linux/mod_devicetable.h> #include <linux/mod_devicetable.h>
#include <linux/adxl.h>
#include <acpi/nfit.h> #include <acpi/nfit.h>
#include <asm/cpu_device_id.h> #include <asm/cpu_device_id.h>
#include <asm/intel-family.h> #include <asm/intel-family.h>
@ -35,6 +36,7 @@
#include "edac_module.h" #include "edac_module.h"
#define EDAC_MOD_STR "skx_edac" #define EDAC_MOD_STR "skx_edac"
#define MSG_SIZE 1024
/* /*
* Debug macros * Debug macros
@ -54,6 +56,29 @@
static LIST_HEAD(skx_edac_list); static LIST_HEAD(skx_edac_list);
static u64 skx_tolm, skx_tohm; static u64 skx_tolm, skx_tohm;
static char *skx_msg;
static unsigned int nvdimm_count;
enum {
INDEX_SOCKET,
INDEX_MEMCTRL,
INDEX_CHANNEL,
INDEX_DIMM,
INDEX_MAX
};
static const char * const component_names[] = {
[INDEX_SOCKET] = "ProcessorSocketId",
[INDEX_MEMCTRL] = "MemoryControllerId",
[INDEX_CHANNEL] = "ChannelId",
[INDEX_DIMM] = "DimmSlotId",
};
static int component_indices[ARRAY_SIZE(component_names)];
static int adxl_component_count;
static const char * const *adxl_component_names;
static u64 *adxl_values;
static char *adxl_msg;
#define NUM_IMC 2 /* memory controllers per socket */ #define NUM_IMC 2 /* memory controllers per socket */
#define NUM_CHANNELS 3 /* channels per memory controller */ #define NUM_CHANNELS 3 /* channels per memory controller */
@ -393,6 +418,8 @@ static int get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
u16 flags; u16 flags;
u64 size = 0; u64 size = 0;
nvdimm_count++;
dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc, dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc,
imc->src_id, 0); imc->src_id, 0);
@ -941,12 +968,46 @@ static void teardown_skx_debug(void)
} }
#endif /*CONFIG_EDAC_DEBUG*/ #endif /*CONFIG_EDAC_DEBUG*/
static bool skx_adxl_decode(struct decoded_addr *res)
{
int i, len = 0;
if (res->addr >= skx_tohm || (res->addr >= skx_tolm &&
res->addr < BIT_ULL(32))) {
edac_dbg(0, "Address 0x%llx out of range\n", res->addr);
return false;
}
if (adxl_decode(res->addr, adxl_values)) {
edac_dbg(0, "Failed to decode 0x%llx\n", res->addr);
return false;
}
res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]];
res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]];
for (i = 0; i < adxl_component_count; i++) {
if (adxl_values[i] == ~0x0ull)
continue;
len += snprintf(adxl_msg + len, MSG_SIZE - len, " %s:0x%llx",
adxl_component_names[i], adxl_values[i]);
if (MSG_SIZE - len <= 0)
break;
}
return true;
}
static void skx_mce_output_error(struct mem_ctl_info *mci, static void skx_mce_output_error(struct mem_ctl_info *mci,
const struct mce *m, const struct mce *m,
struct decoded_addr *res) struct decoded_addr *res)
{ {
enum hw_event_mc_err_type tp_event; enum hw_event_mc_err_type tp_event;
char *type, *optype, msg[256]; char *type, *optype;
bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
bool overflow = GET_BITFIELD(m->status, 62, 62); bool overflow = GET_BITFIELD(m->status, 62, 62);
bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
@ -1007,22 +1068,47 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
break; break;
} }
} }
if (adxl_component_count) {
snprintf(skx_msg, MSG_SIZE, "%s%s err_code:%04x:%04x %s",
overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable) ? " recoverable" : "",
mscod, errcode, adxl_msg);
} else {
snprintf(skx_msg, MSG_SIZE,
"%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x",
overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable) ? " recoverable" : "",
mscod, errcode,
res->socket, res->imc, res->rank,
res->bank_group, res->bank_address, res->row, res->column);
}
snprintf(msg, sizeof(msg), edac_dbg(0, "%s\n", skx_msg);
"%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x",
overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable) ? " recoverable" : "",
mscod, errcode,
res->socket, res->imc, res->rank,
res->bank_group, res->bank_address, res->row, res->column);
edac_dbg(0, "%s\n", msg);
/* Call the helper to output message */ /* Call the helper to output message */
edac_mc_handle_error(tp_event, mci, core_err_cnt, edac_mc_handle_error(tp_event, mci, core_err_cnt,
m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
res->channel, res->dimm, -1, res->channel, res->dimm, -1,
optype, msg); optype, skx_msg);
}
static struct mem_ctl_info *get_mci(int src_id, int lmc)
{
struct skx_dev *d;
if (lmc > NUM_IMC - 1) {
skx_printk(KERN_ERR, "Bad lmc %d\n", lmc);
return NULL;
}
list_for_each_entry(d, &skx_edac_list, list) {
if (d->imc[0].src_id == src_id)
return d->imc[lmc].mci;
}
skx_printk(KERN_ERR, "No mci for src_id %d lmc %d\n", src_id, lmc);
return NULL;
} }
static int skx_mce_check_error(struct notifier_block *nb, unsigned long val, static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
@ -1040,10 +1126,23 @@ static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV)) if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
return NOTIFY_DONE; return NOTIFY_DONE;
memset(&res, 0, sizeof(res));
res.addr = mce->addr; res.addr = mce->addr;
if (!skx_decode(&res))
if (adxl_component_count) {
if (!skx_adxl_decode(&res))
return NOTIFY_DONE;
mci = get_mci(res.socket, res.imc);
} else {
if (!skx_decode(&res))
return NOTIFY_DONE;
mci = res.dev->imc[res.imc].mci;
}
if (!mci)
return NOTIFY_DONE; return NOTIFY_DONE;
mci = res.dev->imc[res.imc].mci;
if (mce->mcgstatus & MCG_STATUS_MCIP) if (mce->mcgstatus & MCG_STATUS_MCIP)
type = "Exception"; type = "Exception";
@ -1094,6 +1193,62 @@ static void skx_remove(void)
} }
} }
static void __init skx_adxl_get(void)
{
const char * const *names;
int i, j;
names = adxl_get_component_names();
if (!names) {
skx_printk(KERN_NOTICE, "No firmware support for address translation.");
skx_printk(KERN_CONT, " Only decoding DDR4 address!\n");
return;
}
for (i = 0; i < INDEX_MAX; i++) {
for (j = 0; names[j]; j++) {
if (!strcmp(component_names[i], names[j])) {
component_indices[i] = j;
break;
}
}
if (!names[j])
goto err;
}
adxl_component_names = names;
while (*names++)
adxl_component_count++;
adxl_values = kcalloc(adxl_component_count, sizeof(*adxl_values),
GFP_KERNEL);
if (!adxl_values) {
adxl_component_count = 0;
return;
}
adxl_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
if (!adxl_msg) {
adxl_component_count = 0;
kfree(adxl_values);
}
return;
err:
skx_printk(KERN_ERR, "'%s' is not matched from DSM parameters: ",
component_names[i]);
for (j = 0; names[j]; j++)
skx_printk(KERN_CONT, "%s ", names[j]);
skx_printk(KERN_CONT, "\n");
}
static void __exit skx_adxl_put(void)
{
kfree(adxl_values);
kfree(adxl_msg);
}
/* /*
* skx_init: * skx_init:
* make sure we are running on the correct cpu model * make sure we are running on the correct cpu model
@ -1158,6 +1313,15 @@ static int __init skx_init(void)
} }
} }
skx_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
if (!skx_msg) {
rc = -ENOMEM;
goto fail;
}
if (nvdimm_count)
skx_adxl_get();
/* Ensure that the OPSTATE is set correctly for POLL or NMI */ /* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init(); opstate_init();
@ -1176,6 +1340,9 @@ static void __exit skx_exit(void)
edac_dbg(2, "\n"); edac_dbg(2, "\n");
mce_unregister_decode_chain(&skx_mce_dec); mce_unregister_decode_chain(&skx_mce_dec);
skx_remove(); skx_remove();
if (nvdimm_count)
skx_adxl_put();
kfree(skx_msg);
teardown_skx_debug(); teardown_skx_debug();
} }

View File

@ -7,7 +7,12 @@
#ifndef _LINUX_ADXL_H #ifndef _LINUX_ADXL_H
#define _LINUX_ADXL_H #define _LINUX_ADXL_H
#ifdef CONFIG_ACPI_ADXL
const char * const *adxl_get_component_names(void); const char * const *adxl_get_component_names(void);
int adxl_decode(u64 addr, u64 component_values[]); int adxl_decode(u64 addr, u64 component_values[]);
#else
static inline const char * const *adxl_get_component_names(void) { return NULL; }
static inline int adxl_decode(u64 addr, u64 component_values[]) { return -EOPNOTSUPP; }
#endif
#endif /* _LINUX_ADXL_H */ #endif /* _LINUX_ADXL_H */