amd64_edac: Check ECC capabilities initially

Rework the code to check the hardware ECC capabilities at PCI probing
time. We do all further initialization only if we actually can/have ECC
enabled.

While at it:
0. Fix function naming.
1. Simplify/clarify debug output.
2. Remove amd64_ prefix from the static functions
3. Reorganize code.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
This commit is contained in:
Borislav Petkov 2010-10-15 17:44:04 +02:00
parent ae7bb7c679
commit 2299ef7114
1 changed files with 74 additions and 65 deletions

View File

@ -2188,18 +2188,19 @@ static u32 amd64_csrow_nr_pages(int csrow_nr, struct amd64_pvt *pvt)
static int amd64_init_csrows(struct mem_ctl_info *mci) static int amd64_init_csrows(struct mem_ctl_info *mci)
{ {
struct csrow_info *csrow; struct csrow_info *csrow;
struct amd64_pvt *pvt; struct amd64_pvt *pvt = mci->pvt_info;
u64 input_addr_min, input_addr_max, sys_addr; u64 input_addr_min, input_addr_max, sys_addr;
u32 val;
int i, empty = 1; int i, empty = 1;
pvt = mci->pvt_info; amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &val);
amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &pvt->nbcfg); pvt->nbcfg = val;
pvt->ctl_error_info.nbcfg = val;
debugf0("NBCFG= 0x%x CHIPKILL= %s DRAM ECC= %s\n", pvt->nbcfg, debugf0("node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
(pvt->nbcfg & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled", pvt->mc_node_id, val,
(pvt->nbcfg & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled" !!(val & K8_NBCFG_CHIPKILL), !!(val & K8_NBCFG_ECC_ENABLE));
);
for (i = 0; i < pvt->cs_count; i++) { for (i = 0; i < pvt->cs_count; i++) {
csrow = &mci->csrows[i]; csrow = &mci->csrows[i];
@ -2294,7 +2295,7 @@ out:
return ret; return ret;
} }
static int amd64_toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
{ {
cpumask_var_t cmask; cpumask_var_t cmask;
int cpu; int cpu;
@ -2332,30 +2333,31 @@ static int amd64_toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool o
return 0; return 0;
} }
static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci) static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid,
struct pci_dev *F3)
{ {
struct amd64_pvt *pvt = mci->pvt_info; bool ret = true;
u8 nid = pvt->mc_node_id;
struct ecc_settings *s = ecc_stngs[nid];
u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn; u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
amd64_read_pci_cfg(pvt->F3, K8_NBCTL, &value); if (toggle_ecc_err_reporting(s, nid, ON)) {
amd64_warn("Error enabling ECC reporting over MCGCTL!\n");
return false;
}
amd64_read_pci_cfg(F3, K8_NBCTL, &value);
/* turn on UECCEn and CECCEn bits */ /* turn on UECCEn and CECCEn bits */
s->old_nbctl = value & mask; s->old_nbctl = value & mask;
s->nbctl_valid = true; s->nbctl_valid = true;
value |= mask; value |= mask;
pci_write_config_dword(pvt->F3, K8_NBCTL, value); pci_write_config_dword(F3, K8_NBCTL, value);
if (amd64_toggle_ecc_err_reporting(s, nid, ON)) amd64_read_pci_cfg(F3, K8_NBCFG, &value);
amd64_warn("Error enabling ECC reporting over MCGCTL!\n");
amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &value); debugf0("1: node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
nid, value,
debugf0("NBCFG(1)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value, !!(value & K8_NBCFG_CHIPKILL), !!(value & K8_NBCFG_ECC_ENABLE));
(value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
(value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled");
if (!(value & K8_NBCFG_ECC_ENABLE)) { if (!(value & K8_NBCFG_ECC_ENABLE)) {
amd64_warn("DRAM ECC disabled on this node, enabling...\n"); amd64_warn("DRAM ECC disabled on this node, enabling...\n");
@ -2364,13 +2366,14 @@ static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
/* Attempt to turn on DRAM ECC Enable */ /* Attempt to turn on DRAM ECC Enable */
value |= K8_NBCFG_ECC_ENABLE; value |= K8_NBCFG_ECC_ENABLE;
pci_write_config_dword(pvt->F3, K8_NBCFG, value); pci_write_config_dword(F3, K8_NBCFG, value);
amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &value); amd64_read_pci_cfg(F3, K8_NBCFG, &value);
if (!(value & K8_NBCFG_ECC_ENABLE)) { if (!(value & K8_NBCFG_ECC_ENABLE)) {
amd64_warn("Hardware rejected DRAM ECC enable," amd64_warn("Hardware rejected DRAM ECC enable,"
"check memory DIMM configuration.\n"); "check memory DIMM configuration.\n");
ret = false;
} else { } else {
amd64_info("Hardware accepted DRAM ECC Enable\n"); amd64_info("Hardware accepted DRAM ECC Enable\n");
} }
@ -2378,11 +2381,11 @@ static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
s->flags.nb_ecc_prev = 1; s->flags.nb_ecc_prev = 1;
} }
debugf0("NBCFG(2)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value, debugf0("2: node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
(value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled", nid, value,
(value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled"); !!(value & K8_NBCFG_CHIPKILL), !!(value & K8_NBCFG_ECC_ENABLE));
pvt->ctl_error_info.nbcfg = value; return ret;
} }
static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt) static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
@ -2408,15 +2411,15 @@ static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
} }
/* restore the NB Enable MCGCTL bit */ /* restore the NB Enable MCGCTL bit */
if (amd64_toggle_ecc_err_reporting(s, nid, OFF)) if (toggle_ecc_err_reporting(s, nid, OFF))
amd64_warn("Error restoring NB MCGCTL settings!\n"); amd64_warn("Error restoring NB MCGCTL settings!\n");
} }
/* /*
* EDAC requires that the BIOS have ECC enabled before taking over the * EDAC requires that the BIOS have ECC enabled before
* processing of ECC errors. This is because the BIOS can properly initialize * taking over the processing of ECC errors. A command line
* the memory system completely. A command line option allows to force-enable * option allows to force-enable hardware ECC later in
* hardware ECC later in amd64_enable_ecc_error_reporting(). * enable_ecc_error_reporting().
*/ */
static const char *ecc_msg = static const char *ecc_msg =
"ECC disabled in the BIOS or no ECC capability, module will not load.\n" "ECC disabled in the BIOS or no ECC capability, module will not load.\n"
@ -2424,33 +2427,28 @@ static const char *ecc_msg =
"'ecc_enable_override'.\n" "'ecc_enable_override'.\n"
" (Note that use of the override may cause unknown side effects.)\n"; " (Note that use of the override may cause unknown side effects.)\n";
static int amd64_check_ecc_enabled(struct amd64_pvt *pvt) static bool ecc_enabled(struct pci_dev *F3, u8 nid)
{ {
u32 value; u32 value;
u8 ecc_enabled = 0; u8 ecc_en = 0;
bool nb_mce_en = false; bool nb_mce_en = false;
amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &value); amd64_read_pci_cfg(F3, K8_NBCFG, &value);
ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE); ecc_en = !!(value & K8_NBCFG_ECC_ENABLE);
amd64_info("DRAM ECC %s.\n", (ecc_enabled ? "enabled" : "disabled")); amd64_info("DRAM ECC %s.\n", (ecc_en ? "enabled" : "disabled"));
nb_mce_en = amd64_nb_mce_bank_enabled_on_node(pvt->mc_node_id); nb_mce_en = amd64_nb_mce_bank_enabled_on_node(nid);
if (!nb_mce_en) if (!nb_mce_en)
amd64_notice("NB MCE bank disabled, " amd64_notice("NB MCE bank disabled, set MSR "
"set MSR 0x%08x[4] on node %d to enable.\n", "0x%08x[4] on node %d to enable.\n",
MSR_IA32_MCG_CTL, pvt->mc_node_id); MSR_IA32_MCG_CTL, nid);
if (!ecc_enabled || !nb_mce_en) { if (!ecc_en || !nb_mce_en) {
if (!ecc_enable_override) { amd64_notice("%s", ecc_msg);
amd64_notice("%s", ecc_msg); return false;
return -ENODEV;
} else {
amd64_warn("Forcing ECC on!\n");
}
} }
return true;
return 0;
} }
struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) + struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) +
@ -2536,7 +2534,7 @@ static struct amd64_family_type *amd64_per_family_init(struct amd64_pvt *pvt)
return fam_type; return fam_type;
} }
static int amd64_probe_one_instance(struct pci_dev *F2) static int amd64_init_one_instance(struct pci_dev *F2)
{ {
struct amd64_pvt *pvt = NULL; struct amd64_pvt *pvt = NULL;
struct amd64_family_type *fam_type = NULL; struct amd64_family_type *fam_type = NULL;
@ -2561,11 +2559,6 @@ static int amd64_probe_one_instance(struct pci_dev *F2)
if (err) if (err)
goto err_free; goto err_free;
ret = -EINVAL;
err = amd64_check_ecc_enabled(pvt);
if (err)
goto err_put;
/* /*
* Save the pointer to the private data for use in 2nd initialization * Save the pointer to the private data for use in 2nd initialization
* stage * stage
@ -2574,9 +2567,6 @@ static int amd64_probe_one_instance(struct pci_dev *F2)
return 0; return 0;
err_put:
amd64_free_mc_sibling_devices(pvt);
err_free: err_free:
kfree(pvt); kfree(pvt);
@ -2618,7 +2608,6 @@ static int amd64_init_2nd_stage(struct amd64_pvt *pvt)
if (amd64_init_csrows(mci)) if (amd64_init_csrows(mci))
mci->edac_cap = EDAC_FLAG_NONE; mci->edac_cap = EDAC_FLAG_NONE;
amd64_enable_ecc_error_reporting(mci);
amd64_set_mc_sysfs_attributes(mci); amd64_set_mc_sysfs_attributes(mci);
ret = -ENODEV; ret = -ENODEV;
@ -2655,12 +2644,13 @@ err_exit:
} }
static int __devinit amd64_init_one_instance(struct pci_dev *pdev, static int __devinit amd64_probe_one_instance(struct pci_dev *pdev,
const struct pci_device_id *mc_type) const struct pci_device_id *mc_type)
{ {
int ret = 0;
u8 nid = get_node_id(pdev); u8 nid = get_node_id(pdev);
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
struct ecc_settings *s; struct ecc_settings *s;
int ret = 0;
ret = pci_enable_device(pdev); ret = pci_enable_device(pdev);
if (ret < 0) { if (ret < 0) {
@ -2671,15 +2661,34 @@ static int __devinit amd64_init_one_instance(struct pci_dev *pdev,
ret = -ENOMEM; ret = -ENOMEM;
s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL); s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL);
if (!s) if (!s)
return ret; goto err_out;
ecc_stngs[nid] = s; ecc_stngs[nid] = s;
ret = amd64_probe_one_instance(pdev); if (!ecc_enabled(F3, nid)) {
ret = -ENODEV;
if (!ecc_enable_override)
goto err_enable;
amd64_warn("Forcing ECC on!\n");
if (!enable_ecc_error_reporting(s, nid, F3))
goto err_enable;
}
ret = amd64_init_one_instance(pdev);
if (ret < 0) if (ret < 0)
amd64_err("Error probing instance: %d\n", nid); amd64_err("Error probing instance: %d\n", nid);
return ret; return ret;
err_enable:
kfree(s);
ecc_stngs[nid] = NULL;
err_out:
return ret;
} }
static void __devexit amd64_remove_one_instance(struct pci_dev *pdev) static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
@ -2741,7 +2750,7 @@ MODULE_DEVICE_TABLE(pci, amd64_pci_table);
static struct pci_driver amd64_pci_driver = { static struct pci_driver amd64_pci_driver = {
.name = EDAC_MOD_STR, .name = EDAC_MOD_STR,
.probe = amd64_init_one_instance, .probe = amd64_probe_one_instance,
.remove = __devexit_p(amd64_remove_one_instance), .remove = __devexit_p(amd64_remove_one_instance),
.id_table = amd64_pci_table, .id_table = amd64_pci_table,
}; };