SCI reporting for other error types not only correctable ones

+ APEI GHES cleanups
 + mce timer fix
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.15 (GNU/Linux)
 
 iQIcBAABAgAGBQJS0qbXAAoJEBLB8Bhh3lVK0G4P/1fjGcBzCiC4qp0vUzhdvu1d
 CXVTtF20fdGP+hgwVU/1DLuIeUhyBLFM57rPgz0FxfvPo/bTYq92Lw9sElQpiVad
 trIRpw3bemjDY/8E91vR94SqLKTddoJifldK5ZTUpRJN9up06MwLky4IurdL2ixE
 QLaI20ZQIDjxe+pmh4wHtyV5qPdtkiXY2ICcdTXwAW7RdiG7pXXd5gLljL4oFUMF
 bp0QzM074szkDvhBgZg6KTAy1zfNzZVM9hUBOAesi1tfXqkT7pCEbUkYKZ6QFNVV
 haQNt+RtkVvYcFcZK+9TkRM32KHSy1MVuicv2W6eNvRDtauGFtSEwBBuvyq3Imjb
 PTY6Vd5g/hR/+o968ieYUYdFua4xaza2wqC3mwL6WuNoWGzzb/f4dk9+wU/x0Khu
 th1NMSwy7VfNqi8pbTTy13LG1yjqqorrMLAEAfsNAMjZPIPsBREFYu8J5kko4ird
 1aEsaENUdkzgoDEUxwO7vQEfAL3RBOC8kr4SrBGd5jQtbN4SYRyP0bYVIsQL3Psz
 fo0H+9sVLwkSxcx7MM0bboBcv0NYazvTS5aIiivSdwMq0uxDsRWRmUCr70AlJDp9
 h7EfyovynCpfOcAOIz212A9bRzBcfW46KnQm6xkcgVdKKhEbc5EWhx6Nz3kCMNQZ
 Or1B6ZtUe7Acdg2kRTRL
 =+eFf
 -----END PGP SIGNATURE-----

Merge tag 'ras_for_3.14_p2' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras into x86/ras

Pull RAS updates from Borislav Petkov:

 " SCI reporting for other error types not only correctable ones
   + APEI GHES cleanups
   + mce timer fix
 "

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2014-01-12 17:56:54 +01:00
commit b769e014f3
6 changed files with 47 additions and 39 deletions

View File

@ -33,22 +33,28 @@
#include <linux/acpi.h>
#include <linux/cper.h>
#include <acpi/apei.h>
#include <acpi/ghes.h>
#include <asm/mce.h>
#include "mce-internal.h"
void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err)
void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
{
struct mce m;
/* Only corrected MC is reported */
if (!corrected || !(mem_err->validation_bits & CPER_MEM_VALID_PA))
if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
return;
mce_setup(&m);
m.bank = 1;
/* Fake a memory read corrected error with unknown channel */
/* Fake a memory read error with unknown channel */
m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f;
if (severity >= GHES_SEV_RECOVERABLE)
m.status |= MCI_STATUS_UC;
if (severity >= GHES_SEV_PANIC)
m.status |= MCI_STATUS_PCC;
m.addr = mem_err->physical_addr;
mce_log(&m);
mce_notify_irq();

View File

@ -1638,15 +1638,15 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
static void mce_start_timer(unsigned int cpu, struct timer_list *t)
{
unsigned long iv = mce_adjust_timer(check_interval * HZ);
__this_cpu_write(mce_next_interval, iv);
unsigned long iv = check_interval * HZ;
if (mca_cfg.ignore_ce || !iv)
return;
per_cpu(mce_next_interval, cpu) = iv;
t->expires = round_jiffies(jiffies + iv);
add_timer_on(t, smp_processor_id());
add_timer_on(t, cpu);
}
static void __mcheck_cpu_init_timer(void)

View File

@ -41,6 +41,7 @@
#include <linux/rculist.h>
#include <linux/interrupt.h>
#include <linux/debugfs.h>
#include <asm/unaligned.h>
#include "apei-internal.h"
@ -567,8 +568,7 @@ static int apei_check_gar(struct acpi_generic_address *reg, u64 *paddr,
bit_offset = reg->bit_offset;
access_size_code = reg->access_width;
space_id = reg->space_id;
/* Handle possible alignment issues */
memcpy(paddr, &reg->address, sizeof(*paddr));
*paddr = get_unaligned(&reg->address);
if (!*paddr) {
pr_warning(FW_BUG APEI_PFX
"Invalid physical address in GAR [0x%llx/%u/%u/%u/%u]\n",

View File

@ -34,6 +34,7 @@
#include <linux/delay.h>
#include <linux/mm.h>
#include <acpi/acpi.h>
#include <asm/unaligned.h>
#include "apei-internal.h"
@ -216,7 +217,7 @@ static void check_vendor_extension(u64 paddr,
static void *einj_get_parameter_address(void)
{
int i;
u64 paddrv4 = 0, paddrv5 = 0;
u64 pa_v4 = 0, pa_v5 = 0;
struct acpi_whea_header *entry;
entry = EINJ_TAB_ENTRY(einj_tab);
@ -225,30 +226,28 @@ static void *einj_get_parameter_address(void)
entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
entry->register_region.space_id ==
ACPI_ADR_SPACE_SYSTEM_MEMORY)
memcpy(&paddrv4, &entry->register_region.address,
sizeof(paddrv4));
pa_v4 = get_unaligned(&entry->register_region.address);
if (entry->action == ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS &&
entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
entry->register_region.space_id ==
ACPI_ADR_SPACE_SYSTEM_MEMORY)
memcpy(&paddrv5, &entry->register_region.address,
sizeof(paddrv5));
pa_v5 = get_unaligned(&entry->register_region.address);
entry++;
}
if (paddrv5) {
if (pa_v5) {
struct set_error_type_with_address *v5param;
v5param = acpi_os_map_memory(paddrv5, sizeof(*v5param));
v5param = acpi_os_map_memory(pa_v5, sizeof(*v5param));
if (v5param) {
acpi5 = 1;
check_vendor_extension(paddrv5, v5param);
check_vendor_extension(pa_v5, v5param);
return v5param;
}
}
if (param_extension && paddrv4) {
if (param_extension && pa_v4) {
struct einj_parameter *v4param;
v4param = acpi_os_map_memory(paddrv4, sizeof(*v4param));
v4param = acpi_os_map_memory(pa_v4, sizeof(*v4param));
if (!v4param)
return NULL;
if (v4param->reserved1 || v4param->reserved2) {

View File

@ -611,7 +611,7 @@ static void __erst_record_id_cache_compact(void)
if (entries[i] == APEI_ERST_INVALID_RECORD_ID)
continue;
if (wpos != i)
memcpy(&entries[wpos], &entries[i], sizeof(entries[i]));
entries[wpos] = entries[i];
wpos++;
}
erst_record_id_cache.len = wpos;

View File

@ -413,27 +413,31 @@ static void ghes_handle_memory_failure(struct acpi_generic_data *gdata, int sev)
{
#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
unsigned long pfn;
int flags = -1;
int sec_sev = ghes_severity(gdata->error_severity);
struct cper_sec_mem_err *mem_err;
mem_err = (struct cper_sec_mem_err *)(gdata + 1);
if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
return;
pfn = mem_err->physical_addr >> PAGE_SHIFT;
if (!pfn_valid(pfn)) {
pr_warn_ratelimited(FW_WARN GHES_PFX
"Invalid address in generic error data: %#llx\n",
mem_err->physical_addr);
return;
}
/* iff following two events can be handled properly by now */
if (sec_sev == GHES_SEV_CORRECTED &&
(gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED) &&
(mem_err->validation_bits & CPER_MEM_VALID_PA)) {
pfn = mem_err->physical_addr >> PAGE_SHIFT;
if (pfn_valid(pfn))
memory_failure_queue(pfn, 0, MF_SOFT_OFFLINE);
else if (printk_ratelimit())
pr_warn(FW_WARN GHES_PFX
"Invalid address in generic error data: %#llx\n",
mem_err->physical_addr);
}
if (sev == GHES_SEV_RECOVERABLE &&
sec_sev == GHES_SEV_RECOVERABLE &&
mem_err->validation_bits & CPER_MEM_VALID_PA) {
pfn = mem_err->physical_addr >> PAGE_SHIFT;
memory_failure_queue(pfn, 0, 0);
}
(gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
flags = MF_SOFT_OFFLINE;
if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
flags = 0;
if (flags != -1)
memory_failure_queue(pfn, 0, flags);
#endif
}
@ -453,8 +457,7 @@ static void ghes_do_proc(struct ghes *ghes,
ghes_edac_report_mem_error(ghes, sev, mem_err);
#ifdef CONFIG_X86_MCE
apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
mem_err);
apei_mce_report_mem_error(sev, mem_err);
#endif
ghes_handle_memory_failure(gdata, sev);
}