Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Ingo Molnar: "Various RAS updates: - AMD MCE support updates for future CPUs, fixes and 'SMCA' (Scalable MCA) error decoding support (Aravind Gopalakrishnan) - x86 memcpy_mcsafe() support, to enable smart(er) hardware error recovery in NVDIMM drivers, based on an extension of the x86 exception handling code. (Tony Luck)" * 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: EDAC/sb_edac: Fix computation of channel address x86/mm, x86/mce: Add memcpy_mcsafe() x86/mce/AMD: Document some functionality x86/mce: Clarify comments regarding deferred error x86/mce/AMD: Fix logic to obtain block address x86/mce/AMD, EDAC: Enable error decoding of Scalable MCA errors x86/mce: Move MCx_CONFIG MSR definitions x86/mce: Check for faults tagged in EXTABLE_CLASS_FAULT exception table entries x86/mm: Expand the exception table logic to allow new handling options x86/mce/AMD: Set MCAX Enable bit x86/mce/AMD: Carve out threshold block preparation x86/mce/AMD: Fix LVT offset configuration for thresholding x86/mce/AMD: Reduce number of blocks scanned per bank x86/mce/AMD: Do not perform shared bank check for future processors x86/mce: Fix order of AMD MCE init function call
This commit is contained in:
commit
d88bfe1d68
|
@ -290,3 +290,38 @@ Due to the way that the exception table is built and needs to be ordered,
|
||||||
only use exceptions for code in the .text section. Any other section
|
only use exceptions for code in the .text section. Any other section
|
||||||
will cause the exception table to not be sorted correctly, and the
|
will cause the exception table to not be sorted correctly, and the
|
||||||
exceptions will fail.
|
exceptions will fail.
|
||||||
|
|
||||||
|
Things changed when 64-bit support was added to x86 Linux. Rather than
|
||||||
|
double the size of the exception table by expanding the two entries
|
||||||
|
from 32-bits to 64 bits, a clever trick was used to store addresses
|
||||||
|
as relative offsets from the table itself. The assembly code changed
|
||||||
|
from:
|
||||||
|
.long 1b,3b
|
||||||
|
to:
|
||||||
|
.long (from) - .
|
||||||
|
.long (to) - .
|
||||||
|
|
||||||
|
and the C-code that uses these values converts back to absolute addresses
|
||||||
|
like this:
|
||||||
|
|
||||||
|
ex_insn_addr(const struct exception_table_entry *x)
|
||||||
|
{
|
||||||
|
return (unsigned long)&x->insn + x->insn;
|
||||||
|
}
|
||||||
|
|
||||||
|
In v4.6 the exception table entry was expanded with a new field "handler".
|
||||||
|
This is also 32-bits wide and contains a third relative function
|
||||||
|
pointer which points to one of:
|
||||||
|
|
||||||
|
1) int ex_handler_default(const struct exception_table_entry *fixup)
|
||||||
|
This is legacy case that just jumps to the fixup code
|
||||||
|
2) int ex_handler_fault(const struct exception_table_entry *fixup)
|
||||||
|
This case provides the fault number of the trap that occurred at
|
||||||
|
entry->insn. It is used to distinguish page faults from machine
|
||||||
|
check.
|
||||||
|
3) int ex_handler_ext(const struct exception_table_entry *fixup)
|
||||||
|
This case is used for uaccess_err ... we need to set a flag
|
||||||
|
in the task structure. Before the handler functions existed this
|
||||||
|
case was handled by adding a large offset to the fixup to tag
|
||||||
|
it as special.
|
||||||
|
More functions can easily be added.
|
||||||
|
|
|
@ -27,15 +27,23 @@ struct amd_l3_cache {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct threshold_block {
|
struct threshold_block {
|
||||||
unsigned int block;
|
unsigned int block; /* Number within bank */
|
||||||
unsigned int bank;
|
unsigned int bank; /* MCA bank the block belongs to */
|
||||||
unsigned int cpu;
|
unsigned int cpu; /* CPU which controls MCA bank */
|
||||||
u32 address;
|
u32 address; /* MSR address for the block */
|
||||||
u16 interrupt_enable;
|
u16 interrupt_enable; /* Enable/Disable APIC interrupt */
|
||||||
bool interrupt_capable;
|
bool interrupt_capable; /* Bank can generate an interrupt. */
|
||||||
u16 threshold_limit;
|
|
||||||
struct kobject kobj;
|
u16 threshold_limit; /*
|
||||||
struct list_head miscj;
|
* Value upon which threshold
|
||||||
|
* interrupt is generated.
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct kobject kobj; /* sysfs object */
|
||||||
|
struct list_head miscj; /*
|
||||||
|
* List of threshold blocks
|
||||||
|
* within a bank.
|
||||||
|
*/
|
||||||
};
|
};
|
||||||
|
|
||||||
struct threshold_bank {
|
struct threshold_bank {
|
||||||
|
|
|
@ -44,19 +44,22 @@
|
||||||
|
|
||||||
/* Exception table entry */
|
/* Exception table entry */
|
||||||
#ifdef __ASSEMBLY__
|
#ifdef __ASSEMBLY__
|
||||||
# define _ASM_EXTABLE(from,to) \
|
# define _ASM_EXTABLE_HANDLE(from, to, handler) \
|
||||||
.pushsection "__ex_table","a" ; \
|
.pushsection "__ex_table","a" ; \
|
||||||
.balign 8 ; \
|
.balign 4 ; \
|
||||||
.long (from) - . ; \
|
.long (from) - . ; \
|
||||||
.long (to) - . ; \
|
.long (to) - . ; \
|
||||||
|
.long (handler) - . ; \
|
||||||
.popsection
|
.popsection
|
||||||
|
|
||||||
# define _ASM_EXTABLE_EX(from,to) \
|
# define _ASM_EXTABLE(from, to) \
|
||||||
.pushsection "__ex_table","a" ; \
|
_ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
|
||||||
.balign 8 ; \
|
|
||||||
.long (from) - . ; \
|
# define _ASM_EXTABLE_FAULT(from, to) \
|
||||||
.long (to) - . + 0x7ffffff0 ; \
|
_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
|
||||||
.popsection
|
|
||||||
|
# define _ASM_EXTABLE_EX(from, to) \
|
||||||
|
_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
|
||||||
|
|
||||||
# define _ASM_NOKPROBE(entry) \
|
# define _ASM_NOKPROBE(entry) \
|
||||||
.pushsection "_kprobe_blacklist","aw" ; \
|
.pushsection "_kprobe_blacklist","aw" ; \
|
||||||
|
@ -89,19 +92,24 @@
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
#else
|
#else
|
||||||
# define _ASM_EXTABLE(from,to) \
|
# define _EXPAND_EXTABLE_HANDLE(x) #x
|
||||||
|
# define _ASM_EXTABLE_HANDLE(from, to, handler) \
|
||||||
" .pushsection \"__ex_table\",\"a\"\n" \
|
" .pushsection \"__ex_table\",\"a\"\n" \
|
||||||
" .balign 8\n" \
|
" .balign 4\n" \
|
||||||
" .long (" #from ") - .\n" \
|
" .long (" #from ") - .\n" \
|
||||||
" .long (" #to ") - .\n" \
|
" .long (" #to ") - .\n" \
|
||||||
|
" .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n" \
|
||||||
" .popsection\n"
|
" .popsection\n"
|
||||||
|
|
||||||
# define _ASM_EXTABLE_EX(from,to) \
|
# define _ASM_EXTABLE(from, to) \
|
||||||
" .pushsection \"__ex_table\",\"a\"\n" \
|
_ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
|
||||||
" .balign 8\n" \
|
|
||||||
" .long (" #from ") - .\n" \
|
# define _ASM_EXTABLE_FAULT(from, to) \
|
||||||
" .long (" #to ") - . + 0x7ffffff0\n" \
|
_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
|
||||||
" .popsection\n"
|
|
||||||
|
# define _ASM_EXTABLE_EX(from, to) \
|
||||||
|
_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
|
||||||
|
|
||||||
/* For C file, we already have NOKPROBE_SYMBOL macro */
|
/* For C file, we already have NOKPROBE_SYMBOL macro */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -40,8 +40,20 @@
|
||||||
#define MCI_STATUS_AR (1ULL<<55) /* Action required */
|
#define MCI_STATUS_AR (1ULL<<55) /* Action required */
|
||||||
|
|
||||||
/* AMD-specific bits */
|
/* AMD-specific bits */
|
||||||
#define MCI_STATUS_DEFERRED (1ULL<<44) /* declare an uncorrected error */
|
#define MCI_STATUS_DEFERRED (1ULL<<44) /* uncorrected error, deferred exception */
|
||||||
#define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */
|
#define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */
|
||||||
|
#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* McaX field if set indicates a given bank supports MCA extensions:
|
||||||
|
* - Deferred error interrupt type is specifiable by bank.
|
||||||
|
* - MCx_MISC0[BlkPtr] field indicates presence of extended MISC registers,
|
||||||
|
* But should not be used to determine MSR numbers.
|
||||||
|
* - TCC bit is present in MCx_STATUS.
|
||||||
|
*/
|
||||||
|
#define MCI_CONFIG_MCAX 0x1
|
||||||
|
#define MCI_IPID_MCATYPE 0xFFFF0000
|
||||||
|
#define MCI_IPID_HWID 0xFFF
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note that the full MCACOD field of IA32_MCi_STATUS MSR is
|
* Note that the full MCACOD field of IA32_MCi_STATUS MSR is
|
||||||
|
@ -91,6 +103,16 @@
|
||||||
#define MCE_LOG_LEN 32
|
#define MCE_LOG_LEN 32
|
||||||
#define MCE_LOG_SIGNATURE "MACHINECHECK"
|
#define MCE_LOG_SIGNATURE "MACHINECHECK"
|
||||||
|
|
||||||
|
/* AMD Scalable MCA */
|
||||||
|
#define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003
|
||||||
|
#define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004
|
||||||
|
#define MSR_AMD64_SMCA_MC0_IPID 0xc0002005
|
||||||
|
#define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a
|
||||||
|
#define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x))
|
||||||
|
#define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
|
||||||
|
#define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))
|
||||||
|
#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This structure contains all data related to the MCE log. Also
|
* This structure contains all data related to the MCE log. Also
|
||||||
* carries a signature to make it easier to find from external
|
* carries a signature to make it easier to find from external
|
||||||
|
@ -287,4 +309,49 @@ struct cper_sec_mem_err;
|
||||||
extern void apei_mce_report_mem_error(int corrected,
|
extern void apei_mce_report_mem_error(int corrected,
|
||||||
struct cper_sec_mem_err *mem_err);
|
struct cper_sec_mem_err *mem_err);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Enumerate new IP types and HWID values in AMD processors which support
|
||||||
|
* Scalable MCA.
|
||||||
|
*/
|
||||||
|
#ifdef CONFIG_X86_MCE_AMD
|
||||||
|
enum amd_ip_types {
|
||||||
|
SMCA_F17H_CORE = 0, /* Core errors */
|
||||||
|
SMCA_DF, /* Data Fabric */
|
||||||
|
SMCA_UMC, /* Unified Memory Controller */
|
||||||
|
SMCA_PB, /* Parameter Block */
|
||||||
|
SMCA_PSP, /* Platform Security Processor */
|
||||||
|
SMCA_SMU, /* System Management Unit */
|
||||||
|
N_AMD_IP_TYPES
|
||||||
|
};
|
||||||
|
|
||||||
|
struct amd_hwid {
|
||||||
|
const char *name;
|
||||||
|
unsigned int hwid;
|
||||||
|
};
|
||||||
|
|
||||||
|
extern struct amd_hwid amd_hwids[N_AMD_IP_TYPES];
|
||||||
|
|
||||||
|
enum amd_core_mca_blocks {
|
||||||
|
SMCA_LS = 0, /* Load Store */
|
||||||
|
SMCA_IF, /* Instruction Fetch */
|
||||||
|
SMCA_L2_CACHE, /* L2 cache */
|
||||||
|
SMCA_DE, /* Decoder unit */
|
||||||
|
RES, /* Reserved */
|
||||||
|
SMCA_EX, /* Execution unit */
|
||||||
|
SMCA_FP, /* Floating Point */
|
||||||
|
SMCA_L3_CACHE, /* L3 cache */
|
||||||
|
N_CORE_MCA_BLOCKS
|
||||||
|
};
|
||||||
|
|
||||||
|
extern const char * const amd_core_mcablock_names[N_CORE_MCA_BLOCKS];
|
||||||
|
|
||||||
|
enum amd_df_mca_blocks {
|
||||||
|
SMCA_CS = 0, /* Coherent Slave */
|
||||||
|
SMCA_PIE, /* Power management, Interrupts, etc */
|
||||||
|
N_DF_BLOCKS
|
||||||
|
};
|
||||||
|
|
||||||
|
extern const char * const amd_df_mcablock_names[N_DF_BLOCKS];
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _ASM_X86_MCE_H */
|
#endif /* _ASM_X86_MCE_H */
|
||||||
|
|
|
@ -78,6 +78,19 @@ int strcmp(const char *cs, const char *ct);
|
||||||
#define memset(s, c, n) __memset(s, c, n)
|
#define memset(s, c, n) __memset(s, c, n)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* memcpy_mcsafe - copy memory with indication if a machine check happened
|
||||||
|
*
|
||||||
|
* @dst: destination address
|
||||||
|
* @src: source address
|
||||||
|
* @cnt: number of bytes to copy
|
||||||
|
*
|
||||||
|
* Low level memory copy function that catches machine checks
|
||||||
|
*
|
||||||
|
* Return true for success, false for fail
|
||||||
|
*/
|
||||||
|
bool memcpy_mcsafe(void *dst, const void *src, size_t cnt);
|
||||||
|
|
||||||
#endif /* __KERNEL__ */
|
#endif /* __KERNEL__ */
|
||||||
|
|
||||||
#endif /* _ASM_X86_STRING_64_H */
|
#endif /* _ASM_X86_STRING_64_H */
|
||||||
|
|
|
@ -90,12 +90,11 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
|
||||||
likely(!__range_not_ok(addr, size, user_addr_max()))
|
likely(!__range_not_ok(addr, size, user_addr_max()))
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The exception table consists of pairs of addresses relative to the
|
* The exception table consists of triples of addresses relative to the
|
||||||
* exception table enty itself: the first is the address of an
|
* exception table entry itself. The first address is of an instruction
|
||||||
* instruction that is allowed to fault, and the second is the address
|
* that is allowed to fault, the second is the target at which the program
|
||||||
* at which the program should continue. No registers are modified,
|
* should continue. The third is a handler function to deal with the fault
|
||||||
* so it is entirely up to the continuation code to figure out what to
|
* caused by the instruction in the first field.
|
||||||
* do.
|
|
||||||
*
|
*
|
||||||
* All the routines below use bits of fixup code that are out of line
|
* All the routines below use bits of fixup code that are out of line
|
||||||
* with the main instruction path. This means when everything is well,
|
* with the main instruction path. This means when everything is well,
|
||||||
|
@ -104,13 +103,14 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
|
||||||
*/
|
*/
|
||||||
|
|
||||||
struct exception_table_entry {
|
struct exception_table_entry {
|
||||||
int insn, fixup;
|
int insn, fixup, handler;
|
||||||
};
|
};
|
||||||
/* This is not the generic standard exception_table_entry format */
|
/* This is not the generic standard exception_table_entry format */
|
||||||
#define ARCH_HAS_SORT_EXTABLE
|
#define ARCH_HAS_SORT_EXTABLE
|
||||||
#define ARCH_HAS_SEARCH_EXTABLE
|
#define ARCH_HAS_SEARCH_EXTABLE
|
||||||
|
|
||||||
extern int fixup_exception(struct pt_regs *regs);
|
extern int fixup_exception(struct pt_regs *regs, int trapnr);
|
||||||
|
extern bool ex_has_fault_handler(unsigned long ip);
|
||||||
extern int early_fixup_exception(unsigned long *ip);
|
extern int early_fixup_exception(unsigned long *ip);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
#include <linux/debugfs.h>
|
#include <linux/debugfs.h>
|
||||||
#include <asm/mce.h>
|
#include <asm/mce.h>
|
||||||
|
#include <asm/uaccess.h>
|
||||||
|
|
||||||
#include "mce-internal.h"
|
#include "mce-internal.h"
|
||||||
|
|
||||||
|
@ -29,7 +30,7 @@
|
||||||
* panic situations)
|
* panic situations)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
enum context { IN_KERNEL = 1, IN_USER = 2 };
|
enum context { IN_KERNEL = 1, IN_USER = 2, IN_KERNEL_RECOV = 3 };
|
||||||
enum ser { SER_REQUIRED = 1, NO_SER = 2 };
|
enum ser { SER_REQUIRED = 1, NO_SER = 2 };
|
||||||
enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
|
enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
|
||||||
|
|
||||||
|
@ -48,6 +49,7 @@ static struct severity {
|
||||||
#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
|
#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
|
||||||
#define KERNEL .context = IN_KERNEL
|
#define KERNEL .context = IN_KERNEL
|
||||||
#define USER .context = IN_USER
|
#define USER .context = IN_USER
|
||||||
|
#define KERNEL_RECOV .context = IN_KERNEL_RECOV
|
||||||
#define SER .ser = SER_REQUIRED
|
#define SER .ser = SER_REQUIRED
|
||||||
#define NOSER .ser = NO_SER
|
#define NOSER .ser = NO_SER
|
||||||
#define EXCP .excp = EXCP_CONTEXT
|
#define EXCP .excp = EXCP_CONTEXT
|
||||||
|
@ -86,6 +88,10 @@ static struct severity {
|
||||||
PANIC, "In kernel and no restart IP",
|
PANIC, "In kernel and no restart IP",
|
||||||
EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
|
EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
|
||||||
),
|
),
|
||||||
|
MCESEV(
|
||||||
|
PANIC, "In kernel and no restart IP",
|
||||||
|
EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0)
|
||||||
|
),
|
||||||
MCESEV(
|
MCESEV(
|
||||||
DEFERRED, "Deferred error",
|
DEFERRED, "Deferred error",
|
||||||
NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
|
NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
|
||||||
|
@ -122,6 +128,11 @@ static struct severity {
|
||||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR),
|
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR),
|
||||||
MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
|
MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
|
||||||
),
|
),
|
||||||
|
MCESEV(
|
||||||
|
AR, "Action required: data load in error recoverable area of kernel",
|
||||||
|
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
|
||||||
|
KERNEL_RECOV
|
||||||
|
),
|
||||||
MCESEV(
|
MCESEV(
|
||||||
AR, "Action required: data load error in a user process",
|
AR, "Action required: data load error in a user process",
|
||||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
|
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
|
||||||
|
@ -170,6 +181,9 @@ static struct severity {
|
||||||
) /* always matches. keep at end */
|
) /* always matches. keep at end */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
|
||||||
|
(MCG_STATUS_RIPV|MCG_STATUS_EIPV))
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If mcgstatus indicated that ip/cs on the stack were
|
* If mcgstatus indicated that ip/cs on the stack were
|
||||||
* no good, then "m->cs" will be zero and we will have
|
* no good, then "m->cs" will be zero and we will have
|
||||||
|
@ -183,7 +197,11 @@ static struct severity {
|
||||||
*/
|
*/
|
||||||
static int error_context(struct mce *m)
|
static int error_context(struct mce *m)
|
||||||
{
|
{
|
||||||
return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
|
if ((m->cs & 3) == 3)
|
||||||
|
return IN_USER;
|
||||||
|
if (mc_recoverable(m->mcgstatus) && ex_has_fault_handler(m->ip))
|
||||||
|
return IN_KERNEL_RECOV;
|
||||||
|
return IN_KERNEL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -961,6 +961,20 @@ static void mce_clear_state(unsigned long *toclear)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int do_memory_failure(struct mce *m)
|
||||||
|
{
|
||||||
|
int flags = MF_ACTION_REQUIRED;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr);
|
||||||
|
if (!(m->mcgstatus & MCG_STATUS_RIPV))
|
||||||
|
flags |= MF_MUST_KILL;
|
||||||
|
ret = memory_failure(m->addr >> PAGE_SHIFT, MCE_VECTOR, flags);
|
||||||
|
if (ret)
|
||||||
|
pr_err("Memory error not recovered");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The actual machine check handler. This only handles real
|
* The actual machine check handler. This only handles real
|
||||||
* exceptions when something got corrupted coming in through int 18.
|
* exceptions when something got corrupted coming in through int 18.
|
||||||
|
@ -998,8 +1012,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
||||||
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
|
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
|
||||||
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
|
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
|
||||||
char *msg = "Unknown";
|
char *msg = "Unknown";
|
||||||
u64 recover_paddr = ~0ull;
|
|
||||||
int flags = MF_ACTION_REQUIRED;
|
|
||||||
int lmce = 0;
|
int lmce = 0;
|
||||||
|
|
||||||
/* If this CPU is offline, just bail out. */
|
/* If this CPU is offline, just bail out. */
|
||||||
|
@ -1136,22 +1148,13 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* At insane "tolerant" levels we take no action. Otherwise
|
* If tolerant is at an insane level we drop requests to kill
|
||||||
* we only die if we have no other choice. For less serious
|
* processes and continue even when there is no way out.
|
||||||
* issues we try to recover, or limit damage to the current
|
|
||||||
* process.
|
|
||||||
*/
|
*/
|
||||||
if (cfg->tolerant < 3) {
|
if (cfg->tolerant == 3)
|
||||||
if (no_way_out)
|
kill_it = 0;
|
||||||
mce_panic("Fatal machine check on current CPU", &m, msg);
|
else if (no_way_out)
|
||||||
if (worst == MCE_AR_SEVERITY) {
|
mce_panic("Fatal machine check on current CPU", &m, msg);
|
||||||
recover_paddr = m.addr;
|
|
||||||
if (!(m.mcgstatus & MCG_STATUS_RIPV))
|
|
||||||
flags |= MF_MUST_KILL;
|
|
||||||
} else if (kill_it) {
|
|
||||||
force_sig(SIGBUS, current);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (worst > 0)
|
if (worst > 0)
|
||||||
mce_report_event(regs);
|
mce_report_event(regs);
|
||||||
|
@ -1159,25 +1162,24 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
||||||
out:
|
out:
|
||||||
sync_core();
|
sync_core();
|
||||||
|
|
||||||
if (recover_paddr == ~0ull)
|
if (worst != MCE_AR_SEVERITY && !kill_it)
|
||||||
goto done;
|
goto out_ist;
|
||||||
|
|
||||||
pr_err("Uncorrected hardware memory error in user-access at %llx",
|
/* Fault was in user mode and we need to take some action */
|
||||||
recover_paddr);
|
if ((m.cs & 3) == 3) {
|
||||||
/*
|
ist_begin_non_atomic(regs);
|
||||||
* We must call memory_failure() here even if the current process is
|
local_irq_enable();
|
||||||
* doomed. We still need to mark the page as poisoned and alert any
|
|
||||||
* other users of the page.
|
if (kill_it || do_memory_failure(&m))
|
||||||
*/
|
force_sig(SIGBUS, current);
|
||||||
ist_begin_non_atomic(regs);
|
local_irq_disable();
|
||||||
local_irq_enable();
|
ist_end_non_atomic();
|
||||||
if (memory_failure(recover_paddr >> PAGE_SHIFT, MCE_VECTOR, flags) < 0) {
|
} else {
|
||||||
pr_err("Memory error not recovered");
|
if (!fixup_exception(regs, X86_TRAP_MC))
|
||||||
force_sig(SIGBUS, current);
|
mce_panic("Failed kernel mode recovery", &m, NULL);
|
||||||
}
|
}
|
||||||
local_irq_disable();
|
|
||||||
ist_end_non_atomic();
|
out_ist:
|
||||||
done:
|
|
||||||
ist_exit(regs);
|
ist_exit(regs);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(do_machine_check);
|
EXPORT_SYMBOL_GPL(do_machine_check);
|
||||||
|
@ -1617,10 +1619,10 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
|
||||||
case X86_VENDOR_AMD: {
|
case X86_VENDOR_AMD: {
|
||||||
u32 ebx = cpuid_ebx(0x80000007);
|
u32 ebx = cpuid_ebx(0x80000007);
|
||||||
|
|
||||||
mce_amd_feature_init(c);
|
|
||||||
mce_flags.overflow_recov = !!(ebx & BIT(0));
|
mce_flags.overflow_recov = !!(ebx & BIT(0));
|
||||||
mce_flags.succor = !!(ebx & BIT(1));
|
mce_flags.succor = !!(ebx & BIT(1));
|
||||||
mce_flags.smca = !!(ebx & BIT(3));
|
mce_flags.smca = !!(ebx & BIT(3));
|
||||||
|
mce_amd_feature_init(c);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* (c) 2005-2015 Advanced Micro Devices, Inc.
|
* (c) 2005-2016 Advanced Micro Devices, Inc.
|
||||||
* Your use of this code is subject to the terms and conditions of the
|
* Your use of this code is subject to the terms and conditions of the
|
||||||
* GNU general public license version 2. See "COPYING" or
|
* GNU general public license version 2. See "COPYING" or
|
||||||
* http://www.gnu.org/licenses/gpl.html
|
* http://www.gnu.org/licenses/gpl.html
|
||||||
|
@ -28,7 +28,7 @@
|
||||||
#include <asm/msr.h>
|
#include <asm/msr.h>
|
||||||
#include <asm/trace/irq_vectors.h>
|
#include <asm/trace/irq_vectors.h>
|
||||||
|
|
||||||
#define NR_BLOCKS 9
|
#define NR_BLOCKS 5
|
||||||
#define THRESHOLD_MAX 0xFFF
|
#define THRESHOLD_MAX 0xFFF
|
||||||
#define INT_TYPE_APIC 0x00020000
|
#define INT_TYPE_APIC 0x00020000
|
||||||
#define MASK_VALID_HI 0x80000000
|
#define MASK_VALID_HI 0x80000000
|
||||||
|
@ -49,6 +49,19 @@
|
||||||
#define DEF_LVT_OFF 0x2
|
#define DEF_LVT_OFF 0x2
|
||||||
#define DEF_INT_TYPE_APIC 0x2
|
#define DEF_INT_TYPE_APIC 0x2
|
||||||
|
|
||||||
|
/* Scalable MCA: */
|
||||||
|
|
||||||
|
/* Threshold LVT offset is at MSR0xC0000410[15:12] */
|
||||||
|
#define SMCA_THR_LVT_OFF 0xF000
|
||||||
|
|
||||||
|
/*
|
||||||
|
* OS is required to set the MCAX bit to acknowledge that it is now using the
|
||||||
|
* new MSR ranges and new registers under each bank. It also means that the OS
|
||||||
|
* will configure deferred errors in the new MCx_CONFIG register. If the bit is
|
||||||
|
* not set, uncorrectable errors will cause a system panic.
|
||||||
|
*/
|
||||||
|
#define SMCA_MCAX_EN_OFF 0x1
|
||||||
|
|
||||||
static const char * const th_names[] = {
|
static const char * const th_names[] = {
|
||||||
"load_store",
|
"load_store",
|
||||||
"insn_fetch",
|
"insn_fetch",
|
||||||
|
@ -58,6 +71,35 @@ static const char * const th_names[] = {
|
||||||
"execution_unit",
|
"execution_unit",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Define HWID to IP type mappings for Scalable MCA */
|
||||||
|
struct amd_hwid amd_hwids[] = {
|
||||||
|
[SMCA_F17H_CORE] = { "f17h_core", 0xB0 },
|
||||||
|
[SMCA_DF] = { "data_fabric", 0x2E },
|
||||||
|
[SMCA_UMC] = { "umc", 0x96 },
|
||||||
|
[SMCA_PB] = { "param_block", 0x5 },
|
||||||
|
[SMCA_PSP] = { "psp", 0xFF },
|
||||||
|
[SMCA_SMU] = { "smu", 0x1 },
|
||||||
|
};
|
||||||
|
EXPORT_SYMBOL_GPL(amd_hwids);
|
||||||
|
|
||||||
|
const char * const amd_core_mcablock_names[] = {
|
||||||
|
[SMCA_LS] = "load_store",
|
||||||
|
[SMCA_IF] = "insn_fetch",
|
||||||
|
[SMCA_L2_CACHE] = "l2_cache",
|
||||||
|
[SMCA_DE] = "decode_unit",
|
||||||
|
[RES] = "",
|
||||||
|
[SMCA_EX] = "execution_unit",
|
||||||
|
[SMCA_FP] = "floating_point",
|
||||||
|
[SMCA_L3_CACHE] = "l3_cache",
|
||||||
|
};
|
||||||
|
EXPORT_SYMBOL_GPL(amd_core_mcablock_names);
|
||||||
|
|
||||||
|
const char * const amd_df_mcablock_names[] = {
|
||||||
|
[SMCA_CS] = "coherent_slave",
|
||||||
|
[SMCA_PIE] = "pie",
|
||||||
|
};
|
||||||
|
EXPORT_SYMBOL_GPL(amd_df_mcablock_names);
|
||||||
|
|
||||||
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
|
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
|
||||||
static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
|
static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
|
||||||
|
|
||||||
|
@ -84,6 +126,13 @@ struct thresh_restart {
|
||||||
|
|
||||||
static inline bool is_shared_bank(int bank)
|
static inline bool is_shared_bank(int bank)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* Scalable MCA provides for only one core to have access to the MSRs of
|
||||||
|
* a shared bank.
|
||||||
|
*/
|
||||||
|
if (mce_flags.smca)
|
||||||
|
return false;
|
||||||
|
|
||||||
/* Bank 4 is for northbridge reporting and is thus shared */
|
/* Bank 4 is for northbridge reporting and is thus shared */
|
||||||
return (bank == 4);
|
return (bank == 4);
|
||||||
}
|
}
|
||||||
|
@ -135,6 +184,14 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (apic != msr) {
|
if (apic != msr) {
|
||||||
|
/*
|
||||||
|
* On SMCA CPUs, LVT offset is programmed at a different MSR, and
|
||||||
|
* the BIOS provides the value. The original field where LVT offset
|
||||||
|
* was set is reserved. Return early here:
|
||||||
|
*/
|
||||||
|
if (mce_flags.smca)
|
||||||
|
return 0;
|
||||||
|
|
||||||
pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
|
pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
|
||||||
"for bank %d, block %d (MSR%08X=0x%x%08x)\n",
|
"for bank %d, block %d (MSR%08X=0x%x%08x)\n",
|
||||||
b->cpu, apic, b->bank, b->block, b->address, hi, lo);
|
b->cpu, apic, b->bank, b->block, b->address, hi, lo);
|
||||||
|
@ -144,10 +201,7 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
|
||||||
return 1;
|
return 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/* Reprogram MCx_MISC MSR behind this threshold bank. */
|
||||||
* Called via smp_call_function_single(), must be called with correct
|
|
||||||
* cpu affinity.
|
|
||||||
*/
|
|
||||||
static void threshold_restart_bank(void *_tr)
|
static void threshold_restart_bank(void *_tr)
|
||||||
{
|
{
|
||||||
struct thresh_restart *tr = _tr;
|
struct thresh_restart *tr = _tr;
|
||||||
|
@ -247,27 +301,116 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
|
||||||
wrmsr(MSR_CU_DEF_ERR, low, high);
|
wrmsr(MSR_CU_DEF_ERR, low, high);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static u32 get_block_address(u32 current_addr, u32 low, u32 high,
|
||||||
|
unsigned int bank, unsigned int block)
|
||||||
|
{
|
||||||
|
u32 addr = 0, offset = 0;
|
||||||
|
|
||||||
|
if (mce_flags.smca) {
|
||||||
|
if (!block) {
|
||||||
|
addr = MSR_AMD64_SMCA_MCx_MISC(bank);
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* For SMCA enabled processors, BLKPTR field of the
|
||||||
|
* first MISC register (MCx_MISC0) indicates presence of
|
||||||
|
* additional MISC register set (MISC1-4).
|
||||||
|
*/
|
||||||
|
u32 low, high;
|
||||||
|
|
||||||
|
if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
|
||||||
|
return addr;
|
||||||
|
|
||||||
|
if (!(low & MCI_CONFIG_MCAX))
|
||||||
|
return addr;
|
||||||
|
|
||||||
|
if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
|
||||||
|
(low & MASK_BLKPTR_LO))
|
||||||
|
addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
|
||||||
|
}
|
||||||
|
return addr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Fall back to method we used for older processors: */
|
||||||
|
switch (block) {
|
||||||
|
case 0:
|
||||||
|
addr = MSR_IA32_MCx_MISC(bank);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
offset = ((low & MASK_BLKPTR_LO) >> 21);
|
||||||
|
if (offset)
|
||||||
|
addr = MCG_XBLK_ADDR + offset;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
addr = ++current_addr;
|
||||||
|
}
|
||||||
|
return addr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
|
||||||
|
int offset, u32 misc_high)
|
||||||
|
{
|
||||||
|
unsigned int cpu = smp_processor_id();
|
||||||
|
struct threshold_block b;
|
||||||
|
int new;
|
||||||
|
|
||||||
|
if (!block)
|
||||||
|
per_cpu(bank_map, cpu) |= (1 << bank);
|
||||||
|
|
||||||
|
memset(&b, 0, sizeof(b));
|
||||||
|
b.cpu = cpu;
|
||||||
|
b.bank = bank;
|
||||||
|
b.block = block;
|
||||||
|
b.address = addr;
|
||||||
|
b.interrupt_capable = lvt_interrupt_supported(bank, misc_high);
|
||||||
|
|
||||||
|
if (!b.interrupt_capable)
|
||||||
|
goto done;
|
||||||
|
|
||||||
|
b.interrupt_enable = 1;
|
||||||
|
|
||||||
|
if (mce_flags.smca) {
|
||||||
|
u32 smca_low, smca_high;
|
||||||
|
u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
|
||||||
|
|
||||||
|
if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
|
||||||
|
smca_high |= SMCA_MCAX_EN_OFF;
|
||||||
|
wrmsr(smca_addr, smca_low, smca_high);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Gather LVT offset for thresholding: */
|
||||||
|
if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
|
||||||
|
} else {
|
||||||
|
new = (misc_high & MASK_LVTOFF_HI) >> 20;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset = setup_APIC_mce_threshold(offset, new);
|
||||||
|
|
||||||
|
if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
|
||||||
|
mce_threshold_vector = amd_threshold_interrupt;
|
||||||
|
|
||||||
|
done:
|
||||||
|
mce_threshold_block_init(&b, offset);
|
||||||
|
|
||||||
|
out:
|
||||||
|
return offset;
|
||||||
|
}
|
||||||
|
|
||||||
/* cpu init entry point, called from mce.c with preempt off */
|
/* cpu init entry point, called from mce.c with preempt off */
|
||||||
void mce_amd_feature_init(struct cpuinfo_x86 *c)
|
void mce_amd_feature_init(struct cpuinfo_x86 *c)
|
||||||
{
|
{
|
||||||
struct threshold_block b;
|
|
||||||
unsigned int cpu = smp_processor_id();
|
|
||||||
u32 low = 0, high = 0, address = 0;
|
u32 low = 0, high = 0, address = 0;
|
||||||
unsigned int bank, block;
|
unsigned int bank, block;
|
||||||
int offset = -1, new;
|
int offset = -1;
|
||||||
|
|
||||||
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
||||||
for (block = 0; block < NR_BLOCKS; ++block) {
|
for (block = 0; block < NR_BLOCKS; ++block) {
|
||||||
if (block == 0)
|
address = get_block_address(address, low, high, bank, block);
|
||||||
address = MSR_IA32_MCx_MISC(bank);
|
if (!address)
|
||||||
else if (block == 1) {
|
break;
|
||||||
address = (low & MASK_BLKPTR_LO) >> 21;
|
|
||||||
if (!address)
|
|
||||||
break;
|
|
||||||
|
|
||||||
address += MCG_XBLK_ADDR;
|
|
||||||
} else
|
|
||||||
++address;
|
|
||||||
|
|
||||||
if (rdmsr_safe(address, &low, &high))
|
if (rdmsr_safe(address, &low, &high))
|
||||||
break;
|
break;
|
||||||
|
@ -279,29 +422,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
|
||||||
(high & MASK_LOCKED_HI))
|
(high & MASK_LOCKED_HI))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!block)
|
offset = prepare_threshold_block(bank, block, address, offset, high);
|
||||||
per_cpu(bank_map, cpu) |= (1 << bank);
|
|
||||||
|
|
||||||
memset(&b, 0, sizeof(b));
|
|
||||||
b.cpu = cpu;
|
|
||||||
b.bank = bank;
|
|
||||||
b.block = block;
|
|
||||||
b.address = address;
|
|
||||||
b.interrupt_capable = lvt_interrupt_supported(bank, high);
|
|
||||||
|
|
||||||
if (!b.interrupt_capable)
|
|
||||||
goto init;
|
|
||||||
|
|
||||||
b.interrupt_enable = 1;
|
|
||||||
new = (high & MASK_LVTOFF_HI) >> 20;
|
|
||||||
offset = setup_APIC_mce_threshold(offset, new);
|
|
||||||
|
|
||||||
if ((offset == new) &&
|
|
||||||
(mce_threshold_vector != amd_threshold_interrupt))
|
|
||||||
mce_threshold_vector = amd_threshold_interrupt;
|
|
||||||
|
|
||||||
init:
|
|
||||||
mce_threshold_block_init(&b, offset);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -394,16 +515,9 @@ static void amd_threshold_interrupt(void)
|
||||||
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
|
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
|
||||||
continue;
|
continue;
|
||||||
for (block = 0; block < NR_BLOCKS; ++block) {
|
for (block = 0; block < NR_BLOCKS; ++block) {
|
||||||
if (block == 0) {
|
address = get_block_address(address, low, high, bank, block);
|
||||||
address = MSR_IA32_MCx_MISC(bank);
|
if (!address)
|
||||||
} else if (block == 1) {
|
break;
|
||||||
address = (low & MASK_BLKPTR_LO) >> 21;
|
|
||||||
if (!address)
|
|
||||||
break;
|
|
||||||
address += MCG_XBLK_ADDR;
|
|
||||||
} else {
|
|
||||||
++address;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rdmsr_safe(address, &low, &high))
|
if (rdmsr_safe(address, &low, &high))
|
||||||
break;
|
break;
|
||||||
|
@ -623,16 +737,11 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
|
||||||
if (err)
|
if (err)
|
||||||
goto out_free;
|
goto out_free;
|
||||||
recurse:
|
recurse:
|
||||||
if (!block) {
|
address = get_block_address(address, low, high, bank, ++block);
|
||||||
address = (low & MASK_BLKPTR_LO) >> 21;
|
if (!address)
|
||||||
if (!address)
|
return 0;
|
||||||
return 0;
|
|
||||||
address += MCG_XBLK_ADDR;
|
|
||||||
} else {
|
|
||||||
++address;
|
|
||||||
}
|
|
||||||
|
|
||||||
err = allocate_threshold_blocks(cpu, bank, ++block, address);
|
err = allocate_threshold_blocks(cpu, bank, block, address);
|
||||||
if (err)
|
if (err)
|
||||||
goto out_free;
|
goto out_free;
|
||||||
|
|
||||||
|
|
|
@ -988,7 +988,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
|
||||||
* In case the user-specified fault handler returned
|
* In case the user-specified fault handler returned
|
||||||
* zero, try to fix up.
|
* zero, try to fix up.
|
||||||
*/
|
*/
|
||||||
if (fixup_exception(regs))
|
if (fixup_exception(regs, trapnr))
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -199,7 +199,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!user_mode(regs)) {
|
if (!user_mode(regs)) {
|
||||||
if (!fixup_exception(regs)) {
|
if (!fixup_exception(regs, trapnr)) {
|
||||||
tsk->thread.error_code = error_code;
|
tsk->thread.error_code = error_code;
|
||||||
tsk->thread.trap_nr = trapnr;
|
tsk->thread.trap_nr = trapnr;
|
||||||
die(str, regs, error_code);
|
die(str, regs, error_code);
|
||||||
|
@ -453,7 +453,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
|
||||||
|
|
||||||
tsk = current;
|
tsk = current;
|
||||||
if (!user_mode(regs)) {
|
if (!user_mode(regs)) {
|
||||||
if (fixup_exception(regs))
|
if (fixup_exception(regs, X86_TRAP_GP))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
tsk->thread.error_code = error_code;
|
tsk->thread.error_code = error_code;
|
||||||
|
@ -699,7 +699,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
|
||||||
conditional_sti(regs);
|
conditional_sti(regs);
|
||||||
|
|
||||||
if (!user_mode(regs)) {
|
if (!user_mode(regs)) {
|
||||||
if (!fixup_exception(regs)) {
|
if (!fixup_exception(regs, trapnr)) {
|
||||||
task->thread.error_code = error_code;
|
task->thread.error_code = error_code;
|
||||||
task->thread.trap_nr = trapnr;
|
task->thread.trap_nr = trapnr;
|
||||||
die(str, regs, error_code);
|
die(str, regs, error_code);
|
||||||
|
|
|
@ -37,6 +37,8 @@ EXPORT_SYMBOL(__copy_user_nocache);
|
||||||
EXPORT_SYMBOL(_copy_from_user);
|
EXPORT_SYMBOL(_copy_from_user);
|
||||||
EXPORT_SYMBOL(_copy_to_user);
|
EXPORT_SYMBOL(_copy_to_user);
|
||||||
|
|
||||||
|
EXPORT_SYMBOL_GPL(memcpy_mcsafe);
|
||||||
|
|
||||||
EXPORT_SYMBOL(copy_page);
|
EXPORT_SYMBOL(copy_page);
|
||||||
EXPORT_SYMBOL(clear_page);
|
EXPORT_SYMBOL(clear_page);
|
||||||
|
|
||||||
|
|
|
@ -177,3 +177,120 @@ ENTRY(memcpy_orig)
|
||||||
.Lend:
|
.Lend:
|
||||||
retq
|
retq
|
||||||
ENDPROC(memcpy_orig)
|
ENDPROC(memcpy_orig)
|
||||||
|
|
||||||
|
#ifndef CONFIG_UML
|
||||||
|
/*
|
||||||
|
* memcpy_mcsafe - memory copy with machine check exception handling
|
||||||
|
* Note that we only catch machine checks when reading the source addresses.
|
||||||
|
* Writes to target are posted and don't generate machine checks.
|
||||||
|
*/
|
||||||
|
ENTRY(memcpy_mcsafe)
|
||||||
|
cmpl $8, %edx
|
||||||
|
/* Less than 8 bytes? Go to byte copy loop */
|
||||||
|
jb .L_no_whole_words
|
||||||
|
|
||||||
|
/* Check for bad alignment of source */
|
||||||
|
testl $7, %esi
|
||||||
|
/* Already aligned */
|
||||||
|
jz .L_8byte_aligned
|
||||||
|
|
||||||
|
/* Copy one byte at a time until source is 8-byte aligned */
|
||||||
|
movl %esi, %ecx
|
||||||
|
andl $7, %ecx
|
||||||
|
subl $8, %ecx
|
||||||
|
negl %ecx
|
||||||
|
subl %ecx, %edx
|
||||||
|
.L_copy_leading_bytes:
|
||||||
|
movb (%rsi), %al
|
||||||
|
movb %al, (%rdi)
|
||||||
|
incq %rsi
|
||||||
|
incq %rdi
|
||||||
|
decl %ecx
|
||||||
|
jnz .L_copy_leading_bytes
|
||||||
|
|
||||||
|
.L_8byte_aligned:
|
||||||
|
/* Figure out how many whole cache lines (64-bytes) to copy */
|
||||||
|
movl %edx, %ecx
|
||||||
|
andl $63, %edx
|
||||||
|
shrl $6, %ecx
|
||||||
|
jz .L_no_whole_cache_lines
|
||||||
|
|
||||||
|
/* Loop copying whole cache lines */
|
||||||
|
.L_cache_w0: movq (%rsi), %r8
|
||||||
|
.L_cache_w1: movq 1*8(%rsi), %r9
|
||||||
|
.L_cache_w2: movq 2*8(%rsi), %r10
|
||||||
|
.L_cache_w3: movq 3*8(%rsi), %r11
|
||||||
|
movq %r8, (%rdi)
|
||||||
|
movq %r9, 1*8(%rdi)
|
||||||
|
movq %r10, 2*8(%rdi)
|
||||||
|
movq %r11, 3*8(%rdi)
|
||||||
|
.L_cache_w4: movq 4*8(%rsi), %r8
|
||||||
|
.L_cache_w5: movq 5*8(%rsi), %r9
|
||||||
|
.L_cache_w6: movq 6*8(%rsi), %r10
|
||||||
|
.L_cache_w7: movq 7*8(%rsi), %r11
|
||||||
|
movq %r8, 4*8(%rdi)
|
||||||
|
movq %r9, 5*8(%rdi)
|
||||||
|
movq %r10, 6*8(%rdi)
|
||||||
|
movq %r11, 7*8(%rdi)
|
||||||
|
leaq 64(%rsi), %rsi
|
||||||
|
leaq 64(%rdi), %rdi
|
||||||
|
decl %ecx
|
||||||
|
jnz .L_cache_w0
|
||||||
|
|
||||||
|
/* Are there any trailing 8-byte words? */
|
||||||
|
.L_no_whole_cache_lines:
|
||||||
|
movl %edx, %ecx
|
||||||
|
andl $7, %edx
|
||||||
|
shrl $3, %ecx
|
||||||
|
jz .L_no_whole_words
|
||||||
|
|
||||||
|
/* Copy trailing words */
|
||||||
|
.L_copy_trailing_words:
|
||||||
|
movq (%rsi), %r8
|
||||||
|
mov %r8, (%rdi)
|
||||||
|
leaq 8(%rsi), %rsi
|
||||||
|
leaq 8(%rdi), %rdi
|
||||||
|
decl %ecx
|
||||||
|
jnz .L_copy_trailing_words
|
||||||
|
|
||||||
|
/* Any trailing bytes? */
|
||||||
|
.L_no_whole_words:
|
||||||
|
andl %edx, %edx
|
||||||
|
jz .L_done_memcpy_trap
|
||||||
|
|
||||||
|
/* Copy trailing bytes */
|
||||||
|
movl %edx, %ecx
|
||||||
|
.L_copy_trailing_bytes:
|
||||||
|
movb (%rsi), %al
|
||||||
|
movb %al, (%rdi)
|
||||||
|
incq %rsi
|
||||||
|
incq %rdi
|
||||||
|
decl %ecx
|
||||||
|
jnz .L_copy_trailing_bytes
|
||||||
|
|
||||||
|
/* Copy successful. Return true */
|
||||||
|
.L_done_memcpy_trap:
|
||||||
|
xorq %rax, %rax
|
||||||
|
ret
|
||||||
|
ENDPROC(memcpy_mcsafe)
|
||||||
|
|
||||||
|
.section .fixup, "ax"
|
||||||
|
/* Return false for any failure */
|
||||||
|
.L_memcpy_mcsafe_fail:
|
||||||
|
mov $1, %rax
|
||||||
|
ret
|
||||||
|
|
||||||
|
.previous
|
||||||
|
|
||||||
|
_ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
|
||||||
|
_ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
|
||||||
|
_ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
|
||||||
|
_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
|
||||||
|
_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
|
||||||
|
_ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
|
||||||
|
_ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
|
||||||
|
_ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail)
|
||||||
|
_ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail)
|
||||||
|
_ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail)
|
||||||
|
_ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail)
|
||||||
|
#endif
|
||||||
|
|
|
@ -3,6 +3,9 @@
|
||||||
#include <linux/sort.h>
|
#include <linux/sort.h>
|
||||||
#include <asm/uaccess.h>
|
#include <asm/uaccess.h>
|
||||||
|
|
||||||
|
typedef bool (*ex_handler_t)(const struct exception_table_entry *,
|
||||||
|
struct pt_regs *, int);
|
||||||
|
|
||||||
static inline unsigned long
|
static inline unsigned long
|
||||||
ex_insn_addr(const struct exception_table_entry *x)
|
ex_insn_addr(const struct exception_table_entry *x)
|
||||||
{
|
{
|
||||||
|
@ -13,11 +16,56 @@ ex_fixup_addr(const struct exception_table_entry *x)
|
||||||
{
|
{
|
||||||
return (unsigned long)&x->fixup + x->fixup;
|
return (unsigned long)&x->fixup + x->fixup;
|
||||||
}
|
}
|
||||||
|
static inline ex_handler_t
|
||||||
int fixup_exception(struct pt_regs *regs)
|
ex_fixup_handler(const struct exception_table_entry *x)
|
||||||
{
|
{
|
||||||
const struct exception_table_entry *fixup;
|
return (ex_handler_t)((unsigned long)&x->handler + x->handler);
|
||||||
unsigned long new_ip;
|
}
|
||||||
|
|
||||||
|
bool ex_handler_default(const struct exception_table_entry *fixup,
|
||||||
|
struct pt_regs *regs, int trapnr)
|
||||||
|
{
|
||||||
|
regs->ip = ex_fixup_addr(fixup);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(ex_handler_default);
|
||||||
|
|
||||||
|
bool ex_handler_fault(const struct exception_table_entry *fixup,
|
||||||
|
struct pt_regs *regs, int trapnr)
|
||||||
|
{
|
||||||
|
regs->ip = ex_fixup_addr(fixup);
|
||||||
|
regs->ax = trapnr;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(ex_handler_fault);
|
||||||
|
|
||||||
|
bool ex_handler_ext(const struct exception_table_entry *fixup,
|
||||||
|
struct pt_regs *regs, int trapnr)
|
||||||
|
{
|
||||||
|
/* Special hack for uaccess_err */
|
||||||
|
current_thread_info()->uaccess_err = 1;
|
||||||
|
regs->ip = ex_fixup_addr(fixup);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(ex_handler_ext);
|
||||||
|
|
||||||
|
bool ex_has_fault_handler(unsigned long ip)
|
||||||
|
{
|
||||||
|
const struct exception_table_entry *e;
|
||||||
|
ex_handler_t handler;
|
||||||
|
|
||||||
|
e = search_exception_tables(ip);
|
||||||
|
if (!e)
|
||||||
|
return false;
|
||||||
|
handler = ex_fixup_handler(e);
|
||||||
|
|
||||||
|
return handler == ex_handler_fault;
|
||||||
|
}
|
||||||
|
|
||||||
|
int fixup_exception(struct pt_regs *regs, int trapnr)
|
||||||
|
{
|
||||||
|
const struct exception_table_entry *e;
|
||||||
|
ex_handler_t handler;
|
||||||
|
|
||||||
#ifdef CONFIG_PNPBIOS
|
#ifdef CONFIG_PNPBIOS
|
||||||
if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
|
if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
|
||||||
|
@ -33,42 +81,34 @@ int fixup_exception(struct pt_regs *regs)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
fixup = search_exception_tables(regs->ip);
|
e = search_exception_tables(regs->ip);
|
||||||
if (fixup) {
|
if (!e)
|
||||||
new_ip = ex_fixup_addr(fixup);
|
return 0;
|
||||||
|
|
||||||
if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) {
|
handler = ex_fixup_handler(e);
|
||||||
/* Special hack for uaccess_err */
|
return handler(e, regs, trapnr);
|
||||||
current_thread_info()->uaccess_err = 1;
|
|
||||||
new_ip -= 0x7ffffff0;
|
|
||||||
}
|
|
||||||
regs->ip = new_ip;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Restricted version used during very early boot */
|
/* Restricted version used during very early boot */
|
||||||
int __init early_fixup_exception(unsigned long *ip)
|
int __init early_fixup_exception(unsigned long *ip)
|
||||||
{
|
{
|
||||||
const struct exception_table_entry *fixup;
|
const struct exception_table_entry *e;
|
||||||
unsigned long new_ip;
|
unsigned long new_ip;
|
||||||
|
ex_handler_t handler;
|
||||||
|
|
||||||
fixup = search_exception_tables(*ip);
|
e = search_exception_tables(*ip);
|
||||||
if (fixup) {
|
if (!e)
|
||||||
new_ip = ex_fixup_addr(fixup);
|
return 0;
|
||||||
|
|
||||||
if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) {
|
new_ip = ex_fixup_addr(e);
|
||||||
/* uaccess handling not supported during early boot */
|
handler = ex_fixup_handler(e);
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
*ip = new_ip;
|
/* special handling not supported during early boot */
|
||||||
return 1;
|
if (handler != ex_handler_default)
|
||||||
}
|
return 0;
|
||||||
|
|
||||||
return 0;
|
*ip = new_ip;
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -133,6 +173,8 @@ void sort_extable(struct exception_table_entry *start,
|
||||||
i += 4;
|
i += 4;
|
||||||
p->fixup += i;
|
p->fixup += i;
|
||||||
i += 4;
|
i += 4;
|
||||||
|
p->handler += i;
|
||||||
|
i += 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
sort(start, finish - start, sizeof(struct exception_table_entry),
|
sort(start, finish - start, sizeof(struct exception_table_entry),
|
||||||
|
@ -145,6 +187,8 @@ void sort_extable(struct exception_table_entry *start,
|
||||||
i += 4;
|
i += 4;
|
||||||
p->fixup -= i;
|
p->fixup -= i;
|
||||||
i += 4;
|
i += 4;
|
||||||
|
p->handler -= i;
|
||||||
|
i += 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -663,7 +663,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
|
||||||
int sig;
|
int sig;
|
||||||
|
|
||||||
/* Are we prepared to handle this kernel fault? */
|
/* Are we prepared to handle this kernel fault? */
|
||||||
if (fixup_exception(regs)) {
|
if (fixup_exception(regs, X86_TRAP_PF)) {
|
||||||
/*
|
/*
|
||||||
* Any interrupt that takes a fault gets the fixup. This makes
|
* Any interrupt that takes a fault gets the fixup. This makes
|
||||||
* the below recursive fault logic only apply to a faults from
|
* the below recursive fault logic only apply to a faults from
|
||||||
|
|
|
@ -147,6 +147,135 @@ static const char * const mc6_mce_desc[] = {
|
||||||
"Status Register File",
|
"Status Register File",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Scalable MCA error strings */
|
||||||
|
static const char * const f17h_ls_mce_desc[] = {
|
||||||
|
"Load queue parity",
|
||||||
|
"Store queue parity",
|
||||||
|
"Miss address buffer payload parity",
|
||||||
|
"L1 TLB parity",
|
||||||
|
"", /* reserved */
|
||||||
|
"DC tag error type 6",
|
||||||
|
"DC tag error type 1",
|
||||||
|
"Internal error type 1",
|
||||||
|
"Internal error type 2",
|
||||||
|
"Sys Read data error thread 0",
|
||||||
|
"Sys read data error thread 1",
|
||||||
|
"DC tag error type 2",
|
||||||
|
"DC data error type 1 (poison comsumption)",
|
||||||
|
"DC data error type 2",
|
||||||
|
"DC data error type 3",
|
||||||
|
"DC tag error type 4",
|
||||||
|
"L2 TLB parity",
|
||||||
|
"PDC parity error",
|
||||||
|
"DC tag error type 3",
|
||||||
|
"DC tag error type 5",
|
||||||
|
"L2 fill data error",
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char * const f17h_if_mce_desc[] = {
|
||||||
|
"microtag probe port parity error",
|
||||||
|
"IC microtag or full tag multi-hit error",
|
||||||
|
"IC full tag parity",
|
||||||
|
"IC data array parity",
|
||||||
|
"Decoupling queue phys addr parity error",
|
||||||
|
"L0 ITLB parity error",
|
||||||
|
"L1 ITLB parity error",
|
||||||
|
"L2 ITLB parity error",
|
||||||
|
"BPQ snoop parity on Thread 0",
|
||||||
|
"BPQ snoop parity on Thread 1",
|
||||||
|
"L1 BTB multi-match error",
|
||||||
|
"L2 BTB multi-match error",
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char * const f17h_l2_mce_desc[] = {
|
||||||
|
"L2M tag multi-way-hit error",
|
||||||
|
"L2M tag ECC error",
|
||||||
|
"L2M data ECC error",
|
||||||
|
"HW assert",
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char * const f17h_de_mce_desc[] = {
|
||||||
|
"uop cache tag parity error",
|
||||||
|
"uop cache data parity error",
|
||||||
|
"Insn buffer parity error",
|
||||||
|
"Insn dispatch queue parity error",
|
||||||
|
"Fetch address FIFO parity",
|
||||||
|
"Patch RAM data parity",
|
||||||
|
"Patch RAM sequencer parity",
|
||||||
|
"uop buffer parity"
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char * const f17h_ex_mce_desc[] = {
|
||||||
|
"Watchdog timeout error",
|
||||||
|
"Phy register file parity",
|
||||||
|
"Flag register file parity",
|
||||||
|
"Immediate displacement register file parity",
|
||||||
|
"Address generator payload parity",
|
||||||
|
"EX payload parity",
|
||||||
|
"Checkpoint queue parity",
|
||||||
|
"Retire dispatch queue parity",
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char * const f17h_fp_mce_desc[] = {
|
||||||
|
"Physical register file parity",
|
||||||
|
"Freelist parity error",
|
||||||
|
"Schedule queue parity",
|
||||||
|
"NSQ parity error",
|
||||||
|
"Retire queue parity",
|
||||||
|
"Status register file parity",
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char * const f17h_l3_mce_desc[] = {
|
||||||
|
"Shadow tag macro ECC error",
|
||||||
|
"Shadow tag macro multi-way-hit error",
|
||||||
|
"L3M tag ECC error",
|
||||||
|
"L3M tag multi-way-hit error",
|
||||||
|
"L3M data ECC error",
|
||||||
|
"XI parity, L3 fill done channel error",
|
||||||
|
"L3 victim queue parity",
|
||||||
|
"L3 HW assert",
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char * const f17h_cs_mce_desc[] = {
|
||||||
|
"Illegal request from transport layer",
|
||||||
|
"Address violation",
|
||||||
|
"Security violation",
|
||||||
|
"Illegal response from transport layer",
|
||||||
|
"Unexpected response",
|
||||||
|
"Parity error on incoming request or probe response data",
|
||||||
|
"Parity error on incoming read response data",
|
||||||
|
"Atomic request parity",
|
||||||
|
"ECC error on probe filter access",
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char * const f17h_pie_mce_desc[] = {
|
||||||
|
"HW assert",
|
||||||
|
"Internal PIE register security violation",
|
||||||
|
"Error on GMI link",
|
||||||
|
"Poison data written to internal PIE register",
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char * const f17h_umc_mce_desc[] = {
|
||||||
|
"DRAM ECC error",
|
||||||
|
"Data poison error on DRAM",
|
||||||
|
"SDP parity error",
|
||||||
|
"Advanced peripheral bus error",
|
||||||
|
"Command/address parity error",
|
||||||
|
"Write data CRC error",
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char * const f17h_pb_mce_desc[] = {
|
||||||
|
"Parameter Block RAM ECC error",
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char * const f17h_psp_mce_desc[] = {
|
||||||
|
"PSP RAM ECC or parity error",
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char * const f17h_smu_mce_desc[] = {
|
||||||
|
"SMU RAM ECC or parity error",
|
||||||
|
};
|
||||||
|
|
||||||
static bool f12h_mc0_mce(u16 ec, u8 xec)
|
static bool f12h_mc0_mce(u16 ec, u8 xec)
|
||||||
{
|
{
|
||||||
bool ret = false;
|
bool ret = false;
|
||||||
|
@ -691,6 +820,177 @@ static void decode_mc6_mce(struct mce *m)
|
||||||
pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
|
pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void decode_f17h_core_errors(const char *ip_name, u8 xec,
|
||||||
|
unsigned int mca_type)
|
||||||
|
{
|
||||||
|
const char * const *error_desc_array;
|
||||||
|
size_t len;
|
||||||
|
|
||||||
|
pr_emerg(HW_ERR "%s Error: ", ip_name);
|
||||||
|
|
||||||
|
switch (mca_type) {
|
||||||
|
case SMCA_LS:
|
||||||
|
error_desc_array = f17h_ls_mce_desc;
|
||||||
|
len = ARRAY_SIZE(f17h_ls_mce_desc) - 1;
|
||||||
|
|
||||||
|
if (xec == 0x4) {
|
||||||
|
pr_cont("Unrecognized LS MCA error code.\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SMCA_IF:
|
||||||
|
error_desc_array = f17h_if_mce_desc;
|
||||||
|
len = ARRAY_SIZE(f17h_if_mce_desc) - 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SMCA_L2_CACHE:
|
||||||
|
error_desc_array = f17h_l2_mce_desc;
|
||||||
|
len = ARRAY_SIZE(f17h_l2_mce_desc) - 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SMCA_DE:
|
||||||
|
error_desc_array = f17h_de_mce_desc;
|
||||||
|
len = ARRAY_SIZE(f17h_de_mce_desc) - 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SMCA_EX:
|
||||||
|
error_desc_array = f17h_ex_mce_desc;
|
||||||
|
len = ARRAY_SIZE(f17h_ex_mce_desc) - 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SMCA_FP:
|
||||||
|
error_desc_array = f17h_fp_mce_desc;
|
||||||
|
len = ARRAY_SIZE(f17h_fp_mce_desc) - 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SMCA_L3_CACHE:
|
||||||
|
error_desc_array = f17h_l3_mce_desc;
|
||||||
|
len = ARRAY_SIZE(f17h_l3_mce_desc) - 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
pr_cont("Corrupted MCA core error info.\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (xec > len) {
|
||||||
|
pr_cont("Unrecognized %s MCA bank error code.\n",
|
||||||
|
amd_core_mcablock_names[mca_type]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
pr_cont("%s.\n", error_desc_array[xec]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void decode_df_errors(u8 xec, unsigned int mca_type)
|
||||||
|
{
|
||||||
|
const char * const *error_desc_array;
|
||||||
|
size_t len;
|
||||||
|
|
||||||
|
pr_emerg(HW_ERR "Data Fabric Error: ");
|
||||||
|
|
||||||
|
switch (mca_type) {
|
||||||
|
case SMCA_CS:
|
||||||
|
error_desc_array = f17h_cs_mce_desc;
|
||||||
|
len = ARRAY_SIZE(f17h_cs_mce_desc) - 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SMCA_PIE:
|
||||||
|
error_desc_array = f17h_pie_mce_desc;
|
||||||
|
len = ARRAY_SIZE(f17h_pie_mce_desc) - 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
pr_cont("Corrupted MCA Data Fabric info.\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (xec > len) {
|
||||||
|
pr_cont("Unrecognized %s MCA bank error code.\n",
|
||||||
|
amd_df_mcablock_names[mca_type]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
pr_cont("%s.\n", error_desc_array[xec]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Decode errors according to Scalable MCA specification */
|
||||||
|
static void decode_smca_errors(struct mce *m)
|
||||||
|
{
|
||||||
|
u32 addr = MSR_AMD64_SMCA_MCx_IPID(m->bank);
|
||||||
|
unsigned int hwid, mca_type, i;
|
||||||
|
u8 xec = XEC(m->status, xec_mask);
|
||||||
|
const char * const *error_desc_array;
|
||||||
|
const char *ip_name;
|
||||||
|
u32 low, high;
|
||||||
|
size_t len;
|
||||||
|
|
||||||
|
if (rdmsr_safe(addr, &low, &high)) {
|
||||||
|
pr_emerg("Invalid IP block specified, error information is unreliable.\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
hwid = high & MCI_IPID_HWID;
|
||||||
|
mca_type = (high & MCI_IPID_MCATYPE) >> 16;
|
||||||
|
|
||||||
|
pr_emerg(HW_ERR "MC%d IPID value: 0x%08x%08x\n", m->bank, high, low);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Based on hwid and mca_type values, decode errors from respective IPs.
|
||||||
|
* Note: mca_type values make sense only in the context of an hwid.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < ARRAY_SIZE(amd_hwids); i++)
|
||||||
|
if (amd_hwids[i].hwid == hwid)
|
||||||
|
break;
|
||||||
|
|
||||||
|
switch (i) {
|
||||||
|
case SMCA_F17H_CORE:
|
||||||
|
ip_name = (mca_type == SMCA_L3_CACHE) ?
|
||||||
|
"L3 Cache" : "F17h Core";
|
||||||
|
return decode_f17h_core_errors(ip_name, xec, mca_type);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SMCA_DF:
|
||||||
|
return decode_df_errors(xec, mca_type);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SMCA_UMC:
|
||||||
|
error_desc_array = f17h_umc_mce_desc;
|
||||||
|
len = ARRAY_SIZE(f17h_umc_mce_desc) - 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SMCA_PB:
|
||||||
|
error_desc_array = f17h_pb_mce_desc;
|
||||||
|
len = ARRAY_SIZE(f17h_pb_mce_desc) - 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SMCA_PSP:
|
||||||
|
error_desc_array = f17h_psp_mce_desc;
|
||||||
|
len = ARRAY_SIZE(f17h_psp_mce_desc) - 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SMCA_SMU:
|
||||||
|
error_desc_array = f17h_smu_mce_desc;
|
||||||
|
len = ARRAY_SIZE(f17h_smu_mce_desc) - 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
pr_emerg(HW_ERR "HWID:%d does not match any existing IPs.\n", hwid);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ip_name = amd_hwids[i].name;
|
||||||
|
pr_emerg(HW_ERR "%s Error: ", ip_name);
|
||||||
|
|
||||||
|
if (xec > len) {
|
||||||
|
pr_cont("Unrecognized %s MCA bank error code.\n", ip_name);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
pr_cont("%s.\n", error_desc_array[xec]);
|
||||||
|
}
|
||||||
|
|
||||||
static inline void amd_decode_err_code(u16 ec)
|
static inline void amd_decode_err_code(u16 ec)
|
||||||
{
|
{
|
||||||
if (INT_ERROR(ec)) {
|
if (INT_ERROR(ec)) {
|
||||||
|
@ -752,6 +1052,7 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
|
||||||
struct mce *m = (struct mce *)data;
|
struct mce *m = (struct mce *)data;
|
||||||
struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
|
struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
|
||||||
int ecc;
|
int ecc;
|
||||||
|
u32 ebx = cpuid_ebx(0x80000007);
|
||||||
|
|
||||||
if (amd_filter_mce(m))
|
if (amd_filter_mce(m))
|
||||||
return NOTIFY_STOP;
|
return NOTIFY_STOP;
|
||||||
|
@ -769,11 +1070,20 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
|
||||||
((m->status & MCI_STATUS_PCC) ? "PCC" : "-"),
|
((m->status & MCI_STATUS_PCC) ? "PCC" : "-"),
|
||||||
((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
|
((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
|
||||||
|
|
||||||
if (c->x86 == 0x15 || c->x86 == 0x16)
|
if (c->x86 >= 0x15)
|
||||||
pr_cont("|%s|%s",
|
pr_cont("|%s|%s",
|
||||||
((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
|
((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
|
||||||
((m->status & MCI_STATUS_POISON) ? "Poison" : "-"));
|
((m->status & MCI_STATUS_POISON) ? "Poison" : "-"));
|
||||||
|
|
||||||
|
if (!!(ebx & BIT(3))) {
|
||||||
|
u32 low, high;
|
||||||
|
u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
|
||||||
|
|
||||||
|
if (!rdmsr_safe(addr, &low, &high) &&
|
||||||
|
(low & MCI_CONFIG_MCAX))
|
||||||
|
pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
|
||||||
|
}
|
||||||
|
|
||||||
/* do the two bits[14:13] together */
|
/* do the two bits[14:13] together */
|
||||||
ecc = (m->status >> 45) & 0x3;
|
ecc = (m->status >> 45) & 0x3;
|
||||||
if (ecc)
|
if (ecc)
|
||||||
|
@ -784,6 +1094,11 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
|
||||||
if (m->status & MCI_STATUS_ADDRV)
|
if (m->status & MCI_STATUS_ADDRV)
|
||||||
pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr);
|
pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr);
|
||||||
|
|
||||||
|
if (!!(ebx & BIT(3))) {
|
||||||
|
decode_smca_errors(m);
|
||||||
|
goto err_code;
|
||||||
|
}
|
||||||
|
|
||||||
if (!fam_ops)
|
if (!fam_ops)
|
||||||
goto err_code;
|
goto err_code;
|
||||||
|
|
||||||
|
@ -834,6 +1149,7 @@ static struct notifier_block amd_mce_dec_nb = {
|
||||||
static int __init mce_amd_init(void)
|
static int __init mce_amd_init(void)
|
||||||
{
|
{
|
||||||
struct cpuinfo_x86 *c = &boot_cpu_data;
|
struct cpuinfo_x86 *c = &boot_cpu_data;
|
||||||
|
u32 ebx;
|
||||||
|
|
||||||
if (c->x86_vendor != X86_VENDOR_AMD)
|
if (c->x86_vendor != X86_VENDOR_AMD)
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
|
@ -888,10 +1204,18 @@ static int __init mce_amd_init(void)
|
||||||
fam_ops->mc2_mce = f16h_mc2_mce;
|
fam_ops->mc2_mce = f16h_mc2_mce;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case 0x17:
|
||||||
|
ebx = cpuid_ebx(0x80000007);
|
||||||
|
xec_mask = 0x3f;
|
||||||
|
if (!(ebx & BIT(3))) {
|
||||||
|
printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n");
|
||||||
|
goto err_out;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
|
printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
|
||||||
kfree(fam_ops);
|
goto err_out;
|
||||||
fam_ops = NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pr_info("MCE: In-kernel MCE decoding enabled.\n");
|
pr_info("MCE: In-kernel MCE decoding enabled.\n");
|
||||||
|
@ -899,6 +1223,11 @@ static int __init mce_amd_init(void)
|
||||||
mce_register_decode_chain(&amd_mce_dec_nb);
|
mce_register_decode_chain(&amd_mce_dec_nb);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
err_out:
|
||||||
|
kfree(fam_ops);
|
||||||
|
fam_ops = NULL;
|
||||||
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
early_initcall(mce_amd_init);
|
early_initcall(mce_amd_init);
|
||||||
|
|
||||||
|
|
|
@ -1839,8 +1839,8 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
|
||||||
edac_dbg(0, "TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n",
|
edac_dbg(0, "TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n",
|
||||||
n_tads, gb, (mb*1000)/1024,
|
n_tads, gb, (mb*1000)/1024,
|
||||||
((u64)tmp_mb) << 20L,
|
((u64)tmp_mb) << 20L,
|
||||||
(u32)TAD_SOCK(reg),
|
(u32)(1 << TAD_SOCK(reg)),
|
||||||
(u32)TAD_CH(reg),
|
(u32)TAD_CH(reg) + 1,
|
||||||
(u32)TAD_TGT0(reg),
|
(u32)TAD_TGT0(reg),
|
||||||
(u32)TAD_TGT1(reg),
|
(u32)TAD_TGT1(reg),
|
||||||
(u32)TAD_TGT2(reg),
|
(u32)TAD_TGT2(reg),
|
||||||
|
@ -2118,7 +2118,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
|
||||||
}
|
}
|
||||||
|
|
||||||
ch_way = TAD_CH(reg) + 1;
|
ch_way = TAD_CH(reg) + 1;
|
||||||
sck_way = TAD_SOCK(reg) + 1;
|
sck_way = 1 << TAD_SOCK(reg);
|
||||||
|
|
||||||
if (ch_way == 3)
|
if (ch_way == 3)
|
||||||
idx = addr >> 6;
|
idx = addr >> 6;
|
||||||
|
@ -2175,7 +2175,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
|
||||||
n_tads,
|
n_tads,
|
||||||
addr,
|
addr,
|
||||||
limit,
|
limit,
|
||||||
(u32)TAD_SOCK(reg),
|
sck_way,
|
||||||
ch_way,
|
ch_way,
|
||||||
offset,
|
offset,
|
||||||
idx,
|
idx,
|
||||||
|
@ -2190,18 +2190,12 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
|
||||||
offset, addr);
|
offset, addr);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
addr -= offset;
|
|
||||||
/* Store the low bits [0:6] of the addr */
|
ch_addr = addr - offset;
|
||||||
ch_addr = addr & 0x7f;
|
ch_addr >>= (6 + shiftup);
|
||||||
/* Remove socket wayness and remove 6 bits */
|
ch_addr /= ch_way * sck_way;
|
||||||
addr >>= 6;
|
ch_addr <<= (6 + shiftup);
|
||||||
addr = div_u64(addr, sck_xch);
|
ch_addr |= addr & ((1 << (6 + shiftup)) - 1);
|
||||||
#if 0
|
|
||||||
/* Divide by channel way */
|
|
||||||
addr = addr / ch_way;
|
|
||||||
#endif
|
|
||||||
/* Recover the last 6 bits */
|
|
||||||
ch_addr |= addr << 6;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Step 3) Decode rank
|
* Step 3) Decode rank
|
||||||
|
|
|
@ -209,6 +209,35 @@ static int compare_relative_table(const void *a, const void *b)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void x86_sort_relative_table(char *extab_image, int image_size)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
i = 0;
|
||||||
|
while (i < image_size) {
|
||||||
|
uint32_t *loc = (uint32_t *)(extab_image + i);
|
||||||
|
|
||||||
|
w(r(loc) + i, loc);
|
||||||
|
w(r(loc + 1) + i + 4, loc + 1);
|
||||||
|
w(r(loc + 2) + i + 8, loc + 2);
|
||||||
|
|
||||||
|
i += sizeof(uint32_t) * 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
qsort(extab_image, image_size / 12, 12, compare_relative_table);
|
||||||
|
|
||||||
|
i = 0;
|
||||||
|
while (i < image_size) {
|
||||||
|
uint32_t *loc = (uint32_t *)(extab_image + i);
|
||||||
|
|
||||||
|
w(r(loc) - i, loc);
|
||||||
|
w(r(loc + 1) - (i + 4), loc + 1);
|
||||||
|
w(r(loc + 2) - (i + 8), loc + 2);
|
||||||
|
|
||||||
|
i += sizeof(uint32_t) * 3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void sort_relative_table(char *extab_image, int image_size)
|
static void sort_relative_table(char *extab_image, int image_size)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
@ -281,6 +310,9 @@ do_file(char const *const fname)
|
||||||
break;
|
break;
|
||||||
case EM_386:
|
case EM_386:
|
||||||
case EM_X86_64:
|
case EM_X86_64:
|
||||||
|
custom_sort = x86_sort_relative_table;
|
||||||
|
break;
|
||||||
|
|
||||||
case EM_S390:
|
case EM_S390:
|
||||||
custom_sort = sort_relative_table;
|
custom_sort = sort_relative_table;
|
||||||
break;
|
break;
|
||||||
|
|
Loading…
Reference in New Issue