From 1e2ae9ec072f3b7887f456426bc2cf23b80f661a Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 15 Apr 2016 15:50:32 +0200 Subject: [PATCH 1/5] x86/hyperv: Avoid reporting bogus NMI status for Gen2 instances Generation2 instances don't support reporting the NMI status on port 0x61, read from there returns 'ff' and we end up reporting nonsensical PCI error (as there is no PCI bus in these instances) on all NMIs: NMI: PCI system error (SERR) for reason ff on CPU 0. Dazed and confused, but trying to continue Fix the issue by overriding x86_platform.get_nmi_reason. Use 'booted on EFI' flag to detect Gen2 instances. Signed-off-by: Vitaly Kuznetsov Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Cathy Avery Cc: Haiyang Zhang Cc: Jiri Olsa Cc: K. Y. Srinivasan Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: devel@linuxdriverproject.org Link: http://lkml.kernel.org/r/1460728232-31433-1-git-send-email-vkuznets@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mshyperv.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 4e7c6933691c..10c11b4da31d 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -152,6 +152,11 @@ static struct clocksource hyperv_cs = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; +static unsigned char hv_get_nmi_reason(void) +{ + return 0; +} + static void __init ms_hyperv_init_platform(void) { /* @@ -191,6 +196,13 @@ static void __init ms_hyperv_init_platform(void) machine_ops.crash_shutdown = hv_machine_crash_shutdown; #endif mark_tsc_unstable("running on Hyper-V"); + + /* + * Generation 2 instances don't support reading the NMI status from + * 0x61 port. + */ + if (efi_enabled(EFI_BOOT)) + x86_platform.get_nmi_reason = hv_get_nmi_reason; } const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { From 78b0634d2851d30ec5d289aefbae23b141de67f0 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 22 Apr 2016 09:35:04 +0200 Subject: [PATCH 2/5] x86/doc: Correct limits in Documentation/x86/x86_64/mm.txt Correct the size of the module mapping space and the maximum available physical memory size of current processors. Signed-off-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: corbet@lwn.net Cc: linux-doc@vger.kernel.org Link: http://lkml.kernel.org/r/1461310504-15977-1-git-send-email-jgross@suse.com Signed-off-by: Ingo Molnar --- Documentation/x86/x86_64/mm.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index c518dce7da4d..5aa738346062 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -19,7 +19,7 @@ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ffffffef00000000 - ffffffff00000000 (=64 GB) EFI region mapping space ... unused hole ... ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0 -ffffffffa0000000 - ffffffffff5fffff (=1525 MB) module mapping space +ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole @@ -31,8 +31,8 @@ vmalloc space is lazily synchronized into the different PML4 pages of the processes using the page fault handler, with init_level4_pgt as reference. -Current X86-64 implementations only support 40 bits of address space, -but we support up to 46 bits. This expands into MBZ space in the page tables. +Current X86-64 implementations support up to 46 bits of address space (64 TB), +which is our current limit. This expands into MBZ space in the page tables. We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual memory window (this size is arbitrary, it can be raised later if needed). From 103f6112f253017d7062cd74d17f4a514ed4485c Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 21 Apr 2016 00:27:04 -0600 Subject: [PATCH 3/5] x86/mm/xen: Suppress hugetlbfs in PV guests Huge pages are not normally available to PV guests. Not suppressing hugetlbfs use results in an endless loop of page faults when user mode code tries to access a hugetlbfs mapped area (since the hypervisor denies such PTEs to be created, but error indications can't be propagated out of xen_set_pte_at(), just like for various of its siblings), and - once killed in an oops like this: kernel BUG at .../fs/hugetlbfs/inode.c:428! invalid opcode: 0000 [#1] SMP ... RIP: e030:[] [] remove_inode_hugepages+0x25b/0x320 ... Call Trace: [] hugetlbfs_evict_inode+0x15/0x40 [] evict+0xbd/0x1b0 [] __dentry_kill+0x19a/0x1f0 [] dput+0x1fe/0x220 [] __fput+0x155/0x200 [] task_work_run+0x60/0xa0 [] do_exit+0x160/0x400 [] do_group_exit+0x3b/0xa0 [] get_signal+0x1ed/0x470 [] do_signal+0x14/0x110 [] prepare_exit_to_usermode+0xe9/0xf0 [] retint_user+0x8/0x13 This is CVE-2016-3961 / XSA-174. Reported-by: Vitaly Kuznetsov Signed-off-by: Jan Beulich Cc: Andrew Morton Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: David Vrabel Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Juergen Gross Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: stable@vger.kernel.org Cc: xen-devel Link: http://lkml.kernel.org/r/57188ED802000078000E431C@prv-mh.provo.novell.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hugetlb.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index f8a29d2c97b0..e6a8613fbfb0 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -4,6 +4,7 @@ #include #include +#define hugepages_supported() cpu_has_pse static inline int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, From ff15e95c82768d589957dbb17d7eb7dba7904659 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 14 Apr 2016 10:21:52 -0700 Subject: [PATCH 4/5] x86 EDAC, sb_edac.c: Repair damage introduced when "fixing" channel address In commit: eb1af3b71f9d ("Fix computation of channel address") I switched the "sck_way" variable from holding the log2 value read from the h/w to instead be the actual number. Unfortunately it is needed in log2 form when used to shift the address. Tested-by: Patrick Geary Signed-off-by: Tony Luck Acked-by: Mauro Carvalho Chehab Cc: Aristeu Rozanski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-edac@vger.kernel.org Cc: stable@vger.kernel.org Fixes: eb1af3b71f9d ("Fix computation of channel address") Signed-off-by: Ingo Molnar --- drivers/edac/sb_edac.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 93f0d4120289..f4666b0aeba6 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -2118,7 +2118,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, } ch_way = TAD_CH(reg) + 1; - sck_way = 1 << TAD_SOCK(reg); + sck_way = TAD_SOCK(reg); if (ch_way == 3) idx = addr >> 6; @@ -2157,7 +2157,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, switch(ch_way) { case 2: case 4: - sck_xch = 1 << sck_way * (ch_way >> 1); + sck_xch = (1 << sck_way) * (ch_way >> 1); break; default: sprintf(msg, "Invalid mirror set. Can't decode addr"); @@ -2193,7 +2193,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, ch_addr = addr - offset; ch_addr >>= (6 + shiftup); - ch_addr /= ch_way * sck_way; + ch_addr /= sck_xch; ch_addr <<= (6 + shiftup); ch_addr |= addr & ((1 << (6 + shiftup)) - 1); From ea5dfb5fae81939f777ca569d8cfb599252da2e8 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 14 Apr 2016 10:22:02 -0700 Subject: [PATCH 5/5] x86 EDAC, sb_edac.c: Take account of channel hashing when needed Haswell and Broadwell can be configured to hash the channel interleave function using bits [27:12] of the physical address. On those processor models we must check to see if hashing is enabled (bit21 of the HASWELL_HASYSDEFEATURE2 register) and act accordingly. Based on a patch by patrickg Tested-by: Patrick Geary Signed-off-by: Tony Luck Acked-by: Mauro Carvalho Chehab Cc: Aristeu Rozanski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-edac@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Ingo Molnar --- drivers/edac/sb_edac.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index f4666b0aeba6..468447aff8eb 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -362,6 +362,7 @@ struct sbridge_pvt { /* Memory type detection */ bool is_mirrored, is_lockstep, is_close_pg; + bool is_chan_hash; /* Fifo double buffers */ struct mce mce_entry[MCE_LOG_LEN]; @@ -1060,6 +1061,20 @@ static inline u8 sad_pkg_ha(u8 pkg) return (pkg >> 2) & 0x1; } +static int haswell_chan_hash(int idx, u64 addr) +{ + int i; + + /* + * XOR even bits from 12:26 to bit0 of idx, + * odd bits from 13:27 to bit1 + */ + for (i = 12; i < 28; i += 2) + idx ^= (addr >> i) & 3; + + return idx; +} + /**************************************************************************** Memory check routines ****************************************************************************/ @@ -1616,6 +1631,10 @@ static int get_dimm_config(struct mem_ctl_info *mci) KNL_MAX_CHANNELS : NUM_CHANNELS; u64 knl_mc_sizes[KNL_MAX_CHANNELS]; + if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL) { + pci_read_config_dword(pvt->pci_ha0, HASWELL_HASYSDEFEATURE2, ®); + pvt->is_chan_hash = GET_BITFIELD(reg, 21, 21); + } if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL || pvt->info.type == KNIGHTS_LANDING) pci_read_config_dword(pvt->pci_sad1, SAD_TARGET, ®); @@ -2122,8 +2141,11 @@ static int get_memory_error_data(struct mem_ctl_info *mci, if (ch_way == 3) idx = addr >> 6; - else + else { idx = (addr >> (6 + sck_way + shiftup)) & 0x3; + if (pvt->is_chan_hash) + idx = haswell_chan_hash(idx, addr); + } idx = idx % ch_way; /*