From 5b65c4677a57a1d4414212f9995aa0e46a21ff80 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Sat, 9 Sep 2017 00:56:03 +0300 Subject: [PATCH 01/17] mm, x86/mm: Fix performance regression in get_user_pages_fast() The 0-day test bot found a performance regression that was tracked down to switching x86 to the generic get_user_pages_fast() implementation: http://lkml.kernel.org/r/20170710024020.GA26389@yexl-desktop The regression was caused by the fact that we now use local_irq_save() + local_irq_restore() in get_user_pages_fast() to disable interrupts. In x86 implementation local_irq_disable() + local_irq_enable() was used. The fix is to make get_user_pages_fast() use local_irq_disable(), leaving local_irq_save() for __get_user_pages_fast() that can be called with interrupts disabled. Numbers for pinning a gigabyte of memory, one page a time, 20 repeats: Before: Average: 14.91 ms, stddev: 0.45 ms After: Average: 10.76 ms, stddev: 0.18 ms Signed-off-by: Kirill A. Shutemov Cc: Andrew Morton Cc: Huang Ying Cc: Jonathan Corbet Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Thorsten Leemhuis Cc: linux-mm@kvack.org Fixes: e585513b76f7 ("x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation") Link: http://lkml.kernel.org/r/20170908215603.9189-3-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- mm/gup.c | 97 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 58 insertions(+), 39 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 23f01c40c88f..4a789f1c6a27 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1618,6 +1618,47 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end, return 1; } +static void gup_pgd_range(unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + unsigned long next; + pgd_t *pgdp; + + pgdp = pgd_offset(current->mm, addr); + do { + pgd_t pgd = READ_ONCE(*pgdp); + + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + return; + if (unlikely(pgd_huge(pgd))) { + if (!gup_huge_pgd(pgd, pgdp, addr, next, write, + pages, nr)) + return; + } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) { + if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr, + PGDIR_SHIFT, next, write, pages, nr)) + return; + } else if (!gup_p4d_range(pgd, addr, next, write, pages, nr)) + return; + } while (pgdp++, addr = next, addr != end); +} + +#ifndef gup_fast_permitted +/* + * Check if it's allowed to use __get_user_pages_fast() for the range, or + * we need to fall back to the slow version: + */ +bool gup_fast_permitted(unsigned long start, int nr_pages, int write) +{ + unsigned long len, end; + + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + return end >= start; +} +#endif + /* * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to * the regular GUP. It will only return non-negative values. @@ -1625,10 +1666,8 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end, int __get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { - struct mm_struct *mm = current->mm; unsigned long addr, len, end; - unsigned long next, flags; - pgd_t *pgdp; + unsigned long flags; int nr = 0; start &= PAGE_MASK; @@ -1652,45 +1691,15 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, * block IPIs that come from THPs splitting. */ - local_irq_save(flags); - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = READ_ONCE(*pgdp); - - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - break; - if (unlikely(pgd_huge(pgd))) { - if (!gup_huge_pgd(pgd, pgdp, addr, next, write, - pages, &nr)) - break; - } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) { - if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr, - PGDIR_SHIFT, next, write, pages, &nr)) - break; - } else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr)) - break; - } while (pgdp++, addr = next, addr != end); - local_irq_restore(flags); + if (gup_fast_permitted(start, nr_pages, write)) { + local_irq_save(flags); + gup_pgd_range(addr, end, write, pages, &nr); + local_irq_restore(flags); + } return nr; } -#ifndef gup_fast_permitted -/* - * Check if it's allowed to use __get_user_pages_fast() for the range, or - * we need to fall back to the slow version: - */ -bool gup_fast_permitted(unsigned long start, int nr_pages, int write) -{ - unsigned long len, end; - - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - return end >= start; -} -#endif - /** * get_user_pages_fast() - pin user pages in memory * @start: starting user address @@ -1710,12 +1719,22 @@ bool gup_fast_permitted(unsigned long start, int nr_pages, int write) int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { + unsigned long addr, len, end; int nr = 0, ret = 0; start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + + if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, + (void __user *)start, len))) + return 0; if (gup_fast_permitted(start, nr_pages, write)) { - nr = __get_user_pages_fast(start, nr_pages, write, pages); + local_irq_disable(); + gup_pgd_range(addr, end, write, pages, &nr); + local_irq_enable(); ret = nr; } From 83e3c48729d9ebb7af5a31a504f3fd6aff0348c4 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 29 Sep 2017 17:08:16 +0300 Subject: [PATCH 02/17] mm/sparsemem: Allocate mem_section at runtime for CONFIG_SPARSEMEM_EXTREME=y Size of the mem_section[] array depends on the size of the physical address space. In preparation for boot-time switching between paging modes on x86-64 we need to make the allocation of mem_section[] dynamic, because otherwise we waste a lot of RAM: with CONFIG_NODE_SHIFT=10, mem_section[] size is 32kB for 4-level paging and 2MB for 5-level paging mode. The patch allocates the array on the first call to sparse_memory_present_with_active_regions(). Signed-off-by: Kirill A. Shutemov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Cyrill Gorcunov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20170929140821.37654-2-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- include/linux/mmzone.h | 6 +++++- mm/page_alloc.c | 10 ++++++++++ mm/sparse.c | 17 +++++++++++------ 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c8f89417740b..e796edf1296f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1150,13 +1150,17 @@ struct mem_section { #define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1) #ifdef CONFIG_SPARSEMEM_EXTREME -extern struct mem_section *mem_section[NR_SECTION_ROOTS]; +extern struct mem_section **mem_section; #else extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]; #endif static inline struct mem_section *__nr_to_section(unsigned long nr) { +#ifdef CONFIG_SPARSEMEM_EXTREME + if (!mem_section) + return NULL; +#endif if (!mem_section[SECTION_NR_TO_ROOT(nr)]) return NULL; return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 77e4d3c5c57b..8dfd13f724d9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5646,6 +5646,16 @@ void __init sparse_memory_present_with_active_regions(int nid) unsigned long start_pfn, end_pfn; int i, this_nid; +#ifdef CONFIG_SPARSEMEM_EXTREME + if (!mem_section) { + unsigned long size, align; + + size = sizeof(struct mem_section) * NR_SECTION_ROOTS; + align = 1 << (INTERNODE_CACHE_SHIFT); + mem_section = memblock_virt_alloc(size, align); + } +#endif + for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid) memory_present(this_nid, start_pfn, end_pfn); } diff --git a/mm/sparse.c b/mm/sparse.c index 83b3bf6461af..b00a97398795 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -22,8 +22,7 @@ * 1) mem_section - memory sections, mem_map's for valid memory */ #ifdef CONFIG_SPARSEMEM_EXTREME -struct mem_section *mem_section[NR_SECTION_ROOTS] - ____cacheline_internodealigned_in_smp; +struct mem_section **mem_section; #else struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] ____cacheline_internodealigned_in_smp; @@ -100,7 +99,7 @@ static inline int sparse_index_init(unsigned long section_nr, int nid) int __section_nr(struct mem_section* ms) { unsigned long root_nr; - struct mem_section* root; + struct mem_section *root = NULL; for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) { root = __nr_to_section(root_nr * SECTIONS_PER_ROOT); @@ -111,7 +110,7 @@ int __section_nr(struct mem_section* ms) break; } - VM_BUG_ON(root_nr == NR_SECTION_ROOTS); + VM_BUG_ON(!root); return (root_nr * SECTIONS_PER_ROOT) + (ms - root); } @@ -329,11 +328,17 @@ again: static void __init check_usemap_section_nr(int nid, unsigned long *usemap) { unsigned long usemap_snr, pgdat_snr; - static unsigned long old_usemap_snr = NR_MEM_SECTIONS; - static unsigned long old_pgdat_snr = NR_MEM_SECTIONS; + static unsigned long old_usemap_snr; + static unsigned long old_pgdat_snr; struct pglist_data *pgdat = NODE_DATA(nid); int usemap_nid; + /* First call */ + if (!old_usemap_snr) { + old_usemap_snr = NR_MEM_SECTIONS; + old_pgdat_snr = NR_MEM_SECTIONS; + } + usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT); pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); if (usemap_snr == pgdat_snr) From 12a8cc7fcf54a8575f094be1e99032ec38aa045c Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 29 Sep 2017 17:08:18 +0300 Subject: [PATCH 03/17] x86/kasan: Use the same shadow offset for 4- and 5-level paging We are going to support boot-time switching between 4- and 5-level paging. For KASAN it means we cannot have different KASAN_SHADOW_OFFSET for different paging modes: the constant is passed to gcc to generate code and cannot be changed at runtime. This patch changes KASAN code to use 0xdffffc0000000000 as shadow offset for both 4- and 5-level paging. For 5-level paging it means that shadow memory region is not aligned to PGD boundary anymore and we have to handle unaligned parts of the region properly. In addition, we have to exclude paravirt code from KASAN instrumentation as we now use set_pgd() before KASAN is fully ready. [kirill.shutemov@linux.intel.com: clenaup, changelog message] Signed-off-by: Andrey Ryabinin Signed-off-by: Kirill A. Shutemov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Cyrill Gorcunov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20170929140821.37654-4-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- Documentation/x86/x86_64/mm.txt | 2 +- arch/x86/Kconfig | 1 - arch/x86/kernel/Makefile | 3 +- arch/x86/mm/kasan_init_64.c | 101 +++++++++++++++++++++++++------- 4 files changed, 83 insertions(+), 24 deletions(-) diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index b0798e281aa6..3448e675b462 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -34,7 +34,7 @@ ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB) ... unused hole ... -ffd8000000000000 - fff7ffffffffffff (=53 bits) kasan shadow memory (8PB) +ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB) ... unused hole ... ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ... unused hole ... diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 971feac13506..32779beb56e2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -302,7 +302,6 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC config KASAN_SHADOW_OFFSET hex depends on KASAN - default 0xdff8000000000000 if X86_5LEVEL default 0xdffffc0000000000 config HAVE_INTEL_TXT diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index fd0a7895b63f..a97a6b611531 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -24,7 +24,8 @@ endif KASAN_SANITIZE_head$(BITS).o := n KASAN_SANITIZE_dumpstack.o := n KASAN_SANITIZE_dumpstack_$(BITS).o := n -KASAN_SANITIZE_stacktrace.o := n +KASAN_SANITIZE_stacktrace.o := n +KASAN_SANITIZE_paravirt.o := n OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index bc84b73684b7..fe5760db7b19 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -15,6 +15,8 @@ extern struct range pfn_mapped[E820_MAX_ENTRIES]; +static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); + static int __init map_range(struct range *range) { unsigned long start; @@ -30,8 +32,10 @@ static void __init clear_pgds(unsigned long start, unsigned long end) { pgd_t *pgd; + /* See comment in kasan_init() */ + unsigned long pgd_end = end & PGDIR_MASK; - for (; start < end; start += PGDIR_SIZE) { + for (; start < pgd_end; start += PGDIR_SIZE) { pgd = pgd_offset_k(start); /* * With folded p4d, pgd_clear() is nop, use p4d_clear() @@ -42,29 +46,61 @@ static void __init clear_pgds(unsigned long start, else pgd_clear(pgd); } + + pgd = pgd_offset_k(start); + for (; start < end; start += P4D_SIZE) + p4d_clear(p4d_offset(pgd, start)); +} + +static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr) +{ + unsigned long p4d; + + if (!IS_ENABLED(CONFIG_X86_5LEVEL)) + return (p4d_t *)pgd; + + p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK; + p4d += __START_KERNEL_map - phys_base; + return (p4d_t *)p4d + p4d_index(addr); +} + +static void __init kasan_early_p4d_populate(pgd_t *pgd, + unsigned long addr, + unsigned long end) +{ + pgd_t pgd_entry; + p4d_t *p4d, p4d_entry; + unsigned long next; + + if (pgd_none(*pgd)) { + pgd_entry = __pgd(_KERNPG_TABLE | __pa_nodebug(kasan_zero_p4d)); + set_pgd(pgd, pgd_entry); + } + + p4d = early_p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + + if (!p4d_none(*p4d)) + continue; + + p4d_entry = __p4d(_KERNPG_TABLE | __pa_nodebug(kasan_zero_pud)); + set_p4d(p4d, p4d_entry); + } while (p4d++, addr = next, addr != end && p4d_none(*p4d)); } static void __init kasan_map_early_shadow(pgd_t *pgd) { - int i; - unsigned long start = KASAN_SHADOW_START; + /* See comment in kasan_init() */ + unsigned long addr = KASAN_SHADOW_START & PGDIR_MASK; unsigned long end = KASAN_SHADOW_END; + unsigned long next; - for (i = pgd_index(start); start < end; i++) { - switch (CONFIG_PGTABLE_LEVELS) { - case 4: - pgd[i] = __pgd(__pa_nodebug(kasan_zero_pud) | - _KERNPG_TABLE); - break; - case 5: - pgd[i] = __pgd(__pa_nodebug(kasan_zero_p4d) | - _KERNPG_TABLE); - break; - default: - BUILD_BUG(); - } - start += PGDIR_SIZE; - } + pgd += pgd_index(addr); + do { + next = pgd_addr_end(addr, end); + kasan_early_p4d_populate(pgd, addr, next); + } while (pgd++, addr = next, addr != end); } #ifdef CONFIG_KASAN_INLINE @@ -101,7 +137,7 @@ void __init kasan_early_init(void) for (i = 0; i < PTRS_PER_PUD; i++) kasan_zero_pud[i] = __pud(pud_val); - for (i = 0; CONFIG_PGTABLE_LEVELS >= 5 && i < PTRS_PER_P4D; i++) + for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++) kasan_zero_p4d[i] = __p4d(p4d_val); kasan_map_early_shadow(early_top_pgt); @@ -117,12 +153,35 @@ void __init kasan_init(void) #endif memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); + + /* + * We use the same shadow offset for 4- and 5-level paging to + * facilitate boot-time switching between paging modes. + * As result in 5-level paging mode KASAN_SHADOW_START and + * KASAN_SHADOW_END are not aligned to PGD boundary. + * + * KASAN_SHADOW_START doesn't share PGD with anything else. + * We claim whole PGD entry to make things easier. + * + * KASAN_SHADOW_END lands in the last PGD entry and it collides with + * bunch of things like kernel code, modules, EFI mapping, etc. + * We need to take extra steps to not overwrite them. + */ + if (IS_ENABLED(CONFIG_X86_5LEVEL)) { + void *ptr; + + ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END)); + memcpy(tmp_p4d_table, (void *)ptr, sizeof(tmp_p4d_table)); + set_pgd(&early_top_pgt[pgd_index(KASAN_SHADOW_END)], + __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE)); + } + load_cr3(early_top_pgt); __flush_tlb_all(); - clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); + clear_pgds(KASAN_SHADOW_START & PGDIR_MASK, KASAN_SHADOW_END); - kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, + kasan_populate_zero_shadow((void *)(KASAN_SHADOW_START & PGDIR_MASK), kasan_mem_to_shadow((void *)PAGE_OFFSET)); for (i = 0; i < E820_MAX_ENTRIES; i++) { From 4375c29985f155d7eb2346615d84e62d1b673682 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 29 Sep 2017 17:08:19 +0300 Subject: [PATCH 04/17] x86/xen: Provide pre-built page tables only for CONFIG_XEN_PV=y and CONFIG_XEN_PVH=y Looks like we only need pre-built page tables in the CONFIG_XEN_PV=y and CONFIG_XEN_PVH=y cases. Let's not provide them for other configurations. Signed-off-by: Kirill A. Shutemov Reviewed-by: Juergen Gross Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Cyrill Gorcunov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20170929140821.37654-5-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_64.S | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 513cbb012ecc..2be7d1e7fcf1 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -37,11 +37,12 @@ * */ -#define p4d_index(x) (((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1)) #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) +#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE) PGD_START_KERNEL = pgd_index(__START_KERNEL_map) +#endif L3_START_KERNEL = pud_index(__START_KERNEL_map) .text @@ -361,10 +362,7 @@ NEXT_PAGE(early_dynamic_pgts) .data -#ifndef CONFIG_XEN -NEXT_PAGE(init_top_pgt) - .fill 512,8,0 -#else +#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) NEXT_PAGE(init_top_pgt) .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC .org init_top_pgt + PGD_PAGE_OFFSET*8, 0 @@ -381,6 +379,9 @@ NEXT_PAGE(level2_ident_pgt) * Don't set NX because code runs from these pages. */ PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) +#else +NEXT_PAGE(init_top_pgt) + .fill 512,8,0 #endif #ifdef CONFIG_X86_5LEVEL From 773dd2fca581b0a80e5a33332cc8ee67e5a79cba Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 29 Sep 2017 17:08:20 +0300 Subject: [PATCH 05/17] x86/xen: Drop 5-level paging support code from the XEN_PV code It was decided 5-level paging is not going to be supported in XEN_PV. Let's drop the dead code from the XEN_PV code. Tested-by: Juergen Gross Signed-off-by: Kirill A. Shutemov Reviewed-by: Juergen Gross Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Cyrill Gorcunov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20170929140821.37654-6-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/xen/mmu_pv.c | 159 ++++++++++++++++-------------------------- 1 file changed, 60 insertions(+), 99 deletions(-) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 71495f1a86d7..2ccdaba31a07 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -449,7 +449,7 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd) } PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); -#if CONFIG_PGTABLE_LEVELS == 4 +#ifdef CONFIG_X86_64 __visible pudval_t xen_pud_val(pud_t pud) { return pte_mfn_to_pfn(pud.pud); @@ -538,7 +538,7 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val) xen_mc_issue(PARAVIRT_LAZY_MMU); } -#endif /* CONFIG_PGTABLE_LEVELS == 4 */ +#endif /* CONFIG_X86_64 */ static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd, int (*func)(struct mm_struct *mm, struct page *, enum pt_level), @@ -580,21 +580,17 @@ static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d, int (*func)(struct mm_struct *mm, struct page *, enum pt_level), bool last, unsigned long limit) { - int i, nr, flush = 0; + int flush = 0; + pud_t *pud; - nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D; - for (i = 0; i < nr; i++) { - pud_t *pud; - if (p4d_none(p4d[i])) - continue; + if (p4d_none(*p4d)) + return flush; - pud = pud_offset(&p4d[i], 0); - if (PTRS_PER_PUD > 1) - flush |= (*func)(mm, virt_to_page(pud), PT_PUD); - flush |= xen_pud_walk(mm, pud, func, - last && i == nr - 1, limit); - } + pud = pud_offset(p4d, 0); + if (PTRS_PER_PUD > 1) + flush |= (*func)(mm, virt_to_page(pud), PT_PUD); + flush |= xen_pud_walk(mm, pud, func, last, limit); return flush; } @@ -644,8 +640,6 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, continue; p4d = p4d_offset(&pgd[i], 0); - if (PTRS_PER_P4D > 1) - flush |= (*func)(mm, virt_to_page(p4d), PT_P4D); flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit); } @@ -1176,22 +1170,14 @@ static void __init xen_cleanmfnmap(unsigned long vaddr) { pgd_t *pgd; p4d_t *p4d; - unsigned int i; bool unpin; unpin = (vaddr == 2 * PGDIR_SIZE); vaddr &= PMD_MASK; pgd = pgd_offset_k(vaddr); p4d = p4d_offset(pgd, 0); - for (i = 0; i < PTRS_PER_P4D; i++) { - if (p4d_none(p4d[i])) - continue; - xen_cleanmfnmap_p4d(p4d + i, unpin); - } - if (IS_ENABLED(CONFIG_X86_5LEVEL)) { - set_pgd(pgd, __pgd(0)); - xen_cleanmfnmap_free_pgtbl(p4d, unpin); - } + if (!p4d_none(*p4d)) + xen_cleanmfnmap_p4d(p4d, unpin); } static void __init xen_pagetable_p2m_free(void) @@ -1692,7 +1678,7 @@ static void xen_release_pmd(unsigned long pfn) xen_release_ptpage(pfn, PT_PMD); } -#if CONFIG_PGTABLE_LEVELS >= 4 +#ifdef CONFIG_X86_64 static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) { xen_alloc_ptpage(mm, pfn, PT_PUD); @@ -2029,13 +2015,12 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr) */ void __init xen_relocate_p2m(void) { - phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys, p4d_phys; + phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys; unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end; - int n_pte, n_pt, n_pmd, n_pud, n_p4d, idx_pte, idx_pt, idx_pmd, idx_pud, idx_p4d; + int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud; pte_t *pt; pmd_t *pmd; pud_t *pud; - p4d_t *p4d = NULL; pgd_t *pgd; unsigned long *new_p2m; int save_pud; @@ -2045,11 +2030,7 @@ void __init xen_relocate_p2m(void) n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT; n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT; n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT; - if (PTRS_PER_P4D > 1) - n_p4d = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT; - else - n_p4d = 0; - n_frames = n_pte + n_pt + n_pmd + n_pud + n_p4d; + n_frames = n_pte + n_pt + n_pmd + n_pud; new_area = xen_find_free_area(PFN_PHYS(n_frames)); if (!new_area) { @@ -2065,76 +2046,56 @@ void __init xen_relocate_p2m(void) * To avoid any possible virtual address collision, just use * 2 * PUD_SIZE for the new area. */ - p4d_phys = new_area; - pud_phys = p4d_phys + PFN_PHYS(n_p4d); + pud_phys = new_area; pmd_phys = pud_phys + PFN_PHYS(n_pud); pt_phys = pmd_phys + PFN_PHYS(n_pmd); p2m_pfn = PFN_DOWN(pt_phys) + n_pt; pgd = __va(read_cr3_pa()); new_p2m = (unsigned long *)(2 * PGDIR_SIZE); - idx_p4d = 0; save_pud = n_pud; - do { - if (n_p4d > 0) { - p4d = early_memremap(p4d_phys, PAGE_SIZE); - clear_page(p4d); - n_pud = min(save_pud, PTRS_PER_P4D); - } - for (idx_pud = 0; idx_pud < n_pud; idx_pud++) { - pud = early_memremap(pud_phys, PAGE_SIZE); - clear_page(pud); - for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD); - idx_pmd++) { - pmd = early_memremap(pmd_phys, PAGE_SIZE); - clear_page(pmd); - for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD); - idx_pt++) { - pt = early_memremap(pt_phys, PAGE_SIZE); - clear_page(pt); - for (idx_pte = 0; - idx_pte < min(n_pte, PTRS_PER_PTE); - idx_pte++) { - set_pte(pt + idx_pte, - pfn_pte(p2m_pfn, PAGE_KERNEL)); - p2m_pfn++; - } - n_pte -= PTRS_PER_PTE; - early_memunmap(pt, PAGE_SIZE); - make_lowmem_page_readonly(__va(pt_phys)); - pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, - PFN_DOWN(pt_phys)); - set_pmd(pmd + idx_pt, - __pmd(_PAGE_TABLE | pt_phys)); - pt_phys += PAGE_SIZE; + for (idx_pud = 0; idx_pud < n_pud; idx_pud++) { + pud = early_memremap(pud_phys, PAGE_SIZE); + clear_page(pud); + for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD); + idx_pmd++) { + pmd = early_memremap(pmd_phys, PAGE_SIZE); + clear_page(pmd); + for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD); + idx_pt++) { + pt = early_memremap(pt_phys, PAGE_SIZE); + clear_page(pt); + for (idx_pte = 0; + idx_pte < min(n_pte, PTRS_PER_PTE); + idx_pte++) { + set_pte(pt + idx_pte, + pfn_pte(p2m_pfn, PAGE_KERNEL)); + p2m_pfn++; } - n_pt -= PTRS_PER_PMD; - early_memunmap(pmd, PAGE_SIZE); - make_lowmem_page_readonly(__va(pmd_phys)); - pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE, - PFN_DOWN(pmd_phys)); - set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys)); - pmd_phys += PAGE_SIZE; + n_pte -= PTRS_PER_PTE; + early_memunmap(pt, PAGE_SIZE); + make_lowmem_page_readonly(__va(pt_phys)); + pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, + PFN_DOWN(pt_phys)); + set_pmd(pmd + idx_pt, + __pmd(_PAGE_TABLE | pt_phys)); + pt_phys += PAGE_SIZE; } - n_pmd -= PTRS_PER_PUD; - early_memunmap(pud, PAGE_SIZE); - make_lowmem_page_readonly(__va(pud_phys)); - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys)); - if (n_p4d > 0) - set_p4d(p4d + idx_pud, __p4d(_PAGE_TABLE | pud_phys)); - else - set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys)); - pud_phys += PAGE_SIZE; + n_pt -= PTRS_PER_PMD; + early_memunmap(pmd, PAGE_SIZE); + make_lowmem_page_readonly(__va(pmd_phys)); + pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE, + PFN_DOWN(pmd_phys)); + set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys)); + pmd_phys += PAGE_SIZE; } - if (n_p4d > 0) { - save_pud -= PTRS_PER_P4D; - early_memunmap(p4d, PAGE_SIZE); - make_lowmem_page_readonly(__va(p4d_phys)); - pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(p4d_phys)); - set_pgd(pgd + 2 + idx_p4d, __pgd(_PAGE_TABLE | p4d_phys)); - p4d_phys += PAGE_SIZE; - } - } while (++idx_p4d < n_p4d); + n_pmd -= PTRS_PER_PUD; + early_memunmap(pud, PAGE_SIZE); + make_lowmem_page_readonly(__va(pud_phys)); + pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys)); + set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys)); + pud_phys += PAGE_SIZE; + } /* Now copy the old p2m info to the new area. */ memcpy(new_p2m, xen_p2m_addr, size); @@ -2361,7 +2322,7 @@ static void __init xen_post_allocator_init(void) pv_mmu_ops.set_pte = xen_set_pte; pv_mmu_ops.set_pmd = xen_set_pmd; pv_mmu_ops.set_pud = xen_set_pud; -#if CONFIG_PGTABLE_LEVELS >= 4 +#ifdef CONFIG_X86_64 pv_mmu_ops.set_p4d = xen_set_p4d; #endif @@ -2371,7 +2332,7 @@ static void __init xen_post_allocator_init(void) pv_mmu_ops.alloc_pmd = xen_alloc_pmd; pv_mmu_ops.release_pte = xen_release_pte; pv_mmu_ops.release_pmd = xen_release_pmd; -#if CONFIG_PGTABLE_LEVELS >= 4 +#ifdef CONFIG_X86_64 pv_mmu_ops.alloc_pud = xen_alloc_pud; pv_mmu_ops.release_pud = xen_release_pud; #endif @@ -2435,14 +2396,14 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), -#if CONFIG_PGTABLE_LEVELS >= 4 +#ifdef CONFIG_X86_64 .pud_val = PV_CALLEE_SAVE(xen_pud_val), .make_pud = PV_CALLEE_SAVE(xen_make_pud), .set_p4d = xen_set_p4d_hyper, .alloc_pud = xen_alloc_pmd_init, .release_pud = xen_release_pmd_init, -#endif /* CONFIG_PGTABLE_LEVELS == 4 */ +#endif /* CONFIG_X86_64 */ .activate_mm = xen_activate_mm, .dup_mmap = xen_dup_mmap, From af8e947079a7dab0480b5d6db6b093fd04b86fc9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 27 Oct 2017 13:11:10 +0900 Subject: [PATCH 06/17] x86/build: Beautify build log of syscall headers This makes the build log look nicer. Before: SYSTBL arch/x86/entry/syscalls/../../include/generated/asm/syscalls_32.h SYSHDR arch/x86/entry/syscalls/../../include/generated/asm/unistd_32_ia32.h SYSHDR arch/x86/entry/syscalls/../../include/generated/asm/unistd_64_x32.h SYSTBL arch/x86/entry/syscalls/../../include/generated/asm/syscalls_64.h SYSHDR arch/x86/entry/syscalls/../../include/generated/uapi/asm/unistd_32.h SYSHDR arch/x86/entry/syscalls/../../include/generated/uapi/asm/unistd_64.h SYSHDR arch/x86/entry/syscalls/../../include/generated/uapi/asm/unistd_x32.h After: SYSTBL arch/x86/include/generated/asm/syscalls_32.h SYSHDR arch/x86/include/generated/asm/unistd_32_ia32.h SYSHDR arch/x86/include/generated/asm/unistd_64_x32.h SYSTBL arch/x86/include/generated/asm/syscalls_64.h SYSHDR arch/x86/include/generated/uapi/asm/unistd_32.h SYSHDR arch/x86/include/generated/uapi/asm/unistd_64.h SYSHDR arch/x86/include/generated/uapi/asm/unistd_x32.h Signed-off-by: Masahiro Yamada Acked-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: "H. Peter Anvin" Cc: linux-kbuild@vger.kernel.org Link: http://lkml.kernel.org/r/1509077470-2735-1-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Ingo Molnar --- arch/x86/entry/syscalls/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile index 57aa59fd140c..e34c7a931994 100644 --- a/arch/x86/entry/syscalls/Makefile +++ b/arch/x86/entry/syscalls/Makefile @@ -1,5 +1,5 @@ -out := $(obj)/../../include/generated/asm -uapi := $(obj)/../../include/generated/uapi/asm +out := arch/$(SRCARCH)/include/generated/asm +uapi := arch/$(SRCARCH)/include/generated/uapi/asm # Create output directory if not already present _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \ From 15670bfe19905b1dcbb63137f40d718b59d84479 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Sat, 28 Oct 2017 09:30:38 +0800 Subject: [PATCH 07/17] x86/mm/64: Rename the register_page_bootmem_memmap() 'size' parameter to 'nr_pages' register_page_bootmem_memmap()'s 3rd 'size' parameter is named in a somewhat misleading fashion - rename it to 'nr_pages' which makes the units of it much clearer. Meanwhile rename the existing local variable 'nr_pages' to 'nr_pmd_pages', a more expressive name, to avoid conflict with new function parameter 'nr_pages'. (Also clean up the unnecessary parentheses in which get_order() is called.) Signed-off-by: Baoquan He Acked-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: akpm@linux-foundation.org Link: http://lkml.kernel.org/r/1509154238-23250-1-git-send-email-bhe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 10 +++++----- include/linux/mm.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 048fbe8fc274..adcea90a2046 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1426,16 +1426,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE) void register_page_bootmem_memmap(unsigned long section_nr, - struct page *start_page, unsigned long size) + struct page *start_page, unsigned long nr_pages) { unsigned long addr = (unsigned long)start_page; - unsigned long end = (unsigned long)(start_page + size); + unsigned long end = (unsigned long)(start_page + nr_pages); unsigned long next; pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t *pmd; - unsigned int nr_pages; + unsigned int nr_pmd_pages; struct page *page; for (; addr < end; addr = next) { @@ -1482,9 +1482,9 @@ void register_page_bootmem_memmap(unsigned long section_nr, if (pmd_none(*pmd)) continue; - nr_pages = 1 << (get_order(PMD_SIZE)); + nr_pmd_pages = 1 << get_order(PMD_SIZE); page = pmd_page(*pmd); - while (nr_pages--) + while (nr_pmd_pages--) get_page_bootmem(section_nr, page++, SECTION_INFO); } diff --git a/include/linux/mm.h b/include/linux/mm.h index 065d99deb847..b2c7045e9604 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2495,7 +2495,7 @@ void vmemmap_populate_print_last(void); void vmemmap_free(unsigned long start, unsigned long end); #endif void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, - unsigned long size); + unsigned long nr_pages); enum mf_flags { MF_COUNT_INCREASED = 1 << 0, From 06dd688ddda5819025e014b79aea9af6ab475fa2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 2 Nov 2017 13:22:35 +0100 Subject: [PATCH 08/17] x86/cpuid: Replace set/clear_bit32() Peter pointed out that the set/clear_bit32() variants are broken in various aspects. Replace them with open coded set/clear_bit() and type cast cpu_info::x86_capability as it's done in all other places throughout x86. Fixes: 0b00de857a64 ("x86/cpuid: Add generic table for CPUID dependencies") Reported-by: Peter Ziljstra Signed-off-by: Thomas Gleixner Cc: Andi Kleen --- arch/x86/kernel/cpu/cpuid-deps.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index c21f22d836ad..904b0a3c4e53 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -62,23 +62,19 @@ const static struct cpuid_dep cpuid_deps[] = { {} }; -static inline void __clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit) -{ - clear_bit32(bit, c->x86_capability); -} - -static inline void __setup_clear_cpu_cap(unsigned int bit) -{ - clear_cpu_cap(&boot_cpu_data, bit); - set_bit32(bit, cpu_caps_cleared); -} - static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature) { - if (!c) - __setup_clear_cpu_cap(feature); - else - __clear_cpu_cap(c, feature); + /* + * Note: This could use the non atomic __*_bit() variants, but the + * rest of the cpufeature code uses atomics as well, so keep it for + * consistency. Cleanup all of it separately. + */ + if (!c) { + clear_cpu_cap(&boot_cpu_data, feature); + set_bit(feature, (unsigned long *)cpu_caps_cleared); + } else { + clear_bit(feature, (unsigned long *)c->x86_capability); + } } /* Take the capabilities and the BUG bits into account */ From 1943dc07b45e347c52c1bfdd4a37e04a86e399aa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 2 Nov 2017 13:30:03 +0100 Subject: [PATCH 09/17] bitops: Revert cbe96375025e ("bitops: Add clear/set_bit32() to linux/bitops.h") These ops are not endian safe and may break on architectures which have aligment requirements. Reverts: cbe96375025e ("bitops: Add clear/set_bit32() to linux/bitops.h") Reported-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Cc: Andi Kleen --- include/linux/bitops.h | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 36794f058ba6..8fbe259b197c 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -227,32 +227,6 @@ static inline unsigned long __ffs64(u64 word) return __ffs((unsigned long)word); } -/* - * clear_bit32 - Clear a bit in memory for u32 array - * @nr: Bit to clear - * @addr: u32 * address of bitmap - * - * Same as clear_bit, but avoids needing casts for u32 arrays. - */ - -static __always_inline void clear_bit32(long nr, volatile u32 *addr) -{ - clear_bit(nr, (volatile unsigned long *)addr); -} - -/* - * set_bit32 - Set a bit in memory for u32 array - * @nr: Bit to clear - * @addr: u32 * address of bitmap - * - * Same as set_bit, but avoids needing casts for u32 arrays. - */ - -static __always_inline void set_bit32(long nr, volatile u32 *addr) -{ - set_bit(nr, (volatile unsigned long *)addr); -} - #ifdef __KERNEL__ #ifndef set_mask_bits From c7da092a1f243bfd1bfb4124f538e69e941882da Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 3 Nov 2017 11:20:28 +0100 Subject: [PATCH 10/17] x86/mm: Define _PAGE_TABLE using _KERNPG_TABLE ... so that the difference is obvious. No functionality change. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171103102028.20284-1-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_types.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index f1492473f10e..c33f80da8a79 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -199,10 +199,9 @@ enum page_cache_mode { #define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) -#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_ENC) #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ _PAGE_DIRTY | _PAGE_ENC) +#define _PAGE_TABLE (_KERNPG_TABLE | _PAGE_USER) #define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC) #define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC) From acbc845ffefd9fb70466182cd8555a26189462b2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 31 Oct 2017 13:17:22 +0100 Subject: [PATCH 11/17] x86/cpufeatures: Re-tabulate the X86_FEATURE definitions Over the years asm/cpufeatures.h has become somewhat of a mess: the original tabulation style was too narrow, while x86 feature names also kept growing in length, creating frequent field width overflows. Re-tabulate it to make it wider and easier to read/modify. Also harmonize the tabulation of the other defines in this file to match it. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171031121723.28524-3-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeatures.h | 508 ++++++++++++++--------------- 1 file changed, 254 insertions(+), 254 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 74370734663c..ad1b835001cc 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -13,8 +13,8 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 18 /* N 32-bit words worth of info */ -#define NBUGINTS 1 /* N 32-bit bug flags */ +#define NCAPINTS 18 /* N 32-bit words worth of info */ +#define NBUGINTS 1 /* N 32-bit bug flags */ /* * Note: If the comment begins with a quoted string, that string is used @@ -28,163 +28,163 @@ */ /* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ -#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ -#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ -#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ -#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ -#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ -#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ -#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ -#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ -#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ -#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ -#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ -#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ -#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ -#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ -#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */ +#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ +#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ +#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ +#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ +#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ +#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ +#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ +#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ +#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ +#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ +#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ +#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ +#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ +#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ +#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */ /* (plus FCMOVcc, FCOMI with FPU) */ -#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ -#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ -#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ -#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ -#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ -#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ -#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ -#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ -#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ -#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ -#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ -#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ -#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ -#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ -#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ +#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ +#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ +#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ +#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ +#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ +#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ +#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ +#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ +#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ +#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ +#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ +#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ +#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ +#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ +#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ /* Don't duplicate feature flags which are redundant with Intel! */ -#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ -#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */ -#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ -#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ -#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ -#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ -#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ -#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */ -#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */ -#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */ +#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ +#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */ +#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ +#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ +#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ +#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ +#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ +#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */ +#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */ +#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */ /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ -#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ -#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ -#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ +#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ +#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ +#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ /* Other features, Linux-defined mapping, word 3 */ /* This range is used for feature bits which conflict or are synthesized */ -#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ -#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ -#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ -#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ +#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ +#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ +#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ +#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ /* cpu types for specific tunings: */ -#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ -#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ -#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ -#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ -#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ -#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */ -#define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */ -#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ -#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ -#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ -#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */ -#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */ -#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */ -#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */ -#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */ -#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ -#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ -#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ -#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ -#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ -#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ -#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ -#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ -#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ -#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ -#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ -#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ +#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ +#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ +#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ +#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ +#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ +#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */ +#define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */ +#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ +#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ +#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ +#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */ +#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */ +#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */ +#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */ +#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */ +#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ +#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ +#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ +#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ +#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ +#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ +#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ +#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ +#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ +#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ +#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ +#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ -#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ -#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ -#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ -#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */ -#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ -#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ -#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */ -#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ -#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ -#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ -#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ -#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ -#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ -#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */ -#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ -#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */ -#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ -#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ -#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ -#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ -#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */ -#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ -#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ +#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ +#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ +#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ +#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */ +#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ +#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ +#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */ +#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ +#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ +#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ +#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ +#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ +#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ +#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */ +#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ +#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */ +#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ +#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ +#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ +#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ +#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */ +#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ +#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ #define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */ -#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ -#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ -#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */ -#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ -#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */ -#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */ -#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ +#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ +#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ +#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */ +#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ +#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */ +#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */ +#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ -#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ -#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ -#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ -#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ -#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ -#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ -#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ -#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ -#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ -#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ +#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ +#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ +#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ +#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ +#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ +#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ +#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ +#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ +#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ +#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ /* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ -#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ -#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ -#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */ -#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ -#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ -#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ -#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ -#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ -#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ -#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ -#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ -#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ -#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ -#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ -#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ -#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ -#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */ -#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ -#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */ -#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ -#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ -#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ -#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ -#define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */ -#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ -#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ +#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ +#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ +#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */ +#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ +#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ +#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ +#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ +#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ +#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ +#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ +#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ +#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ +#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ +#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ +#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ +#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ +#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */ +#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ +#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */ +#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ +#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ +#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ +#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ +#define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */ +#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ +#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ /* * Auxiliary flags: Linux defined - For features scattered in various @@ -192,152 +192,152 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */ -#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ -#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ -#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ -#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ -#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ -#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ +#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */ +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ +#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ +#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ +#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ +#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ -#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ -#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ +#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ +#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ +#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ -#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ -#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ -#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ -#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ +#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ +#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ -#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ +#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ /* Virtualization flags: Linux defined, word 8 */ -#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ -#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ -#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ -#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ -#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ +#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ +#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ +#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ +#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ +#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ -#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ -#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ +#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ +#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ -#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ -#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */ -#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ -#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ -#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ -#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ -#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ -#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ -#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ -#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ -#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ -#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ -#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ -#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ -#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ -#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ -#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ -#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ -#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ -#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ -#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ -#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ -#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ -#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ -#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ -#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ -#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ +#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ +#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */ +#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ +#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ +#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ +#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ +#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ +#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ +#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ +#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ +#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ +#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ +#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ +#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ +#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ +#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ +#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ +#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ +#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ +#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ +#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ +#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ +#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ +#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ +#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ +#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ +#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ /* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ -#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ -#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */ -#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ -#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ +#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ +#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */ +#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ +#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */ -#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ +#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ -#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ -#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ -#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ +#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ +#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ +#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ -#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ -#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ +#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ +#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ -#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ -#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ -#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ -#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ -#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ -#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ -#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ -#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ -#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ -#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ +#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ +#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ +#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ +#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ +#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ +#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ +#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ +#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ +#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ +#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ /* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */ -#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ -#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ -#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ -#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ -#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ -#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ -#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ -#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ -#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ -#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ -#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ -#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ -#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ +#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ +#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ +#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ +#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ +#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ +#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ +#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ +#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ +#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ +#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ +#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ +#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ +#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ -#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ -#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ -#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ -#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ -#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ -#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ -#define X86_FEATURE_VPCLMULQDQ (16*32+ 10) /* Carry-Less Multiplication Double Quadword */ -#define X86_FEATURE_AVX512_VNNI (16*32+ 11) /* Vector Neural Network Instructions */ -#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */ -#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ -#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ -#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ +#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ +#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ +#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ +#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ +#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ +#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ +#define X86_FEATURE_VPCLMULQDQ (16*32+ 10) /* Carry-Less Multiplication Double Quadword */ +#define X86_FEATURE_AVX512_VNNI (16*32+ 11) /* Vector Neural Network Instructions */ +#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */ +#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ +#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ +#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ -#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ -#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ -#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ +#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ +#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ +#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ /* * BUG word(s) */ -#define X86_BUG(x) (NCAPINTS*32 + (x)) +#define X86_BUG(x) (NCAPINTS*32 + (x)) -#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ -#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ -#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ -#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ -#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ -#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ -#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ -#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ -#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ +#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ +#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ +#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ +#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ +#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ +#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ +#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ +#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ +#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ #ifdef CONFIG_X86_32 /* * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional * to avoid confusion. */ -#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ +#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ #endif -#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ -#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ -#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ -#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ +#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ +#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ +#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ +#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ #endif /* _ASM_X86_CPUFEATURES_H */ From f3a624e901c633593156f7b00ca743a6204a29bc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 31 Oct 2017 13:17:23 +0100 Subject: [PATCH 12/17] x86/cpufeatures: Fix various details in the feature definitions Kept this commit separate from the re-tabulation changes, to make the changes easier to review: - add better explanation for entries with no explanation - fix/enhance the text of some of the entries - fix the vertical alignment of some of the feature number definitions - fix inconsistent capitalization - ... and lots of other small details i.e. make it all more of a coherent unit, instead of a patchwork of years of additions. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171031121723.28524-4-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeatures.h | 149 ++++++++++++++--------------- 1 file changed, 74 insertions(+), 75 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ad1b835001cc..cdf5be866863 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -20,14 +20,12 @@ * Note: If the comment begins with a quoted string, that string is used * in /proc/cpuinfo instead of the macro name. If the string is "", * this feature bit is not displayed in /proc/cpuinfo at all. - */ - -/* + * * When adding new features here that depend on other features, - * please update the table in kernel/cpu/cpuid-deps.c + * please update the table in kernel/cpu/cpuid-deps.c as well. */ -/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ +/* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */ #define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ #define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ #define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ @@ -42,8 +40,7 @@ #define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ #define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ #define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ -#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */ - /* (plus FCMOVcc, FCOMI with FPU) */ +#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ #define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ #define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ #define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ @@ -63,15 +60,15 @@ /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ /* Don't duplicate feature flags which are redundant with Intel! */ #define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ -#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */ +#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */ #define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ #define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ #define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ #define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ #define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ -#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */ -#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */ -#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */ +#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */ +#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */ +#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */ /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ #define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ @@ -84,66 +81,67 @@ #define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ #define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ #define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ -/* cpu types for specific tunings: */ + +/* CPU types for specific tunings: */ #define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ #define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ #define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ #define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ -#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */ -#define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */ +#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */ +#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */ #define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ #define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ #define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ -#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */ -#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */ -#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */ -#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */ -#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */ +#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ +#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ +#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ +#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" MFENCE synchronizes RDTSC */ +#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ #define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ -#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ +#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */ #define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ #define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ #define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ -#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ -#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ -#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ +#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */ +#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */ +#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ #define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ -/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ +/* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */ #define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ #define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ #define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ -#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */ -#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ +#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */ +#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */ #define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ -#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */ +#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */ #define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ #define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ #define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ #define X86_FEATURE_CID ( 4*32+10) /* Context ID */ #define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ #define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ -#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */ +#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */ #define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ -#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */ +#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */ #define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ #define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ #define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ #define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ -#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */ +#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */ #define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ #define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ -#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */ +#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */ #define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ -#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ -#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */ +#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */ +#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */ #define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ -#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */ -#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */ +#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */ +#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */ #define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ @@ -158,10 +156,10 @@ #define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ #define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ -/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ +/* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */ #define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ #define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ -#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */ +#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */ #define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ #define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ #define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ @@ -175,16 +173,16 @@ #define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ #define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ #define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ -#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */ +#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */ #define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ -#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */ -#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ -#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ +#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */ +#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */ +#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */ #define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ -#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ -#define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */ +#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */ +#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */ #define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ -#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ +#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */ /* * Auxiliary flags: Linux defined - For features scattered in various @@ -192,7 +190,7 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */ +#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */ #define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ @@ -206,8 +204,8 @@ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ -#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ -#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ @@ -218,19 +216,19 @@ #define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ #define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ -#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ +#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */ #define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ -/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ -#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ -#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */ +/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ +#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ +#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */ #define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ #define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ #define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ #define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ #define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ -#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ +#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */ #define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ #define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ #define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ @@ -238,8 +236,8 @@ #define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ #define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ #define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ -#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ -#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ +#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */ +#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */ #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ #define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ @@ -251,25 +249,25 @@ #define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ #define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ -/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ -#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ -#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */ -#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ -#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ +/* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */ +#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */ +#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */ +#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ +#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ -/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */ +/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */ #define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ -/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ -#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ +/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */ +#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */ #define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ #define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ -/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ -#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ -#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ +/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ +#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ +#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ -/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ +/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ #define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ #define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ @@ -281,7 +279,7 @@ #define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ #define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ -/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */ +/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */ #define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ #define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ #define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ @@ -296,24 +294,24 @@ #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ #define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ -/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ +/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ #define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ #define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ #define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ -#define X86_FEATURE_VPCLMULQDQ (16*32+ 10) /* Carry-Less Multiplication Double Quadword */ -#define X86_FEATURE_AVX512_VNNI (16*32+ 11) /* Vector Neural Network Instructions */ -#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */ +#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */ +#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */ +#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ -/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ -#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ -#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ -#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ +/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ +#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ +#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */ +#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */ /* * BUG word(s) @@ -340,4 +338,5 @@ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ + #endif /* _ASM_X86_CPUFEATURES_H */ From 693cb5580fdb026922363aa103add64b3ecd572e Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 4 Nov 2017 04:19:48 -0700 Subject: [PATCH 13/17] selftests/x86/protection_keys: Fix syscall NR redefinition warnings On new enough glibc, the pkey syscalls numbers are available. Check first before defining them to avoid warnings like: protection_keys.c:198:0: warning: "SYS_pkey_alloc" redefined Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1fbef53a9e6befb7165ff855fc1a7d4788a191d6.1509794321.git.luto@kernel.org Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/protection_keys.c | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index 555e43ca846b..7a1cc0e56d2d 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c @@ -189,17 +189,29 @@ void lots_o_noops_around_write(int *write_to_me) #define u64 uint64_t #ifdef __i386__ -#define SYS_mprotect_key 380 -#define SYS_pkey_alloc 381 -#define SYS_pkey_free 382 + +#ifndef SYS_mprotect_key +# define SYS_mprotect_key 380 +#endif +#ifndef SYS_pkey_alloc +# define SYS_pkey_alloc 381 +# define SYS_pkey_free 382 +#endif #define REG_IP_IDX REG_EIP #define si_pkey_offset 0x14 + #else -#define SYS_mprotect_key 329 -#define SYS_pkey_alloc 330 -#define SYS_pkey_free 331 + +#ifndef SYS_mprotect_key +# define SYS_mprotect_key 329 +#endif +#ifndef SYS_pkey_alloc +# define SYS_pkey_alloc 330 +# define SYS_pkey_free 331 +#endif #define REG_IP_IDX REG_RIP #define si_pkey_offset 0x20 + #endif void dump_mem(void *dumpme, int len_bytes) From d60ad744c9741586010d4bea286f09a063a90fbd Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 4 Nov 2017 04:19:49 -0700 Subject: [PATCH 14/17] selftests/x86/ldt_gdt: Robustify against set_thread_area() and LAR oddities Bits 19:16 of LAR's result are undefined, and some upcoming improvements to the test case seem to trigger this. Mask off those bits to avoid spurious failures. commit 5b781c7e317f ("x86/tls: Forcibly set the accessed bit in TLS segments") adds a valid case in which LAR's output doesn't quite agree with set_thread_area()'s input. This isn't triggered in the test as is, but it will be if we start calling set_thread_area() with the accessed bit clear. Work around this discrepency. I've added a Fixes tag so that -stable can pick this up if neccesary. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 5b781c7e317f ("x86/tls: Forcibly set the accessed bit in TLS segments") Link: http://lkml.kernel.org/r/b82f3f89c034b53580970ac865139fd8863f44e2.1509794321.git.luto@kernel.org Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/ldt_gdt.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index 961e3ee26c27..b0334338a4b0 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -115,7 +115,15 @@ static void check_valid_segment(uint16_t index, int ldt, return; } - if (ar != expected_ar) { + /* The SDM says "bits 19:16 are undefined". Thanks. */ + ar &= ~0xF0000; + + /* + * NB: Different Linux versions do different things with the + * accessed bit in set_thread_area(). + */ + if (ar != expected_ar && + (ldt || ar != (expected_ar | AR_ACCESSED))) { printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n", (ldt ? "LDT" : "GDT"), index, ar, expected_ar); nerrs++; From d744dcad39094c9187075e274d1cdef79c57c8b5 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 4 Nov 2017 04:19:50 -0700 Subject: [PATCH 15/17] selftests/x86/ldt_gdt: Add infrastructure to test set_thread_area() Much of the test design could apply to set_thread_area() (i.e. GDT), not just modify_ldt(). Add set_thread_area() to the install_valid_mode() helper. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/02c23f8fba5547007f741dc24c3926e5284ede02.1509794321.git.luto@kernel.org Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/ldt_gdt.c | 57 ++++++++++++++++++--------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index b0334338a4b0..45f30249133f 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -137,30 +137,51 @@ static void check_valid_segment(uint16_t index, int ldt, } } -static bool install_valid_mode(const struct user_desc *desc, uint32_t ar, - bool oldmode) +static bool install_valid_mode(const struct user_desc *d, uint32_t ar, + bool oldmode, bool ldt) { - int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11, - desc, sizeof(*desc)); - if (ret < -1) - errno = -ret; - if (ret == 0) { - uint32_t limit = desc->limit; - if (desc->limit_in_pages) - limit = (limit << 12) + 4095; - check_valid_segment(desc->entry_number, 1, ar, limit, true); - return true; - } else if (errno == ENOSYS) { - printf("[OK]\tmodify_ldt returned -ENOSYS\n"); + struct user_desc desc = *d; + int ret; + + if (!ldt) { +#ifndef __i386__ + /* No point testing set_thread_area in a 64-bit build */ return false; +#endif + if (!gdt_entry_num) + return false; + desc.entry_number = gdt_entry_num; + + ret = syscall(SYS_set_thread_area, &desc); } else { - if (desc->seg_32bit) { - printf("[FAIL]\tUnexpected modify_ldt failure %d\n", + ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11, + &desc, sizeof(desc)); + + if (ret < -1) + errno = -ret; + + if (ret != 0 && errno == ENOSYS) { + printf("[OK]\tmodify_ldt returned -ENOSYS\n"); + return false; + } + } + + if (ret == 0) { + uint32_t limit = desc.limit; + if (desc.limit_in_pages) + limit = (limit << 12) + 4095; + check_valid_segment(desc.entry_number, ldt, ar, limit, true); + return true; + } else { + if (desc.seg_32bit) { + printf("[FAIL]\tUnexpected %s failure %d\n", + ldt ? "modify_ldt" : "set_thread_area", errno); nerrs++; return false; } else { - printf("[OK]\tmodify_ldt rejected 16 bit segment\n"); + printf("[OK]\t%s rejected 16 bit segment\n", + ldt ? "modify_ldt" : "set_thread_area"); return false; } } @@ -168,7 +189,7 @@ static bool install_valid_mode(const struct user_desc *desc, uint32_t ar, static bool install_valid(const struct user_desc *desc, uint32_t ar) { - return install_valid_mode(desc, ar, false); + return install_valid_mode(desc, ar, false, true); } static void install_invalid(const struct user_desc *desc, bool oldmode) From adedf2893c192dd09b1cc2f2dcfdd7cad99ec49d Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 4 Nov 2017 04:19:51 -0700 Subject: [PATCH 16/17] selftests/x86/ldt_gdt: Run most existing LDT test cases against the GDT as well Now that the main test infrastructure supports the GDT, run tests that will pass the kernel's GDT permission tests against the GDT. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/686a1eda63414da38fcecc2412db8dba1ae40581.1509794321.git.luto@kernel.org Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/ldt_gdt.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index 45f30249133f..3bb42fff5d66 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -189,7 +189,15 @@ static bool install_valid_mode(const struct user_desc *d, uint32_t ar, static bool install_valid(const struct user_desc *desc, uint32_t ar) { - return install_valid_mode(desc, ar, false, true); + bool ret = install_valid_mode(desc, ar, false, true); + + if (desc->contents <= 1 && desc->seg_32bit && + !desc->seg_not_present) { + /* Should work in the GDT, too. */ + install_valid_mode(desc, ar, false, false); + } + + return ret; } static void install_invalid(const struct user_desc *desc, bool oldmode) From fec8f5ae1715a01c72ad52cb2ecd8aacaf142302 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 4 Nov 2017 04:19:52 -0700 Subject: [PATCH 17/17] selftests/x86/ldt_get: Add a few additional tests for limits We weren't testing the .limit and .limit_in_pages fields very well. Add more tests. This addition seems to trigger the "bits 16:19 are undefined" issue that was fixed in an earlier patch. I think that, at least on my CPU, the high nibble of the limit ends in LAR bits 16:19. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/5601c15ea9b3113d288953fd2838b18bedf6bc67.1509794321.git.luto@kernel.org Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/ldt_gdt.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index 3bb42fff5d66..66e5ce5b91f0 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -404,9 +404,24 @@ static void do_simple_tests(void) install_invalid(&desc, false); desc.seg_not_present = 0; - desc.read_exec_only = 0; desc.seg_32bit = 1; + desc.read_exec_only = 0; + desc.limit = 0xfffff; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB); + + desc.limit_in_pages = 1; + + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB | AR_G); + desc.read_exec_only = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P | AR_DB | AR_G); + desc.contents = 1; + desc.read_exec_only = 0; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G); + desc.read_exec_only = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G); + + desc.limit = 0; install_invalid(&desc, true); }