x86/irq/64: Split the IRQ stack into its own pages

Currently, the IRQ stack is hardcoded as the first page of the percpu
area, and the stack canary lives on the IRQ stack. The former gets in
the way of adding an IRQ stack guard page, and the latter is a potential
weakness in the stack canary mechanism.

Split the IRQ stack into its own private percpu pages.

[ tglx: Make 64 and 32 bit share struct irq_stack ]

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Brijesh Singh <brijesh.singh@amd.com>
Cc: "Chang S. Bae" <chang.seok.bae@intel.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Feng Tang <feng.tang@intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Jordan Borgner <mail@jordan-borgner.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Maran Wilson <maran.wilson@oracle.com>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nicolai Stange <nstange@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Pu Wen <puwen@hygon.cn>
Cc: "Rafael Ávila de Espíndola" <rafael@espindo.la>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: x86-ml <x86@kernel.org>
Cc: xen-devel@lists.xenproject.org
Link: https://lkml.kernel.org/r/20190414160146.267376656@linutronix.de
This commit is contained in:
Andy Lutomirski 2019-04-14 18:00:06 +02:00 committed by Borislav Petkov
parent 0ac2610420
commit e6401c1309
11 changed files with 39 additions and 44 deletions

View File

@ -298,7 +298,7 @@ ENTRY(__switch_to_asm)
#ifdef CONFIG_STACKPROTECTOR #ifdef CONFIG_STACKPROTECTOR
movq TASK_stack_canary(%rsi), %rbx movq TASK_stack_canary(%rsi), %rbx
movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
#endif #endif
#ifdef CONFIG_RETPOLINE #ifdef CONFIG_RETPOLINE
@ -430,7 +430,7 @@ END(irq_entries_start)
* it before we actually move ourselves to the IRQ stack. * it before we actually move ourselves to the IRQ stack.
*/ */
movq \old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8) movq \old_rsp, PER_CPU_VAR(irq_stack_backing_store + IRQ_STACK_SIZE - 8)
movq PER_CPU_VAR(hardirq_stack_ptr), %rsp movq PER_CPU_VAR(hardirq_stack_ptr), %rsp
#ifdef CONFIG_DEBUG_ENTRY #ifdef CONFIG_DEBUG_ENTRY

View File

@ -367,6 +367,13 @@ DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
#define __KERNEL_TSS_LIMIT \ #define __KERNEL_TSS_LIMIT \
(IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1) (IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
/* Per CPU interrupt stacks */
struct irq_stack {
char stack[IRQ_STACK_SIZE];
} __aligned(IRQ_STACK_SIZE);
DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
#else #else
@ -375,28 +382,24 @@ DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
#endif #endif
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
union irq_stack_union { struct fixed_percpu_data {
char irq_stack[IRQ_STACK_SIZE];
/* /*
* GCC hardcodes the stack canary as %gs:40. Since the * GCC hardcodes the stack canary as %gs:40. Since the
* irq_stack is the object at %gs:0, we reserve the bottom * irq_stack is the object at %gs:0, we reserve the bottom
* 48 bytes of the irq stack for the canary. * 48 bytes of the irq stack for the canary.
*/ */
struct { char gs_base[40];
char gs_base[40]; unsigned long stack_canary;
unsigned long stack_canary;
};
}; };
DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible; DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible;
DECLARE_INIT_PER_CPU(irq_stack_union); DECLARE_INIT_PER_CPU(fixed_percpu_data);
static inline unsigned long cpu_kernelmode_gs_base(int cpu) static inline unsigned long cpu_kernelmode_gs_base(int cpu)
{ {
return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu); return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
} }
DECLARE_PER_CPU(char *, hardirq_stack_ptr);
DECLARE_PER_CPU(unsigned int, irq_count); DECLARE_PER_CPU(unsigned int, irq_count);
extern asmlinkage void ignore_sysret(void); extern asmlinkage void ignore_sysret(void);
@ -418,14 +421,7 @@ struct stack_canary {
}; };
DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif #endif
/* /* Per CPU softirq stack pointer */
* per-CPU IRQ handling stacks
*/
struct irq_stack {
char stack[IRQ_STACK_SIZE];
} __aligned(IRQ_STACK_SIZE);
DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr); DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
#endif /* X86_64 */ #endif /* X86_64 */

View File

@ -13,7 +13,7 @@
* On x86_64, %gs is shared by percpu area and stack canary. All * On x86_64, %gs is shared by percpu area and stack canary. All
* percpu symbols are zero based and %gs points to the base of percpu * percpu symbols are zero based and %gs points to the base of percpu
* area. The first occupant of the percpu area is always * area. The first occupant of the percpu area is always
* irq_stack_union which contains stack_canary at offset 40. Userland * fixed_percpu_data which contains stack_canary at offset 40. Userland
* %gs is always saved and restored on kernel entry and exit using * %gs is always saved and restored on kernel entry and exit using
* swapgs, so stack protector doesn't add any complexity there. * swapgs, so stack protector doesn't add any complexity there.
* *
@ -64,7 +64,7 @@ static __always_inline void boot_init_stack_canary(void)
u64 tsc; u64 tsc;
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40); BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40);
#endif #endif
/* /*
* We both use the random pool and the current TSC as a source * We both use the random pool and the current TSC as a source
@ -79,7 +79,7 @@ static __always_inline void boot_init_stack_canary(void)
current->stack_canary = canary; current->stack_canary = canary;
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
this_cpu_write(irq_stack_union.stack_canary, canary); this_cpu_write(fixed_percpu_data.stack_canary, canary);
#else #else
this_cpu_write(stack_canary.canary, canary); this_cpu_write(stack_canary.canary, canary);
#endif #endif

View File

@ -73,7 +73,7 @@ int main(void)
BLANK(); BLANK();
#ifdef CONFIG_STACKPROTECTOR #ifdef CONFIG_STACKPROTECTOR
DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary)); DEFINE(stack_canary_offset, offsetof(struct fixed_percpu_data, stack_canary));
BLANK(); BLANK();
#endif #endif

View File

@ -1498,9 +1498,9 @@ static __init int setup_clearcpuid(char *arg)
__setup("clearcpuid=", setup_clearcpuid); __setup("clearcpuid=", setup_clearcpuid);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
DEFINE_PER_CPU_FIRST(union irq_stack_union, DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
irq_stack_union) __aligned(PAGE_SIZE) __visible; fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union); EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
/* /*
* The following percpu variables are hot. Align current_task to * The following percpu variables are hot. Align current_task to
@ -1510,7 +1510,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
&init_task; &init_task;
EXPORT_PER_CPU_SYMBOL(current_task); EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(char *, hardirq_stack_ptr); DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;

View File

@ -265,7 +265,7 @@ ENDPROC(start_cpu0)
GLOBAL(initial_code) GLOBAL(initial_code)
.quad x86_64_start_kernel .quad x86_64_start_kernel
GLOBAL(initial_gs) GLOBAL(initial_gs)
.quad INIT_PER_CPU_VAR(irq_stack_union) .quad INIT_PER_CPU_VAR(fixed_percpu_data)
GLOBAL(initial_stack) GLOBAL(initial_stack)
/* /*
* The SIZEOF_PTREGS gap is a convention which helps the in-kernel * The SIZEOF_PTREGS gap is a convention which helps the in-kernel

View File

@ -23,6 +23,9 @@
#include <asm/io_apic.h> #include <asm/io_apic.h>
#include <asm/apic.h> #include <asm/apic.h>
DEFINE_PER_CPU_PAGE_ALIGNED(struct irq_stack, irq_stack_backing_store) __visible;
DECLARE_INIT_PER_CPU(irq_stack_backing_store);
int sysctl_panic_on_stackoverflow; int sysctl_panic_on_stackoverflow;
/* /*
@ -90,7 +93,7 @@ bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
static int map_irq_stack(unsigned int cpu) static int map_irq_stack(unsigned int cpu)
{ {
void *va = per_cpu_ptr(irq_stack_union.irq_stack, cpu); void *va = per_cpu_ptr(&irq_stack_backing_store, cpu);
per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE; per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE;
return 0; return 0;

View File

@ -244,11 +244,6 @@ void __init setup_per_cpu_areas(void)
per_cpu(x86_cpu_to_logical_apicid, cpu) = per_cpu(x86_cpu_to_logical_apicid, cpu) =
early_per_cpu_map(x86_cpu_to_logical_apicid, cpu); early_per_cpu_map(x86_cpu_to_logical_apicid, cpu);
#endif #endif
#ifdef CONFIG_X86_64
per_cpu(hardirq_stack_ptr, cpu) =
per_cpu(irq_stack_union.irq_stack, cpu) +
IRQ_STACK_SIZE;
#endif
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
per_cpu(x86_cpu_to_node_map, cpu) = per_cpu(x86_cpu_to_node_map, cpu) =
early_per_cpu_map(x86_cpu_to_node_map, cpu); early_per_cpu_map(x86_cpu_to_node_map, cpu);

View File

@ -403,7 +403,8 @@ SECTIONS
*/ */
#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load #define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
INIT_PER_CPU(gdt_page); INIT_PER_CPU(gdt_page);
INIT_PER_CPU(irq_stack_union); INIT_PER_CPU(fixed_percpu_data);
INIT_PER_CPU(irq_stack_backing_store);
/* /*
* Build-time check on the image size: * Build-time check on the image size:
@ -412,8 +413,8 @@ INIT_PER_CPU(irq_stack_union);
"kernel image bigger than KERNEL_IMAGE_SIZE"); "kernel image bigger than KERNEL_IMAGE_SIZE");
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
. = ASSERT((irq_stack_union == 0), . = ASSERT((fixed_percpu_data == 0),
"irq_stack_union is not at start of per-cpu area"); "fixed_percpu_data is not at start of per-cpu area");
#endif #endif
#endif /* CONFIG_X86_32 */ #endif /* CONFIG_X86_32 */

View File

@ -738,7 +738,7 @@ static void percpu_init(void)
* __per_cpu_load * __per_cpu_load
* *
* The "gold" linker incorrectly associates: * The "gold" linker incorrectly associates:
* init_per_cpu__irq_stack_union * init_per_cpu__fixed_percpu_data
* init_per_cpu__gdt_page * init_per_cpu__gdt_page
*/ */
static int is_percpu_sym(ElfW(Sym) *sym, const char *symname) static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)

View File

@ -40,13 +40,13 @@ ENTRY(startup_xen)
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
/* Set up %gs. /* Set up %gs.
* *
* The base of %gs always points to the bottom of the irqstack * The base of %gs always points to fixed_percpu_data. If the
* union. If the stack protector canary is enabled, it is * stack protector canary is enabled, it is located at %gs:40.
* located at %gs:40. Note that, on SMP, the boot cpu uses * Note that, on SMP, the boot cpu uses init data section until
* init data section till per cpu areas are set up. * the per cpu areas are set up.
*/ */
movl $MSR_GS_BASE,%ecx movl $MSR_GS_BASE,%ecx
movq $INIT_PER_CPU_VAR(irq_stack_union),%rax movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax
cdq cdq
wrmsr wrmsr
#endif #endif