x86: make percpu symbols zerobased on SMP

[ Based on original patch from Christoph Lameter and Mike Travis. ]

This patch makes percpu symbols zerobased on x86_64 SMP by adding
PERCPU_VADDR() to vmlinux.lds.h which helps setting explicit vaddr on
the percpu output section and using it in vmlinux_64.lds.S.  A new
PHDR is added as existing ones cannot contain sections near address
zero.  PERCPU_VADDR() also adds a new symbol __per_cpu_load which
always points to the vaddr of the loaded percpu data.init region.

The following adjustments have been made to accomodate the address
change.

* code to locate percpu gdt_page in head_64.S is updated to add the
  load address to the gdt_page offset.

* __per_cpu_load is used in places where access to the init data area
  is necessary.

* pda->data_offset is initialized soon after C code is entered as zero
  value doesn't work anymore.

This patch is mostly taken from Mike Travis' "x86_64: Base percpu
variables at zero" patch.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Tejun Heo 2009-01-13 20:41:35 +09:00 committed by Ingo Molnar
parent a698c823e1
commit 3e5d8f9784
6 changed files with 88 additions and 10 deletions

View File

@ -44,6 +44,8 @@ void __init x86_64_init_pda(void)
{
_cpu_pda = __cpu_pda;
cpu_pda(0) = &_boot_cpu_pda;
cpu_pda(0)->data_offset =
(unsigned long)(__per_cpu_load - __per_cpu_start);
pda_init(0);
}

View File

@ -204,6 +204,23 @@ ENTRY(secondary_startup_64)
pushq $0
popfq
#ifdef CONFIG_SMP
/*
* early_gdt_base should point to the gdt_page in static percpu init
* data area. Computing this requires two symbols - __per_cpu_load
* and per_cpu__gdt_page. As linker can't do no such relocation, do
* it by hand. As early_gdt_descr is manipulated by C code for
* secondary CPUs, this should be done only once for the boot CPU
* when early_gdt_descr_base contains zero.
*/
movq early_gdt_descr_base(%rip), %rax
testq %rax, %rax
jnz 1f
movq $__per_cpu_load, %rax
addq $per_cpu__gdt_page, %rax
movq %rax, early_gdt_descr_base(%rip)
1:
#endif
/*
* We must switch to a new descriptor in kernel space for the GDT
* because soon the kernel won't have access anymore to the userspace
@ -401,7 +418,12 @@ NEXT_PAGE(level2_spare_pgt)
.globl early_gdt_descr
early_gdt_descr:
.word GDT_ENTRIES*8-1
.quad per_cpu__gdt_page
#ifdef CONFIG_SMP
early_gdt_descr_base:
.quad 0x0000000000000000
#else
.quad per_cpu__gdt_page
#endif
ENTRY(phys_base)
/* This must match the first entry in level2_kernel_pgt */

View File

@ -213,7 +213,7 @@ void __init setup_per_cpu_areas(void)
}
#endif
per_cpu_offset(cpu) = ptr - __per_cpu_start;
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
}

View File

@ -19,6 +19,9 @@ PHDRS {
data PT_LOAD FLAGS(7); /* RWE */
user PT_LOAD FLAGS(7); /* RWE */
data.init PT_LOAD FLAGS(7); /* RWE */
#ifdef CONFIG_SMP
percpu PT_LOAD FLAGS(7); /* RWE */
#endif
note PT_NOTE FLAGS(0); /* ___ */
}
SECTIONS
@ -208,14 +211,26 @@ SECTIONS
__initramfs_end = .;
#endif
#ifdef CONFIG_SMP
/*
* percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
* output PHDR, so the next output section - __data_nosave - should
* switch it back to data.init.
*/
. = ALIGN(PAGE_SIZE);
PERCPU_VADDR(0, :percpu)
#else
PERCPU(PAGE_SIZE)
#endif
. = ALIGN(PAGE_SIZE);
__init_end = .;
. = ALIGN(PAGE_SIZE);
__nosave_begin = .;
.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) }
.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
*(.data.nosave)
} :data.init /* switch back to data.init, see PERCPU_VADDR() above */
. = ALIGN(PAGE_SIZE);
__nosave_end = .;

View File

@ -9,7 +9,7 @@ extern char __bss_start[], __bss_stop[];
extern char __init_begin[], __init_end[];
extern char _sinittext[], _einittext[];
extern char _end[];
extern char __per_cpu_start[], __per_cpu_end[];
extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
extern char __kprobes_text_start[], __kprobes_text_end[];
extern char __initdata_begin[], __initdata_end[];
extern char __start_rodata[], __end_rodata[];

View File

@ -430,12 +430,51 @@
*(.initcall7.init) \
*(.initcall7s.init)
#define PERCPU(align) \
. = ALIGN(align); \
VMLINUX_SYMBOL(__per_cpu_start) = .; \
.data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \
#define PERCPU_PROLOG(vaddr) \
VMLINUX_SYMBOL(__per_cpu_load) = .; \
.data.percpu vaddr : AT(__per_cpu_load - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__per_cpu_start) = .;
#define PERCPU_EPILOG(phdr) \
VMLINUX_SYMBOL(__per_cpu_end) = .; \
} phdr \
. = __per_cpu_load + SIZEOF(.data.percpu);
/**
* PERCPU_VADDR - define output section for percpu area
* @vaddr: explicit base address (optional)
* @phdr: destination PHDR (optional)
*
* Macro which expands to output section for percpu area. If @vaddr
* is not blank, it specifies explicit base address and all percpu
* symbols will be offset from the given address. If blank, @vaddr
* always equals @laddr + LOAD_OFFSET.
*
* @phdr defines the output PHDR to use if not blank. Be warned that
* output PHDR is sticky. If @phdr is specified, the next output
* section in the linker script will go there too. @phdr should have
* a leading colon.
*
* This macro defines three symbols, __per_cpu_load, __per_cpu_start
* and __per_cpu_end. The first one is the vaddr of loaded percpu
* init data. __per_cpu_start equals @vaddr and __per_cpu_end is the
* end offset.
*/
#define PERCPU_VADDR(vaddr, phdr) \
PERCPU_PROLOG(vaddr) \
*(.data.percpu.page_aligned) \
*(.data.percpu) \
*(.data.percpu.shared_aligned) \
} \
VMLINUX_SYMBOL(__per_cpu_end) = .;
PERCPU_EPILOG(phdr)
/**
* PERCPU - define output section for percpu area, simple version
* @align: required alignment
*
* Align to @align and outputs output section for percpu area. This
* macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and
* __per_cpu_start will be identical.
*/
#define PERCPU(align) \
. = ALIGN(align); \
PERCPU_VADDR( , )