From 0a7d87a7776e2616334473c4209e277b6ca300e5 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 8 Aug 2016 11:12:07 +0100 Subject: [PATCH 001/100] arm64: hibernate: reduce TLB maintenance scope In break_before_make_ttbr_switch we perform broadcast TLB maintenance for the inner shareable domain, and use a DSB ISH to complete this. However, at the point we execute this, secondary CPUs are either physically offline, or executing code outside of the kernel. Upon entering the kernel, secondary CPUs will invalidate their TLBs before enabling their MMUs. Thus we do not need to invalidate TLBs of other CPUs, and as with idmap_cpu_replace_ttbr1 we can reduce the scope of maintenance to the TLBs of the local CPU. This keeps our TLB maintenance code consistent, and is a minor optimisation. Cc: Catalin Marinas Cc: Lorenzo Pieralisi Acked-by: James Morse Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/hibernate-asm.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index 46f29b6560ec..7734f3e7a1be 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -36,8 +36,8 @@ .macro break_before_make_ttbr_switch zero_page, page_table msr ttbr1_el1, \zero_page isb - tlbi vmalle1is - dsb ish + tlbi vmalle1 + dsb nsh msr ttbr1_el1, \page_table isb .endm From 421dd6fa6709ebee4f888ed89da5c103c77caee1 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 14 Jul 2016 16:48:14 -0400 Subject: [PATCH 002/100] arm64: factor work_pending state machine to C Currently ret_fast_syscall, work_pending, and ret_to_user form an ad-hoc state machine that can be difficult to reason about due to duplicated code and a large number of branch targets. This patch factors the common logic out into the existing do_notify_resume function, converting the code to C in the process, making the code more legible. This patch tries to closely mirror the existing behaviour while using the usual C control flow primitives. As local_irq_{disable,enable} may be instrumented, we balance exception entry (where we will almost most likely enable IRQs) with a call to trace_hardirqs_on just before the return to userspace. Signed-off-by: Chris Metcalf Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 12 ++++-------- arch/arm64/kernel/signal.c | 32 ++++++++++++++++++++++++-------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 441420ca7d08..6a64182822e5 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -707,18 +707,13 @@ ret_fast_syscall_trace: * Ok, we need to do extra processing, enter the slow path. */ work_pending: - tbnz x1, #TIF_NEED_RESCHED, work_resched - /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */ mov x0, sp // 'regs' - enable_irq // enable interrupts for do_notify_resume() bl do_notify_resume - b ret_to_user -work_resched: #ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off // the IRQs are off here, inform the tracing code + bl trace_hardirqs_on // enabled while in userspace #endif - bl schedule - + ldr x1, [tsk, #TI_FLAGS] // re-check for single-step + b finish_ret_to_user /* * "slow" syscall return path. */ @@ -727,6 +722,7 @@ ret_to_user: ldr x1, [tsk, #TI_FLAGS] and x2, x1, #_TIF_WORK_MASK cbnz x2, work_pending +finish_ret_to_user: enable_step_tsk x1, x2 kernel_exit 0 ENDPROC(ret_to_user) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index a8eafdbc7cb8..404dd67080b9 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -402,15 +402,31 @@ static void do_signal(struct pt_regs *regs) asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned int thread_flags) { - if (thread_flags & _TIF_SIGPENDING) - do_signal(regs); + /* + * The assembly code enters us with IRQs off, but it hasn't + * informed the tracing code of that for efficiency reasons. + * Update the trace code with the current status. + */ + trace_hardirqs_off(); + do { + if (thread_flags & _TIF_NEED_RESCHED) { + schedule(); + } else { + local_irq_enable(); - if (thread_flags & _TIF_NOTIFY_RESUME) { - clear_thread_flag(TIF_NOTIFY_RESUME); - tracehook_notify_resume(regs); - } + if (thread_flags & _TIF_SIGPENDING) + do_signal(regs); - if (thread_flags & _TIF_FOREIGN_FPSTATE) - fpsimd_restore_current_state(); + if (thread_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + } + if (thread_flags & _TIF_FOREIGN_FPSTATE) + fpsimd_restore_current_state(); + } + + local_irq_disable(); + thread_flags = READ_ONCE(current_thread_info()->flags); + } while (thread_flags & _TIF_WORK_MASK); } From d34fdb7081394cbf93fa6571d990086356f4ea9d Mon Sep 17 00:00:00 2001 From: Kwangwoo Lee Date: Tue, 2 Aug 2016 09:50:50 +0900 Subject: [PATCH 003/100] arm64: mm: convert __dma_* routines to use start, size __dma_* routines have been converted to use start and size instread of start and end addresses. The patch was origianlly for adding __clean_dcache_area_poc() which will be used in pmem driver to clean dcache to the PoC(Point of Coherency) in arch_wb_cache_pmem(). The functionality of __clean_dcache_area_poc() was equivalent to __dma_clean_range(). The difference was __dma_clean_range() uses the end address, but __clean_dcache_area_poc() uses the size to clean. Thus, __clean_dcache_area_poc() has been revised with a fallthrough function of __dma_clean_range() after the change that __dma_* routines use start and size instead of using start and end. As a consequence of using start and size, the name of __dma_* routines has also been altered following the terminology below: area: takes a start and size range: takes a start and end Reviewed-by: Robin Murphy Signed-off-by: Kwangwoo Lee Signed-off-by: Will Deacon --- arch/arm64/include/asm/cacheflush.h | 3 +- arch/arm64/mm/cache.S | 86 ++++++++++++++--------------- arch/arm64/mm/dma-mapping.c | 6 +- 3 files changed, 46 insertions(+), 49 deletions(-) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index c64268dbff64..2e5fb976a572 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -68,6 +68,7 @@ extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void flush_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(void *addr, size_t len); +extern void __clean_dcache_area_poc(void *addr, size_t len); extern void __clean_dcache_area_pou(void *addr, size_t len); extern long __flush_cache_user_range(unsigned long start, unsigned long end); @@ -85,7 +86,7 @@ static inline void flush_cache_page(struct vm_area_struct *vma, */ extern void __dma_map_area(const void *, size_t, int); extern void __dma_unmap_area(const void *, size_t, int); -extern void __dma_flush_range(const void *, const void *); +extern void __dma_flush_area(const void *, size_t); /* * Copy user data from/to a page which is mapped into a different diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 07d7352d7c38..58b5a906ff78 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -104,20 +104,21 @@ ENTRY(__clean_dcache_area_pou) ret ENDPROC(__clean_dcache_area_pou) +/* + * __dma_inv_area(start, size) + * - start - virtual start address of region + * - size - size in question + */ +__dma_inv_area: + add x1, x1, x0 + /* FALLTHROUGH */ + /* * __inval_cache_range(start, end) * - start - start address of region * - end - end address of region */ ENTRY(__inval_cache_range) - /* FALLTHROUGH */ - -/* - * __dma_inv_range(start, end) - * - start - virtual start address of region - * - end - virtual end address of region - */ -__dma_inv_range: dcache_line_size x2, x3 sub x3, x2, #1 tst x1, x3 // end cache line aligned? @@ -136,46 +137,43 @@ __dma_inv_range: dsb sy ret ENDPIPROC(__inval_cache_range) -ENDPROC(__dma_inv_range) +ENDPROC(__dma_inv_area) /* - * __dma_clean_range(start, end) - * - start - virtual start address of region - * - end - virtual end address of region + * __clean_dcache_area_poc(kaddr, size) + * + * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * are cleaned to the PoC. + * + * - kaddr - kernel address + * - size - size in question */ -__dma_clean_range: - dcache_line_size x2, x3 - sub x3, x2, #1 - bic x0, x0, x3 -1: -alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE - dc cvac, x0 -alternative_else - dc civac, x0 -alternative_endif - add x0, x0, x2 - cmp x0, x1 - b.lo 1b - dsb sy - ret -ENDPROC(__dma_clean_range) +ENTRY(__clean_dcache_area_poc) + /* FALLTHROUGH */ /* - * __dma_flush_range(start, end) + * __dma_clean_area(start, size) * - start - virtual start address of region - * - end - virtual end address of region + * - size - size in question */ -ENTRY(__dma_flush_range) - dcache_line_size x2, x3 - sub x3, x2, #1 - bic x0, x0, x3 -1: dc civac, x0 // clean & invalidate D / U line - add x0, x0, x2 - cmp x0, x1 - b.lo 1b - dsb sy +__dma_clean_area: + dcache_by_line_op cvac, sy, x0, x1, x2, x3 ret -ENDPIPROC(__dma_flush_range) +ENDPIPROC(__clean_dcache_area_poc) +ENDPROC(__dma_clean_area) + +/* + * __dma_flush_area(start, size) + * + * clean & invalidate D / U line + * + * - start - virtual start address of region + * - size - size in question + */ +ENTRY(__dma_flush_area) + dcache_by_line_op civac, sy, x0, x1, x2, x3 + ret +ENDPIPROC(__dma_flush_area) /* * __dma_map_area(start, size, dir) @@ -184,10 +182,9 @@ ENDPIPROC(__dma_flush_range) * - dir - DMA direction */ ENTRY(__dma_map_area) - add x1, x1, x0 cmp w2, #DMA_FROM_DEVICE - b.eq __dma_inv_range - b __dma_clean_range + b.eq __dma_inv_area + b __dma_clean_area ENDPIPROC(__dma_map_area) /* @@ -197,8 +194,7 @@ ENDPIPROC(__dma_map_area) * - dir - DMA direction */ ENTRY(__dma_unmap_area) - add x1, x1, x0 cmp w2, #DMA_TO_DEVICE - b.ne __dma_inv_range + b.ne __dma_inv_area ret ENDPIPROC(__dma_unmap_area) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index c4284c432ae8..f3953decb171 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -168,7 +168,7 @@ static void *__dma_alloc(struct device *dev, size_t size, return ptr; /* remove any dirty cache lines on the kernel alias */ - __dma_flush_range(ptr, ptr + size); + __dma_flush_area(ptr, size); /* create a coherent mapping */ page = virt_to_page(ptr); @@ -387,7 +387,7 @@ static int __init atomic_pool_init(void) void *page_addr = page_address(page); memset(page_addr, 0, atomic_pool_size); - __dma_flush_range(page_addr, page_addr + atomic_pool_size); + __dma_flush_area(page_addr, atomic_pool_size); atomic_pool = gen_pool_create(PAGE_SHIFT, -1); if (!atomic_pool) @@ -548,7 +548,7 @@ fs_initcall(dma_debug_do_init); /* Thankfully, all cache ops are by VA so we can ignore phys here */ static void flush_page(struct device *dev, const void *virt, phys_addr_t phys) { - __dma_flush_range(virt, virt + PAGE_SIZE); + __dma_flush_area(virt, PAGE_SIZE); } static void *__iommu_alloc_attrs(struct device *dev, size_t size, From 826d05623f3e2b316bbad6204418a0ff1b49a720 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 10 Aug 2016 20:59:15 +0800 Subject: [PATCH 004/100] arm64: perf: Use the builtin_platform_driver Use the builtin_platform_driver() to simplify code. Signed-off-by: Kefeng Wang Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 838ccf123307..ea64f0173d61 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1057,8 +1057,4 @@ static struct platform_driver armv8_pmu_driver = { .probe = armv8_pmu_device_probe, }; -static int __init register_armv8_pmu_driver(void) -{ - return platform_driver_register(&armv8_pmu_driver); -} -device_initcall(register_armv8_pmu_driver); +builtin_platform_driver(armv8_pmu_driver); From da752563e7fff00885c93e9d5c06bbbfaf34e4da Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 11 Aug 2016 17:59:46 +0100 Subject: [PATCH 005/100] arm64: remove traces of perf_ops_bp Even though perf_ops_bp was removed/renamed back in commit b0a873ebbf87bf38 ("perf: Register PMU implementations"), as part of v2.6.37, its definition still lives on in some arch headers. This patch removes the vestigal definition from arm64. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/hw_breakpoint.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index 115ea2a64520..2487891dec46 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -141,8 +141,6 @@ static inline void ptrace_hw_copy_thread(struct task_struct *task) } #endif -extern struct pmu perf_ops_bp; - /* Determine number of BRP registers available. */ static inline int get_num_brps(void) { From 67060ed148cca4f6f3cde6d6941d3b4a86ad698c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 16 Aug 2016 18:19:22 +0900 Subject: [PATCH 006/100] arm64: remove redundant "select HAVE_CLK" HAVE_CLK is select'ed by CLKDEV_LOOKUP, which is select'ed by COMMON_CLK, which is select'ed by ARM64. No sub-architecture needs to select HAVE_CLK explicitly. Signed-off-by: Masahiro Yamada Signed-off-by: Will Deacon --- arch/arm64/Kconfig.platforms | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index be5d824ebdba..4a4318bc219a 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -159,7 +159,6 @@ config ARCH_TEGRA select CLKSRC_MMIO select CLKSRC_OF select GENERIC_CLOCKEVENTS - select HAVE_CLK select PINCTRL select RESET_CONTROLLER help From 1aed28f94ce6c1f6c24bcbbd5fcd749b55f65e9e Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Mon, 15 Aug 2016 14:45:44 +0800 Subject: [PATCH 007/100] arm64: vdso: add __init section marker to alloc_vectors_page It is not needed after booting, this patch moves the alloc_vectors_page function to the __init section. Acked-by: Mark Rutland Signed-off-by: Jisheng Zhang Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 076312b17d4f..e320e8f96de4 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -55,7 +55,7 @@ struct vdso_data *vdso_data = &vdso_data_store.data; */ static struct page *vectors_page[1]; -static int alloc_vectors_page(void) +static int __init alloc_vectors_page(void) { extern char __kuser_helper_start[], __kuser_helper_end[]; extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; From b6d081bddf397026575a437b603b118dff2606ff Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Mon, 15 Aug 2016 14:45:45 +0800 Subject: [PATCH 008/100] arm64: vdso: constify vm_special_mapping used for aarch32 vectors page The vm_special_mapping spec which is used for aarch32 vectors page is never modified, so mark it as const. Acked-by: Mark Rutland Signed-off-by: Jisheng Zhang Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index e320e8f96de4..10ad8abe7ec5 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -88,7 +88,7 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; unsigned long addr = AARCH32_VECTORS_BASE; - static struct vm_special_mapping spec = { + static const struct vm_special_mapping spec = { .name = "[vectors]", .pages = vectors_page, From 5a9e3e156ec1ab26ba70b4c44157858c92bbeee0 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Mon, 15 Aug 2016 14:45:46 +0800 Subject: [PATCH 009/100] arm64: apply __ro_after_init to some objects These objects are set during initialization, thereafter are read only. Previously I only want to mark vdso_pages, vdso_spec, vectors_page and cpu_ops as __read_mostly from performance point of view. Then inspired by Kees's patch[1] to apply more __ro_after_init for arm, I think it's better to mark them as __ro_after_init. What's more, I find some more objects are also read only after init. So apply __ro_after_init to all of them. This patch also removes global vdso_pagelist and tries to clean up vdso_spec[] assignment code. [1] http://www.spinics.net/lists/arm-kernel/msg523188.html Acked-by: Mark Rutland Signed-off-by: Jisheng Zhang Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_ops.c | 3 ++- arch/arm64/kernel/kaslr.c | 3 ++- arch/arm64/kernel/vdso.c | 30 +++++++++++++++--------------- arch/arm64/mm/dma-mapping.c | 3 ++- arch/arm64/mm/init.c | 5 +++-- arch/arm64/mm/mmu.c | 3 ++- 6 files changed, 26 insertions(+), 21 deletions(-) diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index c7cfb8fe06f9..e137ceaf5016 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -17,6 +17,7 @@ */ #include +#include #include #include #include @@ -28,7 +29,7 @@ extern const struct cpu_operations smp_spin_table_ops; extern const struct cpu_operations acpi_parking_protocol_ops; extern const struct cpu_operations cpu_psci_ops; -const struct cpu_operations *cpu_ops[NR_CPUS]; +const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = { &smp_spin_table_ops, diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index b05469173ba5..769f24ef628c 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -6,6 +6,7 @@ * published by the Free Software Foundation. */ +#include #include #include #include @@ -20,7 +21,7 @@ #include #include -u64 __read_mostly module_alloc_base; +u64 __ro_after_init module_alloc_base; u16 __initdata memstart_offset_seed; static __init u64 get_kaslr_seed(void *fdt) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 10ad8abe7ec5..6225612f2464 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -18,12 +18,13 @@ * Author: Will Deacon */ -#include +#include #include #include #include #include #include +#include #include #include #include @@ -37,8 +38,7 @@ #include extern char vdso_start, vdso_end; -static unsigned long vdso_pages; -static struct page **vdso_pagelist; +static unsigned long vdso_pages __ro_after_init; /* * The vDSO data page. @@ -53,7 +53,7 @@ struct vdso_data *vdso_data = &vdso_data_store.data; /* * Create and map the vectors page for AArch32 tasks. */ -static struct page *vectors_page[1]; +static struct page *vectors_page[1] __ro_after_init; static int __init alloc_vectors_page(void) { @@ -110,11 +110,19 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) } #endif /* CONFIG_COMPAT */ -static struct vm_special_mapping vdso_spec[2]; +static struct vm_special_mapping vdso_spec[2] __ro_after_init = { + { + .name = "[vvar]", + }, + { + .name = "[vdso]", + }, +}; static int __init vdso_init(void) { int i; + struct page **vdso_pagelist; if (memcmp(&vdso_start, "\177ELF", 4)) { pr_err("vDSO is not a valid ELF object!\n"); @@ -138,16 +146,8 @@ static int __init vdso_init(void) for (i = 0; i < vdso_pages; i++) vdso_pagelist[i + 1] = pfn_to_page(PHYS_PFN(__pa(&vdso_start)) + i); - /* Populate the special mapping structures */ - vdso_spec[0] = (struct vm_special_mapping) { - .name = "[vvar]", - .pages = vdso_pagelist, - }; - - vdso_spec[1] = (struct vm_special_mapping) { - .name = "[vdso]", - .pages = &vdso_pagelist[1], - }; + vdso_spec[0].pages = &vdso_pagelist[0]; + vdso_spec[1].pages = &vdso_pagelist[1]; return 0; } diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index f3953decb171..bdacead5b802 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -30,7 +31,7 @@ #include -static int swiotlb __read_mostly; +static int swiotlb __ro_after_init; static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot, bool coherent) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index bbb7ee76e319..251e0824cd82 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -55,8 +56,8 @@ * executes, which assigns it its actual value. So use a default value * that cannot be mistaken for a real physical address. */ -s64 memstart_addr __read_mostly = -1; -phys_addr_t arm64_dma_phys_limit __read_mostly; +s64 memstart_addr __ro_after_init = -1; +phys_addr_t arm64_dma_phys_limit __ro_after_init; #ifdef CONFIG_BLK_DEV_INITRD static int __init early_initrd(char *p) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 4989948d1feb..e634a0f6d62b 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -17,6 +17,7 @@ * along with this program. If not, see . */ +#include #include #include #include @@ -46,7 +47,7 @@ u64 idmap_t0sz = TCR_T0SZ(VA_BITS); -u64 kimage_voffset __read_mostly; +u64 kimage_voffset __ro_after_init; EXPORT_SYMBOL(kimage_voffset); /* From aea73abb90265110ed59281e370289316fd689f3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 16 Aug 2016 21:02:32 +0200 Subject: [PATCH 010/100] arm64: head.S: get rid of x25 and x26 with 'global' scope Currently, x25 and x26 hold the physical addresses of idmap_pg_dir and swapper_pg_dir, respectively, when running early boot code. But having registers with 'global' scope in files that contain different sections with different lifetimes, and that are called by different CPUs at different times is a bit messy, especially since stashing the values does not buy us anything in terms of code size or clarity. So simply replace each reference to x25 or x26 with an adrp instruction referring to idmap_pg_dir or swapper_pg_dir directly. Acked-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 28 +++++++++++++--------------- arch/arm64/kernel/sleep.S | 2 -- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index b77f58355da1..219676253dbc 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -214,7 +214,7 @@ ENTRY(stext) adrp x24, __PHYS_OFFSET and x23, x24, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0 bl set_cpu_boot_mode_flag - bl __create_page_tables // x25=TTBR0, x26=TTBR1 + bl __create_page_tables /* * The following calls CPU setup code, see arch/arm64/mm/proc.S for * details. @@ -311,23 +311,21 @@ ENDPROC(preserve_boot_args) * been enabled */ __create_page_tables: - adrp x25, idmap_pg_dir - adrp x26, swapper_pg_dir mov x28, lr /* * Invalidate the idmap and swapper page tables to avoid potential * dirty cache lines being evicted. */ - mov x0, x25 - add x1, x26, #SWAPPER_DIR_SIZE + adrp x0, idmap_pg_dir + adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE bl __inval_cache_range /* * Clear the idmap and swapper page tables. */ - mov x0, x25 - add x6, x26, #SWAPPER_DIR_SIZE + adrp x0, idmap_pg_dir + adrp x6, swapper_pg_dir + SWAPPER_DIR_SIZE 1: stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 @@ -340,7 +338,7 @@ __create_page_tables: /* * Create the identity mapping. */ - mov x0, x25 // idmap_pg_dir + adrp x0, idmap_pg_dir adrp x3, __idmap_text_start // __pa(__idmap_text_start) #ifndef CONFIG_ARM64_VA_BITS_48 @@ -390,7 +388,7 @@ __create_page_tables: /* * Map the kernel image (starting with PHYS_OFFSET). */ - mov x0, x26 // swapper_pg_dir + adrp x0, swapper_pg_dir mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) add x5, x5, x23 // add KASLR displacement create_pgd_entry x0, x5, x3, x6 @@ -405,8 +403,8 @@ __create_page_tables: * accesses (MMU disabled), invalidate the idmap and swapper page * tables again to remove any speculatively loaded cache lines. */ - mov x0, x25 - add x1, x26, #SWAPPER_DIR_SIZE + adrp x0, idmap_pg_dir + adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE dmb sy bl __inval_cache_range @@ -666,8 +664,6 @@ secondary_startup: /* * Common entry point for secondary CPUs. */ - adrp x25, idmap_pg_dir - adrp x26, swapper_pg_dir bl __cpu_setup // initialise processor adr_l x27, __secondary_switch // address to jump to after enabling the MMU @@ -731,8 +727,10 @@ ENTRY(__enable_mmu) cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED b.ne __no_granule_support update_early_cpu_boot_status 0, x1, x2 - msr ttbr0_el1, x25 // load TTBR0 - msr ttbr1_el1, x26 // load TTBR1 + adrp x1, idmap_pg_dir + adrp x2, swapper_pg_dir + msr ttbr0_el1, x1 // load TTBR0 + msr ttbr1_el1, x2 // load TTBR1 isb msr sctlr_el1, x0 isb diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index ccf79d849e0a..182129b60fdf 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -102,8 +102,6 @@ ENTRY(cpu_resume) /* enable the MMU early - so we can access sleep_save_stash by va */ adr_l lr, __enable_mmu /* __cpu_setup will return here */ adr_l x27, _resume_switched /* __enable_mmu will branch here */ - adrp x25, idmap_pg_dir - adrp x26, swapper_pg_dir b __cpu_setup ENDPROC(cpu_resume) From 7419333fa15ec958d12845fcc79b7bdd16de06ec Mon Sep 17 00:00:00 2001 From: Pratyush Anand Date: Mon, 22 Aug 2016 12:16:00 +0530 Subject: [PATCH 011/100] arm64: kprobe: Always clear pstate.D in breakpoint exception handler Whenever we are hitting a kprobe from a none-kprobe debug exception handler, we hit an infinite occurrences of "Unexpected kernel single-step exception at EL1" PSTATE.D is debug exception mask bit. It is set whenever we enter into an exception mode. When it is set then Watchpoint, Breakpoint, and Software Step exceptions are masked. However, software Breakpoint Instruction exceptions can never be masked. Therefore, if we ever execute a BRK instruction, irrespective of D-bit setting, we will be receiving a corresponding breakpoint exception. For example: - We are executing kprobe pre/post handler, and kprobe has been inserted in one of the instruction of a function called by handler. So, it executes BRK instruction and we land into the case of KPROBE_REENTER. (This case is already handled by current code) - We are executing uprobe handler or any other BRK handler such as in WARN_ON (BRK BUG_BRK_IMM), and we trace that path using kprobe.So, we enter into kprobe breakpoint handler,from another BRK handler.(This case is not being handled currently) In all such cases kprobe breakpoint exception will be raised when we were already in debug exception mode. SPSR's D bit (bit 9) shows the value of PSTATE.D immediately before the exception was taken. So, in above example cases we would find it set in kprobe breakpoint handler. Single step exception will always be followed by a kprobe breakpoint exception.However, it will only be raised gracefully if we clear D bit while returning from breakpoint exception. If D bit is set then, it results into undefined exception and when it's handler enables dbg then single step exception is generated, however it will never be handled(because address does not match and therefore treated as unexpected). This patch clears D-flag unconditionally in setup_singlestep, so that we can always get single step exception correctly after returning from breakpoint exception. Additionally, it also removes D-flag set statement for KPROBE_REENTER return path, because debug exception for KPROBE_REENTER will always take place in a debug exception state. So, D-flag will already be set in this case. Acked-by: Sandeepa Prabhu Acked-by: Masami Hiramatsu Signed-off-by: Pratyush Anand Signed-off-by: Will Deacon --- arch/arm64/kernel/probes/kprobes.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index c6b0f40620d8..0354ffeb2ed5 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -166,13 +166,18 @@ static void __kprobes set_current_kprobe(struct kprobe *p) } /* - * The D-flag (Debug mask) is set (masked) upon debug exception entry. - * Kprobes needs to clear (unmask) D-flag -ONLY- in case of recursive - * probe i.e. when probe hit from kprobe handler context upon - * executing the pre/post handlers. In this case we return with - * D-flag clear so that single-stepping can be carried-out. - * - * Leave D-flag set in all other cases. + * When PSTATE.D is set (masked), then software step exceptions can not be + * generated. + * SPSR's D bit shows the value of PSTATE.D immediately before the + * exception was taken. PSTATE.D is set while entering into any exception + * mode, however software clears it for any normal (none-debug-exception) + * mode in the exception entry. Therefore, when we are entering into kprobe + * breakpoint handler from any normal mode then SPSR.D bit is already + * cleared, however it is set when we are entering from any debug exception + * mode. + * Since we always need to generate single step exception after a kprobe + * breakpoint exception therefore we need to clear it unconditionally, when + * we become sure that the current breakpoint exception is for kprobe. */ static void __kprobes spsr_set_debug_flag(struct pt_regs *regs, int mask) @@ -245,10 +250,7 @@ static void __kprobes setup_singlestep(struct kprobe *p, set_ss_context(kcb, slot); /* mark pending ss */ - if (kcb->kprobe_status == KPROBE_REENTER) - spsr_set_debug_flag(regs, 0); - else - WARN_ON(regs->pstate & PSR_D_BIT); + spsr_set_debug_flag(regs, 0); /* IRQs and single stepping do not mix well. */ kprobes_save_local_irqflag(kcb, regs); @@ -333,8 +335,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) BUG(); kernel_disable_single_step(); - if (kcb->kprobe_status == KPROBE_REENTER) - spsr_set_debug_flag(regs, 1); if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); @@ -457,9 +457,6 @@ kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) kprobes_restore_local_irqflag(kcb, regs); kernel_disable_single_step(); - if (kcb->kprobe_status == KPROBE_REENTER) - spsr_set_debug_flag(regs, 1); - post_kprobe_handler(kcb, regs); } From cab15ce604e550020bb7115b779013b91bcdbc21 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 11 Aug 2016 18:44:50 +0100 Subject: [PATCH 012/100] arm64: Introduce execute-only page access permissions The ARMv8 architecture allows execute-only user permissions by clearing the PTE_UXN and PTE_USER bits. However, the kernel running on a CPU implementation without User Access Override (ARMv8.2 onwards) can still access such page, so execute-only page permission does not protect against read(2)/write(2) etc. accesses. Systems requiring such protection must enable features like SECCOMP. This patch changes the arm64 __P100 and __S100 protection_map[] macros to the new __PAGE_EXECONLY attributes. A side effect is that pte_user() no longer triggers for __PAGE_EXECONLY since PTE_USER isn't set. To work around this, the check is done on the PTE_NG bit via the pte_ng() macro. VM_READ is also checked now for page faults. Reviewed-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable-prot.h | 5 +++-- arch/arm64/include/asm/pgtable.h | 10 +++++----- arch/arm64/mm/fault.c | 5 ++--- mm/mmap.c | 5 +++++ 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 39f5252673f7..2142c7726e76 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -70,12 +70,13 @@ #define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) +#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_NG | PTE_PXN) #define __P000 PAGE_NONE #define __P001 PAGE_READONLY #define __P010 PAGE_COPY #define __P011 PAGE_COPY -#define __P100 PAGE_READONLY_EXEC +#define __P100 PAGE_EXECONLY #define __P101 PAGE_READONLY_EXEC #define __P110 PAGE_COPY_EXEC #define __P111 PAGE_COPY_EXEC @@ -84,7 +85,7 @@ #define __S001 PAGE_READONLY #define __S010 PAGE_SHARED #define __S011 PAGE_SHARED -#define __S100 PAGE_READONLY_EXEC +#define __S100 PAGE_EXECONLY #define __S101 PAGE_READONLY_EXEC #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index e20bd431184a..7ba1cebb64d9 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -73,7 +73,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) -#define pte_user(pte) (!!(pte_val(pte) & PTE_USER)) +#define pte_ng(pte) (!!(pte_val(pte) & PTE_NG)) #ifdef CONFIG_ARM64_HW_AFDBM #define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) @@ -84,8 +84,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) -#define pte_valid_not_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) +#define pte_valid_global(pte) \ + ((pte_val(pte) & (PTE_VALID | PTE_NG)) == PTE_VALID) #define pte_valid_young(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF)) @@ -168,7 +168,7 @@ static inline void set_pte(pte_t *ptep, pte_t pte) * Only if the new pte is valid and kernel, otherwise TLB maintenance * or update_mmu_cache() have the necessary barriers. */ - if (pte_valid_not_user(pte)) { + if (pte_valid_global(pte)) { dsb(ishst); isb(); } @@ -202,7 +202,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_val(pte) &= ~PTE_RDONLY; else pte_val(pte) |= PTE_RDONLY; - if (pte_user(pte) && pte_exec(pte) && !pte_special(pte)) + if (pte_ng(pte) && pte_exec(pte) && !pte_special(pte)) __sync_icache_dcache(pte, addr); } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 05d2bd776c69..a5f098a5f602 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -251,8 +251,7 @@ static int __do_page_fault(struct mm_struct *mm, unsigned long addr, good_area: /* * Check that the permissions on the VMA allow for the fault which - * occurred. If we encountered a write or exec fault, we must have - * appropriate permissions, otherwise we allow any permission. + * occurred. */ if (!(vma->vm_flags & vm_flags)) { fault = VM_FAULT_BADACCESS; @@ -288,7 +287,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct task_struct *tsk; struct mm_struct *mm; int fault, sig, code; - unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; + unsigned long vm_flags = VM_READ | VM_WRITE; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; if (notify_page_fault(regs, esr)) diff --git a/mm/mmap.c b/mm/mmap.c index ca9d91bca0d6..69cad562cd00 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -88,6 +88,11 @@ static void unmap_region(struct mm_struct *mm, * w: (no) no w: (no) no w: (copy) copy w: (no) no * x: (no) no x: (no) yes x: (no) yes x: (yes) yes * + * On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and + * MAP_PRIVATE: + * r: (no) no + * w: (no) no + * x: (yes) yes */ pgprot_t protection_map[16] = { __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111, From ee78fdc71db1ce9a437b9ca17e31063996b71ec1 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 24 Aug 2016 18:27:28 +0100 Subject: [PATCH 013/100] arm64: Create sections.h Each time new section markers are added, kernel/vmlinux.ld.S is updated, and new extern char __start_foo[] definitions are scattered through the tree. Create asm/include/sections.h to collect these definitions (and include the existing asm-generic version). Signed-off-by: James Morse Reviewed-by: Mark Rutland Tested-by: Mark Rutland Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/Kbuild | 1 - arch/arm64/include/asm/sections.h | 29 +++++++++++++++++++++++++++++ arch/arm64/include/asm/traps.h | 6 +----- arch/arm64/include/asm/virt.h | 9 +-------- arch/arm64/kernel/alternative.c | 7 +++---- arch/arm64/kernel/hibernate.c | 6 ------ arch/arm64/kernel/probes/kprobes.c | 5 +---- 7 files changed, 35 insertions(+), 28 deletions(-) create mode 100644 arch/arm64/include/asm/sections.h diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index f43d2c44c765..2b3d2d24acba 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -32,7 +32,6 @@ generic-y += poll.h generic-y += preempt.h generic-y += resource.h generic-y += rwsem.h -generic-y += sections.h generic-y += segment.h generic-y += sembuf.h generic-y += serial.h diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h new file mode 100644 index 000000000000..237fcdd13445 --- /dev/null +++ b/arch/arm64/include/asm/sections.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2016 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __ASM_SECTIONS_H +#define __ASM_SECTIONS_H + +#include + +extern char __alt_instructions[], __alt_instructions_end[]; +extern char __exception_text_start[], __exception_text_end[]; +extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; +extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; +extern char __hyp_text_start[], __hyp_text_end[]; +extern char __idmap_text_start[], __idmap_text_end[]; +extern char __irqentry_text_start[], __irqentry_text_end[]; + +#endif /* __ASM_SECTIONS_H */ diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 9cd03f3e812f..02e9035b0685 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -19,6 +19,7 @@ #define __ASM_TRAP_H #include +#include struct pt_regs; @@ -39,9 +40,6 @@ void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr); #ifdef CONFIG_FUNCTION_GRAPH_TRACER static inline int __in_irqentry_text(unsigned long ptr) { - extern char __irqentry_text_start[]; - extern char __irqentry_text_end[]; - return ptr >= (unsigned long)&__irqentry_text_start && ptr < (unsigned long)&__irqentry_text_end; } @@ -54,8 +52,6 @@ static inline int __in_irqentry_text(unsigned long ptr) static inline int in_exception_text(unsigned long ptr) { - extern char __exception_text_start[]; - extern char __exception_text_end[]; int in; in = ptr >= (unsigned long)&__exception_text_start && diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 1788545f25bc..db5739413677 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -45,6 +45,7 @@ #ifndef __ASSEMBLY__ #include +#include /* * __boot_cpu_mode records what mode CPUs were booted in. @@ -87,14 +88,6 @@ extern void verify_cpu_run_el(void); static inline void verify_cpu_run_el(void) {} #endif -/* The section containing the hypervisor idmap text */ -extern char __hyp_idmap_text_start[]; -extern char __hyp_idmap_text_end[]; - -/* The section containing the hypervisor text */ -extern char __hyp_text_start[]; -extern char __hyp_text_end[]; - #endif /* __ASSEMBLY__ */ #endif /* ! __ASM__VIRT_H */ diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index d2ee1b21a10d..4434dabde898 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -25,14 +25,13 @@ #include #include #include +#include #include #define __ALT_PTR(a,f) (u32 *)((void *)&(a)->f + (a)->f) #define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset) #define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset) -extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; - struct alt_region { struct alt_instr *begin; struct alt_instr *end; @@ -124,8 +123,8 @@ static int __apply_alternatives_multi_stop(void *unused) { static int patched = 0; struct alt_region region = { - .begin = __alt_instructions, - .end = __alt_instructions_end, + .begin = (struct alt_instr *)__alt_instructions, + .end = (struct alt_instr *)__alt_instructions_end, }; /* We always have a CPU 0 at this point (__init) */ diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 65d81f965e74..b4082017c4cb 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -54,12 +54,6 @@ extern int in_suspend; /* Do we need to reset el2? */ #define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) -/* - * Start/end of the hibernate exit code, this must be copied to a 'safe' - * location in memory, and executed from there. - */ -extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; - /* temporary el2 vectors in the __hibernate_exit_text section. */ extern char hibernate_el2_vectors[]; diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 0354ffeb2ed5..f97a58111e10 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include "decode-insn.h" @@ -540,9 +540,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) bool arch_within_kprobe_blacklist(unsigned long addr) { - extern char __idmap_text_start[], __idmap_text_end[]; - extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; - if ((addr >= (unsigned long)__kprobes_text_start && addr < (unsigned long)__kprobes_text_end) || (addr >= (unsigned long)__entry_text_start && From b61130381120398876b86282082ad9f24976dfcf Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 24 Aug 2016 18:27:29 +0100 Subject: [PATCH 014/100] arm64: vmlinux.ld: Add mmuoff data sections and move mmuoff text into idmap Resume from hibernate needs to clean any text executed by the kernel with the MMU off to the PoC. Collect these functions together into the .idmap.text section as all this code is tightly coupled and also needs the same cleaning after resume. Data is more complicated, secondary_holding_pen_release is written with the MMU on, clean and invalidated, then read with the MMU off. In contrast __boot_cpu_mode is written with the MMU off, the corresponding cache line is invalidated, so when we read it with the MMU on we don't get stale data. These cache maintenance operations conflict with each other if the values are within a Cache Writeback Granule (CWG) of each other. Collect the data into two sections .mmuoff.data.read and .mmuoff.data.write, the linker script ensures mmuoff.data.write section is aligned to the architectural maximum CWG of 2KB. Signed-off-by: James Morse Cc: Ard Biesheuvel Cc: Mark Rutland Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/sections.h | 1 + arch/arm64/kernel/head.S | 25 +++++++++++++++---------- arch/arm64/kernel/sleep.S | 2 +- arch/arm64/kernel/smp_spin_table.c | 3 ++- arch/arm64/kernel/vmlinux.lds.S | 19 +++++++++++++++++++ arch/arm64/mm/proc.S | 4 ++++ 6 files changed, 42 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index 237fcdd13445..4e7e7067afdb 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -25,5 +25,6 @@ extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; extern char __hyp_text_start[], __hyp_text_end[]; extern char __idmap_text_start[], __idmap_text_end[]; extern char __irqentry_text_start[], __irqentry_text_end[]; +extern char __mmuoff_data_start[], __mmuoff_data_end[]; #endif /* __ASM_SECTIONS_H */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 219676253dbc..9ee9666c2e34 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -463,7 +463,7 @@ ENDPROC(__primary_switched) * end early head section, begin head code that is also used for * hotplug and needs to have the same protections as the text region */ - .section ".text","ax" + .section ".idmap.text","ax" ENTRY(kimage_vaddr) .quad _text - TEXT_OFFSET @@ -618,6 +618,13 @@ set_cpu_boot_mode_flag: ret ENDPROC(set_cpu_boot_mode_flag) +/* + * These values are written with the MMU off, but read with the MMU on. + * Writers will invalidate the corresponding address, discarding up to a + * 'Cache Writeback Granule' (CWG) worth of data. The linker script ensures + * sufficient alignment that the CWG doesn't overlap another section. + */ + .pushsection ".mmuoff.data.write", "aw" /* * We need to find out the CPU boot mode long after boot, so we need to * store it in a writable variable. @@ -625,11 +632,16 @@ ENDPROC(set_cpu_boot_mode_flag) * This is not in .bss, because we set it sufficiently early that the boot-time * zeroing of .bss would clobber it. */ - .pushsection .data..cacheline_aligned - .align L1_CACHE_SHIFT ENTRY(__boot_cpu_mode) .long BOOT_CPU_MODE_EL2 .long BOOT_CPU_MODE_EL1 +/* + * The booting CPU updates the failed status @__early_cpu_boot_status, + * with MMU turned off. + */ +ENTRY(__early_cpu_boot_status) + .long 0 + .popsection /* @@ -702,12 +714,6 @@ ENDPROC(__secondary_switched) dc ivac, \tmp1 // Invalidate potentially stale cache line .endm - .pushsection .data..cacheline_aligned - .align L1_CACHE_SHIFT -ENTRY(__early_cpu_boot_status) - .long 0 - .popsection - /* * Enable the MMU. * @@ -719,7 +725,6 @@ ENTRY(__early_cpu_boot_status) * Checks if the selected granule size is supported by the CPU. * If it isn't, park the CPU */ - .section ".idmap.text", "ax" ENTRY(__enable_mmu) mrs x22, sctlr_el1 // preserve old SCTLR_EL1 value mrs x1, ID_AA64MMFR0_EL1 diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 182129b60fdf..1fac020761da 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -97,6 +97,7 @@ ENTRY(__cpu_suspend_enter) ENDPROC(__cpu_suspend_enter) .ltorg + .pushsection ".idmap.text", "ax" ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly /* enable the MMU early - so we can access sleep_save_stash by va */ @@ -105,7 +106,6 @@ ENTRY(cpu_resume) b __cpu_setup ENDPROC(cpu_resume) - .pushsection ".idmap.text", "ax" _resume_switched: ldr x8, =_cpu_resume br x8 diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 18a71bcd26ee..9a00eee9acc8 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -29,7 +29,8 @@ #include extern void secondary_holding_pen(void); -volatile unsigned long secondary_holding_pen_release = INVALID_HWID; +volatile unsigned long __section(".mmuoff.data.read") +secondary_holding_pen_release = INVALID_HWID; static phys_addr_t cpu_release_addr[NR_CPUS]; diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 659963d40bb4..5ce9b2929e0d 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -185,6 +185,25 @@ SECTIONS _data = .; _sdata = .; RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) + + /* + * Data written with the MMU off but read with the MMU on requires + * cache lines to be invalidated, discarding up to a Cache Writeback + * Granule (CWG) of data from the cache. Keep the section that + * requires this type of maintenance to be in its own Cache Writeback + * Granule (CWG) area so the cache maintenance operations don't + * interfere with adjacent data. + */ + .mmuoff.data.write : ALIGN(SZ_2K) { + __mmuoff_data_start = .; + *(.mmuoff.data.write) + } + . = ALIGN(SZ_2K); + .mmuoff.data.read : { + *(.mmuoff.data.read) + __mmuoff_data_end = .; + } + PECOFF_EDATA_PADDING _edata = .; diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 5bb61de23201..5eb35964ab8e 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -83,6 +83,7 @@ ENDPROC(cpu_do_suspend) * * x0: Address of context pointer */ + .pushsection ".idmap.text", "ax" ENTRY(cpu_do_resume) ldp x2, x3, [x0] ldp x4, x5, [x0, #16] @@ -111,6 +112,7 @@ ENTRY(cpu_do_resume) isb ret ENDPROC(cpu_do_resume) + .popsection #endif /* @@ -172,6 +174,7 @@ ENDPROC(idmap_cpu_replace_ttbr1) * Initialise the processor for turning the MMU on. Return in x0 the * value of the SCTLR_EL1 register. */ + .pushsection ".idmap.text", "ax" ENTRY(__cpu_setup) tlbi vmalle1 // Invalidate local TLB dsb nsh @@ -257,3 +260,4 @@ ENDPROC(__cpu_setup) crval: .word 0xfcffffff // clear .word 0x34d5d91d // set + .popsection From 5ebe3a44cc744d11cb60d8438106a9322b7c04dc Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 24 Aug 2016 18:27:30 +0100 Subject: [PATCH 015/100] arm64: hibernate: Support DEBUG_PAGEALLOC DEBUG_PAGEALLOC removes the valid bit of page table entries to prevent any access to unallocated memory. Hibernate uses this as a hint that those pages don't need to be saved/restored. This patch adds the kernel_page_present() function it uses. hibernate.c copies the resume kernel's linear map for use during restore. Add _copy_pte() to fill-in the holes made by DEBUG_PAGEALLOC in the resume kernel, so we can restore data the original kernel had at these addresses. Finally, DEBUG_PAGEALLOC means the linear-map alias of KERNEL_START to KERNEL_END may have holes in it, so we can't lazily clean this whole area to the PoC. Only clean the new mmuoff region, and the kernel/kvm idmaps. This reverts commit da24eb1f3f9e2c7b75c5f8c40d8e48e2c4789596. Reported-by: Will Deacon Signed-off-by: James Morse Cc: Catalin Marinas Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 - arch/arm64/include/asm/pgtable.h | 10 +++++++ arch/arm64/kernel/hibernate.c | 45 +++++++++++++++++++++++++------- arch/arm64/mm/pageattr.c | 41 ++++++++++++++++++++++++++++- 4 files changed, 86 insertions(+), 11 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index bc3f00f586f1..9be0c164df4e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -607,7 +607,6 @@ source kernel/Kconfig.preempt source kernel/Kconfig.hz config ARCH_SUPPORTS_DEBUG_PAGEALLOC - depends on !HIBERNATION def_bool y config ARCH_HAS_HOLES_MEMORYMODEL diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 7ba1cebb64d9..ffbb9a520563 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -155,6 +155,16 @@ static inline pte_t pte_mknoncont(pte_t pte) return clear_pte_bit(pte, __pgprot(PTE_CONT)); } +static inline pte_t pte_clear_rdonly(pte_t pte) +{ + return clear_pte_bit(pte, __pgprot(PTE_RDONLY)); +} + +static inline pte_t pte_mkpresent(pte_t pte) +{ + return set_pte_bit(pte, __pgprot(PTE_VALID)); +} + static inline pmd_t pmd_mkcont(pmd_t pmd) { return __pmd(pmd_val(pmd) | PMD_SECT_CONT); diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index b4082017c4cb..71d82cfb62f5 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -235,6 +235,7 @@ out: return rc; } +#define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start)) int swsusp_arch_suspend(void) { @@ -252,8 +253,13 @@ int swsusp_arch_suspend(void) if (__cpu_suspend_enter(&state)) { ret = swsusp_save(); } else { - /* Clean kernel to PoC for secondary core startup */ - __flush_dcache_area(LMADDR(KERNEL_START), KERNEL_END - KERNEL_START); + /* Clean kernel core startup/idle code to PoC*/ + dcache_clean_range(__mmuoff_data_start, __mmuoff_data_end); + dcache_clean_range(__idmap_text_start, __idmap_text_end); + + /* Clean kvm setup code to PoC? */ + if (el2_reset_needed()) + dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end); /* * Tell the hibernation core that we've just restored @@ -269,6 +275,33 @@ int swsusp_arch_suspend(void) return ret; } +static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr) +{ + pte_t pte = *src_pte; + + if (pte_valid(pte)) { + /* + * Resume will overwrite areas that may be marked + * read only (code, rodata). Clear the RDONLY bit from + * the temporary mappings we use during restore. + */ + set_pte(dst_pte, pte_clear_rdonly(pte)); + } else if (debug_pagealloc_enabled() && !pte_none(pte)) { + /* + * debug_pagealloc will removed the PTE_VALID bit if + * the page isn't in use by the resume kernel. It may have + * been in use by the original kernel, in which case we need + * to put it back in our copy to do the restore. + * + * Before marking this entry valid, check the pfn should + * be mapped. + */ + BUG_ON(!pfn_valid(pte_pfn(pte))); + + set_pte(dst_pte, pte_mkpresent(pte_clear_rdonly(pte))); + } +} + static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start, unsigned long end) { @@ -284,13 +317,7 @@ static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start, src_pte = pte_offset_kernel(src_pmd, start); do { - if (!pte_none(*src_pte)) - /* - * Resume will overwrite areas that may be marked - * read only (code, rodata). Clear the RDONLY bit from - * the temporary mappings we use during restore. - */ - set_pte(dst_pte, __pte(pte_val(*src_pte) & ~PTE_RDONLY)); + _copy_pte(dst_pte, src_pte, addr); } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); return 0; diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index ca6d268e3313..8def55e7249b 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -139,4 +139,43 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) __pgprot(0), __pgprot(PTE_VALID)); } -#endif +#ifdef CONFIG_HIBERNATION +/* + * When built with CONFIG_DEBUG_PAGEALLOC and CONFIG_HIBERNATION, this function + * is used to determine if a linear map page has been marked as not-valid by + * CONFIG_DEBUG_PAGEALLOC. Walk the page table and check the PTE_VALID bit. + * This is based on kern_addr_valid(), which almost does what we need. + * + * Because this is only called on the kernel linear map, p?d_sect() implies + * p?d_present(). When debug_pagealloc is enabled, sections mappings are + * disabled. + */ +bool kernel_page_present(struct page *page) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long addr = (unsigned long)page_address(page); + + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) + return false; + + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) + return false; + if (pud_sect(*pud)) + return true; + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return false; + if (pmd_sect(*pmd)) + return true; + + pte = pte_offset_kernel(pmd, addr); + return pte_valid(*pte); +} +#endif /* CONFIG_HIBERNATION */ +#endif /* CONFIG_DEBUG_PAGEALLOC */ From e7cd190385d17790cc3eb3821b1094b00aacf325 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Mon, 22 Aug 2016 15:55:24 +0900 Subject: [PATCH 016/100] arm64: mark reserved memblock regions explicitly in iomem Kdump(kexec-tools) parses /proc/iomem to identify all the memory regions on the system. Since the current kernel names "nomap" regions, like UEFI runtime services code/data, as "System RAM," kexec-tools sets up elf core header to include them in a crash dump file (/proc/vmcore). Then crash dump kernel parses UEFI memory map again, re-marks those regions as "nomap" and does not create a memory mapping for them unlike the other areas of System RAM. In this case, copying /proc/vmcore through copy_oldmem_page() on crash dump kernel will end up with a kernel abort, as reported in [1]. This patch names all the "nomap" regions explicitly as "reserved" so that we can exclude them from a crash dump file. acpi_os_ioremap() must also be modified because those regions have WB attributes [2]. Apart from kdump, this change also matches x86's use of acpi (and /proc/iomem). [1] http://lists.infradead.org/pipermail/linux-arm-kernel/2016-August/448186.html [2] http://lists.infradead.org/pipermail/linux-arm-kernel/2016-August/450089.html Reviewed-by: Catalin Marinas Tested-by: James Morse Reviewed-by: James Morse Signed-off-by: AKASHI Takahiro Signed-off-by: Will Deacon --- arch/arm64/include/asm/acpi.h | 8 ++++++-- arch/arm64/kernel/setup.c | 9 +++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 5420cb0fcb3e..e517088d635f 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -12,7 +12,7 @@ #ifndef _ASM_ACPI_H #define _ASM_ACPI_H -#include +#include #include #include @@ -32,7 +32,11 @@ static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) { - if (!page_is_ram(phys >> PAGE_SHIFT)) + /* + * EFI's reserve_regions() call adds memory with the WB attribute + * to memblock via early_init_dt_add_memory_arch(). + */ + if (!memblock_is_memory(phys)) return ioremap(phys, size); return ioremap_cache(phys, size); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 536dce22fe76..514b4e3ba029 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -206,10 +206,15 @@ static void __init request_standard_resources(void) for_each_memblock(memory, region) { res = alloc_bootmem_low(sizeof(*res)); - res->name = "System RAM"; + if (memblock_is_nomap(region)) { + res->name = "reserved"; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + } else { + res->name = "System RAM"; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + } res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; - res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); From 40982fd6b975de4a51ce5147bc1d698c3b075634 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 25 Aug 2016 17:23:23 +0100 Subject: [PATCH 017/100] arm64: always enable DEBUG_RODATA and remove the Kconfig option Follow the example set by x86 in commit 9ccaf77cf05915f5 ("x86/mm: Always enable CONFIG_DEBUG_RODATA and remove the Kconfig option"), and make these protections a fundamental security feature rather than an opt-in. This also results in a minor code simplification. For those rare cases when users wish to disable this protection (e.g. for debugging), this can be done by passing 'rodata=off' on the command line. As DEBUG_RODATA_ALIGN is only intended to address a performance/memory tradeoff, and does not affect correctness, this is left user-selectable. DEBUG_MODULE_RONX is also left user-selectable until the core code provides a boot-time option to disable the protection for debugging use-cases. Cc: Catalin Marinas Acked-by: Ard Biesheuvel Acked-by: Kees Cook Acked-by: Laura Abbott Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 3 +++ arch/arm64/Kconfig.debug | 10 ---------- arch/arm64/kernel/insn.c | 2 +- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9be0c164df4e..1e49fbc4b075 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -122,6 +122,9 @@ config ARCH_PHYS_ADDR_T_64BIT config MMU def_bool y +config DEBUG_RODATA + def_bool y + config ARM64_PAGE_SHIFT int default 16 if ARM64_64K_PAGES diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index 0cc758cdd0dc..b661fe742615 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -49,16 +49,6 @@ config DEBUG_SET_MODULE_RONX If in doubt, say Y. -config DEBUG_RODATA - bool "Make kernel text and rodata read-only" - default y - help - If this is set, kernel text and rodata will be made read-only. This - is to help catch accidental or malicious attempts to change the - kernel's executable code. - - If in doubt, say Y. - config DEBUG_ALIGN_RODATA depends on DEBUG_RODATA bool "Align linker sections up to SECTION_SIZE" diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 63f9432d05e8..178488fc775f 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -96,7 +96,7 @@ static void __kprobes *patch_map(void *addr, int fixmap) if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX)) page = vmalloc_to_page(addr); - else if (!module && IS_ENABLED(CONFIG_DEBUG_RODATA)) + else if (!module) page = pfn_to_page(PHYS_PFN(__pa(addr))); else return addr; From d391e552293399396c131544f5b1c2f9b1fb0baa Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 17 Aug 2016 13:50:25 +0100 Subject: [PATCH 018/100] cpu/hotplug: Allow suspend/resume CPU to be specified disable_nonboot_cpus() assumes that the lowest numbered online CPU is the boot CPU, and that this is the correct CPU to run any power management code on. On x86 this is always correct, as CPU0 cannot (easily) by taken offline. On arm64 CPU0 can be taken offline. For hibernate/resume this means we may hibernate on a CPU other than CPU0. If the system is rebooted with kexec 'CPU0' will be assigned to a different physical CPU. This complicates hibernate/resume as now we can't trust the CPU numbers. Arch code can find the correct physical CPU, and ensure it is online before resume from hibernate begins, but also needs to influence disable_nonboot_cpus()s choice of CPU. Rename disable_nonboot_cpus() as freeze_secondary_cpus() and add an argument indicating which CPU should be left standing. Follow the logic in migrate_to_reboot_cpu() to use the lowest numbered online CPU if the requested CPU is not online. Add disable_nonboot_cpus() as an inline function that has the existing behaviour. Cc: Rafael J. Wysocki Reviewed-by: Thomas Gleixner Signed-off-by: James Morse Signed-off-by: Will Deacon --- include/linux/cpu.h | 6 +++++- kernel/cpu.c | 9 +++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 797d9c8e9a1b..ad4f1f33a74e 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -228,7 +228,11 @@ static inline void cpu_hotplug_done(void) {} #endif /* CONFIG_HOTPLUG_CPU */ #ifdef CONFIG_PM_SLEEP_SMP -extern int disable_nonboot_cpus(void); +extern int freeze_secondary_cpus(int primary); +static inline int disable_nonboot_cpus(void) +{ + return freeze_secondary_cpus(0); +} extern void enable_nonboot_cpus(void); #else /* !CONFIG_PM_SLEEP_SMP */ static inline int disable_nonboot_cpus(void) { return 0; } diff --git a/kernel/cpu.c b/kernel/cpu.c index 341bf80f80bd..ebbf027dd4a1 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1024,12 +1024,13 @@ EXPORT_SYMBOL_GPL(cpu_up); #ifdef CONFIG_PM_SLEEP_SMP static cpumask_var_t frozen_cpus; -int disable_nonboot_cpus(void) +int freeze_secondary_cpus(int primary) { - int cpu, first_cpu, error = 0; + int cpu, error = 0; cpu_maps_update_begin(); - first_cpu = cpumask_first(cpu_online_mask); + if (!cpu_online(primary)) + primary = cpumask_first(cpu_online_mask); /* * We take down all of the non-boot CPUs in one shot to avoid races * with the userspace trying to use the CPU hotplug at the same time @@ -1038,7 +1039,7 @@ int disable_nonboot_cpus(void) pr_info("Disabling non-boot CPUs ...\n"); for_each_online_cpu(cpu) { - if (cpu == first_cpu) + if (cpu == primary) continue; trace_suspend_resume(TPS("CPU_OFF"), cpu, true); error = _cpu_down(cpu, 1, CPUHP_OFFLINE); From 8ec058fd2710da1df463c19a4e0ee55ac4530f09 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 17 Aug 2016 13:50:26 +0100 Subject: [PATCH 019/100] arm64: hibernate: Resume when hibernate image created on non-boot CPU disable_nonboot_cpus() assumes that the lowest numbered online CPU is the boot CPU, and that this is the correct CPU to run any power management code on. On arm64 CPU0 can be taken offline. For hibernate/resume this means we may hibernate on a CPU other than CPU0. If the system is rebooted with kexec 'CPU0' will be assigned to a different CPU. This complicates hibernate/resume as now we can't trust the CPU numbers. We currently forbid hibernate if CPU0 has been hotplugged out to avoid this situation without kexec. Save the MPIDR of the CPU we hibernated on in the hibernate arch-header, use hibernate_resume_nonboot_cpu_disable() to direct which CPU we should resume on based on the MPIDR of the CPU we hibernated on. This allows us to hibernate/resume on any CPU, even if the logical numbers have been shuffled by kexec. Signed-off-by: James Morse Cc: Mark Rutland Cc: Lorenzo Pieralisi Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/suspend.h | 3 ++ arch/arm64/kernel/hibernate.c | 51 ++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h index 024d623f662e..b8a313fd7a09 100644 --- a/arch/arm64/include/asm/suspend.h +++ b/arch/arm64/include/asm/suspend.h @@ -47,4 +47,7 @@ int swsusp_arch_resume(void); int arch_hibernation_header_save(void *addr, unsigned int max_size); int arch_hibernation_header_restore(void *addr); +/* Used to resume on the CPU we hibernated on */ +int hibernate_resume_nonboot_cpu_disable(void); + #endif diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 71d82cfb62f5..3c74b9fa760c 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -15,6 +15,7 @@ * License terms: GNU General Public License (GPL) version 2 */ #define pr_fmt(x) "hibernate: " x +#include #include #include #include @@ -26,6 +27,7 @@ #include #include +#include #include #include #include @@ -34,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -60,6 +63,12 @@ extern char hibernate_el2_vectors[]; /* hyp-stub vectors, used to restore el2 during resume from hibernate. */ extern char __hyp_stub_vectors[]; +/* + * The logical cpu number we should resume on, initialised to a non-cpu + * number. + */ +static int sleep_cpu = -EINVAL; + /* * Values that may not change over hibernate/resume. We put the build number * and date in here so that we guarantee not to resume with a different @@ -82,6 +91,8 @@ static struct arch_hibernate_hdr { * re-configure el2. */ phys_addr_t __hyp_stub_vectors; + + u64 sleep_cpu_mpidr; } resume_hdr; static inline void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i) @@ -124,12 +135,22 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size) else hdr->__hyp_stub_vectors = 0; + /* Save the mpidr of the cpu we called cpu_suspend() on... */ + if (sleep_cpu < 0) { + pr_err("Failing to hibernate on an unkown CPU.\n"); + return -ENODEV; + } + hdr->sleep_cpu_mpidr = cpu_logical_map(sleep_cpu); + pr_info("Hibernating on CPU %d [mpidr:0x%llx]\n", sleep_cpu, + hdr->sleep_cpu_mpidr); + return 0; } EXPORT_SYMBOL(arch_hibernation_header_save); int arch_hibernation_header_restore(void *addr) { + int ret; struct arch_hibernate_hdr_invariants invariants; struct arch_hibernate_hdr *hdr = addr; @@ -139,6 +160,24 @@ int arch_hibernation_header_restore(void *addr) return -EINVAL; } + sleep_cpu = get_logical_index(hdr->sleep_cpu_mpidr); + pr_info("Hibernated on CPU %d [mpidr:0x%llx]\n", sleep_cpu, + hdr->sleep_cpu_mpidr); + if (sleep_cpu < 0) { + pr_crit("Hibernated on a CPU not known to this kernel!\n"); + sleep_cpu = -EINVAL; + return -EINVAL; + } + if (!cpu_online(sleep_cpu)) { + pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n"); + ret = cpu_up(sleep_cpu); + if (ret) { + pr_err("Failed to bring hibernate-CPU up!\n"); + sleep_cpu = -EINVAL; + return ret; + } + } + resume_hdr = *hdr; return 0; @@ -251,6 +290,7 @@ int swsusp_arch_suspend(void) local_dbg_save(flags); if (__cpu_suspend_enter(&state)) { + sleep_cpu = smp_processor_id(); ret = swsusp_save(); } else { /* Clean kernel core startup/idle code to PoC*/ @@ -267,6 +307,7 @@ int swsusp_arch_suspend(void) */ in_suspend = 0; + sleep_cpu = -EINVAL; __cpu_suspend_exit(); } @@ -528,3 +569,13 @@ static int __init check_boot_cpu_online_init(void) return 0; } core_initcall(check_boot_cpu_online_init); + +int hibernate_resume_nonboot_cpu_disable(void) +{ + if (sleep_cpu < 0) { + pr_err("Failing to resume from hibernate on an unkown CPU.\n"); + return -ENODEV; + } + + return freeze_secondary_cpus(sleep_cpu); +} From b2d8b0cb6ca9cb81dd71626642f764ac70d10813 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 17 Aug 2016 13:50:27 +0100 Subject: [PATCH 020/100] Revert "arm64: hibernate: Refuse to hibernate if the boot cpu is offline" Now that we use the MPIDR to resume on the same CPU that we hibernated on, we no longer need to refuse to hibernate if the boot cpu is offline. (Which we can't possibly know if kexec causes logical CPUs to be renumbered). This reverts commit 1fe492ce6482b77807b25d29690a48c46456beee. Signed-off-by: James Morse Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/kernel/hibernate.c | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 3c74b9fa760c..d55a7b09959b 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -545,31 +544,6 @@ out: return rc; } -static int check_boot_cpu_online_pm_callback(struct notifier_block *nb, - unsigned long action, void *ptr) -{ - if (action == PM_HIBERNATION_PREPARE && - cpumask_first(cpu_online_mask) != 0) { - pr_warn("CPU0 is offline.\n"); - return notifier_from_errno(-ENODEV); - } - - return NOTIFY_OK; -} - -static int __init check_boot_cpu_online_init(void) -{ - /* - * Set this pm_notifier callback with a lower priority than - * cpu_hotplug_pm_callback, so that cpu_hotplug_pm_callback will be - * called earlier to disable cpu hotplug before the cpu online check. - */ - pm_notifier(check_boot_cpu_online_pm_callback, -INT_MAX); - - return 0; -} -core_initcall(check_boot_cpu_online_init); - int hibernate_resume_nonboot_cpu_disable(void) { if (sleep_cpu < 0) { From 6ffe9923f2350c19b95a2c9ebf1b4f5f275986f2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 22 Aug 2016 11:58:36 +0100 Subject: [PATCH 021/100] arm64: errata: Pass --fix-cortex-a53-843419 to ld if workaround enabled Cortex-A53 erratum 843419 is worked around by the linker, although it is a configure-time option to GCC as to whether ld is actually asked to apply the workaround or not. This patch ensures that we pass --fix-cortex-a53-843419 to the linker when both CONFIG_ARM64_ERRATUM_843419=y and the linker supports the option. Acked-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 15 +++++---------- arch/arm64/Makefile | 8 ++++++++ 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1e49fbc4b075..0579f6e0a5fd 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -418,18 +418,13 @@ config ARM64_ERRATUM_845719 config ARM64_ERRATUM_843419 bool "Cortex-A53: 843419: A load or store might access an incorrect address" - depends on MODULES default y - select ARM64_MODULE_CMODEL_LARGE + select ARM64_MODULE_CMODEL_LARGE if MODULES help - This option builds kernel modules using the large memory model in - order to avoid the use of the ADRP instruction, which can cause - a subsequent memory access to use an incorrect address on Cortex-A53 - parts up to r0p4. - - Note that the kernel itself must be linked with a version of ld - which fixes potentially affected ADRP instructions through the - use of veneers. + This option links the kernel with '--fix-cortex-a53-843419' and + builds modules using the large memory model in order to avoid the use + of the ADRP instruction, which can cause a subsequent memory access + to use an incorrect address on Cortex-A53 parts up to r0p4. If unsure, say Y. diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 5b54f8c021d8..0b53c752f23f 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -18,6 +18,14 @@ ifneq ($(CONFIG_RELOCATABLE),) LDFLAGS_vmlinux += -pie -Bsymbolic endif +ifeq ($(CONFIG_ARM64_ERRATUM_843419),y) + ifeq ($(call ld-option, --fix-cortex-a53-843419),) +$(warning ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum) + else +LDFLAGS_vmlinux += --fix-cortex-a53-843419 + endif +endif + KBUILD_DEFCONFIG := defconfig # Check for binutils support for specific extensions From cfa88c79462d15098db29edebe623428c3620a4b Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Tue, 30 Aug 2016 10:31:35 +0200 Subject: [PATCH 022/100] arm64: Set UTS_MACHINE in the Makefile The make rpm target depends on proper UTS_MACHINE definition. Also, use the variable in arch/arm64/kernel/setup.c, so that it's not accidentally removed in the future. Reported-and-tested-by: Fabian Vogt Signed-off-by: Michal Marek Signed-off-by: Will Deacon --- arch/arm64/Makefile | 2 ++ arch/arm64/kernel/Makefile | 2 ++ arch/arm64/kernel/setup.c | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 0b53c752f23f..ab51aed6b6c1 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -46,10 +46,12 @@ ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian AS += -EB LD += -EB +UTS_MACHINE := aarch64_be else KBUILD_CPPFLAGS += -mlittle-endian AS += -EL LD += -EL +UTS_MACHINE := aarch64 endif CHECKFLAGS += -D__aarch64__ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 14f7b651c787..7d66bbaafc0c 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -10,6 +10,8 @@ CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_insn.o = -pg CFLAGS_REMOVE_return_address.o = -pg +CFLAGS_setup.o = -DUTS_MACHINE='"$(UTS_MACHINE)"' + # Object file lists. arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 514b4e3ba029..f534f492a268 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -233,7 +233,7 @@ void __init setup_arch(char **cmdline_p) { pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id()); - sprintf(init_utsname()->machine, ELF_PLATFORM); + sprintf(init_utsname()->machine, UTS_MACHINE); init_mm.start_code = (unsigned long) _text; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; From b4a4485ed4c78520f8fe8e63784903b11bedbbfd Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 30 Aug 2016 14:08:39 -0500 Subject: [PATCH 023/100] arm64: don't select PERF_USE_VMALLOC by default Any arm64 based parts that have cache aliasing issues can set it manually. Apparently dragged in from ARM(32) defaults in commit 8c2c3df "arm64: Build infrastructure". Signed-off-by: Kim Phillips Cc: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0579f6e0a5fd..e072033b27b2 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -105,7 +105,6 @@ config ARM64 select OF_NUMA if NUMA && OF select OF_RESERVED_MEM select PCI_ECAM if ACPI - select PERF_USE_VMALLOC select POWER_RESET select POWER_SUPPLY select SPARSE_IRQ From 6a6a4f158037334cbad39817975a87333748c0c2 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 31 Aug 2016 20:38:50 +0800 Subject: [PATCH 024/100] arm64: cleanup unused UDBG_* define The UDBG_UNDEFINED/SYSCALL/BADABORT/SEGV are only used to show verbose user fault messages in arm, not arm64, drop them. Signed-off-by: Kefeng Wang Signed-off-by: Will Deacon --- arch/arm64/include/asm/system_misc.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h index 57f110bea6a8..bc812435bc76 100644 --- a/arch/arm64/include/asm/system_misc.h +++ b/arch/arm64/include/asm/system_misc.h @@ -56,12 +56,6 @@ extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); __show_ratelimited; \ }) -#define UDBG_UNDEFINED (1 << 0) -#define UDBG_SYSCALL (1 << 1) -#define UDBG_BADABORT (1 << 2) -#define UDBG_SEGV (1 << 3) -#define UDBG_BUS (1 << 4) - #endif /* __ASSEMBLY__ */ #endif /* __ASM_SYSTEM_MISC_H */ From 5e49d73c1d87de50353844d263c1c7664aefeec8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 31 Aug 2016 11:31:08 +0100 Subject: [PATCH 025/100] arm64: cpufeature: constify arm64_ftr_bits structures The arm64_ftr_bits structures are never modified, so make them read-only. Reviewed-by: Suzuki K Poulose Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 14 ++++----- arch/arm64/kernel/cpufeature.c | 46 +++++++++++++++-------------- 2 files changed, 31 insertions(+), 29 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 7099f26e3702..7c0b7cff17df 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -72,11 +72,11 @@ struct arm64_ftr_bits { * @sys_val Safe value across the CPUs (system view) */ struct arm64_ftr_reg { - u32 sys_id; - const char *name; - u64 strict_mask; - u64 sys_val; - struct arm64_ftr_bits *ftr_bits; + u32 sys_id; + const char *name; + u64 strict_mask; + u64 sys_val; + const struct arm64_ftr_bits *ftr_bits; }; /* scope of capability check */ @@ -157,7 +157,7 @@ cpuid_feature_extract_unsigned_field(u64 features, int field) return cpuid_feature_extract_unsigned_field_width(features, field, 4); } -static inline u64 arm64_ftr_mask(struct arm64_ftr_bits *ftrp) +static inline u64 arm64_ftr_mask(const struct arm64_ftr_bits *ftrp) { return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift); } @@ -170,7 +170,7 @@ cpuid_feature_extract_field(u64 features, int field, bool sign) cpuid_feature_extract_unsigned_field(features, field); } -static inline s64 arm64_ftr_value(struct arm64_ftr_bits *ftrp, u64 val) +static inline s64 arm64_ftr_value(const struct arm64_ftr_bits *ftrp, u64 val) { return (s64)cpuid_feature_extract_field(val, ftrp->shift, ftrp->sign); } diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 62272eac1352..eac76cb3a206 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -74,7 +74,7 @@ static bool __maybe_unused cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused); -static struct arm64_ftr_bits ftr_id_aa64isar0[] = { +static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0), @@ -87,7 +87,7 @@ static struct arm64_ftr_bits ftr_id_aa64isar0[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_aa64pfr0[] = { +static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0), @@ -101,7 +101,7 @@ static struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { +static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), @@ -119,7 +119,7 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { +static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0), @@ -130,7 +130,7 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { +static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LVA_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_IESB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LSM_SHIFT, 4, 0), @@ -139,7 +139,7 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_ctr[] = { +static const struct arm64_ftr_bits ftr_ctr[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */ @@ -155,7 +155,7 @@ static struct arm64_ftr_bits ftr_ctr[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_mmfr0[] = { +static const struct arm64_ftr_bits ftr_id_mmfr0[] = { S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0xf), /* InnerShr */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0), /* FCSE */ ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */ @@ -167,7 +167,7 @@ static struct arm64_ftr_bits ftr_id_mmfr0[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_aa64dfr0[] = { +static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0), @@ -178,14 +178,14 @@ static struct arm64_ftr_bits ftr_id_aa64dfr0[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_mvfr2[] = { +static const struct arm64_ftr_bits ftr_mvfr2[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0), /* RAZ */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* FPMisc */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* SIMDMisc */ ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_dczid[] = { +static const struct arm64_ftr_bits ftr_dczid[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 5, 27, 0), /* RAZ */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 1, 1), /* DZP */ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* BS */ @@ -193,7 +193,7 @@ static struct arm64_ftr_bits ftr_dczid[] = { }; -static struct arm64_ftr_bits ftr_id_isar5[] = { +static const struct arm64_ftr_bits ftr_id_isar5[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 20, 4, 0), /* RAZ */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0), @@ -204,14 +204,14 @@ static struct arm64_ftr_bits ftr_id_isar5[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_mmfr4[] = { +static const struct arm64_ftr_bits ftr_id_mmfr4[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0), /* RAZ */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* ac2 */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* RAZ */ ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_pfr0[] = { +static const struct arm64_ftr_bits ftr_id_pfr0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 16, 0), /* RAZ */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0), /* State3 */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0), /* State2 */ @@ -220,7 +220,7 @@ static struct arm64_ftr_bits ftr_id_pfr0[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_id_dfr0[] = { +static const struct arm64_ftr_bits ftr_id_dfr0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf), /* PerfMon */ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), @@ -238,7 +238,7 @@ static struct arm64_ftr_bits ftr_id_dfr0[] = { * 0. Covers the following 32bit registers: * id_isar[0-4], id_mmfr[1-3], id_pfr1, mvfr[0-1] */ -static struct arm64_ftr_bits ftr_generic_32bits[] = { +static const struct arm64_ftr_bits ftr_generic_32bits[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), @@ -250,17 +250,17 @@ static struct arm64_ftr_bits ftr_generic_32bits[] = { ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_generic[] = { +static const struct arm64_ftr_bits ftr_generic[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0), ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_generic32[] = { +static const struct arm64_ftr_bits ftr_generic32[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 32, 0), ARM64_FTR_END, }; -static struct arm64_ftr_bits ftr_aa64raz[] = { +static const struct arm64_ftr_bits ftr_aa64raz[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0), ARM64_FTR_END, }; @@ -346,7 +346,8 @@ static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id) search_cmp_ftr_reg); } -static u64 arm64_ftr_set_value(struct arm64_ftr_bits *ftrp, s64 reg, s64 ftr_val) +static u64 arm64_ftr_set_value(const struct arm64_ftr_bits *ftrp, s64 reg, + s64 ftr_val) { u64 mask = arm64_ftr_mask(ftrp); @@ -355,7 +356,8 @@ static u64 arm64_ftr_set_value(struct arm64_ftr_bits *ftrp, s64 reg, s64 ftr_val return reg; } -static s64 arm64_ftr_safe_value(struct arm64_ftr_bits *ftrp, s64 new, s64 cur) +static s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, + s64 cur) { s64 ret = 0; @@ -407,7 +409,7 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) { u64 val = 0; u64 strict_mask = ~0x0ULL; - struct arm64_ftr_bits *ftrp; + const struct arm64_ftr_bits *ftrp; struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg); BUG_ON(!reg); @@ -464,7 +466,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new) { - struct arm64_ftr_bits *ftrp; + const struct arm64_ftr_bits *ftrp; for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) { s64 ftr_cur = arm64_ftr_value(ftrp, reg->sys_val); From 6f2b7eeff9dbadeb7366d44086aa34792a996fc9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 31 Aug 2016 11:31:09 +0100 Subject: [PATCH 026/100] arm64: cpufeature: constify arm64_ftr_regs array Constify the arm64_ftr_regs array, by moving the mutable arm64_ftr_reg fields out of the array itself. This also streamlines the bsearch, since the entire array can be covered by fewer cachelines. Moving the payload out of the array also allows us to have special explicitly defined struct instance in case other code needs to refer to it directly. Note that this replaces the runtime sorting of the array with a runtime BUG() check whether the array is sorted correctly in the code. Reviewed-by: Suzuki K Poulose Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 1 - arch/arm64/kernel/cpufeature.c | 46 +++++++++++++---------------- 2 files changed, 20 insertions(+), 27 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 7c0b7cff17df..8bb4f1527b26 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -72,7 +72,6 @@ struct arm64_ftr_bits { * @sys_val Safe value across the CPUs (system view) */ struct arm64_ftr_reg { - u32 sys_id; const char *name; u64 strict_mask; u64 sys_val; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index eac76cb3a206..cc7451a27d94 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -265,14 +265,17 @@ static const struct arm64_ftr_bits ftr_aa64raz[] = { ARM64_FTR_END, }; -#define ARM64_FTR_REG(id, table) \ - { \ - .sys_id = id, \ +#define ARM64_FTR_REG(id, table) { \ + .sys_id = id, \ + .reg = &(struct arm64_ftr_reg){ \ .name = #id, \ .ftr_bits = &((table)[0]), \ - } + }} -static struct arm64_ftr_reg arm64_ftr_regs[] = { +static const struct __ftr_reg_entry { + u32 sys_id; + struct arm64_ftr_reg *reg; +} arm64_ftr_regs[] = { /* Op1 = 0, CRn = 0, CRm = 1 */ ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0), @@ -324,7 +327,7 @@ static struct arm64_ftr_reg arm64_ftr_regs[] = { static int search_cmp_ftr_reg(const void *id, const void *regp) { - return (int)(unsigned long)id - (int)((const struct arm64_ftr_reg *)regp)->sys_id; + return (int)(unsigned long)id - (int)((const struct __ftr_reg_entry *)regp)->sys_id; } /* @@ -339,11 +342,16 @@ static int search_cmp_ftr_reg(const void *id, const void *regp) */ static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id) { - return bsearch((const void *)(unsigned long)sys_id, + const struct __ftr_reg_entry *ret; + + ret = bsearch((const void *)(unsigned long)sys_id, arm64_ftr_regs, ARRAY_SIZE(arm64_ftr_regs), sizeof(arm64_ftr_regs[0]), search_cmp_ftr_reg); + if (ret) + return ret->reg; + return NULL; } static u64 arm64_ftr_set_value(const struct arm64_ftr_bits *ftrp, s64 reg, @@ -378,27 +386,13 @@ static s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, return ret; } -static int __init sort_cmp_ftr_regs(const void *a, const void *b) -{ - return ((const struct arm64_ftr_reg *)a)->sys_id - - ((const struct arm64_ftr_reg *)b)->sys_id; -} - -static void __init swap_ftr_regs(void *a, void *b, int size) -{ - struct arm64_ftr_reg tmp = *(struct arm64_ftr_reg *)a; - *(struct arm64_ftr_reg *)a = *(struct arm64_ftr_reg *)b; - *(struct arm64_ftr_reg *)b = tmp; -} - static void __init sort_ftr_regs(void) { - /* Keep the array sorted so that we can do the binary search */ - sort(arm64_ftr_regs, - ARRAY_SIZE(arm64_ftr_regs), - sizeof(arm64_ftr_regs[0]), - sort_cmp_ftr_regs, - swap_ftr_regs); + int i; + + /* Check that the array is sorted so that we can do the binary search */ + for (i = 1; i < ARRAY_SIZE(arm64_ftr_regs); i++) + BUG_ON(arm64_ftr_regs[i].sys_id < arm64_ftr_regs[i - 1].sys_id); } /* From 675b0563d6b26aa97bb8fe5bbde0ab9dc358433b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 31 Aug 2016 11:31:10 +0100 Subject: [PATCH 027/100] arm64: cpufeature: expose arm64_ftr_reg struct for CTR_EL0 Expose the arm64_ftr_reg struct covering CTR_EL0 outside of cpufeature.o so that other code can refer to it directly (i.e., without performing the binary search) Reviewed-by: Suzuki K Poulose Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 2 ++ arch/arm64/kernel/cpufeature.c | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 8bb4f1527b26..c07c5d1cd04a 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -78,6 +78,8 @@ struct arm64_ftr_reg { const struct arm64_ftr_bits *ftr_bits; }; +extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; + /* scope of capability check */ enum { SCOPE_SYSTEM, diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index cc7451a27d94..c3d7ae48f92d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -155,6 +155,11 @@ static const struct arm64_ftr_bits ftr_ctr[] = { ARM64_FTR_END, }; +struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = { + .name = "SYS_CTR_EL0", + .ftr_bits = ftr_ctr +}; + static const struct arm64_ftr_bits ftr_id_mmfr0[] = { S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0xf), /* InnerShr */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0), /* FCSE */ @@ -318,7 +323,7 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2), /* Op1 = 3, CRn = 0, CRm = 0 */ - ARM64_FTR_REG(SYS_CTR_EL0, ftr_ctr), + { SYS_CTR_EL0, &arm64_ftr_reg_ctrel0 }, ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid), /* Op1 = 3, CRn = 14, CRm = 0 */ From 3a402a709500c5a3faca2111668c33d96555e35a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 26 Aug 2016 11:36:39 +0100 Subject: [PATCH 028/100] arm64: debug: avoid resetting stepping state machine when TIF_SINGLESTEP When TIF_SINGLESTEP is set for a task, the single-step state machine is enabled and we must take care not to reset it to the active-not-pending state if it is already in the active-pending state. Unfortunately, that's exactly what user_enable_single_step does, by unconditionally setting the SS bit in the SPSR for the current task. This causes failures in the GDB testsuite, where GDB ends up missing expected step traps if the instruction being stepped generates another trap, e.g. PTRACE_EVENT_FORK from an SVC instruction. This patch fixes the problem by preserving the current state of the stepping state machine when TIF_SINGLESTEP is set on the current thread. Cc: Reported-by: Yao Qi Signed-off-by: Will Deacon --- arch/arm64/kernel/debug-monitors.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 91fff48d0f57..2751ff9c0934 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -435,8 +435,10 @@ NOKPROBE_SYMBOL(kernel_active_single_step); /* ptrace API */ void user_enable_single_step(struct task_struct *task) { - set_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP); - set_regs_spsr_ss(task_pt_regs(task)); + struct thread_info *ti = task_thread_info(task); + + if (!test_and_set_ti_thread_flag(ti, TIF_SINGLESTEP)) + set_regs_spsr_ss(task_pt_regs(task)); } NOKPROBE_SYMBOL(user_enable_single_step); From a842789837c0e3734357c6b4c54d39d60a1d24b1 Mon Sep 17 00:00:00 2001 From: zijun_hu Date: Thu, 1 Sep 2016 18:51:19 +0800 Subject: [PATCH 029/100] arm64: remove duplicate macro __KERNEL__ check remove duplicate macro __KERNEL__ check Signed-off-by: zijun_hu Signed-off-by: Will Deacon --- arch/arm64/include/asm/processor.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index ace0a96e7d6e..df2e53d3a969 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -37,7 +37,6 @@ #include #include -#ifdef __KERNEL__ #define STACK_TOP_MAX TASK_SIZE_64 #ifdef CONFIG_COMPAT #define AARCH32_VECTORS_BASE 0xffff0000 @@ -49,7 +48,6 @@ extern phys_addr_t arm64_dma_phys_limit; #define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1) -#endif /* __KERNEL__ */ struct debug_info { /* Have we suspended stepping by a debugger? */ From d7a83d127a64fd91ef1ad39b7e2d78db36cf388b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 15 Aug 2016 18:55:11 +0100 Subject: [PATCH 030/100] arm64: hw_breakpoint: convert CPU hotplug notifier to new infrastructure The arm64 hw_breakpoint implementation uses a CPU hotplug notifier to reset the {break,watch}point registers when CPUs come online. This patch converts the code to the new hotplug mechanism, whilst moving the invocation earlier to remove the need to disable IRQs explicitly in the driver (which could cause havok if we trip a watchpoint in an IRQ handler whilst restoring the debug register state). Cc: Sebastian Andrzej Siewior Reviewed-by: Lorenzo Pieralisi Signed-off-by: Will Deacon --- arch/arm64/kernel/hw_breakpoint.c | 48 +++++++++++-------------------- arch/arm64/kernel/suspend.c | 10 ++++--- include/linux/cpuhotplug.h | 1 + 3 files changed, 23 insertions(+), 36 deletions(-) diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 26a6bf77d272..948b73148d56 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -857,7 +857,7 @@ void hw_breakpoint_thread_switch(struct task_struct *next) /* * CPU initialisation. */ -static void hw_breakpoint_reset(void *unused) +static int hw_breakpoint_reset(unsigned int cpu) { int i; struct perf_event **slots; @@ -888,28 +888,14 @@ static void hw_breakpoint_reset(void *unused) write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL); } } -} -static int hw_breakpoint_reset_notify(struct notifier_block *self, - unsigned long action, - void *hcpu) -{ - if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) { - local_irq_disable(); - hw_breakpoint_reset(NULL); - local_irq_enable(); - } - return NOTIFY_OK; + return 0; } -static struct notifier_block hw_breakpoint_reset_nb = { - .notifier_call = hw_breakpoint_reset_notify, -}; - #ifdef CONFIG_CPU_PM -extern void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)); +extern void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int)); #else -static inline void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) +static inline void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int)) { } #endif @@ -919,36 +905,34 @@ static inline void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) */ static int __init arch_hw_breakpoint_init(void) { + int ret; + core_num_brps = get_num_brps(); core_num_wrps = get_num_wrps(); pr_info("found %d breakpoint and %d watchpoint registers.\n", core_num_brps, core_num_wrps); - cpu_notifier_register_begin(); - - /* - * Reset the breakpoint resources. We assume that a halting - * debugger will leave the world in a nice state for us. - */ - smp_call_function(hw_breakpoint_reset, NULL, 1); - hw_breakpoint_reset(NULL); - /* Register debug fault handlers. */ hook_debug_fault_code(DBG_ESR_EVT_HWBP, breakpoint_handler, SIGTRAP, TRAP_HWBKPT, "hw-breakpoint handler"); hook_debug_fault_code(DBG_ESR_EVT_HWWP, watchpoint_handler, SIGTRAP, TRAP_HWBKPT, "hw-watchpoint handler"); - /* Register hotplug notifier. */ - __register_cpu_notifier(&hw_breakpoint_reset_nb); - - cpu_notifier_register_done(); + /* + * Reset the breakpoint resources. We assume that a halting + * debugger will leave the world in a nice state for us. + */ + ret = cpuhp_setup_state(CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING, + "CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING", + hw_breakpoint_reset, NULL); + if (ret) + pr_err("failed to register CPU hotplug notifier: %d\n", ret); /* Register cpu_suspend hw breakpoint restore hook */ cpu_suspend_set_dbg_restorer(hw_breakpoint_reset); - return 0; + return ret; } arch_initcall(arch_hw_breakpoint_init); diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index b616e365cee3..ad734142070d 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -23,8 +23,8 @@ unsigned long *sleep_save_stash; * time the notifier runs debug exceptions might have been enabled already, * with HW breakpoints registers content still in an unknown state. */ -static void (*hw_breakpoint_restore)(void *); -void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) +static int (*hw_breakpoint_restore)(unsigned int); +void __init cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int)) { /* Prevent multiple restore hook initializations */ if (WARN_ON(hw_breakpoint_restore)) @@ -34,6 +34,8 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) void notrace __cpu_suspend_exit(void) { + unsigned int cpu = smp_processor_id(); + /* * We are resuming from reset with the idmap active in TTBR0_EL1. * We must uninstall the idmap and restore the expected MMU @@ -45,7 +47,7 @@ void notrace __cpu_suspend_exit(void) * Restore per-cpu offset before any kernel * subsystem relying on it has a chance to run. */ - set_my_cpu_offset(per_cpu_offset(smp_processor_id())); + set_my_cpu_offset(per_cpu_offset(cpu)); /* * Restore HW breakpoint registers to sane values @@ -53,7 +55,7 @@ void notrace __cpu_suspend_exit(void) * through local_dbg_restore. */ if (hw_breakpoint_restore) - hw_breakpoint_restore(NULL); + hw_breakpoint_restore(cpu); } /* diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 242bf530edfc..3758fe6d5968 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -45,6 +45,7 @@ enum cpuhp_state { CPUHP_AP_PERF_METAG_STARTING, CPUHP_AP_MIPS_OP_LOONGSON3_STARTING, CPUHP_AP_ARM_VFP_STARTING, + CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING, CPUHP_AP_PERF_ARM_STARTING, CPUHP_AP_ARM_L2X0_STARTING, CPUHP_AP_ARM_ARCH_TIMER_STARTING, From e937dd5782688928d8c4050237b93b0a51faebee Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 16 Aug 2016 11:29:17 +0100 Subject: [PATCH 031/100] arm64: debug: convert OS lock CPU hotplug notifier to new infrastructure The arm64 debug monitor initialisation code uses a CPU hotplug notifier to clear the OS lock when CPUs come online. This patch converts the code to the new hotplug mechanism. Cc: Sebastian Andrzej Siewior Reviewed-by: Lorenzo Pieralisi Signed-off-by: Will Deacon --- arch/arm64/kernel/debug-monitors.c | 30 ++++++------------------------ include/linux/cpuhotplug.h | 1 + 2 files changed, 7 insertions(+), 24 deletions(-) diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 2751ff9c0934..30821928106f 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -132,36 +132,18 @@ NOKPROBE_SYMBOL(disable_debug_monitors); /* * OS lock clearing. */ -static void clear_os_lock(void *unused) +static int clear_os_lock(unsigned int cpu) { asm volatile("msr oslar_el1, %0" : : "r" (0)); + isb(); + return 0; } -static int os_lock_notify(struct notifier_block *self, - unsigned long action, void *data) -{ - if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) - clear_os_lock(NULL); - return NOTIFY_OK; -} - -static struct notifier_block os_lock_nb = { - .notifier_call = os_lock_notify, -}; - static int debug_monitors_init(void) { - cpu_notifier_register_begin(); - - /* Clear the OS lock. */ - on_each_cpu(clear_os_lock, NULL, 1); - isb(); - - /* Register hotplug handler. */ - __register_cpu_notifier(&os_lock_nb); - - cpu_notifier_register_done(); - return 0; + return cpuhp_setup_state(CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING, + "CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING", + clear_os_lock, NULL); } postcore_initcall(debug_monitors_init); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 3758fe6d5968..8c999a202452 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -45,6 +45,7 @@ enum cpuhp_state { CPUHP_AP_PERF_METAG_STARTING, CPUHP_AP_MIPS_OP_LOONGSON3_STARTING, CPUHP_AP_ARM_VFP_STARTING, + CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING, CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING, CPUHP_AP_PERF_ARM_STARTING, CPUHP_AP_ARM_L2X0_STARTING, From 563cada03db9cb5df19b20290b65c4e5e1d21358 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Thu, 1 Sep 2016 14:35:59 +0100 Subject: [PATCH 032/100] arm64: kernel: do not need to reset UAO on exception entry Commit e19a6ee2460b ("arm64: kernel: Save and restore UAO and addr_limit on exception entry") states that exception handler inherits the original PSTATE.UAO value, so UAO needes to be reset explicitly. However, ARM 8.2 Extension documentation says: PSTATE.UAO is copied to SPSR_ELx.UAO and is then set to 0 on an exception taken from AArch64 to AArch64 so hardware already does the right thing. Signed-off-by: Vladimir Murzin Acked-by: James Morse Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 6a64182822e5..6880dcc3b465 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -104,7 +104,7 @@ str x20, [sp, #S_ORIG_ADDR_LIMIT] mov x20, #TASK_SIZE_64 str x20, [tsk, #TI_ADDR_LIMIT] - ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO) + /* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */ .endif /* \el == 0 */ mrs x22, elr_el1 mrs x23, spsr_el1 From b5fe242972ef3faaa6bcfe66cbacc7a0014faf89 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 31 Aug 2016 12:05:11 +0100 Subject: [PATCH 033/100] arm64: kernel: fix style issues in sleep.S This fixes a number of style issues in sleep.S. No functional changes are intended: - replace absolute literal references with relative references in __cpu_suspend_enter(), which executes from its virtual address - replace explicit lr assignment plus branch with bl in cpu_resume(), which aligns it with stext() and secondary_startup() - don't export _cpu_resume() - use adr_l for mpidr_hash reference, and fix the incorrect accompanying comment, which has been out of date since commit cabe1c81ea5be983 ("arm64: Change cpu_resume() to enable mmu early then access sleep_sp by va") - replace leading spaces with tabs, and add a bit of whitespace for readability Reviewed-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/sleep.S | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 1fac020761da..6adc76bf8f91 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -73,10 +73,9 @@ ENTRY(__cpu_suspend_enter) str x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP] /* find the mpidr_hash */ - ldr x1, =sleep_save_stash - ldr x1, [x1] + ldr_l x1, sleep_save_stash mrs x7, mpidr_el1 - ldr x9, =mpidr_hash + adr_l x9, mpidr_hash ldr x10, [x9, #MPIDR_HASH_MASK] /* * Following code relies on the struct mpidr_hash @@ -95,15 +94,14 @@ ENTRY(__cpu_suspend_enter) mov x0, #1 ret ENDPROC(__cpu_suspend_enter) - .ltorg .pushsection ".idmap.text", "ax" ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly + bl __cpu_setup /* enable the MMU early - so we can access sleep_save_stash by va */ - adr_l lr, __enable_mmu /* __cpu_setup will return here */ adr_l x27, _resume_switched /* __enable_mmu will branch here */ - b __cpu_setup + b __enable_mmu ENDPROC(cpu_resume) _resume_switched: @@ -113,16 +111,17 @@ ENDPROC(_resume_switched) .ltorg .popsection -ENTRY(_cpu_resume) +_cpu_resume: mrs x1, mpidr_el1 - adrp x8, mpidr_hash - add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address - /* retrieve mpidr_hash members to compute the hash */ + adr_l x8, mpidr_hash // x8 = struct mpidr_hash virt address + + /* retrieve mpidr_hash members to compute the hash */ ldr x2, [x8, #MPIDR_HASH_MASK] ldp w3, w4, [x8, #MPIDR_HASH_SHIFTS] ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)] compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2 - /* x7 contains hash index, let's use it to grab context pointer */ + + /* x7 contains hash index, let's use it to grab context pointer */ ldr_l x0, sleep_save_stash ldr x0, [x0, x7, lsl #3] add x29, x0, #SLEEP_STACK_DATA_CALLEE_REGS From 23c8a500c24d02dd2de1bff968d4467b441717bb Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 31 Aug 2016 12:05:12 +0100 Subject: [PATCH 034/100] arm64: kernel: use ordinary return/argument register for el2_setup() The function el2_setup() passes its return value in register w20, and in the two cases where the caller actually cares about this return value, it is passed into set_cpu_boot_mode_flag() [almost] directly, which expects its input in w20 as well. So there is no reason to use a 'special' callee saved register here, but we can simply follow the PCS for return value and first argument, respectively. Reviewed-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 9ee9666c2e34..be02c5593ba6 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -210,7 +210,7 @@ efi_header_end: ENTRY(stext) bl preserve_boot_args - bl el2_setup // Drop to EL1, w20=cpu_boot_mode + bl el2_setup // Drop to EL1, w0=cpu_boot_mode adrp x24, __PHYS_OFFSET and x23, x24, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0 bl set_cpu_boot_mode_flag @@ -488,7 +488,7 @@ CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2 CPU_BE( orr x0, x0, #(3 << 24) ) // Set the EE and E0E bits for EL1 CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 msr sctlr_el1, x0 - mov w20, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 + mov w0, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 isb ret @@ -584,7 +584,7 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems cbz x2, install_el2_stub - mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 + mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 isb ret @@ -599,7 +599,7 @@ install_el2_stub: PSR_MODE_EL1h) msr spsr_el2, x0 msr elr_el2, lr - mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 + mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 eret ENDPROC(el2_setup) @@ -609,10 +609,10 @@ ENDPROC(el2_setup) */ set_cpu_boot_mode_flag: adr_l x1, __boot_cpu_mode - cmp w20, #BOOT_CPU_MODE_EL2 + cmp w0, #BOOT_CPU_MODE_EL2 b.ne 1f add x1, x1, #4 -1: str w20, [x1] // This CPU has booted in EL1 +1: str w0, [x1] // This CPU has booted in EL1 dmb sy dc ivac, x1 // Invalidate potentially stale cache line ret @@ -649,7 +649,7 @@ ENTRY(__early_cpu_boot_status) * cores are held until we're ready for them to initialise. */ ENTRY(secondary_holding_pen) - bl el2_setup // Drop to EL1, w20=cpu_boot_mode + bl el2_setup // Drop to EL1, w0=cpu_boot_mode bl set_cpu_boot_mode_flag mrs x0, mpidr_el1 mov_q x1, MPIDR_HWID_BITMASK From 3c5e9f238bc475b0712419eaebc643c07c73cb94 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 31 Aug 2016 12:05:13 +0100 Subject: [PATCH 035/100] arm64: head.S: move KASLR processing out of __enable_mmu() The KASLR processing is only used by the primary boot path, and complements the processing that takes place in __primary_switch(). Move the two parts together, to make the code easier to understand. Also, fix up a minor whitespace issue. Reviewed-by: Mark Rutland Signed-off-by: Ard Biesheuvel [will: fixed conflict with -rc3 due to lack of fd363bd417dd] Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 69 ++++++++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 27 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index be02c5593ba6..5543068da3ae 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -222,9 +222,7 @@ ENTRY(stext) * the TCR will have been set. */ bl __cpu_setup // initialise processor - adr_l x27, __primary_switch // address to jump to after - // MMU has been enabled - b __enable_mmu + b __primary_switch ENDPROC(stext) /* @@ -453,7 +451,7 @@ __primary_switched: cbz x0, 0f // KASLR disabled? just proceed orr x23, x23, x0 // record KASLR offset ret x28 // we must enable KASLR, return - // to __enable_mmu() + // to __primary_switch() 0: #endif b start_kernel @@ -726,7 +724,6 @@ ENDPROC(__secondary_switched) * If it isn't, park the CPU */ ENTRY(__enable_mmu) - mrs x22, sctlr_el1 // preserve old SCTLR_EL1 value mrs x1, ID_AA64MMFR0_EL1 ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED @@ -747,25 +744,6 @@ ENTRY(__enable_mmu) ic iallu dsb nsh isb -#ifdef CONFIG_RANDOMIZE_BASE - mov x19, x0 // preserve new SCTLR_EL1 value - blr x27 - - /* - * If we return here, we have a KASLR displacement in x23 which we need - * to take into account by discarding the current kernel mapping and - * creating a new one. - */ - msr sctlr_el1, x22 // disable the MMU - isb - bl __create_page_tables // recreate kernel mapping - - msr sctlr_el1, x19 // re-enable the MMU - isb - ic iallu // flush instructions fetched - dsb nsh // via old mapping - isb -#endif br x27 ENDPROC(__enable_mmu) @@ -775,11 +753,11 @@ __no_granule_support: 1: wfe wfi - b 1b + b 1b ENDPROC(__no_granule_support) -__primary_switch: #ifdef CONFIG_RELOCATABLE +__relocate_kernel: /* * Iterate over each entry in the relocation table, and apply the * relocations in place. @@ -801,8 +779,45 @@ __primary_switch: add x13, x13, x23 // relocate str x13, [x11, x23] b 0b +1: ret +ENDPROC(__relocate_kernel) +#endif -1: +__primary_switch: +#ifdef CONFIG_RANDOMIZE_BASE + mov x19, x0 // preserve new SCTLR_EL1 value + mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value +#endif + + adr x27, 0f + b __enable_mmu +0: +#ifdef CONFIG_RELOCATABLE + bl __relocate_kernel +#ifdef CONFIG_RANDOMIZE_BASE + ldr x8, =__primary_switched + blr x8 + + /* + * If we return here, we have a KASLR displacement in x23 which we need + * to take into account by discarding the current kernel mapping and + * creating a new one. + */ + msr sctlr_el1, x20 // disable the MMU + isb + bl __create_page_tables // recreate kernel mapping + + tlbi vmalle1 // Remove any stale TLB entries + dsb nsh + + msr sctlr_el1, x19 // re-enable the MMU + isb + ic iallu // flush instructions fetched + dsb nsh // via old mapping + isb + + bl __relocate_kernel +#endif #endif ldr x8, =__primary_switched br x8 From 9dcf7914ae238619ae019dcf82a91c817ff8628e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 31 Aug 2016 12:05:14 +0100 Subject: [PATCH 036/100] arm64: kernel: use x30 for __enable_mmu return address Using x27 for passing to __enable_mmu what is essentially the return address makes the code look more complicated than it needs to be. So switch to x30/lr, and update the secondary and cpu_resume call sites to simply call __enable_mmu as an ordinary function, with a bl instruction. This requires the callers to be covered by .idmap.text. Reviewed-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 21 +++++++-------------- arch/arm64/kernel/sleep.S | 8 ++------ 2 files changed, 9 insertions(+), 20 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 5543068da3ae..45b865e022cc 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -675,9 +675,9 @@ secondary_startup: * Common entry point for secondary CPUs. */ bl __cpu_setup // initialise processor - - adr_l x27, __secondary_switch // address to jump to after enabling the MMU - b __enable_mmu + bl __enable_mmu + ldr x8, =__secondary_switched + br x8 ENDPROC(secondary_startup) __secondary_switched: @@ -716,9 +716,9 @@ ENDPROC(__secondary_switched) * Enable the MMU. * * x0 = SCTLR_EL1 value for turning on the MMU. - * x27 = *virtual* address to jump to upon completion * - * Other registers depend on the function called upon completion. + * Returns to the caller via x30/lr. This requires the caller to be covered + * by the .idmap.text section. * * Checks if the selected granule size is supported by the CPU. * If it isn't, park the CPU @@ -744,7 +744,7 @@ ENTRY(__enable_mmu) ic iallu dsb nsh isb - br x27 + ret ENDPROC(__enable_mmu) __no_granule_support: @@ -789,9 +789,7 @@ __primary_switch: mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value #endif - adr x27, 0f - b __enable_mmu -0: + bl __enable_mmu #ifdef CONFIG_RELOCATABLE bl __relocate_kernel #ifdef CONFIG_RANDOMIZE_BASE @@ -822,8 +820,3 @@ __primary_switch: ldr x8, =__primary_switched br x8 ENDPROC(__primary_switch) - -__secondary_switch: - ldr x8, =__secondary_switched - br x8 -ENDPROC(__secondary_switch) diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 6adc76bf8f91..0f7e0b2ac64c 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -100,14 +100,10 @@ ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly bl __cpu_setup /* enable the MMU early - so we can access sleep_save_stash by va */ - adr_l x27, _resume_switched /* __enable_mmu will branch here */ - b __enable_mmu -ENDPROC(cpu_resume) - -_resume_switched: + bl __enable_mmu ldr x8, =_cpu_resume br x8 -ENDPROC(_resume_switched) +ENDPROC(cpu_resume) .ltorg .popsection From b929fe320e5f3c91c76cca81be80f2dde2ac54a6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 31 Aug 2016 12:05:15 +0100 Subject: [PATCH 037/100] arm64: kernel: drop use of x24 from primary boot path Keeping __PHYS_OFFSET in x24 is actually less clear than simply taking the value of __PHYS_OFFSET using an adrp instruction in the three places that we need it. So change that. Reviewed-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 45b865e022cc..4dee51045e79 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -211,8 +211,8 @@ efi_header_end: ENTRY(stext) bl preserve_boot_args bl el2_setup // Drop to EL1, w0=cpu_boot_mode - adrp x24, __PHYS_OFFSET - and x23, x24, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0 + adrp x23, __PHYS_OFFSET + and x23, x23, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0 bl set_cpu_boot_mode_flag bl __create_page_tables /* @@ -412,6 +412,8 @@ ENDPROC(__create_page_tables) /* * The following fragment of code is executed with the MMU enabled. + * + * x0 = __PHYS_OFFSET */ .set initial_sp, init_thread_union + THREAD_START_SP __primary_switched: @@ -420,6 +422,12 @@ __primary_switched: msr vbar_el1, x8 // vector table address isb + str_l x21, __fdt_pointer, x5 // Save FDT pointer + + ldr_l x4, kimage_vaddr // Save the offset between + sub x4, x4, x0 // the kernel virtual and + str_l x4, kimage_voffset, x5 // physical mappings + // Clear BSS adr_l x0, __bss_start mov x1, xzr @@ -432,12 +440,6 @@ __primary_switched: mov x4, sp and x4, x4, #~(THREAD_SIZE - 1) msr sp_el0, x4 // Save thread_info - str_l x21, __fdt_pointer, x5 // Save FDT pointer - - ldr_l x4, kimage_vaddr // Save the offset between - sub x4, x4, x24 // the kernel virtual and - str_l x4, kimage_voffset, x5 // physical mappings - mov x29, #0 #ifdef CONFIG_KASAN bl kasan_early_init @@ -794,6 +796,7 @@ __primary_switch: bl __relocate_kernel #ifdef CONFIG_RANDOMIZE_BASE ldr x8, =__primary_switched + adrp x0, __PHYS_OFFSET blr x8 /* @@ -818,5 +821,6 @@ __primary_switch: #endif #endif ldr x8, =__primary_switched + adrp x0, __PHYS_OFFSET br x8 ENDPROC(__primary_switch) From 60699ba18b69ff210ed0304bc23f6c9d11d27a72 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 31 Aug 2016 12:05:16 +0100 Subject: [PATCH 038/100] arm64: head.S: use ordinary stack frame for __primary_switched() Instead of stashing the value of the link register in x28 before setting up the stack and calling into C code, create an ordinary PCS compatible stack frame so that we can push the return address onto the stack. Since exception handlers require a stack as well, assign the stack pointer register before installing the vector table. Note that this accounts for the difference between THREAD_START_SP and THREAD_SIZE, given that the stack pointer is always decremented before calling into any C code. Reviewed-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 4dee51045e79..29a734ee0770 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -415,13 +415,18 @@ ENDPROC(__create_page_tables) * * x0 = __PHYS_OFFSET */ - .set initial_sp, init_thread_union + THREAD_START_SP __primary_switched: - mov x28, lr // preserve LR + adrp x4, init_thread_union + add sp, x4, #THREAD_SIZE + msr sp_el0, x4 // Save thread_info + adr_l x8, vectors // load VBAR_EL1 with virtual msr vbar_el1, x8 // vector table address isb + stp xzr, x30, [sp, #-16]! + mov x29, sp + str_l x21, __fdt_pointer, x5 // Save FDT pointer ldr_l x4, kimage_vaddr // Save the offset between @@ -436,11 +441,6 @@ __primary_switched: bl __pi_memset dsb ishst // Make zero page visible to PTW - adr_l sp, initial_sp, x4 - mov x4, sp - and x4, x4, #~(THREAD_SIZE - 1) - msr sp_el0, x4 // Save thread_info - mov x29, #0 #ifdef CONFIG_KASAN bl kasan_early_init #endif @@ -452,8 +452,8 @@ __primary_switched: bl kaslr_early_init // parse FDT for KASLR options cbz x0, 0f // KASLR disabled? just proceed orr x23, x23, x0 // record KASLR offset - ret x28 // we must enable KASLR, return - // to __primary_switch() + ldp x29, x30, [sp], #16 // we must enable KASLR, return + ret // to __primary_switch() 0: #endif b start_kernel From a9be2ee09385387819ca22bf6522b2437334489e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 31 Aug 2016 12:05:17 +0100 Subject: [PATCH 039/100] arm64: head.S: document the use of callee saved registers Now that the only remaining occurrences of the use of callee saved registers are on the primary boot path, add a comment to the code which register is used for what. Reviewed-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 29a734ee0770..427f6d3f084c 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -208,6 +208,16 @@ efi_header_end: __INIT + /* + * The following callee saved general purpose registers are used on the + * primary lowlevel boot path: + * + * Register Scope Purpose + * x21 stext() .. start_kernel() FDT pointer passed at boot in x0 + * x23 stext() .. start_kernel() physical misalignment/KASLR offset + * x28 __create_page_tables() callee preserved temp register + * x19/x20 __primary_switch() callee preserved temp registers + */ ENTRY(stext) bl preserve_boot_args bl el2_setup // Drop to EL1, w0=cpu_boot_mode From adeb68ef85235f952b77e01c4dadfd60851890d9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 1 Sep 2016 13:35:02 +0100 Subject: [PATCH 040/100] arm64: debug: report TRAP_TRACE instead of TRAP_HWBRPT for singlestep Single-step traps to userspace (e.g. via ptrace) are expected to use the TRAP_TRACE for the si_code field of the siginfo, as opposed to TRAP_HWBRPT that we report currently. Fix the reported value, which has no effect on existing and legacy builds of GDB. Reported-by: Yao Qi Signed-off-by: Will Deacon --- arch/arm64/kernel/debug-monitors.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 30821928106f..d97fdc1f6a38 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -236,7 +236,7 @@ static int single_step_handler(unsigned long addr, unsigned int esr, return 0; if (user_mode(regs)) { - send_user_sigtrap(TRAP_HWBKPT); + send_user_sigtrap(TRAP_TRACE); /* * ptrace will disable single step unless explicitly @@ -364,7 +364,7 @@ NOKPROBE_SYMBOL(aarch32_break_handler); static int __init debug_traps_init(void) { hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP, - TRAP_HWBKPT, "single-step handler"); + TRAP_TRACE, "single-step handler"); hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP, TRAP_BRKPT, "ptrace BRK handler"); return 0; From f928c16dbfaff447c637d8c22c29a642be0044f1 Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 5 Sep 2016 09:43:04 +0100 Subject: [PATCH 041/100] arm64: Drop generic xlate_dev_mem_{k,}ptr() The code that provides /dev/mem uses xlate_dev_mem_{k,}ptr() to avoid making a cachable mapping of a non-cachable area on ia64. On arm64 we do this via phys_mem_access_prot() instead, but provide dummy versions of xlate_dev_mem_{k,}ptr(). These are the same as those in asm-generic/io.h, which we include from asm/io.h Signed-off-by: James Morse Signed-off-by: Will Deacon --- arch/arm64/include/asm/io.h | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 9b6e408cfa51..ce20741b2cb5 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -184,17 +184,6 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size); #define iowrite32be(v,p) ({ __iowmb(); __raw_writel((__force __u32)cpu_to_be32(v), p); }) #define iowrite64be(v,p) ({ __iowmb(); __raw_writeq((__force __u64)cpu_to_be64(v), p); }) -/* - * Convert a physical pointer to a virtual kernel pointer for /dev/mem - * access - */ -#define xlate_dev_mem_ptr(p) __va(p) - -/* - * Convert a virtual cached pointer to an uncached pointer - */ -#define xlate_dev_kmem_ptr(p) p - #include /* From dc00247576fdb97211e1959b4dfd2a7893cf9d0b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 5 Sep 2016 10:23:17 +0100 Subject: [PATCH 042/100] arm64: kernel: re-export _cpu_resume() from sleep.S Commit b5fe242972ef ("arm64: kernel: fix style issues in sleep.S") changed the linkage of _cpu_resume() to local, even though the symbol is also referenced from hibernate.c. So revert this change. Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/sleep.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 0f7e0b2ac64c..b8799e7c79de 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -107,7 +107,7 @@ ENDPROC(cpu_resume) .ltorg .popsection -_cpu_resume: +ENTRY(_cpu_resume) mrs x1, mpidr_el1 adr_l x8, mpidr_hash // x8 = struct mpidr_hash virt address From 98ab10e9770e3ce9fbd263689644be9d81a06885 Mon Sep 17 00:00:00 2001 From: Pratyush Anand Date: Mon, 5 Sep 2016 08:03:16 +0530 Subject: [PATCH 043/100] arm64: ftrace: add save_stack_trace_regs() Currently, enabling stacktrace of a kprobe events generates warning: echo stacktrace > /sys/kernel/debug/tracing/trace_options echo "p xhci_irq" > /sys/kernel/debug/tracing/kprobe_events echo 1 > /sys/kernel/debug/tracing/events/kprobes/enable save_stack_trace_regs() not implemented yet. ------------[ cut here ]------------ WARNING: CPU: 1 PID: 0 at ../kernel/stacktrace.c:74 save_stack_trace_regs+0x3c/0x48 Modules linked in: CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.8.0-rc4-dirty #5128 Hardware name: ARM Juno development board (r1) (DT) task: ffff800975dd1900 task.stack: ffff800975ddc000 PC is at save_stack_trace_regs+0x3c/0x48 LR is at save_stack_trace_regs+0x3c/0x48 pc : [] lr : [] pstate: 600003c5 sp : ffff80097ef52c00 Call trace: save_stack_trace_regs+0x3c/0x48 __ftrace_trace_stack+0x168/0x208 trace_buffer_unlock_commit_regs+0x5c/0x7c kprobe_trace_func+0x308/0x3d8 kprobe_dispatcher+0x58/0x60 kprobe_breakpoint_handler+0xbc/0x18c brk_handler+0x50/0x90 do_debug_exception+0x50/0xbc This patch implements save_stack_trace_regs(), so that stacktrace of a kprobe events can be obtained. After this patch, there is no warning and we can see the stacktrace for kprobe events in trace buffer. more /sys/kernel/debug/tracing/trace -0 [004] d.h. 1356.000496: p_xhci_irq_0:(xhci_irq+0x0/0x9ac) -0 [004] d.h. 1356.000497: => xhci_irq => __handle_irq_event_percpu => handle_irq_event_percpu => handle_irq_event => handle_fasteoi_irq => generic_handle_irq => __handle_domain_irq => gic_handle_irq => el1_irq => arch_cpu_idle => default_idle_call => cpu_startup_entry => secondary_start_kernel => Tested-by: David A. Long Reviewed-by: James Morse Signed-off-by: Pratyush Anand Signed-off-by: Will Deacon --- arch/arm64/kernel/stacktrace.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index d9751a4769e7..ca01addf8c4c 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -152,6 +152,27 @@ static int save_trace(struct stackframe *frame, void *d) return trace->nr_entries >= trace->max_entries; } +void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) +{ + struct stack_trace_data data; + struct stackframe frame; + + data.trace = trace; + data.skip = trace->skip; + data.no_sched_functions = 0; + + frame.fp = regs->regs[29]; + frame.sp = regs->sp; + frame.pc = regs->pc; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = current->curr_ret_stack; +#endif + + walk_stackframe(current, &frame, save_trace, &data); + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} + void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { struct stack_trace_data data; From 282b87963556a971f9acbe3d430991b80480541a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Sep 2016 15:34:44 +0100 Subject: [PATCH 044/100] drivers/perf: arm_pmu: Always consider IRQ0 as an error As declared by the chief penguin, and enforced by the NO_IRQ brigade, IRQ0 doesn't exist, and is considered as an error (no irq). Unfortunately, the arm_pmu driver still considers it as valid in a large number of cases. Let's fix this. Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index c494613c1909..193a68cc2af4 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -602,7 +602,7 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) irqs = min(pmu_device->num_resources, num_possible_cpus()); irq = platform_get_irq(pmu_device, 0); - if (irq >= 0 && irq_is_percpu(irq)) { + if (irq > 0 && irq_is_percpu(irq)) { on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu_disable_percpu_irq, &irq, 1); free_percpu_irq(irq, &hw_events->percpu_pmu); @@ -616,7 +616,7 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs)) continue; irq = platform_get_irq(pmu_device, i); - if (irq >= 0) + if (irq > 0) free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu)); } } @@ -638,7 +638,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) } irq = platform_get_irq(pmu_device, 0); - if (irq >= 0 && irq_is_percpu(irq)) { + if (irq > 0 && irq_is_percpu(irq)) { err = request_percpu_irq(irq, handler, "arm-pmu", &hw_events->percpu_pmu); if (err) { @@ -919,7 +919,7 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu) /* Check the IRQ type and prohibit a mix of PPIs and SPIs */ irq = platform_get_irq(pdev, i); - if (irq >= 0) { + if (irq > 0) { bool spi = !irq_is_percpu(irq); if (i > 0 && spi != using_spi) { @@ -969,8 +969,7 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu) if (cpumask_weight(&pmu->supported_cpus) == 0) { int irq = platform_get_irq(pdev, 0); - if (irq_is_percpu(irq)) { - /* If using PPIs, check the affinity of the partition */ + if (irq > 0 && irq_is_percpu(irq)) { int ret; ret = irq_get_percpu_devid_partition(irq, &pmu->supported_cpus); From dae8c235d9a21a564793ea9fe716233e11d30e21 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 5 Sep 2016 19:30:22 +0800 Subject: [PATCH 045/100] arm64: mm: drop fixup_init() and mm.h There is only fixup_init() in mm.h , and it is only called in free_initmem(), so move the codes from fixup_init() into free_initmem(), then drop fixup_init() and mm.h. Acked-by: Mark Rutland Signed-off-by: Kefeng Wang Signed-off-by: Will Deacon --- arch/arm64/mm/flush.c | 2 -- arch/arm64/mm/init.c | 10 +++++++--- arch/arm64/mm/mm.h | 2 -- arch/arm64/mm/mmu.c | 12 ------------ arch/arm64/mm/pgd.c | 2 -- 5 files changed, 7 insertions(+), 21 deletions(-) delete mode 100644 arch/arm64/mm/mm.h diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 43a76b07eb32..8377329d8c97 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -25,8 +25,6 @@ #include #include -#include "mm.h" - void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 251e0824cd82..21c489bdeb4e 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -48,8 +49,6 @@ #include #include -#include "mm.h" - /* * We need to be able to catch inadvertent references to memstart_addr * that occur (potentially in generic code) before arm64_memblock_init() @@ -486,7 +485,12 @@ void free_initmem(void) { free_reserved_area(__va(__pa(__init_begin)), __va(__pa(__init_end)), 0, "unused kernel"); - fixup_init(); + /* + * Unmap the __init region but leave the VM area in place. This + * prevents the region from being reused for kernel modules, which + * is not supported by kallsyms. + */ + unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin)); } #ifdef CONFIG_BLK_DEV_INITRD diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h deleted file mode 100644 index 71fe98985455..000000000000 --- a/arch/arm64/mm/mm.h +++ /dev/null @@ -1,2 +0,0 @@ - -void fixup_init(void); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index e634a0f6d62b..05615a3fdc6f 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -43,8 +43,6 @@ #include #include -#include "mm.h" - u64 idmap_t0sz = TCR_T0SZ(VA_BITS); u64 kimage_voffset __ro_after_init; @@ -400,16 +398,6 @@ void mark_rodata_ro(void) section_size, PAGE_KERNEL_RO); } -void fixup_init(void) -{ - /* - * Unmap the __init region but leave the VM area in place. This - * prevents the region from being reused for kernel modules, which - * is not supported by kallsyms. - */ - unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin)); -} - static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, pgprot_t prot, struct vm_struct *vma) { diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index ae11d4e03d0e..371c5f03a170 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -26,8 +26,6 @@ #include #include -#include "mm.h" - static struct kmem_cache *pgd_cache; pgd_t *pgd_alloc(struct mm_struct *mm) From ef0da55a84a345f323ceddda3b6c78b25de90435 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 5 Sep 2016 18:25:47 +0100 Subject: [PATCH 046/100] jump_labels: Allow array initialisers The static key API is currently designed around single variable definitions. There are cases where an array of static keys is desirable, so extend the API to allow this rather than using the internal static key implementation directly. Cc: Jason Baron Cc: Jonathan Corbet Acked-by: Peter Zijlstra (Intel) Suggested-by: Dave P Martin Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon --- Documentation/static-keys.txt | 9 +++++++++ include/linux/jump_label.h | 12 ++++++++++++ 2 files changed, 21 insertions(+) diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt index 477927becacb..ea8d7b4e53f0 100644 --- a/Documentation/static-keys.txt +++ b/Documentation/static-keys.txt @@ -15,6 +15,8 @@ The updated API replacements are: DEFINE_STATIC_KEY_TRUE(key); DEFINE_STATIC_KEY_FALSE(key); +DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count); +DEFINE_STATIC_KEY_ARRAY_FALSE(keys, count); static_branch_likely() static_branch_unlikely() @@ -140,6 +142,13 @@ static_branch_inc(), will change the branch back to true. Likewise, if the key is initialized false, a 'static_branch_inc()', will change the branch to true. And then a 'static_branch_dec()', will again make the branch false. +Where an array of keys is required, it can be defined as: + + DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count); + +or: + + DEFINE_STATIC_KEY_ARRAY_FALSE(keys, count); 4) Architecture level code patching interface, 'jump labels' diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 661af564fae8..a534c7f15a61 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -21,6 +21,8 @@ * * DEFINE_STATIC_KEY_TRUE(key); * DEFINE_STATIC_KEY_FALSE(key); + * DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count); + * DEFINE_STATIC_KEY_ARRAY_FALSE(keys, count); * static_branch_likely() * static_branch_unlikely() * @@ -270,6 +272,16 @@ struct static_key_false { #define DEFINE_STATIC_KEY_FALSE(name) \ struct static_key_false name = STATIC_KEY_FALSE_INIT +#define DEFINE_STATIC_KEY_ARRAY_TRUE(name, count) \ + struct static_key_true name[count] = { \ + [0 ... (count) - 1] = STATIC_KEY_TRUE_INIT, \ + } + +#define DEFINE_STATIC_KEY_ARRAY_FALSE(name, count) \ + struct static_key_false name[count] = { \ + [0 ... (count) - 1] = STATIC_KEY_FALSE_INIT, \ + } + extern bool ____wrong_branch_error(void); #define static_key_enabled(x) \ From efd9e03facd075f5b76bf82e6c785bd45d5cbf4f Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 5 Sep 2016 18:25:48 +0100 Subject: [PATCH 047/100] arm64: Use static keys for CPU features This patch adds static keys transparently for all the cpu_hwcaps features by implementing an array of default-false static keys and enabling them when detected. The cpus_have_cap() check uses the static keys if the feature being checked is a constant, otherwise the compiler generates the bitmap test. Because of the early call to static_branch_enable() via check_local_cpu_errata() -> update_cpu_capabilities(), the jump labels are initialised in cpuinfo_store_boot_cpu(). Cc: Will Deacon Cc: Suzuki K. Poulose Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 14 +++++++++++--- arch/arm64/kernel/cpufeature.c | 3 +++ arch/arm64/kernel/smp.c | 5 +++++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index c07c5d1cd04a..23a76dc5a6cf 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -9,6 +9,8 @@ #ifndef __ASM_CPUFEATURE_H #define __ASM_CPUFEATURE_H +#include + #include #include @@ -110,6 +112,7 @@ struct arm64_cpu_capabilities { }; extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); +extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; bool this_cpu_has_cap(unsigned int cap); @@ -122,16 +125,21 @@ static inline bool cpus_have_cap(unsigned int num) { if (num >= ARM64_NCAPS) return false; - return test_bit(num, cpu_hwcaps); + if (__builtin_constant_p(num)) + return static_branch_unlikely(&cpu_hwcap_keys[num]); + else + return test_bit(num, cpu_hwcaps); } static inline void cpus_set_cap(unsigned int num) { - if (num >= ARM64_NCAPS) + if (num >= ARM64_NCAPS) { pr_warn("Attempt to set an illegal CPU capability (%d >= %d)\n", num, ARM64_NCAPS); - else + } else { __set_bit(num, cpu_hwcaps); + static_branch_enable(&cpu_hwcap_keys[num]); + } } static inline int __attribute_const__ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c3d7ae48f92d..9128ced913e7 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -46,6 +46,9 @@ unsigned int compat_elf_hwcap2 __read_mostly; DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); +DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS); +EXPORT_SYMBOL(cpu_hwcap_keys); + #define __ARM64_FTR_BITS(SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ { \ .sign = SIGNED, \ diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index d93d43352504..c3c08368a685 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -437,6 +437,11 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_prepare_boot_cpu(void) { set_my_cpu_offset(per_cpu_offset(smp_processor_id())); + /* + * Initialise the static keys early as they may be enabled by the + * cpufeature code. + */ + jump_label_init(); cpuinfo_store_boot_cpu(); save_boot_cpu_run_el(); } From ee5e41b5f21a5438664effce1ba5bdd11e03ee24 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 8 Sep 2016 11:02:20 +0100 Subject: [PATCH 048/100] arm64/io: Allow I/O writes to use {W,X}ZR When zeroing an I/O location, the current accessors are forced to allocate a temporary register to store the zero for the write. By tweaking the assembly constraints, we can allow the compiler to use the zero register directly in such cases, and save some juggling. Compiling a representative kernel configuration with GCC 6 shows that 2.3KB worth of code can be wasted just on that! text data bss dec hex filename 13316776 3248256 18176769 34741801 2121e29 vmlinux.o.new 13319140 3248256 18176769 34744165 2122765 vmlinux.o.old Acked-by: Mark Rutland Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- arch/arm64/include/asm/io.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index ce20741b2cb5..0bba427bb4c2 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -40,25 +40,25 @@ #define __raw_writeb __raw_writeb static inline void __raw_writeb(u8 val, volatile void __iomem *addr) { - asm volatile("strb %w0, [%1]" : : "r" (val), "r" (addr)); + asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr)); } #define __raw_writew __raw_writew static inline void __raw_writew(u16 val, volatile void __iomem *addr) { - asm volatile("strh %w0, [%1]" : : "r" (val), "r" (addr)); + asm volatile("strh %w0, [%1]" : : "rZ" (val), "r" (addr)); } #define __raw_writel __raw_writel static inline void __raw_writel(u32 val, volatile void __iomem *addr) { - asm volatile("str %w0, [%1]" : : "r" (val), "r" (addr)); + asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr)); } #define __raw_writeq __raw_writeq static inline void __raw_writeq(u64 val, volatile void __iomem *addr) { - asm volatile("str %0, [%1]" : : "r" (val), "r" (addr)); + asm volatile("str %x0, [%1]" : : "rZ" (val), "r" (addr)); } #define __raw_readb __raw_readb From 7aff4a2dd3db4e519c636f43aa863078e3b30d0f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 Sep 2016 13:55:34 +0100 Subject: [PATCH 049/100] arm64: sysreg: allow write_sysreg to use XZR Currently write_sysreg has to allocate a temporary register to write zero to a system register, which is unfortunate given that the MSR instruction accepts XZR as an operand. Allow XZR to be used when appropriate by fiddling with the assembly constraints. Cc: Catalin Marinas Cc: Marc Zyngier Cc: Suzuki K Poulose Reviewed-by: Robin Murphy Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index cc06794b7346..39fed2e56e98 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -273,10 +273,14 @@ static inline void config_sctlr_el1(u32 clear, u32 set) __val; \ }) +/* + * The "Z" constraint normally means a zero immediate, but when combined with + * the "%x0" template means XZR. + */ #define write_sysreg(v, r) do { \ u64 __val = (u64)v; \ - asm volatile("msr " __stringify(r) ", %0" \ - : : "r" (__val)); \ + asm volatile("msr " __stringify(r) ", %x0" \ + : : "rZ" (__val)); \ } while (0) #endif From cd5f22d7967f613c49288256ce95b07eb910e2a9 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 Sep 2016 13:55:35 +0100 Subject: [PATCH 050/100] arm64: arch_timer: simplify accessors A while back we added {read,write}_sysreg accessors to handle accesses to system registers, without the usual boilerplate asm volatile, temporary variable, etc. This patch makes use of these in the arm64 arch timer accessors to make the code shorter and clearer. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Marc Zyngier Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/arch_timer.h | 41 ++++++++++------------------- 1 file changed, 14 insertions(+), 27 deletions(-) diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index fbe0ca31a99c..7ff386c15539 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -20,6 +20,7 @@ #define __ASM_ARCH_TIMER_H #include +#include #include #include @@ -38,19 +39,19 @@ void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val) if (access == ARCH_TIMER_PHYS_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: - asm volatile("msr cntp_ctl_el0, %0" : : "r" (val)); + write_sysreg(val, cntp_ctl_el0); break; case ARCH_TIMER_REG_TVAL: - asm volatile("msr cntp_tval_el0, %0" : : "r" (val)); + write_sysreg(val, cntp_tval_el0); break; } } else if (access == ARCH_TIMER_VIRT_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: - asm volatile("msr cntv_ctl_el0, %0" : : "r" (val)); + write_sysreg(val, cntv_ctl_el0); break; case ARCH_TIMER_REG_TVAL: - asm volatile("msr cntv_tval_el0, %0" : : "r" (val)); + write_sysreg(val, cntv_tval_el0); break; } } @@ -61,48 +62,38 @@ void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val) static __always_inline u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg) { - u32 val; - if (access == ARCH_TIMER_PHYS_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: - asm volatile("mrs %0, cntp_ctl_el0" : "=r" (val)); - break; + return read_sysreg(cntp_ctl_el0); case ARCH_TIMER_REG_TVAL: - asm volatile("mrs %0, cntp_tval_el0" : "=r" (val)); - break; + return read_sysreg(cntp_tval_el0); } } else if (access == ARCH_TIMER_VIRT_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: - asm volatile("mrs %0, cntv_ctl_el0" : "=r" (val)); - break; + return read_sysreg(cntv_ctl_el0); case ARCH_TIMER_REG_TVAL: - asm volatile("mrs %0, cntv_tval_el0" : "=r" (val)); - break; + return read_sysreg(cntv_tval_el0); } } - return val; + BUG(); } static inline u32 arch_timer_get_cntfrq(void) { - u32 val; - asm volatile("mrs %0, cntfrq_el0" : "=r" (val)); - return val; + return read_sysreg(cntfrq_el0); } static inline u32 arch_timer_get_cntkctl(void) { - u32 cntkctl; - asm volatile("mrs %0, cntkctl_el1" : "=r" (cntkctl)); - return cntkctl; + return read_sysreg(cntkctl_el1); } static inline void arch_timer_set_cntkctl(u32 cntkctl) { - asm volatile("msr cntkctl_el1, %0" : : "r" (cntkctl)); + write_sysreg(cntkctl, cntkctl_el1); } static inline u64 arch_counter_get_cntpct(void) @@ -116,12 +107,8 @@ static inline u64 arch_counter_get_cntpct(void) static inline u64 arch_counter_get_cntvct(void) { - u64 cval; - isb(); - asm volatile("mrs %0, cntvct_el0" : "=r" (cval)); - - return cval; + return read_sysreg(cntvct_el0); } static inline int arch_timer_arch_init(void) From d0a69d9f388dcd37e9bf2d8d7d4a83b87822ffa1 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 Sep 2016 13:55:36 +0100 Subject: [PATCH 051/100] arm64: dcc: simplify accessors A while back we added {read,write}_sysreg accessors to handle accesses to system registers, without the usual boilerplate asm volatile, temporary variable, etc. This patch makes use of these in the arm64 DCC accessors to make the code shorter and clearer. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/dcc.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/dcc.h b/arch/arm64/include/asm/dcc.h index 65e0190e97c8..836b05630003 100644 --- a/arch/arm64/include/asm/dcc.h +++ b/arch/arm64/include/asm/dcc.h @@ -21,21 +21,16 @@ #define __ASM_DCC_H #include +#include static inline u32 __dcc_getstatus(void) { - u32 ret; - - asm volatile("mrs %0, mdccsr_el0" : "=r" (ret)); - - return ret; + return read_sysreg(mdccsr_el0); } static inline char __dcc_getchar(void) { - char c; - - asm volatile("mrs %0, dbgdtrrx_el0" : "=r" (c)); + char c = read_sysreg(dbgdtrrx_el0); isb(); return c; @@ -47,8 +42,7 @@ static inline void __dcc_putchar(char c) * The typecast is to make absolutely certain that 'c' is * zero-extended. */ - asm volatile("msr dbgdtrtx_el0, %0" - : : "r" ((unsigned long)(unsigned char)c)); + write_sysreg((unsigned char)c, dbgdtrtx_el0); isb(); } From 1f3d8699be82583c713e2a1099c597a740ebaf4d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 Sep 2016 13:55:37 +0100 Subject: [PATCH 052/100] arm64/kvm: use {read,write}_sysreg() A while back we added {read,write}_sysreg accessors to handle accesses to system registers, without the usual boilerplate asm volatile, temporary variable, etc. This patch makes use of these in the arm64 KVM code to make the code shorter and clearer. At the same time, a comment style violation next to a system register access is fixed up in reset_pmcr, and comments describing whether operations are reads or writes are removed as this is now painfully obvious. Cc: Catalin Marinas Cc: Marc Zyngier Cc: Will Deacon Acked-by: Christoffer Dall Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/virt.h | 6 ++---- arch/arm64/kvm/sys_regs.c | 31 ++++++++++------------------ arch/arm64/kvm/sys_regs_generic_v8.c | 6 ++---- 3 files changed, 15 insertions(+), 28 deletions(-) diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index db5739413677..fea10736b11f 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -46,6 +46,7 @@ #include #include +#include /* * __boot_cpu_mode records what mode CPUs were booted in. @@ -76,10 +77,7 @@ static inline bool is_hyp_mode_mismatched(void) static inline bool is_kernel_in_hyp_mode(void) { - u64 el; - - asm("mrs %0, CurrentEL" : "=r" (el)); - return el == CurrentEL_EL2; + return read_sysreg(CurrentEL) == CurrentEL_EL2; } #ifdef CONFIG_ARM64_VHE diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index b0b225ceca18..295a3e536a75 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -36,6 +36,7 @@ #include #include #include +#include #include @@ -67,11 +68,9 @@ static u32 get_ccsidr(u32 csselr) /* Make sure noone else changes CSSELR during this! */ local_irq_disable(); - /* Put value into CSSELR */ - asm volatile("msr csselr_el1, %x0" : : "r" (csselr)); + write_sysreg(csselr, csselr_el1); isb(); - /* Read result out of CCSIDR */ - asm volatile("mrs %0, ccsidr_el1" : "=r" (ccsidr)); + ccsidr = read_sysreg(ccsidr_el1); local_irq_enable(); return ccsidr; @@ -174,9 +173,7 @@ static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu, if (p->is_write) { return ignore_write(vcpu, p); } else { - u32 val; - asm volatile("mrs %0, dbgauthstatus_el1" : "=r" (val)); - p->regval = val; + p->regval = read_sysreg(dbgauthstatus_el1); return true; } } @@ -429,10 +426,7 @@ static void reset_wcr(struct kvm_vcpu *vcpu, static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - u64 amair; - - asm volatile("mrs %0, amair_el1\n" : "=r" (amair)); - vcpu_sys_reg(vcpu, AMAIR_EL1) = amair; + vcpu_sys_reg(vcpu, AMAIR_EL1) = read_sysreg(amair_el1); } static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) @@ -456,8 +450,9 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 pmcr, val; - asm volatile("mrs %0, pmcr_el0\n" : "=r" (pmcr)); - /* Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) is reset to UNKNOWN + pmcr = read_sysreg(pmcr_el0); + /* + * Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) are reset to UNKNOWN * except PMCR.E resetting to zero. */ val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) @@ -557,9 +552,9 @@ static bool access_pmceid(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return false; if (!(p->Op2 & 1)) - asm volatile("mrs %0, pmceid0_el0\n" : "=r" (pmceid)); + pmceid = read_sysreg(pmceid0_el0); else - asm volatile("mrs %0, pmceid1_el0\n" : "=r" (pmceid)); + pmceid = read_sysreg(pmceid1_el0); p->regval = pmceid; @@ -1841,11 +1836,7 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, static void get_##reg(struct kvm_vcpu *v, \ const struct sys_reg_desc *r) \ { \ - u64 val; \ - \ - asm volatile("mrs %0, " __stringify(reg) "\n" \ - : "=r" (val)); \ - ((struct sys_reg_desc *)r)->val = val; \ + ((struct sys_reg_desc *)r)->val = read_sysreg(reg); \ } FUNCTION_INVARIANT(midr_el1) diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c index ed90578fa120..46af7186bca6 100644 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "sys_regs.h" @@ -43,10 +44,7 @@ static bool access_actlr(struct kvm_vcpu *vcpu, static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - u64 actlr; - - asm volatile("mrs %0, actlr_el1\n" : "=r" (actlr)); - vcpu_sys_reg(vcpu, ACTLR_EL1) = actlr; + vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1); } /* From adf7589997927b1d84a5d003027b866bbef61ef2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 Sep 2016 13:55:38 +0100 Subject: [PATCH 053/100] arm64: simplify sysreg manipulation A while back we added {read,write}_sysreg accessors to handle accesses to system registers, without the usual boilerplate asm volatile, temporary variable, etc. This patch makes use of these across arm64 to make code shorter and clearer. For sequences with a trailing ISB, the existing isb() macro is also used so that asm blocks can be removed entirely. A few uses of inline assembly for msr/mrs are left as-is. Those manipulating sp_el0 for the current thread_info value have special clobber requiremends. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/hw_breakpoint.h | 13 +++++++------ arch/arm64/include/asm/mmu_context.h | 27 ++++++++++---------------- arch/arm64/include/asm/pgtable-hwdef.h | 1 + arch/arm64/include/asm/sysreg.h | 20 +++++++++---------- arch/arm64/include/asm/thread_info.h | 3 +++ arch/arm64/kernel/cacheinfo.c | 8 +++----- arch/arm64/kernel/debug-monitors.c | 8 +++----- arch/arm64/kernel/process.c | 14 ++++++------- arch/arm64/kernel/sys_compat.c | 2 +- 9 files changed, 44 insertions(+), 52 deletions(-) diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index 2487891dec46..9510ace570e2 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -18,6 +18,7 @@ #include #include +#include #include #ifdef __KERNEL__ @@ -98,18 +99,18 @@ static inline void decode_ctrl_reg(u32 reg, #define AARCH64_DBG_REG_WCR (AARCH64_DBG_REG_WVR + ARM_MAX_WRP) /* Debug register names. */ -#define AARCH64_DBG_REG_NAME_BVR "bvr" -#define AARCH64_DBG_REG_NAME_BCR "bcr" -#define AARCH64_DBG_REG_NAME_WVR "wvr" -#define AARCH64_DBG_REG_NAME_WCR "wcr" +#define AARCH64_DBG_REG_NAME_BVR bvr +#define AARCH64_DBG_REG_NAME_BCR bcr +#define AARCH64_DBG_REG_NAME_WVR wvr +#define AARCH64_DBG_REG_NAME_WCR wcr /* Accessor macros for the debug registers. */ #define AARCH64_DBG_READ(N, REG, VAL) do {\ - asm volatile("mrs %0, dbg" REG #N "_el1" : "=r" (VAL));\ + VAL = read_sysreg(dbg##REG##N##_el1);\ } while (0) #define AARCH64_DBG_WRITE(N, REG, VAL) do {\ - asm volatile("msr dbg" REG #N "_el1, %0" :: "r" (VAL));\ + write_sysreg(VAL, dbg##REG##N##_el1);\ } while (0) struct task_struct; diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index b1892a0dbcb0..e5c24b47dba5 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -27,16 +27,14 @@ #include #include #include +#include #include #ifdef CONFIG_PID_IN_CONTEXTIDR static inline void contextidr_thread_switch(struct task_struct *next) { - asm( - " msr contextidr_el1, %0\n" - " isb" - : - : "r" (task_pid_nr(next))); + write_sysreg(task_pid_nr(next), contextidr_el1); + isb(); } #else static inline void contextidr_thread_switch(struct task_struct *next) @@ -51,11 +49,8 @@ static inline void cpu_set_reserved_ttbr0(void) { unsigned long ttbr = virt_to_phys(empty_zero_page); - asm( - " msr ttbr0_el1, %0 // set TTBR0\n" - " isb" - : - : "r" (ttbr)); + write_sysreg(ttbr, ttbr0_el1); + isb(); } /* @@ -81,13 +76,11 @@ static inline void __cpu_set_tcr_t0sz(unsigned long t0sz) if (!__cpu_uses_extended_idmap()) return; - asm volatile ( - " mrs %0, tcr_el1 ;" - " bfi %0, %1, %2, %3 ;" - " msr tcr_el1, %0 ;" - " isb" - : "=&r" (tcr) - : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); + tcr = read_sysreg(tcr_el1); + tcr &= ~TCR_T0SZ_MASK; + tcr |= t0sz << TCR_T0SZ_OFFSET; + write_sysreg(tcr, tcr_el1); + isb(); } #define cpu_set_default_tcr_t0sz() __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS)) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index c3ae239db3ee..eb0c2bd90de9 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -208,6 +208,7 @@ #define TCR_T1SZ(x) ((UL(64) - (x)) << TCR_T1SZ_OFFSET) #define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x)) #define TCR_TxSZ_WIDTH 6 +#define TCR_T0SZ_MASK (((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T0SZ_OFFSET) #define TCR_IRGN0_SHIFT 8 #define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 39fed2e56e98..e91aef2bb33d 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -253,16 +253,6 @@ asm( " .endm\n" ); -static inline void config_sctlr_el1(u32 clear, u32 set) -{ - u32 val; - - asm volatile("mrs %0, sctlr_el1" : "=r" (val)); - val &= ~clear; - val |= set; - asm volatile("msr sctlr_el1, %0" : : "r" (val)); -} - /* * Unlike read_cpuid, calls to read_sysreg are never expected to be * optimized away or replaced with synthetic values. @@ -283,6 +273,16 @@ static inline void config_sctlr_el1(u32 clear, u32 set) : : "rZ" (__val)); \ } while (0) +static inline void config_sctlr_el1(u32 clear, u32 set) +{ + u32 val; + + val = read_sysreg(sctlr_el1); + val &= ~clear; + val |= set; + write_sysreg(val, sctlr_el1); +} + #endif #endif /* __ASM_SYSREG_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index abd64bd1f6d9..e9ea5a6bd449 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -75,6 +75,9 @@ static inline struct thread_info *current_thread_info(void) __attribute_const__; /* * struct thread_info can be accessed directly via sp_el0. + * + * We don't use read_sysreg() as we want the compiler to cache the value where + * possible. */ static inline struct thread_info *current_thread_info(void) { diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index b8629d52fba9..9617301f76b5 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -39,7 +39,7 @@ static inline enum cache_type get_cache_type(int level) if (level > MAX_CACHE_LEVEL) return CACHE_TYPE_NOCACHE; - asm volatile ("mrs %x0, clidr_el1" : "=r" (clidr)); + clidr = read_sysreg(clidr_el1); return CLIDR_CTYPE(clidr, level); } @@ -55,11 +55,9 @@ u64 __attribute_const__ cache_get_ccsidr(u64 csselr) WARN_ON(preemptible()); - /* Put value into CSSELR */ - asm volatile("msr csselr_el1, %x0" : : "r" (csselr)); + write_sysreg(csselr, csselr_el1); isb(); - /* Read result out of CCSIDR */ - asm volatile("mrs %x0, ccsidr_el1" : "=r" (ccsidr)); + ccsidr = read_sysreg(ccsidr_el1); return ccsidr; } diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index d97fdc1f6a38..73ae90ef434c 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -46,16 +46,14 @@ static void mdscr_write(u32 mdscr) { unsigned long flags; local_dbg_save(flags); - asm volatile("msr mdscr_el1, %0" :: "r" (mdscr)); + write_sysreg(mdscr, mdscr_el1); local_dbg_restore(flags); } NOKPROBE_SYMBOL(mdscr_write); static u32 mdscr_read(void) { - u32 mdscr; - asm volatile("mrs %0, mdscr_el1" : "=r" (mdscr)); - return mdscr; + return read_sysreg(mdscr_el1); } NOKPROBE_SYMBOL(mdscr_read); @@ -134,7 +132,7 @@ NOKPROBE_SYMBOL(disable_debug_monitors); */ static int clear_os_lock(unsigned int cpu) { - asm volatile("msr oslar_el1, %0" : : "r" (0)); + write_sysreg(0, oslar_el1); isb(); return 0; } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 6cd2612236dc..a4f5f766af08 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -202,7 +202,7 @@ void show_regs(struct pt_regs * regs) static void tls_thread_flush(void) { - asm ("msr tpidr_el0, xzr"); + write_sysreg(0, tpidr_el0); if (is_compat_task()) { current->thread.tp_value = 0; @@ -213,7 +213,7 @@ static void tls_thread_flush(void) * with a stale shadow state during context switch. */ barrier(); - asm ("msr tpidrro_el0, xzr"); + write_sysreg(0, tpidrro_el0); } } @@ -253,7 +253,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, * Read the current TLS pointer from tpidr_el0 as it may be * out-of-sync with the saved value. */ - asm("mrs %0, tpidr_el0" : "=r" (*task_user_tls(p))); + *task_user_tls(p) = read_sysreg(tpidr_el0); if (stack_start) { if (is_compat_thread(task_thread_info(p))) @@ -289,17 +289,15 @@ static void tls_thread_switch(struct task_struct *next) { unsigned long tpidr, tpidrro; - asm("mrs %0, tpidr_el0" : "=r" (tpidr)); + tpidr = read_sysreg(tpidr_el0); *task_user_tls(current) = tpidr; tpidr = *task_user_tls(next); tpidrro = is_compat_thread(task_thread_info(next)) ? next->thread.tp_value : 0; - asm( - " msr tpidr_el0, %0\n" - " msr tpidrro_el0, %1" - : : "r" (tpidr), "r" (tpidrro)); + write_sysreg(tpidr, tpidr_el0); + write_sysreg(tpidrro, tpidrro_el0); } /* Restore the UAO state depending on next's addr_limit */ diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 28c511b06edf..abaf582fc7a8 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -94,7 +94,7 @@ long compat_arm_syscall(struct pt_regs *regs) * See comment in tls_thread_flush. */ barrier(); - asm ("msr tpidrro_el0, %0" : : "r" (regs->regs[0])); + write_sysreg(regs->regs[0], tpidrro_el0); return 0; default: From d3ea42aad584493b99c109e59ced77db145a68e1 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 Sep 2016 13:55:39 +0100 Subject: [PATCH 054/100] arm64: simplify contextidr_thread_switch When CONFIG_PID_IN_CONTEXTIDR is not selected, we use an empty stub definition of contextidr_thread_switch(). As everything we rely upon exists regardless of CONFIG_PID_IN_CONTEXTIDR, we don't strictly require an empty stub. By using IS_ENABLED() rather than ifdeffery, we avoid duplication, and get compiler coverage on all the code even when CONFIG_PID_IN_CONTEXTIDR is not selected and the code is optimised away. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/mmu_context.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index e5c24b47dba5..a50185375f09 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -30,17 +30,14 @@ #include #include -#ifdef CONFIG_PID_IN_CONTEXTIDR static inline void contextidr_thread_switch(struct task_struct *next) { + if (!IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR)) + return; + write_sysreg(task_pid_nr(next), contextidr_el1); isb(); } -#else -static inline void contextidr_thread_switch(struct task_struct *next) -{ -} -#endif /* * Set TTBR0 to empty_zero_page. No translations will be possible via TTBR0. From 86cdd72af936860503f392825410d1b60a3e474e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 9 Sep 2016 14:08:26 +0100 Subject: [PATCH 055/100] drivers/perf: arm_pmu: add common attr group fields In preparation for adding common attribute groups, add an array of attribute group pointers to arm_pmu, which will be used if the backend hasn't already set pmu::attr_groups. Subsequent patches will move backends over to using these, before adding common fields. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 3 +++ include/linux/perf/arm_pmu.h | 10 +++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 193a68cc2af4..1a39899d1392 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -1037,6 +1037,9 @@ int arm_pmu_device_probe(struct platform_device *pdev, goto out_free; } + if (!pmu->pmu.attr_groups) + pmu->pmu.attr_groups = pmu->attr_groups; + ret = cpu_pmu_init(pmu); if (ret) goto out_free; diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index e18843809eec..268bc63f1358 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -14,7 +14,7 @@ #include #include - +#include #include /* @@ -77,6 +77,12 @@ struct pmu_hw_events { struct arm_pmu *percpu_pmu; }; +enum armpmu_attr_groups { + ARMPMU_ATTR_GROUP_EVENTS, + ARMPMU_ATTR_GROUP_FORMATS, + ARMPMU_NR_ATTR_GROUPS +}; + struct arm_pmu { struct pmu pmu; cpumask_t active_irqs; @@ -111,6 +117,8 @@ struct arm_pmu { struct pmu_hw_events __percpu *hw_events; struct list_head entry; struct notifier_block cpu_pm_nb; + /* the attr_groups array must be NULL-terminated */ + const struct attribute_group *attr_groups[ARMPMU_NR_ATTR_GROUPS + 1]; }; #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) From 569de9026c2904dab9b335437fe48e318e824c96 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 9 Sep 2016 14:08:27 +0100 Subject: [PATCH 056/100] arm64: perf: move to common attr_group fields By using a common attr_groups array, the common arm_pmu code can set up common files (e.g. cpumask) for us in subsequent patches. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 36 ++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index ea64f0173d61..03a13661bfce 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -523,12 +523,6 @@ static struct attribute_group armv8_pmuv3_format_attr_group = { .attrs = armv8_pmuv3_format_attrs, }; -static const struct attribute_group *armv8_pmuv3_attr_groups[] = { - &armv8_pmuv3_events_attr_group, - &armv8_pmuv3_format_attr_group, - NULL, -}; - /* * Perf Events' indices */ @@ -985,7 +979,10 @@ static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu) armv8_pmu_init(cpu_pmu); cpu_pmu->name = "armv8_pmuv3"; cpu_pmu->map_event = armv8_pmuv3_map_event; - cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv8_pmuv3_format_attr_group; return armv8pmu_probe_pmu(cpu_pmu); } @@ -994,7 +991,10 @@ static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) armv8_pmu_init(cpu_pmu); cpu_pmu->name = "armv8_cortex_a53"; cpu_pmu->map_event = armv8_a53_map_event; - cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv8_pmuv3_format_attr_group; return armv8pmu_probe_pmu(cpu_pmu); } @@ -1003,7 +1003,10 @@ static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) armv8_pmu_init(cpu_pmu); cpu_pmu->name = "armv8_cortex_a57"; cpu_pmu->map_event = armv8_a57_map_event; - cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv8_pmuv3_format_attr_group; return armv8pmu_probe_pmu(cpu_pmu); } @@ -1012,7 +1015,10 @@ static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) armv8_pmu_init(cpu_pmu); cpu_pmu->name = "armv8_cortex_a72"; cpu_pmu->map_event = armv8_a57_map_event; - cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv8_pmuv3_format_attr_group; return armv8pmu_probe_pmu(cpu_pmu); } @@ -1021,7 +1027,10 @@ static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) armv8_pmu_init(cpu_pmu); cpu_pmu->name = "armv8_cavium_thunder"; cpu_pmu->map_event = armv8_thunder_map_event; - cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv8_pmuv3_format_attr_group; return armv8pmu_probe_pmu(cpu_pmu); } @@ -1030,7 +1039,10 @@ static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu) armv8_pmu_init(cpu_pmu); cpu_pmu->name = "armv8_brcm_vulcan"; cpu_pmu->map_event = armv8_vulcan_map_event; - cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv8_pmuv3_format_attr_group; return armv8pmu_probe_pmu(cpu_pmu); } From 9268c5dafae486de68fbbf3ae079edd00f386724 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 9 Sep 2016 14:08:28 +0100 Subject: [PATCH 057/100] arm: perf: move to common attr_group fields By using a common attr_groups array, the common arm_pmu code can set up common files (e.g. cpumask) for us in subsequent patches. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event_v7.c | 47 ++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 15063851cd10..b9423491b9d7 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -596,12 +596,6 @@ static struct attribute_group armv7_pmuv1_events_attr_group = { .attrs = armv7_pmuv1_event_attrs, }; -static const struct attribute_group *armv7_pmuv1_attr_groups[] = { - &armv7_pmuv1_events_attr_group, - &armv7_pmu_format_attr_group, - NULL, -}; - ARMV7_EVENT_ATTR(mem_access, ARMV7_PERFCTR_MEM_ACCESS); ARMV7_EVENT_ATTR(l1i_cache, ARMV7_PERFCTR_L1_ICACHE_ACCESS); ARMV7_EVENT_ATTR(l1d_cache_wb, ARMV7_PERFCTR_L1_DCACHE_WB); @@ -653,12 +647,6 @@ static struct attribute_group armv7_pmuv2_events_attr_group = { .attrs = armv7_pmuv2_event_attrs, }; -static const struct attribute_group *armv7_pmuv2_attr_groups[] = { - &armv7_pmuv2_events_attr_group, - &armv7_pmu_format_attr_group, - NULL, -}; - /* * Perf Events' indices */ @@ -1208,7 +1196,10 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a8"; cpu_pmu->map_event = armv7_a8_map_event; - cpu_pmu->pmu.attr_groups = armv7_pmuv1_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv1_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; return armv7_probe_num_events(cpu_pmu); } @@ -1217,7 +1208,10 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a9"; cpu_pmu->map_event = armv7_a9_map_event; - cpu_pmu->pmu.attr_groups = armv7_pmuv1_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv1_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; return armv7_probe_num_events(cpu_pmu); } @@ -1226,7 +1220,10 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a5"; cpu_pmu->map_event = armv7_a5_map_event; - cpu_pmu->pmu.attr_groups = armv7_pmuv1_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv1_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; return armv7_probe_num_events(cpu_pmu); } @@ -1236,7 +1233,10 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->name = "armv7_cortex_a15"; cpu_pmu->map_event = armv7_a15_map_event; cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv2_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; return armv7_probe_num_events(cpu_pmu); } @@ -1246,7 +1246,10 @@ static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->name = "armv7_cortex_a7"; cpu_pmu->map_event = armv7_a7_map_event; cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv2_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; return armv7_probe_num_events(cpu_pmu); } @@ -1256,7 +1259,10 @@ static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->name = "armv7_cortex_a12"; cpu_pmu->map_event = armv7_a12_map_event; cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv2_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; return armv7_probe_num_events(cpu_pmu); } @@ -1264,7 +1270,10 @@ static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu) { int ret = armv7_a12_pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a17"; - cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = + &armv7_pmuv2_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = + &armv7_pmu_format_attr_group; return ret; } From 1589680da6f7df30d8a592eebee16478f3e34a2c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 9 Sep 2016 14:08:29 +0100 Subject: [PATCH 058/100] drivers/perf: arm_pmu: only use common attr_groups Now that the 32-bit and 64-bit perf backends use the common groups directly, remove the fallback and no longer allow the groups array to be overridden. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 1a39899d1392..60c065eb638d 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -549,6 +549,7 @@ static void armpmu_init(struct arm_pmu *armpmu) .stop = armpmu_stop, .read = armpmu_read, .filter_match = armpmu_filter_match, + .attr_groups = armpmu->attr_groups, }; } @@ -1037,8 +1038,6 @@ int arm_pmu_device_probe(struct platform_device *pdev, goto out_free; } - if (!pmu->pmu.attr_groups) - pmu->pmu.attr_groups = pmu->attr_groups; ret = cpu_pmu_init(pmu); if (ret) From 48538b5863d8e8f8d567fc9a1d27a68623e0a0ff Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 9 Sep 2016 14:08:30 +0100 Subject: [PATCH 059/100] drivers/perf: arm_pmu: expose a cpumask in sysfs In systems with heterogeneous CPUs, there are multiple logical CPU PMUs, each of which covers a subset of CPUs in the system. In some cases userspace needs to know which CPUs a given logical PMU covers, so we'd like to expose a cpumask under sysfs, similar to what is done for uncore PMUs. Unfortunately, prior to commit 00e727bb389359c8 ("perf stat: Balance opening and reading events"), perf stat only correctly handled a cpumask holding a single CPU, and only when profiling in system-wide mode. In other cases, the presence of a cpumask file could cause perf stat to behave erratically. Thus, exposing a cpumask file would break older perf binaries in cases where they would otherwise work. To avoid this issue while still providing userspace with the information it needs, this patch exposes a differently-named file (cpus) under sysfs. New tools can look for this and operate correctly, while older tools will not be adversely affected by its presence. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 20 ++++++++++++++++++++ include/linux/perf/arm_pmu.h | 1 + 2 files changed, 21 insertions(+) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 60c065eb638d..c36913ad3a09 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -534,6 +534,24 @@ static int armpmu_filter_match(struct perf_event *event) return cpumask_test_cpu(cpu, &armpmu->supported_cpus); } +static ssize_t armpmu_cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct arm_pmu *armpmu = to_arm_pmu(dev_get_drvdata(dev)); + return cpumap_print_to_pagebuf(true, buf, &armpmu->supported_cpus); +} + +static DEVICE_ATTR(cpus, S_IRUGO, armpmu_cpumask_show, NULL); + +static struct attribute *armpmu_common_attrs[] = { + &dev_attr_cpus.attr, + NULL, +}; + +static struct attribute_group armpmu_common_attr_group = { + .attrs = armpmu_common_attrs, +}; + static void armpmu_init(struct arm_pmu *armpmu) { atomic_set(&armpmu->active_events, 0); @@ -551,6 +569,8 @@ static void armpmu_init(struct arm_pmu *armpmu) .filter_match = armpmu_filter_match, .attr_groups = armpmu->attr_groups, }; + armpmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] = + &armpmu_common_attr_group; } /* Set at runtime when we know what CPU type we are. */ diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 268bc63f1358..dc1f2f30c961 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -78,6 +78,7 @@ struct pmu_hw_events { }; enum armpmu_attr_groups { + ARMPMU_ATTR_GROUP_COMMON, ARMPMU_ATTR_GROUP_EVENTS, ARMPMU_ATTR_GROUP_FORMATS, ARMPMU_NR_ATTR_GROUPS From 16a82f06c40301045e4c05297ea93b85595dbbc4 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 1 Sep 2016 14:54:52 +0800 Subject: [PATCH 060/100] of/numa: remove a duplicated pr_debug information This information will be printed in the subfunction numa_add_memblk. They are not the same, but very similar. Signed-off-by: Zhen Lei Acked-by: Rob Herring Signed-off-by: Will Deacon --- drivers/of/of_numa.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c index ed5a097f0801..fb71b4ef1eeb 100644 --- a/drivers/of/of_numa.c +++ b/drivers/of/of_numa.c @@ -88,10 +88,6 @@ static int __init of_numa_parse_memory_nodes(void) break; } - pr_debug("NUMA: base = %llx len = %llx, node = %u\n", - rsrc.start, rsrc.end - rsrc.start + 1, nid); - - r = numa_add_memblk(nid, rsrc.start, rsrc.end + 1); if (r) break; From 84b14256c18c967afd3cf4ee2df09535587154e0 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 1 Sep 2016 14:54:53 +0800 Subject: [PATCH 061/100] of/numa: fix a memory@ node can only contains one memory block For a normal memory@ devicetree node, its reg property can contains more memory blocks. Because we don't known how many memory blocks maybe contained, so we try from index=0, increase 1 until error returned(the end). Signed-off-by: Zhen Lei Acked-by: Rob Herring Signed-off-by: Will Deacon --- drivers/of/of_numa.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c index fb71b4ef1eeb..7b3fbdc0b278 100644 --- a/drivers/of/of_numa.c +++ b/drivers/of/of_numa.c @@ -63,13 +63,9 @@ static int __init of_numa_parse_memory_nodes(void) struct device_node *np = NULL; struct resource rsrc; u32 nid; - int r = 0; - - for (;;) { - np = of_find_node_by_type(np, "memory"); - if (!np) - break; + int i, r; + for_each_node_by_type(np, "memory") { r = of_property_read_u32(np, "numa-node-id", &nid); if (r == -EINVAL) /* @@ -78,23 +74,18 @@ static int __init of_numa_parse_memory_nodes(void) * "numa-node-id" property */ continue; - else if (r) - /* some other error */ - break; - r = of_address_to_resource(np, 0, &rsrc); - if (r) { - pr_err("NUMA: bad reg property in memory node\n"); - break; + for (i = 0; !r && !of_address_to_resource(np, i, &rsrc); i++) + r = numa_add_memblk(nid, rsrc.start, rsrc.end + 1); + + if (!i || r) { + of_node_put(np); + pr_err("NUMA: bad property in memory node\n"); + return r ? : -EINVAL; } - - r = numa_add_memblk(nid, rsrc.start, rsrc.end + 1); - if (r) - break; } - of_node_put(np); - return r; + return 0; } static int __init of_numa_parse_distance_map_v1(struct device_node *map) From 571a588fec2de4efd6043805ab0b017c67b63a4d Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 1 Sep 2016 14:54:54 +0800 Subject: [PATCH 062/100] of/numa: add nid check for memory block If the numa-id which was configured in memory@ devicetree node is greater than MAX_NUMNODES, we should report a warning. We have done this for cpus and distance-map dt nodes, this patch help them to be consistent. Acked-by: Rob Herring Signed-off-by: Zhen Lei Signed-off-by: Will Deacon --- drivers/of/of_numa.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c index 7b3fbdc0b278..c1bd62ce3144 100644 --- a/drivers/of/of_numa.c +++ b/drivers/of/of_numa.c @@ -75,6 +75,11 @@ static int __init of_numa_parse_memory_nodes(void) */ continue; + if (nid >= MAX_NUMNODES) { + pr_warn("NUMA: Node id %u exceeds maximum value\n", nid); + r = -EINVAL; + } + for (i = 0; !r && !of_address_to_resource(np, i, &rsrc); i++) r = numa_add_memblk(nid, rsrc.start, rsrc.end + 1); From 9787ed6e5cee7a62320f3014eb5e7b373502c292 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 1 Sep 2016 14:54:55 +0800 Subject: [PATCH 063/100] of/numa: remove a duplicated warning This warning has been printed in of_numa_parse_cpu_nodes before. Signed-off-by: Zhen Lei Acked-by: Rob Herring Signed-off-by: Will Deacon --- drivers/of/of_numa.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c index c1bd62ce3144..625b0573e5f8 100644 --- a/drivers/of/of_numa.c +++ b/drivers/of/of_numa.c @@ -179,13 +179,8 @@ int of_node_to_nid(struct device_node *device) np->name); of_node_put(np); - if (!r) { - if (nid >= MAX_NUMNODES) - pr_warn("NUMA: Node id %u exceeds maximum value\n", - nid); - else - return nid; - } + if (!r) + return nid; return NUMA_NO_NODE; } From 794224ea565c439ca624a3760ac220928463ea17 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 1 Sep 2016 14:54:56 +0800 Subject: [PATCH 064/100] arm64/numa: avoid inconsistent information to be printed numa_init may return error because of numa configuration error. So "No NUMA configuration found" is inaccurate. In fact, specific configuration error information should be immediately printed by the testing branch. Signed-off-by: Zhen Lei Signed-off-by: Will Deacon --- arch/arm64/kernel/acpi_numa.c | 4 +++- arch/arm64/mm/numa.c | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index f85149cc7c71..f01fab637dab 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -105,8 +105,10 @@ int __init arm64_acpi_numa_init(void) int ret; ret = acpi_numa_init(); - if (ret) + if (ret) { + pr_info("Failed to initialise from firmware\n"); return ret; + } return srat_disabled() ? -EINVAL : 0; } diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 5bb15eab6f00..d97c6e246fb2 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -335,8 +335,10 @@ static int __init numa_init(int (*init_func)(void)) if (ret < 0) return ret; - if (nodes_empty(numa_nodes_parsed)) + if (nodes_empty(numa_nodes_parsed)) { + pr_info("No NUMA configuration found\n"); return -EINVAL; + } ret = numa_register_nodes(); if (ret < 0) @@ -367,8 +369,6 @@ static int __init dummy_numa_init(void) if (numa_off) pr_info("NUMA disabled\n"); /* Forced off on command line. */ - else - pr_info("No NUMA configuration found\n"); pr_info("NUMA: Faking a node at [mem %#018Lx-%#018Lx]\n", 0LLU, PFN_PHYS(max_pfn) - 1); From 837dae1b4308f2dfc9c5bc76c367553670198c63 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 1 Sep 2016 14:54:57 +0800 Subject: [PATCH 065/100] of_numa: Use of_get_next_parent to simplify code Use of_get_next_parent() instead of open-code. Signed-off-by: Kefeng Wang Acked-by: Rob Herring Signed-off-by: Will Deacon --- drivers/of/of_numa.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c index 625b0573e5f8..0d7459bd31f0 100644 --- a/drivers/of/of_numa.c +++ b/drivers/of/of_numa.c @@ -158,8 +158,6 @@ int of_node_to_nid(struct device_node *device) np = of_node_get(device); while (np) { - struct device_node *parent; - r = of_property_read_u32(np, "numa-node-id", &nid); /* * -EINVAL indicates the property was not found, and @@ -170,9 +168,7 @@ int of_node_to_nid(struct device_node *device) if (r != -EINVAL) break; - parent = of_get_parent(np); - of_node_put(np); - np = parent; + np = of_get_next_parent(np); } if (np && r) pr_warn("NUMA: Invalid \"numa-node-id\" property in node %s\n", From ad02180515d4856702bc656f754e9df83ab0345b Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 1 Sep 2016 14:54:58 +0800 Subject: [PATCH 066/100] of_numa: Use pr_fmt() Use pr_fmt to prefix kernel output. Signed-off-by: Kefeng Wang Acked-by: Rob Herring Signed-off-by: Will Deacon --- drivers/of/of_numa.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c index 0d7459bd31f0..f63d4b0deff0 100644 --- a/drivers/of/of_numa.c +++ b/drivers/of/of_numa.c @@ -16,6 +16,8 @@ * along with this program. If not, see . */ +#define pr_fmt(fmt) "OF: NUMA: " fmt + #include #include #include @@ -49,10 +51,9 @@ static void __init of_numa_parse_cpu_nodes(void) if (r) continue; - pr_debug("NUMA: CPU on %u\n", nid); + pr_debug("CPU on %u\n", nid); if (nid >= MAX_NUMNODES) - pr_warn("NUMA: Node id %u exceeds maximum value\n", - nid); + pr_warn("Node id %u exceeds maximum value\n", nid); else node_set(nid, numa_nodes_parsed); } @@ -76,7 +77,7 @@ static int __init of_numa_parse_memory_nodes(void) continue; if (nid >= MAX_NUMNODES) { - pr_warn("NUMA: Node id %u exceeds maximum value\n", nid); + pr_warn("Node id %u exceeds maximum value\n", nid); r = -EINVAL; } @@ -85,7 +86,7 @@ static int __init of_numa_parse_memory_nodes(void) if (!i || r) { of_node_put(np); - pr_err("NUMA: bad property in memory node\n"); + pr_err("bad property in memory node\n"); return r ? : -EINVAL; } } @@ -99,17 +100,17 @@ static int __init of_numa_parse_distance_map_v1(struct device_node *map) int entry_count; int i; - pr_info("NUMA: parsing numa-distance-map-v1\n"); + pr_info("parsing numa-distance-map-v1\n"); matrix = of_get_property(map, "distance-matrix", NULL); if (!matrix) { - pr_err("NUMA: No distance-matrix property in distance-map\n"); + pr_err("No distance-matrix property in distance-map\n"); return -EINVAL; } entry_count = of_property_count_u32_elems(map, "distance-matrix"); if (entry_count <= 0) { - pr_err("NUMA: Invalid distance-matrix\n"); + pr_err("Invalid distance-matrix\n"); return -EINVAL; } @@ -124,7 +125,7 @@ static int __init of_numa_parse_distance_map_v1(struct device_node *map) matrix++; numa_set_distance(nodea, nodeb, distance); - pr_debug("NUMA: distance[node%d -> node%d] = %d\n", + pr_debug("distance[node%d -> node%d] = %d\n", nodea, nodeb, distance); /* Set default distance of node B->A same as A->B */ @@ -171,7 +172,7 @@ int of_node_to_nid(struct device_node *device) np = of_get_next_parent(np); } if (np && r) - pr_warn("NUMA: Invalid \"numa-node-id\" property in node %s\n", + pr_warn("Invalid \"numa-node-id\" property in node %s\n", np->name); of_node_put(np); From f11c7bacd5941fcfc5e9dd3bb0362e8a2eec4722 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 1 Sep 2016 14:54:59 +0800 Subject: [PATCH 067/100] arm64: numa: Use pr_fmt() Use pr_fmt to prefix kernel output, and remove duplicated msg of NUMA turned off. Signed-off-by: Kefeng Wang Signed-off-by: Will Deacon --- arch/arm64/mm/numa.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index d97c6e246fb2..0e75b537de0c 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -17,6 +17,8 @@ * along with this program. If not, see . */ +#define pr_fmt(fmt) "NUMA: " fmt + #include #include #include @@ -38,10 +40,9 @@ static __init int numa_parse_early_param(char *opt) { if (!opt) return -EINVAL; - if (!strncmp(opt, "off", 3)) { - pr_info("%s\n", "NUMA turned off"); + if (!strncmp(opt, "off", 3)) numa_off = true; - } + return 0; } early_param("numa", numa_parse_early_param); @@ -110,7 +111,7 @@ static void __init setup_node_to_cpumask_map(void) set_cpu_numa_node(cpu, NUMA_NO_NODE); /* cpumask_of_node() will now work */ - pr_debug("NUMA: Node to cpumask map for %d nodes\n", nr_node_ids); + pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); } /* @@ -145,13 +146,13 @@ int __init numa_add_memblk(int nid, u64 start, u64 end) ret = memblock_set_node(start, (end - start), &memblock.memory, nid); if (ret < 0) { - pr_err("NUMA: memblock [0x%llx - 0x%llx] failed to add on node %d\n", + pr_err("memblock [0x%llx - 0x%llx] failed to add on node %d\n", start, (end - 1), nid); return ret; } node_set(nid, numa_nodes_parsed); - pr_info("NUMA: Adding memblock [0x%llx - 0x%llx] on node %d\n", + pr_info("Adding memblock [0x%llx - 0x%llx] on node %d\n", start, (end - 1), nid); return ret; } @@ -166,19 +167,18 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) void *nd; int tnid; - pr_info("NUMA: Initmem setup node %d [mem %#010Lx-%#010Lx]\n", - nid, start_pfn << PAGE_SHIFT, - (end_pfn << PAGE_SHIFT) - 1); + pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n", + nid, start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1); nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); nd = __va(nd_pa); /* report and initialize */ - pr_info("NUMA: NODE_DATA [mem %#010Lx-%#010Lx]\n", + pr_info("NODE_DATA [mem %#010Lx-%#010Lx]\n", nd_pa, nd_pa + nd_size - 1); tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); if (tnid != nid) - pr_info("NUMA: NODE_DATA(%d) on node %d\n", nid, tnid); + pr_info("NODE_DATA(%d) on node %d\n", nid, tnid); node_data[nid] = nd; memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); @@ -235,8 +235,7 @@ static int __init numa_alloc_distance(void) numa_distance[i * numa_distance_cnt + j] = i == j ? LOCAL_DISTANCE : REMOTE_DISTANCE; - pr_debug("NUMA: Initialized distance table, cnt=%d\n", - numa_distance_cnt); + pr_debug("Initialized distance table, cnt=%d\n", numa_distance_cnt); return 0; } @@ -257,20 +256,20 @@ static int __init numa_alloc_distance(void) void __init numa_set_distance(int from, int to, int distance) { if (!numa_distance) { - pr_warn_once("NUMA: Warning: distance table not allocated yet\n"); + pr_warn_once("Warning: distance table not allocated yet\n"); return; } if (from >= numa_distance_cnt || to >= numa_distance_cnt || from < 0 || to < 0) { - pr_warn_once("NUMA: Warning: node ids are out of bound, from=%d to=%d distance=%d\n", + pr_warn_once("Warning: node ids are out of bound, from=%d to=%d distance=%d\n", from, to, distance); return; } if ((u8)distance != distance || (from == to && distance != LOCAL_DISTANCE)) { - pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n", + pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n", from, to, distance); return; } @@ -297,7 +296,7 @@ static int __init numa_register_nodes(void) /* Check that valid nid is set to memblks */ for_each_memblock(memory, mblk) if (mblk->nid == NUMA_NO_NODE || mblk->nid >= MAX_NUMNODES) { - pr_warn("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n", + pr_warn("Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n", mblk->nid, mblk->base, mblk->base + mblk->size - 1); return -EINVAL; @@ -369,8 +368,8 @@ static int __init dummy_numa_init(void) if (numa_off) pr_info("NUMA disabled\n"); /* Forced off on command line. */ - pr_info("NUMA: Faking a node at [mem %#018Lx-%#018Lx]\n", - 0LLU, PFN_PHYS(max_pfn) - 1); + pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", + 0LLU, PFN_PHYS(max_pfn) - 1); for_each_memblock(memory, mblk) { ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size); From 7af3a0a992524ffddc342cd1481cc4dcb3f1da71 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 1 Sep 2016 14:55:00 +0800 Subject: [PATCH 068/100] arm64/numa: support HAVE_SETUP_PER_CPU_AREA To make each percpu area allocated from its local numa node. Without this patch, all percpu areas will be allocated from the node which cpu0 belongs to. Signed-off-by: Zhen Lei Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 8 +++++++ arch/arm64/mm/numa.c | 52 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e072033b27b2..0e11c8a2aec1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -600,6 +600,14 @@ config USE_PERCPU_NUMA_NODE_ID def_bool y depends on NUMA +config HAVE_SETUP_PER_CPU_AREA + def_bool y + depends on NUMA + +config NEED_PER_CPU_EMBED_FIRST_CHUNK + def_bool y + depends on NUMA + source kernel/Kconfig.preempt source kernel/Kconfig.hz diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 0e75b537de0c..087064d5dcc1 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -26,6 +26,7 @@ #include #include +#include struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); @@ -131,6 +132,57 @@ void __init early_map_cpu_to_node(unsigned int cpu, int nid) cpu_to_node_map[cpu] = nid; } +#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); + +static int __init early_cpu_to_node(int cpu) +{ + return cpu_to_node_map[cpu]; +} + +static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) +{ + return node_distance(from, to); +} + +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, + size_t align) +{ + int nid = early_cpu_to_node(cpu); + + return memblock_virt_alloc_try_nid(size, align, + __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid); +} + +static void __init pcpu_fc_free(void *ptr, size_t size) +{ + memblock_free_early(__pa(ptr), size); +} + +void __init setup_per_cpu_areas(void) +{ + unsigned long delta; + unsigned int cpu; + int rc; + + /* + * Always reserve area for module percpu variables. That's + * what the legacy allocator did. + */ + rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, + PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, + pcpu_cpu_distance, + pcpu_fc_alloc, pcpu_fc_free); + if (rc < 0) + panic("Failed to initialize percpu areas."); + + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; + for_each_possible_cpu(cpu) + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; +} +#endif + /** * numa_add_memblk - Set node id to memblk * @nid: NUMA node ID of the new memblk From df7ffa34cc0c06bfa7206732df78725ff34633ee Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 1 Sep 2016 14:55:03 +0800 Subject: [PATCH 069/100] arm64/numa: remove some useless code When the deleted code is executed, only the bit of cpu0 was set on cpu_possible_mask. So that, only set_cpu_numa_node(0, NUMA_NO_NODE); will be executed. And map_cpu_to_node(0, 0) will soon be called. So these code can be safely removed. Signed-off-by: Zhen Lei Signed-off-by: Will Deacon --- arch/arm64/mm/numa.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 087064d5dcc1..0a15f010b64a 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -95,7 +95,6 @@ void numa_clear_node(unsigned int cpu) */ static void __init setup_node_to_cpumask_map(void) { - unsigned int cpu; int node; /* setup nr_node_ids if not done yet */ @@ -108,9 +107,6 @@ static void __init setup_node_to_cpumask_map(void) cpumask_clear(node_to_cpumask_map[node]); } - for_each_possible_cpu(cpu) - set_cpu_numa_node(cpu, NUMA_NO_NODE); - /* cpumask_of_node() will now work */ pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); } From 7ba5f605f3a0d9495aad539eeb8346d726dfc183 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 1 Sep 2016 14:55:04 +0800 Subject: [PATCH 070/100] arm64/numa: remove the limitation that cpu0 must bind to node0 1. Remove the old binding code. 2. Read the nid of cpu0 from dts. 3. Fallback the nid of cpu0 to 0 when numa=off is set in bootargs. Signed-off-by: Zhen Lei Signed-off-by: Will Deacon --- arch/arm64/kernel/smp.c | 1 + arch/arm64/mm/numa.c | 16 ++++++++++------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index c3c08368a685..8b048e6ec34a 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -624,6 +624,7 @@ static void __init of_parse_and_init_cpus(void) } bootcpu_valid = true; + early_map_cpu_to_node(0, of_node_to_nid(dn)); /* * cpu_logical_map has already been diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 0a15f010b64a..778a985c8a70 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -116,16 +116,24 @@ static void __init setup_node_to_cpumask_map(void) */ void numa_store_cpu_info(unsigned int cpu) { - map_cpu_to_node(cpu, numa_off ? 0 : cpu_to_node_map[cpu]); + map_cpu_to_node(cpu, cpu_to_node_map[cpu]); } void __init early_map_cpu_to_node(unsigned int cpu, int nid) { /* fallback to node 0 */ - if (nid < 0 || nid >= MAX_NUMNODES) + if (nid < 0 || nid >= MAX_NUMNODES || numa_off) nid = 0; cpu_to_node_map[cpu] = nid; + + /* + * We should set the numa node of cpu0 as soon as possible, because it + * has already been set up online before. cpu_to_node(0) will soon be + * called. + */ + if (!cpu) + set_cpu_numa_node(cpu, nid); } #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA @@ -393,10 +401,6 @@ static int __init numa_init(int (*init_func)(void)) setup_node_to_cpumask_map(); - /* init boot processor */ - cpu_to_node_map[0] = 0; - map_cpu_to_node(0, 0); - return 0; } From ee7bc638f140e0586941002ffb82765743dabb97 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 9 Sep 2016 14:07:08 +0100 Subject: [PATCH 071/100] arm64: Set the safe value for L1 icache policy Right now we use 0 as the safe value for CTR_EL0:L1Ip, which is not defined at the moment. The safer value for the L1Ip should be the weakest of the policies, which happens to be AIVIVT. While at it, fix the comment about safe_val. Cc: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 2 +- arch/arm64/kernel/cpufeature.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 23a76dc5a6cf..bd950b00a575 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -65,7 +65,7 @@ struct arm64_ftr_bits { enum ftr_type type; u8 shift; u8 width; - s64 safe_val; /* safe value for discrete features */ + s64 safe_val; /* safe value for FTR_EXACT features */ }; /* diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9128ced913e7..acd374e2d5d8 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -150,9 +150,10 @@ static const struct arm64_ftr_bits ftr_ctr[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */ /* * Linux can handle differing I-cache policies. Userspace JITs will - * make use of *minLine + * make use of *minLine. + * If we have differing I-cache policies, report it as the weakest - AIVIVT. */ - ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0), /* L1Ip */ + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_AIVIVT), /* L1Ip */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 10, 0), /* RAZ */ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ ARM64_FTR_END, From 89ba26458b72422e0a1d85eb729a15220b204458 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 9 Sep 2016 14:07:09 +0100 Subject: [PATCH 072/100] arm64: Use consistent naming for errata handling This is a cosmetic change to rename the functions dealing with the errata work arounds to be more consistent with their naming. 1) check_local_cpu_errata() => update_cpu_errata_workarounds() check_local_cpu_errata() actually updates the system's errata work arounds. So rename it to reflect the same. 2) verify_local_cpu_errata() => verify_local_cpu_errata_workarounds() Use errata_workarounds instead of _errata. Cc: Mark Rutland Cc: Catalin Marinas Acked-by: Andre Przywara Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 4 ++-- arch/arm64/kernel/cpu_errata.c | 4 ++-- arch/arm64/kernel/cpufeature.c | 2 +- arch/arm64/kernel/cpuinfo.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index bd950b00a575..ddea66642ce1 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -202,10 +202,10 @@ void __init setup_cpu_features(void); void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info); void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps); -void check_local_cpu_errata(void); +void update_cpu_errata_workarounds(void); void __init enable_errata_workarounds(void); -void verify_local_cpu_errata(void); +void verify_local_cpu_errata_workarounds(void); void verify_local_cpu_capabilities(void); u64 read_system_reg(u32 id); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 82b0fc2e637b..5836b3df0094 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -116,7 +116,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { * and the related information is freed soon after. If the new CPU requires * an errata not detected at boot, fail this CPU. */ -void verify_local_cpu_errata(void) +void verify_local_cpu_errata_workarounds(void) { const struct arm64_cpu_capabilities *caps = arm64_errata; @@ -131,7 +131,7 @@ void verify_local_cpu_errata(void) } } -void check_local_cpu_errata(void) +void update_cpu_errata_workarounds(void) { update_cpu_capabilities(arm64_errata, "enabling workaround for"); } diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index acd374e2d5d8..c74b8215991f 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1021,7 +1021,7 @@ void verify_local_cpu_capabilities(void) if (!sys_caps_initialised) return; - verify_local_cpu_errata(); + verify_local_cpu_errata_workarounds(); verify_local_cpu_features(arm64_features); verify_local_elf_hwcaps(arm64_elf_hwcaps); if (system_supports_32bit_el0()) diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index ed1b84fe6925..4fa7b73e27e4 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -364,7 +364,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) cpuinfo_detect_icache_policy(info); - check_local_cpu_errata(); + update_cpu_errata_workarounds(); } void cpuinfo_store_cpu(void) From c47a1900ad710fd2c97127e2ba19da1df79cf733 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 9 Sep 2016 14:07:10 +0100 Subject: [PATCH 073/100] arm64: Rearrange CPU errata workaround checks Right now we run through the work around checks on a CPU from __cpuinfo_store_cpu. There are some problems with that: 1) We initialise the system wide CPU feature registers only after the Boot CPU updates its cpuinfo. Now, if a work around depends on the variance of a CPU ID feature (e.g, check for Cache Line size mismatch), we have no way of performing it cleanly for the boot CPU. 2) It is out of place, invoked from __cpuinfo_store_cpu() in cpuinfo.c. It is not an obvious place for that. This patch rearranges the CPU specific capability(aka work around) checks. 1) At the moment we use verify_local_cpu_capabilities() to check if a new CPU has all the system advertised features. Use this for the secondary CPUs to perform the work around check. For that we rename verify_local_cpu_capabilities() => check_local_cpu_capabilities() which: If the system wide capabilities haven't been initialised (i.e, the CPU is activated at the boot), update the system wide detected work arounds. Otherwise (i.e a CPU hotplugged in later) verify that this CPU conforms to the system wide capabilities. 2) Boot CPU updates the work arounds from smp_prepare_boot_cpu() after we have initialised the system wide CPU feature values. Cc: Mark Rutland Cc: Andre Przywara Cc: Will Deacon Cc: Catalin Marinas Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 4 ++-- arch/arm64/kernel/cpufeature.c | 32 +++++++++++++++++++---------- arch/arm64/kernel/cpuinfo.c | 2 -- arch/arm64/kernel/smp.c | 8 +++++++- 4 files changed, 30 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index ddea66642ce1..6806b86ab791 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -202,11 +202,11 @@ void __init setup_cpu_features(void); void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info); void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps); +void check_local_cpu_capabilities(void); + void update_cpu_errata_workarounds(void); void __init enable_errata_workarounds(void); - void verify_local_cpu_errata_workarounds(void); -void verify_local_cpu_capabilities(void); u64 read_system_reg(u32 id); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c74b8215991f..d577f263cc4a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1009,18 +1009,8 @@ verify_local_cpu_features(const struct arm64_cpu_capabilities *caps) * cannot do anything to fix it up and could cause unexpected failures. So * we park the CPU. */ -void verify_local_cpu_capabilities(void) +static void verify_local_cpu_capabilities(void) { - - check_early_cpu_features(); - - /* - * If we haven't computed the system capabilities, there is nothing - * to verify. - */ - if (!sys_caps_initialised) - return; - verify_local_cpu_errata_workarounds(); verify_local_cpu_features(arm64_features); verify_local_elf_hwcaps(arm64_elf_hwcaps); @@ -1028,6 +1018,26 @@ void verify_local_cpu_capabilities(void) verify_local_elf_hwcaps(compat_elf_hwcaps); } +void check_local_cpu_capabilities(void) +{ + /* + * All secondary CPUs should conform to the early CPU features + * in use by the kernel based on boot CPU. + */ + check_early_cpu_features(); + + /* + * If we haven't finalised the system capabilities, this CPU gets + * a chance to update the errata work arounds. + * Otherwise, this CPU should verify that it has all the system + * advertised capabilities. + */ + if (!sys_caps_initialised) + update_cpu_errata_workarounds(); + else + verify_local_cpu_capabilities(); +} + static void __init setup_feature_capabilities(void) { update_cpu_capabilities(arm64_features, "detected feature:"); diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 4fa7b73e27e4..b3d5b3e8fbcb 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -363,8 +363,6 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) } cpuinfo_detect_icache_policy(info); - - update_cpu_errata_workarounds(); } void cpuinfo_store_cpu(void) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 8b048e6ec34a..a8e64095cd69 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -239,7 +239,7 @@ asmlinkage void secondary_start_kernel(void) * this CPU ticks all of those. If it doesn't, the CPU will * fail to come online. */ - verify_local_cpu_capabilities(); + check_local_cpu_capabilities(); if (cpu_ops[cpu]->cpu_postboot) cpu_ops[cpu]->cpu_postboot(); @@ -444,6 +444,12 @@ void __init smp_prepare_boot_cpu(void) jump_label_init(); cpuinfo_store_boot_cpu(); save_boot_cpu_run_el(); + /* + * Run the errata work around checks on the boot CPU, once we have + * initialised the cpu feature infrastructure from + * cpuinfo_store_boot_cpu() above. + */ + update_cpu_errata_workarounds(); } static u64 __init of_get_cpu_mpidr(struct device_node *dn) From baa763b565386b1383b87b6fb76e72db84a0ff16 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 9 Sep 2016 14:07:11 +0100 Subject: [PATCH 074/100] arm64: alternative: Disallow patching instructions using literals The alternative code patching doesn't check if the replaced instruction uses a pc relative literal. This could cause silent corruption in the instruction stream as the instruction will be executed from a different address than what it was compiled for. Catch all such cases. Cc: Marc Zyngier Cc: Andre Przywara Cc: Mark Rutland Cc: Catalin Marinas Suggested-by: Will Deacon Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/alternative.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 4434dabde898..992918dfbe2d 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -79,6 +79,12 @@ static u32 get_alt_insn(struct alt_instr *alt, u32 *insnptr, u32 *altinsnptr) offset = target - (unsigned long)insnptr; insn = aarch64_set_branch_offset(insn, offset); } + } else if (aarch64_insn_uses_literal(insn)) { + /* + * Disallow patching unhandled instructions using PC relative + * literal addresses + */ + BUG(); } return insn; From 46084bc253e1acdd6c47846e91fc20f4ab4f1fec Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 9 Sep 2016 14:07:12 +0100 Subject: [PATCH 075/100] arm64: insn: Add helpers for adrp offsets Adds helpers for decoding/encoding the PC relative addresses for adrp. This will be used for handling dynamic patching of 'adrp' instructions in alternative code patching. Cc: Mark Rutland Cc: Will Deacon Cc: Catalin Marinas Cc: Marc Zyngier Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/insn.h | 11 ++++++++++- arch/arm64/kernel/insn.c | 13 +++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 1dbaa901d7e5..bc853663dd51 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -246,7 +246,8 @@ static __always_inline bool aarch64_insn_is_##abbr(u32 code) \ static __always_inline u32 aarch64_insn_get_##abbr##_value(void) \ { return (val); } -__AARCH64_INSN_FUNCS(adr_adrp, 0x1F000000, 0x10000000) +__AARCH64_INSN_FUNCS(adr, 0x9F000000, 0x10000000) +__AARCH64_INSN_FUNCS(adrp, 0x9F000000, 0x90000000) __AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000) __AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800) __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800) @@ -318,6 +319,11 @@ __AARCH64_INSN_FUNCS(msr_reg, 0xFFF00000, 0xD5100000) bool aarch64_insn_is_nop(u32 insn); bool aarch64_insn_is_branch_imm(u32 insn); +static inline bool aarch64_insn_is_adr_adrp(u32 insn) +{ + return aarch64_insn_is_adr(insn) || aarch64_insn_is_adrp(insn); +} + int aarch64_insn_read(void *addr, u32 *insnp); int aarch64_insn_write(void *addr, u32 insn); enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn); @@ -398,6 +404,9 @@ int aarch64_insn_patch_text_nosync(void *addr, u32 insn); int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt); int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt); +s32 aarch64_insn_adrp_get_offset(u32 insn); +u32 aarch64_insn_adrp_set_offset(u32 insn, s32 offset); + bool aarch32_insn_is_wide(u32 insn); #define A32_RN_OFFSET 16 diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 178488fc775f..6f2ac4fc66ca 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -1202,6 +1202,19 @@ u32 aarch64_set_branch_offset(u32 insn, s32 offset) BUG(); } +s32 aarch64_insn_adrp_get_offset(u32 insn) +{ + BUG_ON(!aarch64_insn_is_adrp(insn)); + return aarch64_insn_decode_immediate(AARCH64_INSN_IMM_ADR, insn) << 12; +} + +u32 aarch64_insn_adrp_set_offset(u32 insn, s32 offset) +{ + BUG_ON(!aarch64_insn_is_adrp(insn)); + return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_ADR, insn, + offset >> 12); +} + /* * Extract the Op/CR data from a msr/mrs instruction. */ From c831b2ae257853ecd36ea4f7d788bf0665e4cf89 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 9 Sep 2016 14:07:13 +0100 Subject: [PATCH 076/100] arm64: alternative: Add support for patching adrp instructions adrp uses PC-relative address offset to a page (of 4K size) of a symbol. If it appears in an alternative code patched in, we should adjust the offset to reflect the address where it will be run from. This patch adds support for fixing the offset for adrp instructions. Cc: Will Deacon Cc: Marc Zyngier Cc: Andre Przywara Cc: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/alternative.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 992918dfbe2d..06d650f61da7 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -58,6 +58,8 @@ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) BUG(); } +#define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1)) + static u32 get_alt_insn(struct alt_instr *alt, u32 *insnptr, u32 *altinsnptr) { u32 insn; @@ -79,6 +81,19 @@ static u32 get_alt_insn(struct alt_instr *alt, u32 *insnptr, u32 *altinsnptr) offset = target - (unsigned long)insnptr; insn = aarch64_set_branch_offset(insn, offset); } + } else if (aarch64_insn_is_adrp(insn)) { + s32 orig_offset, new_offset; + unsigned long target; + + /* + * If we're replacing an adrp instruction, which uses PC-relative + * immediate addressing, adjust the offset to reflect the new + * PC. adrp operates on 4K aligned addresses. + */ + orig_offset = aarch64_insn_adrp_get_offset(insn); + target = align_down(altinsnptr, SZ_4K) + orig_offset; + new_offset = target - align_down(insnptr, SZ_4K); + insn = aarch64_insn_adrp_set_offset(insn, new_offset); } else if (aarch64_insn_uses_literal(insn)) { /* * Disallow patching unhandled instructions using PC relative From 072f0a633838aca13b5a8b211eb64f5c445cfd7c Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 9 Sep 2016 14:07:14 +0100 Subject: [PATCH 077/100] arm64: Introduce raw_{d,i}cache_line_size On systems with mismatched i/d cache min line sizes, we need to use the smallest size possible across all CPUs. This will be done by fetching the system wide safe value from CPU feature infrastructure. However the some special users(e.g kexec, hibernate) would need the line size on the CPU (rather than the system wide), when either the system wide feature may not be accessible or it is guranteed that the caller executes with a gurantee of no migration. Provide another helper which will fetch cache line size on the current CPU. Cc: Mark Rutland Cc: Will Deacon Cc: Catalin Marinas Acked-by: James Morse Reviewed-by: Geoff Levand Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 24 ++++++++++++++++++++---- arch/arm64/kernel/hibernate-asm.S | 2 +- arch/arm64/kernel/relocate_kernel.S | 2 +- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index d5025c69ca81..a4bb3f52d9ef 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -218,9 +218,10 @@ lr .req x30 // link register .endm /* - * dcache_line_size - get the minimum D-cache line size from the CTR register. + * raw_dcache_line_size - get the minimum D-cache line size on this CPU + * from the CTR register. */ - .macro dcache_line_size, reg, tmp + .macro raw_dcache_line_size, reg, tmp mrs \tmp, ctr_el0 // read CTR ubfm \tmp, \tmp, #16, #19 // cache line size encoding mov \reg, #4 // bytes per word @@ -228,15 +229,30 @@ lr .req x30 // link register .endm /* - * icache_line_size - get the minimum I-cache line size from the CTR register. + * dcache_line_size - get the safe D-cache line size across all CPUs */ - .macro icache_line_size, reg, tmp + .macro dcache_line_size, reg, tmp + raw_dcache_line_size \reg, \tmp + .endm + +/* + * raw_icache_line_size - get the minimum I-cache line size on this CPU + * from the CTR register. + */ + .macro raw_icache_line_size, reg, tmp mrs \tmp, ctr_el0 // read CTR and \tmp, \tmp, #0xf // cache line size encoding mov \reg, #4 // bytes per word lsl \reg, \reg, \tmp // actual cache line size .endm +/* + * icache_line_size - get the safe I-cache line size across all CPUs + */ + .macro icache_line_size, reg, tmp + raw_icache_line_size \reg, \tmp + .endm + /* * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map */ diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index 7734f3e7a1be..e56d848b6466 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -96,7 +96,7 @@ ENTRY(swsusp_arch_suspend_exit) add x1, x10, #PAGE_SIZE /* Clean the copied page to PoU - based on flush_icache_range() */ - dcache_line_size x2, x3 + raw_dcache_line_size x2, x3 sub x3, x2, #1 bic x4, x10, x3 2: dc cvau, x4 /* clean D line / unified line */ diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index 51b73cdde287..ce704a4aeadd 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -34,7 +34,7 @@ ENTRY(arm64_relocate_new_kernel) /* Setup the list loop variables. */ mov x17, x1 /* x17 = kimage_start */ mov x16, x0 /* x16 = kimage_head */ - dcache_line_size x15, x0 /* x15 = dcache line size */ + raw_dcache_line_size x15, x0 /* x15 = dcache line size */ mov x14, xzr /* x14 = entry ptr */ mov x13, xzr /* x13 = copy dest */ From 9dbd5bb25c56e35e6b4c34d968689a1ded850924 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 9 Sep 2016 14:07:15 +0100 Subject: [PATCH 078/100] arm64: Refactor sysinstr exception handling Right now we trap some of the user space data cache operations based on a few Errata (ARM 819472, 826319, 827319 and 824069). We need to trap userspace access to CTR_EL0, if we detect mismatched cache line size. Since both these traps share the EC, refactor the handler a little bit to make it a bit more reader friendly. Cc: Mark Rutland Cc: Will Deacon Cc: Catalin Marinas Acked-by: Andre Przywara Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/esr.h | 76 +++++++++++++++++++++++++++++++----- arch/arm64/kernel/traps.c | 73 ++++++++++++++++++++++------------ 2 files changed, 114 insertions(+), 35 deletions(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index f772e15c4766..9875b326a73e 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -78,6 +78,23 @@ #define ESR_ELx_IL (UL(1) << 25) #define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1) + +/* ISS field definitions shared by different classes */ +#define ESR_ELx_WNR (UL(1) << 6) + +/* Shared ISS field definitions for Data/Instruction aborts */ +#define ESR_ELx_EA (UL(1) << 9) +#define ESR_ELx_S1PTW (UL(1) << 7) + +/* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */ +#define ESR_ELx_FSC (0x3F) +#define ESR_ELx_FSC_TYPE (0x3C) +#define ESR_ELx_FSC_EXTABT (0x10) +#define ESR_ELx_FSC_ACCESS (0x08) +#define ESR_ELx_FSC_FAULT (0x04) +#define ESR_ELx_FSC_PERM (0x0C) + +/* ISS field definitions for Data Aborts */ #define ESR_ELx_ISV (UL(1) << 24) #define ESR_ELx_SAS_SHIFT (22) #define ESR_ELx_SAS (UL(3) << ESR_ELx_SAS_SHIFT) @@ -86,16 +103,9 @@ #define ESR_ELx_SRT_MASK (UL(0x1F) << ESR_ELx_SRT_SHIFT) #define ESR_ELx_SF (UL(1) << 15) #define ESR_ELx_AR (UL(1) << 14) -#define ESR_ELx_EA (UL(1) << 9) #define ESR_ELx_CM (UL(1) << 8) -#define ESR_ELx_S1PTW (UL(1) << 7) -#define ESR_ELx_WNR (UL(1) << 6) -#define ESR_ELx_FSC (0x3F) -#define ESR_ELx_FSC_TYPE (0x3C) -#define ESR_ELx_FSC_EXTABT (0x10) -#define ESR_ELx_FSC_ACCESS (0x08) -#define ESR_ELx_FSC_FAULT (0x04) -#define ESR_ELx_FSC_PERM (0x0C) + +/* ISS field definitions for exceptions taken in to Hyp */ #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) #define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) @@ -109,6 +119,54 @@ ((ESR_ELx_EC_BRK64 << ESR_ELx_EC_SHIFT) | ESR_ELx_IL | \ ((imm) & 0xffff)) +/* ISS field definitions for System instruction traps */ +#define ESR_ELx_SYS64_ISS_RES0_SHIFT 22 +#define ESR_ELx_SYS64_ISS_RES0_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_RES0_SHIFT) +#define ESR_ELx_SYS64_ISS_DIR_MASK 0x1 +#define ESR_ELx_SYS64_ISS_DIR_READ 0x1 +#define ESR_ELx_SYS64_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_SYS64_ISS_RT_SHIFT 5 +#define ESR_ELx_SYS64_ISS_RT_MASK (UL(0x1f) << ESR_ELx_SYS64_ISS_RT_SHIFT) +#define ESR_ELx_SYS64_ISS_CRM_SHIFT 1 +#define ESR_ELx_SYS64_ISS_CRM_MASK (UL(0xf) << ESR_ELx_SYS64_ISS_CRM_SHIFT) +#define ESR_ELx_SYS64_ISS_CRN_SHIFT 10 +#define ESR_ELx_SYS64_ISS_CRN_MASK (UL(0xf) << ESR_ELx_SYS64_ISS_CRN_SHIFT) +#define ESR_ELx_SYS64_ISS_OP1_SHIFT 14 +#define ESR_ELx_SYS64_ISS_OP1_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_OP1_SHIFT) +#define ESR_ELx_SYS64_ISS_OP2_SHIFT 17 +#define ESR_ELx_SYS64_ISS_OP2_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_OP2_SHIFT) +#define ESR_ELx_SYS64_ISS_OP0_SHIFT 20 +#define ESR_ELx_SYS64_ISS_OP0_MASK (UL(0x3) << ESR_ELx_SYS64_ISS_OP0_SHIFT) +#define ESR_ELx_SYS64_ISS_SYS_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \ + ESR_ELx_SYS64_ISS_OP1_MASK | \ + ESR_ELx_SYS64_ISS_OP2_MASK | \ + ESR_ELx_SYS64_ISS_CRN_MASK | \ + ESR_ELx_SYS64_ISS_CRM_MASK) +#define ESR_ELx_SYS64_ISS_SYS_VAL(op0, op1, op2, crn, crm) \ + (((op0) << ESR_ELx_SYS64_ISS_OP0_SHIFT) | \ + ((op1) << ESR_ELx_SYS64_ISS_OP1_SHIFT) | \ + ((op2) << ESR_ELx_SYS64_ISS_OP2_SHIFT) | \ + ((crn) << ESR_ELx_SYS64_ISS_CRN_SHIFT) | \ + ((crm) << ESR_ELx_SYS64_ISS_CRM_SHIFT)) +/* + * User space cache operations have the following sysreg encoding + * in System instructions. + * op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 14 }, WRITE (L=0) + */ +#define ESR_ELx_SYS64_ISS_CRM_DC_CIVAC 14 +#define ESR_ELx_SYS64_ISS_CRM_DC_CVAU 11 +#define ESR_ELx_SYS64_ISS_CRM_DC_CVAC 10 +#define ESR_ELx_SYS64_ISS_CRM_IC_IVAU 5 + +#define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \ + ESR_ELx_SYS64_ISS_OP1_MASK | \ + ESR_ELx_SYS64_ISS_OP2_MASK | \ + ESR_ELx_SYS64_ISS_CRN_MASK | \ + ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL \ + (ESR_ELx_SYS64_ISS_SYS_VAL(1, 3, 1, 7, 0) | \ + ESR_ELx_SYS64_ISS_DIR_WRITE) #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index e04f83873af7..224f64eddd93 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -447,36 +447,29 @@ void cpu_enable_cache_maint_trap(void *__unused) : "=r" (res) \ : "r" (address), "i" (-EFAULT) ) -asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) +static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) { unsigned long address; - int ret; + int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; + int ret = 0; - /* if this is a write with: Op0=1, Op2=1, Op1=3, CRn=7 */ - if ((esr & 0x01fffc01) == 0x0012dc00) { - int rt = (esr >> 5) & 0x1f; - int crm = (esr >> 1) & 0x0f; + address = (rt == 31) ? 0 : regs->regs[rt]; - address = (rt == 31) ? 0 : regs->regs[rt]; - - switch (crm) { - case 11: /* DC CVAU, gets promoted */ - __user_cache_maint("dc civac", address, ret); - break; - case 10: /* DC CVAC, gets promoted */ - __user_cache_maint("dc civac", address, ret); - break; - case 14: /* DC CIVAC */ - __user_cache_maint("dc civac", address, ret); - break; - case 5: /* IC IVAU */ - __user_cache_maint("ic ivau", address, ret); - break; - default: - force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); - return; - } - } else { + switch (crm) { + case ESR_ELx_SYS64_ISS_CRM_DC_CVAU: /* DC CVAU, gets promoted */ + __user_cache_maint("dc civac", address, ret); + break; + case ESR_ELx_SYS64_ISS_CRM_DC_CVAC: /* DC CVAC, gets promoted */ + __user_cache_maint("dc civac", address, ret); + break; + case ESR_ELx_SYS64_ISS_CRM_DC_CIVAC: /* DC CIVAC */ + __user_cache_maint("dc civac", address, ret); + break; + case ESR_ELx_SYS64_ISS_CRM_IC_IVAU: /* IC IVAU */ + __user_cache_maint("ic ivau", address, ret); + break; + default: force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); return; } @@ -487,6 +480,34 @@ asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) regs->pc += 4; } +struct sys64_hook { + unsigned int esr_mask; + unsigned int esr_val; + void (*handler)(unsigned int esr, struct pt_regs *regs); +}; + +static struct sys64_hook sys64_hooks[] = { + { + .esr_mask = ESR_ELx_SYS64_ISS_EL0_CACHE_OP_MASK, + .esr_val = ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL, + .handler = user_cache_maint_handler, + }, + {}, +}; + +asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) +{ + struct sys64_hook *hook; + + for (hook = sys64_hooks; hook->handler; hook++) + if ((hook->esr_mask & esr) == hook->esr_val) { + hook->handler(esr, regs); + return; + } + + force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); +} + long compat_arm_syscall(struct pt_regs *regs); asmlinkage long do_ni_syscall(struct pt_regs *regs) From 116c81f427ff6c5380850963e3fb8798cc821d2b Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 9 Sep 2016 14:07:16 +0100 Subject: [PATCH 079/100] arm64: Work around systems with mismatched cache line sizes Systems with differing CPU i-cache/d-cache line sizes can cause problems with the cache management by software when the execution is migrated from one to another. Usually, the application reads the cache size on a CPU and then uses that length to perform cache operations. However, if it gets migrated to another CPU with a smaller cache line size, things could go completely wrong. To prevent such cases, always use the smallest cache line size among the CPUs. The kernel CPU feature infrastructure already keeps track of the safe value for all CPUID registers including CTR. This patch works around the problem by : For kernel, dynamically patch the kernel to read the cache size from the system wide copy of CTR_EL0. For applications, trap read accesses to CTR_EL0 (by clearing the SCTLR.UCT) and emulate the mrs instruction to return the system wide safe value of CTR_EL0. For faster access (i.e, avoiding to lookup the system wide value of CTR_EL0 via read_system_reg), we keep track of the pointer to table entry for CTR_EL0 in the CPU feature infrastructure. Cc: Mark Rutland Cc: Andre Przywara Cc: Will Deacon Cc: Catalin Marinas Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 24 ++++++++++++++++++++++-- arch/arm64/include/asm/cpufeature.h | 3 ++- arch/arm64/include/asm/esr.h | 8 ++++++++ arch/arm64/include/asm/sysreg.h | 1 + arch/arm64/kernel/asm-offsets.c | 2 ++ arch/arm64/kernel/cpu_errata.c | 22 ++++++++++++++++++++++ arch/arm64/kernel/traps.c | 14 ++++++++++++++ 7 files changed, 71 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index a4bb3f52d9ef..f09a5ae48a44 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -216,6 +216,20 @@ lr .req x30 // link register .macro mmid, rd, rn ldr \rd, [\rn, #MM_CONTEXT_ID] .endm +/* + * read_ctr - read CTR_EL0. If the system has mismatched + * cache line sizes, provide the system wide safe value + * from arm64_ftr_reg_ctrel0.sys_val + */ + .macro read_ctr, reg +alternative_if_not ARM64_MISMATCHED_CACHE_LINE_SIZE + mrs \reg, ctr_el0 // read CTR + nop +alternative_else + ldr_l \reg, arm64_ftr_reg_ctrel0 + ARM64_FTR_SYSVAL +alternative_endif + .endm + /* * raw_dcache_line_size - get the minimum D-cache line size on this CPU @@ -232,7 +246,10 @@ lr .req x30 // link register * dcache_line_size - get the safe D-cache line size across all CPUs */ .macro dcache_line_size, reg, tmp - raw_dcache_line_size \reg, \tmp + read_ctr \tmp + ubfm \tmp, \tmp, #16, #19 // cache line size encoding + mov \reg, #4 // bytes per word + lsl \reg, \reg, \tmp // actual cache line size .endm /* @@ -250,7 +267,10 @@ lr .req x30 // link register * icache_line_size - get the safe I-cache line size across all CPUs */ .macro icache_line_size, reg, tmp - raw_icache_line_size \reg, \tmp + read_ctr \tmp + and \tmp, \tmp, #0xf // cache line size encoding + mov \reg, #4 // bytes per word + lsl \reg, \reg, \tmp // actual cache line size .endm /* diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 6806b86ab791..758d74fedfad 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -39,8 +39,9 @@ #define ARM64_WORKAROUND_CAVIUM_27456 12 #define ARM64_HAS_32BIT_EL0 13 #define ARM64_HYP_OFFSET_LOW 14 +#define ARM64_MISMATCHED_CACHE_LINE_SIZE 15 -#define ARM64_NCAPS 15 +#define ARM64_NCAPS 16 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 9875b326a73e..d14c478976d0 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -149,6 +149,9 @@ ((op2) << ESR_ELx_SYS64_ISS_OP2_SHIFT) | \ ((crn) << ESR_ELx_SYS64_ISS_CRN_SHIFT) | \ ((crm) << ESR_ELx_SYS64_ISS_CRM_SHIFT)) + +#define ESR_ELx_SYS64_ISS_SYS_OP_MASK (ESR_ELx_SYS64_ISS_SYS_MASK | \ + ESR_ELx_SYS64_ISS_DIR_MASK) /* * User space cache operations have the following sysreg encoding * in System instructions. @@ -167,6 +170,11 @@ #define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL \ (ESR_ELx_SYS64_ISS_SYS_VAL(1, 3, 1, 7, 0) | \ ESR_ELx_SYS64_ISS_DIR_WRITE) + +#define ESR_ELx_SYS64_ISS_SYS_CTR ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 1, 0, 0) +#define ESR_ELx_SYS64_ISS_SYS_CTR_READ (ESR_ELx_SYS64_ISS_SYS_CTR | \ + ESR_ELx_SYS64_ISS_DIR_READ) + #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index e91aef2bb33d..7e4ecd1d2ac9 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -100,6 +100,7 @@ /* SCTLR_EL1 specific flags. */ #define SCTLR_EL1_UCI (1 << 26) #define SCTLR_EL1_SPAN (1 << 23) +#define SCTLR_EL1_UCT (1 << 15) #define SCTLR_EL1_SED (1 << 8) #define SCTLR_EL1_CP15BEN (1 << 5) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 05070b72fc28..4a2f0f0fef32 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -145,5 +146,6 @@ int main(void) DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address)); DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address)); DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next)); + DEFINE(ARM64_FTR_SYSVAL, offsetof(struct arm64_ftr_reg, sys_val)); return 0; } diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 5836b3df0094..0150394f4cab 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -30,6 +30,21 @@ is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope) entry->midr_range_max); } +static bool +has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry, + int scope) +{ + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + return (read_cpuid_cachetype() & arm64_ftr_reg_ctrel0.strict_mask) != + (arm64_ftr_reg_ctrel0.sys_val & arm64_ftr_reg_ctrel0.strict_mask); +} + +static void cpu_enable_trap_ctr_access(void *__unused) +{ + /* Clear SCTLR_EL1.UCT */ + config_sctlr_el1(SCTLR_EL1_UCT, 0); +} + #define MIDR_RANGE(model, min, max) \ .def_scope = SCOPE_LOCAL_CPU, \ .matches = is_affected_midr_range, \ @@ -107,6 +122,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = { MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00), }, #endif + { + .desc = "Mismatched cache line size", + .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE, + .matches = has_mismatched_cache_line_size, + .def_scope = SCOPE_LOCAL_CPU, + .enable = cpu_enable_trap_ctr_access, + }, { } }; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 224f64eddd93..93445f8b530c 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -480,6 +480,14 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) regs->pc += 4; } +static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) +{ + int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + + regs->regs[rt] = arm64_ftr_reg_ctrel0.sys_val; + regs->pc += 4; +} + struct sys64_hook { unsigned int esr_mask; unsigned int esr_val; @@ -492,6 +500,12 @@ static struct sys64_hook sys64_hooks[] = { .esr_val = ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL, .handler = user_cache_maint_handler, }, + { + /* Trap read access to CTR_EL0 */ + .esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK, + .esr_val = ESR_ELx_SYS64_ISS_SYS_CTR_READ, + .handler = ctr_read_handler, + }, {}, }; From 0e27a7fce60f8c334ef59de0fbf5df8744e752e0 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 7 Sep 2016 16:02:31 +0100 Subject: [PATCH 080/100] arm64: Remove shadowed asm-generic headers We've grown our own versions of bug.h, ftrace.h, pci.h and topology.h, so generating the generic ones as well is unnecessary and a potential source of build hiccups. At the very least, having them present has confused my source-indexing tool, and that simply will not do. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- arch/arm64/include/asm/Kbuild | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 2b3d2d24acba..44e1d7f10add 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -1,4 +1,3 @@ -generic-y += bug.h generic-y += bugs.h generic-y += clkdev.h generic-y += cputime.h @@ -10,7 +9,6 @@ generic-y += dma-contiguous.h generic-y += early_ioremap.h generic-y += emergency-restart.h generic-y += errno.h -generic-y += ftrace.h generic-y += hw_irq.h generic-y += ioctl.h generic-y += ioctls.h @@ -27,7 +25,6 @@ generic-y += mman.h generic-y += msgbuf.h generic-y += msi.h generic-y += mutex.h -generic-y += pci.h generic-y += poll.h generic-y += preempt.h generic-y += resource.h @@ -44,7 +41,6 @@ generic-y += swab.h generic-y += switch_to.h generic-y += termbits.h generic-y += termios.h -generic-y += topology.h generic-y += trace_clock.h generic-y += types.h generic-y += unaligned.h From 8a71f0c656e0521867931eecff54eb3a35ca65a7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 6 Sep 2016 14:04:45 +0100 Subject: [PATCH 081/100] arm64: sysreg: replace open-coded mrs_s/msr_s with {read,write}_sysreg_s Similar to our {read,write}_sysreg accessors for architected, named system registers, this patch introduces {read,write}_sysreg_s variants that can take arbitrary sys_reg output and therefore access IMPDEF registers or registers that unsupported by binutils. Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/cputype.h | 6 +----- arch/arm64/include/asm/sysreg.h | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 9d9fd4b9a72e..26a68ddb11c1 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -93,11 +93,7 @@ #include -#define read_cpuid(reg) ({ \ - u64 __val; \ - asm("mrs_s %0, " __stringify(SYS_ ## reg) : "=r" (__val)); \ - __val; \ -}) +#define read_cpuid(reg) read_sysreg_s(SYS_ ## reg) /* * The CPU ID never changes at run time, so we might as well tell the diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 7e4ecd1d2ac9..e8d46e8e6079 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -274,6 +274,21 @@ asm( : : "rZ" (__val)); \ } while (0) +/* + * For registers without architectural names, or simply unsupported by + * GAS. + */ +#define read_sysreg_s(r) ({ \ + u64 __val; \ + asm volatile("mrs_s %0, " __stringify(r) : "=r" (__val)); \ + __val; \ +}) + +#define write_sysreg_s(v, r) do { \ + u64 __val = (u64)v; \ + asm volatile("msr_s " __stringify(r) ", %0" : : "rZ" (__val)); \ +} while (0) + static inline void config_sctlr_el1(u32 clear, u32 set) { u32 val; From f99a250cb6a3b301b101b4c0f5fcb80593bba6dc Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 6 Sep 2016 16:40:23 +0100 Subject: [PATCH 082/100] arm64: barriers: introduce nops and __nops macros for NOP sequences NOP sequences tend to get used for padding out alternative sections and uarch-specific pipeline flushes in errata workarounds. This patch adds macros for generating these sequences as both inline asm blocks, but also as strings suitable for embedding in other asm blocks directly. Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 9 +++++++++ arch/arm64/include/asm/barrier.h | 3 +++ 2 files changed, 12 insertions(+) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index f09a5ae48a44..28bfe6132eb6 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -86,6 +86,15 @@ dmb \opt .endm +/* + * NOP sequence + */ + .macro nops, num + .rept \num + nop + .endr + .endm + /* * Emit an entry into the exception table */ diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 4eea7f618dce..4e0497f581a0 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -20,6 +20,9 @@ #ifndef __ASSEMBLY__ +#define __nops(n) ".rept " #n "\nnop\n.endr\n" +#define nops(n) asm volatile(__nops(n)) + #define sev() asm volatile("sev" : : : "memory") #define wfe() asm volatile("wfe" : : : "memory") #define wfi() asm volatile("wfi" : : : "memory") From 05492f2fd87d0a2e6a626bf8fe002c9a11941950 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 6 Sep 2016 16:42:58 +0100 Subject: [PATCH 083/100] arm64: lse: convert lse alternatives NOP padding to use __nops The LSE atomics are implemented using alternative code sequences of different lengths, and explicit NOP padding is used to ensure the patching works correctly. This patch converts the bulk of the LSE code over to using the __nops macro, which makes it slightly clearer as to what is going on and also consolidates all of the padding at the end of the various sequences. Signed-off-by: Will Deacon --- arch/arm64/include/asm/atomic_lse.h | 64 ++++++++++++----------------- arch/arm64/include/asm/cmpxchg.h | 4 +- arch/arm64/include/asm/spinlock.h | 27 +++++------- 3 files changed, 39 insertions(+), 56 deletions(-) diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index b5890be8f257..7457ce082b5f 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -86,8 +86,8 @@ static inline int atomic_add_return##name(int i, atomic_t *v) \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ - __LL_SC_ATOMIC(add_return##name), \ + __LL_SC_ATOMIC(add_return##name) \ + __nops(1), \ /* LSE atomics */ \ " ldadd" #mb " %w[i], w30, %[v]\n" \ " add %w[i], %w[i], w30") \ @@ -112,8 +112,8 @@ static inline void atomic_and(int i, atomic_t *v) asm volatile(ARM64_LSE_ATOMIC_INSN( /* LL/SC */ - " nop\n" - __LL_SC_ATOMIC(and), + __LL_SC_ATOMIC(and) + __nops(1), /* LSE atomics */ " mvn %w[i], %w[i]\n" " stclr %w[i], %[v]") @@ -130,8 +130,8 @@ static inline int atomic_fetch_and##name(int i, atomic_t *v) \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ - __LL_SC_ATOMIC(fetch_and##name), \ + __LL_SC_ATOMIC(fetch_and##name) \ + __nops(1), \ /* LSE atomics */ \ " mvn %w[i], %w[i]\n" \ " ldclr" #mb " %w[i], %w[i], %[v]") \ @@ -156,8 +156,8 @@ static inline void atomic_sub(int i, atomic_t *v) asm volatile(ARM64_LSE_ATOMIC_INSN( /* LL/SC */ - " nop\n" - __LL_SC_ATOMIC(sub), + __LL_SC_ATOMIC(sub) + __nops(1), /* LSE atomics */ " neg %w[i], %w[i]\n" " stadd %w[i], %[v]") @@ -174,9 +174,8 @@ static inline int atomic_sub_return##name(int i, atomic_t *v) \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ __LL_SC_ATOMIC(sub_return##name) \ - " nop", \ + __nops(2), \ /* LSE atomics */ \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], w30, %[v]\n" \ @@ -203,8 +202,8 @@ static inline int atomic_fetch_sub##name(int i, atomic_t *v) \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ - __LL_SC_ATOMIC(fetch_sub##name), \ + __LL_SC_ATOMIC(fetch_sub##name) \ + __nops(1), \ /* LSE atomics */ \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], %w[i], %[v]") \ @@ -284,8 +283,8 @@ static inline long atomic64_add_return##name(long i, atomic64_t *v) \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ - __LL_SC_ATOMIC64(add_return##name), \ + __LL_SC_ATOMIC64(add_return##name) \ + __nops(1), \ /* LSE atomics */ \ " ldadd" #mb " %[i], x30, %[v]\n" \ " add %[i], %[i], x30") \ @@ -310,8 +309,8 @@ static inline void atomic64_and(long i, atomic64_t *v) asm volatile(ARM64_LSE_ATOMIC_INSN( /* LL/SC */ - " nop\n" - __LL_SC_ATOMIC64(and), + __LL_SC_ATOMIC64(and) + __nops(1), /* LSE atomics */ " mvn %[i], %[i]\n" " stclr %[i], %[v]") @@ -328,8 +327,8 @@ static inline long atomic64_fetch_and##name(long i, atomic64_t *v) \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ - __LL_SC_ATOMIC64(fetch_and##name), \ + __LL_SC_ATOMIC64(fetch_and##name) \ + __nops(1), \ /* LSE atomics */ \ " mvn %[i], %[i]\n" \ " ldclr" #mb " %[i], %[i], %[v]") \ @@ -354,8 +353,8 @@ static inline void atomic64_sub(long i, atomic64_t *v) asm volatile(ARM64_LSE_ATOMIC_INSN( /* LL/SC */ - " nop\n" - __LL_SC_ATOMIC64(sub), + __LL_SC_ATOMIC64(sub) + __nops(1), /* LSE atomics */ " neg %[i], %[i]\n" " stadd %[i], %[v]") @@ -372,9 +371,8 @@ static inline long atomic64_sub_return##name(long i, atomic64_t *v) \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ __LL_SC_ATOMIC64(sub_return##name) \ - " nop", \ + __nops(2), \ /* LSE atomics */ \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], x30, %[v]\n" \ @@ -401,8 +399,8 @@ static inline long atomic64_fetch_sub##name(long i, atomic64_t *v) \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ - __LL_SC_ATOMIC64(fetch_sub##name), \ + __LL_SC_ATOMIC64(fetch_sub##name) \ + __nops(1), \ /* LSE atomics */ \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], %[i], %[v]") \ @@ -426,13 +424,8 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) asm volatile(ARM64_LSE_ATOMIC_INSN( /* LL/SC */ - " nop\n" __LL_SC_ATOMIC64(dec_if_positive) - " nop\n" - " nop\n" - " nop\n" - " nop\n" - " nop", + __nops(6), /* LSE atomics */ "1: ldr x30, %[v]\n" " subs %[ret], x30, #1\n" @@ -464,9 +457,8 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr, \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ - __LL_SC_CMPXCHG(name) \ - " nop", \ + __LL_SC_CMPXCHG(name) \ + __nops(2), \ /* LSE atomics */ \ " mov " #w "30, %" #w "[old]\n" \ " cas" #mb #sz "\t" #w "30, %" #w "[new], %[v]\n" \ @@ -517,10 +509,8 @@ static inline long __cmpxchg_double##name(unsigned long old1, \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - " nop\n" \ - " nop\n" \ - " nop\n" \ - __LL_SC_CMPXCHG_DBL(name), \ + __LL_SC_CMPXCHG_DBL(name) \ + __nops(3), \ /* LSE atomics */ \ " casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\ " eor %[old1], %[old1], %[oldval1]\n" \ diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index bd86a79491bc..91b26d26af8a 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -43,10 +43,8 @@ static inline unsigned long __xchg_case_##name(unsigned long x, \ " cbnz %w1, 1b\n" \ " " #mb, \ /* LSE atomics */ \ - " nop\n" \ - " nop\n" \ " swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n" \ - " nop\n" \ + __nops(3) \ " " #nop_lse) \ : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) \ : "r" (x) \ diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index e875a5a551d7..28f8c2174f8a 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -66,8 +66,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) ARM64_LSE_ATOMIC_INSN( /* LL/SC */ " stxr %w1, %w0, %2\n" -" nop\n" -" nop\n", + __nops(2), /* LSE atomics */ " mov %w1, %w0\n" " cas %w0, %w0, %2\n" @@ -99,9 +98,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) /* LSE atomics */ " mov %w2, %w5\n" " ldadda %w2, %w0, %3\n" -" nop\n" -" nop\n" -" nop\n" + __nops(3) ) /* Did we get the lock? */ @@ -165,8 +162,8 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) " stlrh %w1, %0", /* LSE atomics */ " mov %w1, #1\n" - " nop\n" - " staddlh %w1, %0") + " staddlh %w1, %0\n" + __nops(1)) : "=Q" (lock->owner), "=&r" (tmp) : : "memory"); @@ -212,7 +209,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw) " cbnz %w0, 1b\n" " stxr %w0, %w2, %1\n" " cbnz %w0, 2b\n" - " nop", + __nops(1), /* LSE atomics */ "1: mov %w0, wzr\n" "2: casa %w0, %w2, %1\n" @@ -241,8 +238,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) /* LSE atomics */ " mov %w0, wzr\n" " casa %w0, %w2, %1\n" - " nop\n" - " nop") + __nops(2)) : "=&r" (tmp), "+Q" (rw->lock) : "r" (0x80000000) : "memory"); @@ -290,8 +286,8 @@ static inline void arch_read_lock(arch_rwlock_t *rw) " add %w0, %w0, #1\n" " tbnz %w0, #31, 1b\n" " stxr %w1, %w0, %2\n" - " nop\n" - " cbnz %w1, 2b", + " cbnz %w1, 2b\n" + __nops(1), /* LSE atomics */ "1: wfe\n" "2: ldxr %w0, %2\n" @@ -317,9 +313,8 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) " cbnz %w1, 1b", /* LSE atomics */ " movn %w0, #0\n" - " nop\n" - " nop\n" - " staddl %w0, %2") + " staddl %w0, %2\n" + __nops(2)) : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock) : : "memory"); @@ -344,7 +339,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) " tbnz %w1, #31, 1f\n" " casa %w0, %w1, %2\n" " sbc %w1, %w1, %w0\n" - " nop\n" + __nops(1) "1:") : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock) : From 792d47379f4d4c76692f1795f33d38582f8907fa Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 7 Sep 2016 11:07:08 +0100 Subject: [PATCH 084/100] arm64: alternative: add auto-nop infrastructure In some cases, one side of an alternative sequence is simply a number of NOPs used to balance the other side. Keeping track of this manually is tedious, and the presence of large chains of NOPs makes the code more painful to read than necessary. To ameliorate matters, this patch adds a new alternative_else_nop_endif, which automatically balances an alternative sequence with a trivial NOP sled. In many cases, we would like a NOP-sled in the default case, and instructions patched in in the presence of a feature. To enable the NOPs to be generated automatically for this case, this patch also adds a new alternative_if, and updates alternative_else and alternative_endif to work with either alternative_if or alternative_endif. Cc: Andre Przywara Cc: Catalin Marinas Cc: Dave Martin Cc: James Morse Signed-off-by: Mark Rutland [will: use new nops macro to generate nop sequences] Signed-off-by: Will Deacon --- arch/arm64/include/asm/alternative.h | 72 +++++++++++++++++++++------- 1 file changed, 54 insertions(+), 18 deletions(-) diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 8746ff6abd77..55101bd86b98 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -2,6 +2,7 @@ #define __ASM_ALTERNATIVE_H #include +#include #ifndef __ASSEMBLY__ @@ -90,24 +91,15 @@ void apply_alternatives(void *start, size_t length); .endm /* - * Begin an alternative code sequence. + * Alternative sequences * - * The code that follows this macro will be assembled and linked as - * normal. There are no restrictions on this code. - */ -.macro alternative_if_not cap - .pushsection .altinstructions, "a" - altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f - .popsection -661: -.endm - -/* - * Provide the alternative code sequence. + * The code for the case where the capability is not present will be + * assembled and linked as normal. There are no restrictions on this + * code. * - * The code that follows this macro is assembled into a special - * section to be used for dynamic patching. Code that follows this - * macro must: + * The code for the case where the capability is present will be + * assembled into a special section to be used for dynamic patching. + * Code for that case must: * * 1. Be exactly the same length (in bytes) as the default code * sequence. @@ -116,8 +108,38 @@ void apply_alternatives(void *start, size_t length); * alternative sequence it is defined in (branches into an * alternative sequence are not fixed up). */ + +/* + * Begin an alternative code sequence. + */ +.macro alternative_if_not cap + .set .Lasm_alt_mode, 0 + .pushsection .altinstructions, "a" + altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f + .popsection +661: +.endm + +.macro alternative_if cap + .set .Lasm_alt_mode, 1 + .pushsection .altinstructions, "a" + altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f + .popsection + .pushsection .altinstr_replacement, "ax" + .align 2 /* So GAS knows label 661 is suitably aligned */ +661: +.endm + +/* + * Provide the other half of the alternative code sequence. + */ .macro alternative_else -662: .pushsection .altinstr_replacement, "ax" +662: + .if .Lasm_alt_mode==0 + .pushsection .altinstr_replacement, "ax" + .else + .popsection + .endif 663: .endm @@ -125,11 +147,25 @@ void apply_alternatives(void *start, size_t length); * Complete an alternative code sequence. */ .macro alternative_endif -664: .popsection +664: + .if .Lasm_alt_mode==0 + .popsection + .endif .org . - (664b-663b) + (662b-661b) .org . - (662b-661b) + (664b-663b) .endm +/* + * Provides a trivial alternative or default sequence consisting solely + * of NOPs. The number of NOPs is chosen automatically to match the + * previous case. + */ +.macro alternative_else_nop_endif +alternative_else + nops (662b-661b) / AARCH64_INSN_SIZE +alternative_endif +.endm + #define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \ alternative_insn insn1, insn2, cap, IS_ENABLED(cfg) From 6ba3b554f5b9b53cb99c0edb93f0ea855fbc712a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 7 Sep 2016 11:07:09 +0100 Subject: [PATCH 085/100] arm64: use alternative auto-nop Make use of the new alternative_if and alternative_else_nop_endif and get rid of our homebew NOP sleds, making the code simpler to read. Note that for cpu_do_switch_mm the ret has been moved out of the alternative sequence, and in the default case there will be three additional NOPs executed. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 10 ++-------- arch/arm64/lib/copy_page.S | 13 ++++--------- arch/arm64/mm/proc.S | 9 ++------- 3 files changed, 8 insertions(+), 24 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 6880dcc3b465..223d54a4d66b 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -150,13 +150,7 @@ ldr x23, [sp, #S_SP] // load return stack pointer msr sp_el0, x23 #ifdef CONFIG_ARM64_ERRATUM_845719 -alternative_if_not ARM64_WORKAROUND_845719 - nop - nop -#ifdef CONFIG_PID_IN_CONTEXTIDR - nop -#endif -alternative_else +alternative_if ARM64_WORKAROUND_845719 tbz x22, #4, 1f #ifdef CONFIG_PID_IN_CONTEXTIDR mrs x29, contextidr_el1 @@ -165,7 +159,7 @@ alternative_else msr contextidr_el1, xzr #endif 1: -alternative_endif +alternative_else_nop_endif #endif .endif msr elr_el1, x21 // set up the return data diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index 4c1e700840b6..c3cd65e31814 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -29,14 +29,11 @@ * x1 - src */ ENTRY(copy_page) -alternative_if_not ARM64_HAS_NO_HW_PREFETCH - nop - nop -alternative_else +alternative_if ARM64_HAS_NO_HW_PREFETCH # Prefetch two cache lines ahead. prfm pldl1strm, [x1, #128] prfm pldl1strm, [x1, #256] -alternative_endif +alternative_else_nop_endif ldp x2, x3, [x1] ldp x4, x5, [x1, #16] @@ -52,11 +49,9 @@ alternative_endif 1: subs x18, x18, #128 -alternative_if_not ARM64_HAS_NO_HW_PREFETCH - nop -alternative_else +alternative_if ARM64_HAS_NO_HW_PREFETCH prfm pldl1strm, [x1, #384] -alternative_endif +alternative_else_nop_endif stnp x2, x3, [x0] ldp x2, x3, [x1] diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 5eb35964ab8e..1b11dcd7e851 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -127,17 +127,12 @@ ENTRY(cpu_do_switch_mm) bfi x0, x1, #48, #16 // set the ASID msr ttbr0_el1, x0 // set TTBR0 isb -alternative_if_not ARM64_WORKAROUND_CAVIUM_27456 - ret - nop - nop - nop -alternative_else +alternative_if ARM64_WORKAROUND_CAVIUM_27456 ic iallu dsb nsh isb +alternative_else_nop_endif ret -alternative_endif ENDPROC(cpu_do_switch_mm) .pushsection ".idmap.text", "ax" From e506236a7b8140d73b35fee80f7e38c794dd931d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 7 Sep 2016 11:07:10 +0100 Subject: [PATCH 086/100] arm64/kvm: use alternative auto-nop Make use of the new alternative_if and alternative_else_nop_endif and get rid of our open-coded NOP sleds, making the code simpler to read. Note that for __kvm_call_hyp the branch to __vhe_hyp_call has been moved out of the alternative sequence, and in the default case there will be four additional NOPs executed. Cc: Marc Zyngier Cc: kvmarm@lists.cs.columbia.edu Acked-by: Christoffer Dall Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/kvm_mmu.h | 10 +++------- arch/arm64/kvm/hyp.S | 6 +----- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index b6bb83400cd8..dff109871f2a 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -99,14 +99,10 @@ .macro kern_hyp_va reg alternative_if_not ARM64_HAS_VIRT_HOST_EXTN and \reg, \reg, #HYP_PAGE_OFFSET_HIGH_MASK -alternative_else - nop -alternative_endif -alternative_if_not ARM64_HYP_OFFSET_LOW - nop -alternative_else +alternative_else_nop_endif +alternative_if ARM64_HYP_OFFSET_LOW and \reg, \reg, #HYP_PAGE_OFFSET_LOW_MASK -alternative_endif +alternative_else_nop_endif .endm #else diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 7ce931565151..2726635dceba 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -46,10 +46,6 @@ alternative_if_not ARM64_HAS_VIRT_HOST_EXTN hvc #0 ldr lr, [sp], #16 ret -alternative_else +alternative_else_nop_endif b __vhe_hyp_call - nop - nop - nop -alternative_endif ENDPROC(__kvm_call_hyp) From 3e593f66754def77fa3433c595f941f1defe4af1 Mon Sep 17 00:00:00 2001 From: "David A. Long" Date: Mon, 12 Sep 2016 14:21:27 -0400 Subject: [PATCH 087/100] arm64: Improve kprobes test for atomic sequence Kprobes searches backwards a finite number of instructions to determine if there is an attempt to probe a load/store exclusive sequence. It stops when it hits the maximum number of instructions or a load or store exclusive. However this means it can run up past the beginning of the function and start looking at literal constants. This has been shown to cause a false positive and blocks insertion of the probe. To fix this, further limit the backwards search to stop if it hits a symbol address from kallsyms. The presumption is that this is the entry point to this code (particularly for the common case of placing probes at the beginning of functions). This also improves efficiency by not searching code that is not part of the function. There may be some possibility that the label might not denote the entry path to the probed instruction but the likelihood seems low and this is just another example of how the kprobes user really needs to be careful about what they are doing. Acked-by: Masami Hiramatsu Signed-off-by: David A. Long Signed-off-by: Will Deacon --- arch/arm64/kernel/probes/decode-insn.c | 46 ++++++++++++-------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c index 37e47a9d617e..d1731bf977ef 100644 --- a/arch/arm64/kernel/probes/decode-insn.c +++ b/arch/arm64/kernel/probes/decode-insn.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -122,7 +123,7 @@ arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi) static bool __kprobes is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end) { - while (scan_start > scan_end) { + while (scan_start >= scan_end) { /* * atomic region starts from exclusive load and ends with * exclusive store. @@ -142,33 +143,30 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi) { enum kprobe_insn decoded; kprobe_opcode_t insn = le32_to_cpu(*addr); - kprobe_opcode_t *scan_start = addr - 1; - kprobe_opcode_t *scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE; -#if defined(CONFIG_MODULES) && defined(MODULES_VADDR) - struct module *mod; -#endif + kprobe_opcode_t *scan_end = NULL; + unsigned long size = 0, offset = 0; - if (addr >= (kprobe_opcode_t *)_text && - scan_end < (kprobe_opcode_t *)_text) - scan_end = (kprobe_opcode_t *)_text; -#if defined(CONFIG_MODULES) && defined(MODULES_VADDR) - else { - preempt_disable(); - mod = __module_address((unsigned long)addr); - if (mod && within_module_init((unsigned long)addr, mod) && - !within_module_init((unsigned long)scan_end, mod)) - scan_end = (kprobe_opcode_t *)mod->init_layout.base; - else if (mod && within_module_core((unsigned long)addr, mod) && - !within_module_core((unsigned long)scan_end, mod)) - scan_end = (kprobe_opcode_t *)mod->core_layout.base; - preempt_enable(); + /* + * If there's a symbol defined in front of and near enough to + * the probe address assume it is the entry point to this + * code and use it to further limit how far back we search + * when determining if we're in an atomic sequence. If we could + * not find any symbol skip the atomic test altogether as we + * could otherwise end up searching irrelevant text/literals. + * KPROBES depends on KALLSYMS so this last case should never + * happen. + */ + if (kallsyms_lookup_size_offset((unsigned long) addr, &size, &offset)) { + if (offset < (MAX_ATOMIC_CONTEXT_SIZE*sizeof(kprobe_opcode_t))) + scan_end = addr - (offset / sizeof(kprobe_opcode_t)); + else + scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE; } -#endif decoded = arm_probe_decode_insn(insn, asi); - if (decoded == INSN_REJECTED || - is_probed_address_atomic(scan_start, scan_end)) - return INSN_REJECTED; + if (decoded != INSN_REJECTED && scan_end) + if (is_probed_address_atomic(addr - 1, scan_end)) + return INSN_REJECTED; return decoded; } From 55d5c4ab06e0c984aa38eae391f654f59b7d24db Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 15 Sep 2016 10:14:41 +0100 Subject: [PATCH 088/100] MAINTAINERS: Update ARM PMU PROFILING AND DEBUGGING entry There are an increasing number of ARM SoC PMU drivers appearing for things like interconnects, memory controllers and cache controllers. Rather than have these handled on an ad-hoc basis, where SoC maintainers each send their PMU drivers directly to arm-soc, let's take these into drivers/perf/ and send a single pull request to arm-soc instead, much like other subsystems. This patch amends the ARM PMU MAINTAINERS entry to include all of drivers/perf/ (currently just the ARM CPU PMU), changes Mark Rutland from Reviewer to Maintainer, so that he can help with the new tree and adds the device-tree binding to the list of maintained files. Acked-by: Mark Rutland Acked-by: Arnd Bergmann Signed-off-by: Will Deacon --- MAINTAINERS | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0bbe4b105c34..32bf8a3d69a8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -903,15 +903,17 @@ F: arch/arm/include/asm/floppy.h ARM PMU PROFILING AND DEBUGGING M: Will Deacon -R: Mark Rutland +M: Mark Rutland S: Maintained +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) F: arch/arm*/kernel/perf_* F: arch/arm/oprofile/common.c F: arch/arm*/kernel/hw_breakpoint.c F: arch/arm*/include/asm/hw_breakpoint.h F: arch/arm*/include/asm/perf_event.h -F: drivers/perf/arm_pmu.c +F: drivers/perf/* F: include/linux/perf/arm_pmu.h +F: Documentation/devicetree/bindings/arm/pmu.txt ARM PORT M: Russell King From dbee3a74ef2c73acc4eb31cee7a60d5e46767a41 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Wed, 14 Sep 2016 17:32:29 -0500 Subject: [PATCH 089/100] arm64: pmu: add fallback probe table In preparation for ACPI support, add a pmu_probe_info table to the arm_pmu_device_probe() call. This table gets used when probing in the absence of a devicetree node for PMU. Signed-off-by: Mark Salter Signed-off-by: Jeremy Linton Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 13 ++++++++++++- drivers/perf/arm_pmu.c | 2 +- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 03a13661bfce..0bcf2fb7d26e 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -1056,9 +1057,19 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = { {}, }; +static const struct pmu_probe_info armv8_pmu_probe_table[] = { + PMU_PROBE(0, 0, armv8_pmuv3_init), /* if all else fails... */ + { /* sentinel value */ } +}; + static int armv8_pmu_device_probe(struct platform_device *pdev) { - return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL); + if (acpi_disabled) + return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, + NULL); + + return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, + armv8_pmu_probe_table); } static struct platform_driver armv8_pmu_driver = { diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index c36913ad3a09..77ac1ccb39ed 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -1048,7 +1048,7 @@ int arm_pmu_device_probe(struct platform_device *pdev, ret = of_pmu_irq_cfg(pmu); if (!ret) ret = init_fn(pmu); - } else { + } else if (probe_table) { cpumask_setall(&pmu->supported_cpus); ret = probe_current_pmu(pmu, probe_table); } From 236b9b91cd1255a9cae9d7ef6b755efe37013c1f Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 14 Sep 2016 17:32:30 -0500 Subject: [PATCH 090/100] arm64: pmu: Probe default hw/cache counters ARMv8 machines can identify the micro/arch defined counters that are available on a machine. Add all these counters to the default armv8 perf map. At run-time disable the counters which are not available on the given PMU. Signed-off-by: Jeremy Linton Acked-by: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 45 +++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 0bcf2fb7d26e..92dbe886cdf0 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -191,13 +191,23 @@ #define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS 0xED /* PMUv3 HW events mapping. */ + +/* + * ARMv8 Architectural defined events, not all of these may + * be supported on any given implementation. Undefined events will + * be disabled at run-time. + */ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { PERF_MAP_ALL_UNSUPPORTED, [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES, [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED, [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED, [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND, }; /* ARM Cortex-A53 HW events mapping. */ @@ -259,6 +269,15 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL, + [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, + [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB, + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, @@ -900,9 +919,22 @@ static void armv8pmu_reset(void *info) static int armv8_pmuv3_map_event(struct perf_event *event) { - return armpmu_map_event(event, &armv8_pmuv3_perf_map, - &armv8_pmuv3_perf_cache_map, - ARMV8_PMU_EVTYPE_EVENT); + int hw_event_id; + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + + hw_event_id = armpmu_map_event(event, &armv8_pmuv3_perf_map, + &armv8_pmuv3_perf_cache_map, + ARMV8_PMU_EVTYPE_EVENT); + if (hw_event_id < 0) + return hw_event_id; + + /* disable micro/arch events not supported by this PMU */ + if ((hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) && + !test_bit(hw_event_id, armpmu->pmceid_bitmap)) { + return -EOPNOTSUPP; + } + + return hw_event_id; } static int armv8_a53_map_event(struct perf_event *event) @@ -1057,8 +1089,13 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = { {}, }; +/* + * Non DT systems have their micro/arch events probed at run-time. + * A fairly complete list of generic events are provided and ones that + * aren't supported by the current PMU are disabled. + */ static const struct pmu_probe_info armv8_pmu_probe_table[] = { - PMU_PROBE(0, 0, armv8_pmuv3_init), /* if all else fails... */ + PMU_PROBE(0, 0, armv8_pmuv3_init), /* enable all defined counters */ { /* sentinel value */ } }; From 85023b2e1325826edf5d226a9cb4d809ed1e2024 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 14 Sep 2016 17:32:31 -0500 Subject: [PATCH 091/100] arm64: pmu: Hoist pmu platform device name Move the PMU name into a common header file so it may be referenced by other users. Signed-off-by: Jeremy Linton Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 2 +- include/linux/perf/arm_pmu.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 92dbe886cdf0..a9310a69fffd 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1111,7 +1111,7 @@ static int armv8_pmu_device_probe(struct platform_device *pdev) static struct platform_driver armv8_pmu_driver = { .driver = { - .name = "armv8-pmu", + .name = ARMV8_PMU_PDEV_NAME, .of_match_table = armv8_pmu_of_device_ids, }, .probe = armv8_pmu_device_probe, diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index dc1f2f30c961..9ff07d3fc8de 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -160,6 +160,8 @@ int arm_pmu_device_probe(struct platform_device *pdev, const struct of_device_id *of_table, const struct pmu_probe_info *probe_table); +#define ARMV8_PMU_PDEV_NAME "armv8-pmu" + #endif /* CONFIG_ARM_PMU */ #endif /* __ARM_PMU_H__ */ From 0edfa8391664a4f795c67d0e07480fbe801a0e1d Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Mon, 19 Sep 2016 17:38:55 -0400 Subject: [PATCH 092/100] arm64: migrate exception table users off module.h and onto extable.h These files were only including module.h for exception table related functions. We've now separated that content out into its own file "extable.h" so now move over to that and avoid all the extra header content in module.h that we don't really need to compile these files. Cc: Catalin Marinas Acked-by: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Paul Gortmaker Signed-off-by: Will Deacon --- arch/arm64/kernel/probes/kprobes.c | 2 +- arch/arm64/mm/extable.c | 2 +- arch/arm64/mm/fault.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index f97a58111e10..f5077ea7af6d 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index 81acd4706878..c9f118cd812b 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -2,7 +2,7 @@ * Based on arch/arm/mm/extable.c */ -#include +#include #include int fixup_exception(struct pt_regs *regs) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index a5f098a5f602..53d9159662fe 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -18,7 +18,7 @@ * along with this program. If not, see . */ -#include +#include #include #include #include From ca219452c6b8a6cd1369b6a78b1cf069d0386865 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 21 Sep 2016 15:25:04 -0700 Subject: [PATCH 093/100] arm64: Correctly bounds check virt_addr_valid virt_addr_valid is supposed to return true if and only if virt_to_page returns a valid page structure. The current macro does math on whatever address is given and passes that to pfn_valid to verify. vmalloc and module addresses can happen to generate a pfn that 'happens' to be valid. Fix this by only performing the pfn_valid check on addresses that have the potential to be valid. Acked-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- arch/arm64/include/asm/memory.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 31b73227b41f..ba62df8c6e35 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -214,7 +214,7 @@ static inline void *phys_to_virt(phys_addr_t x) #ifndef CONFIG_SPARSEMEM_VMEMMAP #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) -#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) +#define _virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) #else #define __virt_to_pgoff(kaddr) (((u64)(kaddr) & ~PAGE_OFFSET) / PAGE_SIZE * sizeof(struct page)) #define __page_to_voff(kaddr) (((u64)(page) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page)) @@ -222,11 +222,15 @@ static inline void *phys_to_virt(phys_addr_t x) #define page_to_virt(page) ((void *)((__page_to_voff(page)) | PAGE_OFFSET)) #define virt_to_page(vaddr) ((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START)) -#define virt_addr_valid(kaddr) pfn_valid((((u64)(kaddr) & ~PAGE_OFFSET) \ +#define _virt_addr_valid(kaddr) pfn_valid((((u64)(kaddr) & ~PAGE_OFFSET) \ + PHYS_OFFSET) >> PAGE_SHIFT) #endif #endif +#define _virt_addr_is_linear(kaddr) (((u64)(kaddr)) >= PAGE_OFFSET) +#define virt_addr_valid(kaddr) (_virt_addr_is_linear(kaddr) && \ + _virt_addr_valid(kaddr)) + #include #endif From 22e43390456152f6e72ad2632e2b3fb363e94146 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 22 Sep 2016 03:35:15 -0500 Subject: [PATCH 094/100] arm64: arch_timer: Add device tree binding for A-008585 erratum This erratum describes a bug in logic outside the core, so MIDR can't be used to identify its presence, and reading an SoC-specific revision register from common arch timer code would be awkward. So, describe it in the device tree. Signed-off-by: Scott Wood Acked-by: Rob Herring Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/arm/arch_timer.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt index e774128935d5..ef5fbe9a77c7 100644 --- a/Documentation/devicetree/bindings/arm/arch_timer.txt +++ b/Documentation/devicetree/bindings/arm/arch_timer.txt @@ -25,6 +25,12 @@ to deliver its interrupts via SPIs. - always-on : a boolean property. If present, the timer is powered through an always-on power domain, therefore it never loses context. +- fsl,erratum-a008585 : A boolean property. Indicates the presence of + QorIQ erratum A-008585, which says that reading the counter is + unreliable unless the same value is returned by back-to-back reads. + This also affects writes to the tval register, due to the implicit + counter read. + ** Optional properties: - arm,cpu-registers-not-fw-configured : Firmware does not initialize From f6dc1576cd517440313c9551b6ffa3d7e389c7c7 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 22 Sep 2016 03:35:17 -0500 Subject: [PATCH 095/100] arm64: arch_timer: Work around QorIQ Erratum A-008585 Erratum A-008585 says that the ARM generic timer counter "has the potential to contain an erroneous value for a small number of core clock cycles every time the timer value changes". Accesses to TVAL (both read and write) are also affected due to the implicit counter read. Accesses to CVAL are not affected. The workaround is to reread TVAL and count registers until successive reads return the same value. Writes to TVAL are replaced with an equivalent write to CVAL. The workaround is to reread TVAL and count registers until successive reads return the same value, and when writing TVAL to retry until counter reads before and after the write return the same value. The workaround is enabled if the fsl,erratum-a008585 property is found in the timer node in the device tree. This can be overridden with the clocksource.arm_arch_timer.fsl-a008585 boot parameter, which allows KVM users to enable the workaround until a mechanism is implemented to automatically communicate this information. This erratum can be found on LS1043A and LS2080A. Acked-by: Marc Zyngier Signed-off-by: Scott Wood [will: renamed read macro to reflect that it's not usually unstable] Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.txt | 2 + Documentation/kernel-parameters.txt | 9 +++ arch/arm64/include/asm/arch_timer.h | 47 ++++++++++- drivers/clocksource/Kconfig | 10 +++ drivers/clocksource/arm_arch_timer.c | 104 +++++++++++++++++++++++++ 5 files changed, 169 insertions(+), 3 deletions(-) diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index 4da60b463995..041e3a943784 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -60,3 +60,5 @@ stable kernels. | Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 | | Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 | | Cavium | ThunderX SMMUv2 | #27704 | N/A | +| | | | | +| Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 | diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 46c030a49186..fb4de4daba1f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -698,6 +698,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted. loops can be debugged more effectively on production systems. + clocksource.arm_arch_timer.fsl-a008585= + [ARM64] + Format: + Enable/disable the workaround of Freescale/NXP + erratum A-008585. This can be useful for KVM + guests, if the guest device tree doesn't show the + erratum. If unspecified, the workaround is + enabled based on the device tree. + clearcpuid=BITNUM [X86] Disable CPUID feature X for the kernel. See arch/x86/include/asm/cpufeatures.h for the valid bit diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 7ff386c15539..eaa5bbe3fa87 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -24,10 +24,51 @@ #include #include +#include #include #include +#if IS_ENABLED(CONFIG_FSL_ERRATUM_A008585) +extern struct static_key_false arch_timer_read_ool_enabled; +#define needs_fsl_a008585_workaround() \ + static_branch_unlikely(&arch_timer_read_ool_enabled) +#else +#define needs_fsl_a008585_workaround() false +#endif + +u32 __fsl_a008585_read_cntp_tval_el0(void); +u32 __fsl_a008585_read_cntv_tval_el0(void); +u64 __fsl_a008585_read_cntvct_el0(void); + +/* + * The number of retries is an arbitrary value well beyond the highest number + * of iterations the loop has been observed to take. + */ +#define __fsl_a008585_read_reg(reg) ({ \ + u64 _old, _new; \ + int _retries = 200; \ + \ + do { \ + _old = read_sysreg(reg); \ + _new = read_sysreg(reg); \ + _retries--; \ + } while (unlikely(_old != _new) && _retries); \ + \ + WARN_ON_ONCE(!_retries); \ + _new; \ +}) + +#define arch_timer_reg_read_stable(reg) \ +({ \ + u64 _val; \ + if (needs_fsl_a008585_workaround()) \ + _val = __fsl_a008585_read_##reg(); \ + else \ + _val = read_sysreg(reg); \ + _val; \ +}) + /* * These register accessors are marked inline so the compiler can * nicely work out which register we want, and chuck away the rest of @@ -67,14 +108,14 @@ u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg) case ARCH_TIMER_REG_CTRL: return read_sysreg(cntp_ctl_el0); case ARCH_TIMER_REG_TVAL: - return read_sysreg(cntp_tval_el0); + return arch_timer_reg_read_stable(cntp_tval_el0); } } else if (access == ARCH_TIMER_VIRT_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: return read_sysreg(cntv_ctl_el0); case ARCH_TIMER_REG_TVAL: - return read_sysreg(cntv_tval_el0); + return arch_timer_reg_read_stable(cntv_tval_el0); } } @@ -108,7 +149,7 @@ static inline u64 arch_counter_get_cntpct(void) static inline u64 arch_counter_get_cntvct(void) { isb(); - return read_sysreg(cntvct_el0); + return arch_timer_reg_read_stable(cntvct_el0); } static inline int arch_timer_arch_init(void) diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 567788664723..8a753fd5b79d 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -305,6 +305,16 @@ config ARM_ARCH_TIMER_EVTSTREAM This must be disabled for hardware validation purposes to detect any hardware anomalies of missing events. +config FSL_ERRATUM_A008585 + bool "Workaround for Freescale/NXP Erratum A-008585" + default y + depends on ARM_ARCH_TIMER && ARM64 + help + This option enables a workaround for Freescale/NXP Erratum + A-008585 ("ARM generic timer may contain an erroneous + value"). The workaround will only be active if the + fsl,erratum-a008585 property is found in the timer node. + config ARM_GLOBAL_TIMER bool "Support for the ARM global timer" if COMPILE_TEST select CLKSRC_OF if OF diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 57700541f951..eb5fb4121ac8 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -94,6 +94,43 @@ early_param("clocksource.arm_arch_timer.evtstrm", early_evtstrm_cfg); * Architected system timer support. */ +#ifdef CONFIG_FSL_ERRATUM_A008585 +DEFINE_STATIC_KEY_FALSE(arch_timer_read_ool_enabled); +EXPORT_SYMBOL_GPL(arch_timer_read_ool_enabled); + +static int fsl_a008585_enable = -1; + +static int __init early_fsl_a008585_cfg(char *buf) +{ + int ret; + bool val; + + ret = strtobool(buf, &val); + if (ret) + return ret; + + fsl_a008585_enable = val; + return 0; +} +early_param("clocksource.arm_arch_timer.fsl-a008585", early_fsl_a008585_cfg); + +u32 __fsl_a008585_read_cntp_tval_el0(void) +{ + return __fsl_a008585_read_reg(cntp_tval_el0); +} + +u32 __fsl_a008585_read_cntv_tval_el0(void) +{ + return __fsl_a008585_read_reg(cntv_tval_el0); +} + +u64 __fsl_a008585_read_cntvct_el0(void) +{ + return __fsl_a008585_read_reg(cntvct_el0); +} +EXPORT_SYMBOL(__fsl_a008585_read_cntvct_el0); +#endif /* CONFIG_FSL_ERRATUM_A008585 */ + static __always_inline void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val, struct clock_event_device *clk) @@ -243,6 +280,40 @@ static __always_inline void set_next_event(const int access, unsigned long evt, arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk); } +#ifdef CONFIG_FSL_ERRATUM_A008585 +static __always_inline void fsl_a008585_set_next_event(const int access, + unsigned long evt, struct clock_event_device *clk) +{ + unsigned long ctrl; + u64 cval = evt + arch_counter_get_cntvct(); + + ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk); + ctrl |= ARCH_TIMER_CTRL_ENABLE; + ctrl &= ~ARCH_TIMER_CTRL_IT_MASK; + + if (access == ARCH_TIMER_PHYS_ACCESS) + write_sysreg(cval, cntp_cval_el0); + else if (access == ARCH_TIMER_VIRT_ACCESS) + write_sysreg(cval, cntv_cval_el0); + + arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk); +} + +static int fsl_a008585_set_next_event_virt(unsigned long evt, + struct clock_event_device *clk) +{ + fsl_a008585_set_next_event(ARCH_TIMER_VIRT_ACCESS, evt, clk); + return 0; +} + +static int fsl_a008585_set_next_event_phys(unsigned long evt, + struct clock_event_device *clk) +{ + fsl_a008585_set_next_event(ARCH_TIMER_PHYS_ACCESS, evt, clk); + return 0; +} +#endif /* CONFIG_FSL_ERRATUM_A008585 */ + static int arch_timer_set_next_event_virt(unsigned long evt, struct clock_event_device *clk) { @@ -271,6 +342,19 @@ static int arch_timer_set_next_event_phys_mem(unsigned long evt, return 0; } +static void fsl_a008585_set_sne(struct clock_event_device *clk) +{ +#ifdef CONFIG_FSL_ERRATUM_A008585 + if (!static_branch_unlikely(&arch_timer_read_ool_enabled)) + return; + + if (arch_timer_uses_ppi == VIRT_PPI) + clk->set_next_event = fsl_a008585_set_next_event_virt; + else + clk->set_next_event = fsl_a008585_set_next_event_phys; +#endif +} + static void __arch_timer_setup(unsigned type, struct clock_event_device *clk) { @@ -299,6 +383,8 @@ static void __arch_timer_setup(unsigned type, default: BUG(); } + + fsl_a008585_set_sne(clk); } else { clk->features |= CLOCK_EVT_FEAT_DYNIRQ; clk->name = "arch_mem_timer"; @@ -515,6 +601,15 @@ static void __init arch_counter_register(unsigned type) arch_timer_read_counter = arch_counter_get_cntvct; else arch_timer_read_counter = arch_counter_get_cntpct; + +#ifdef CONFIG_FSL_ERRATUM_A008585 + /* + * Don't use the vdso fastpath if errata require using + * the out-of-line counter accessor. + */ + if (static_branch_unlikely(&arch_timer_read_ool_enabled)) + clocksource_counter.name = "arch_sys_counter_ool"; +#endif } else { arch_timer_read_counter = arch_counter_get_cntvct_mem; @@ -800,6 +895,15 @@ static int __init arch_timer_of_init(struct device_node *np) arch_timer_c3stop = !of_property_read_bool(np, "always-on"); +#ifdef CONFIG_FSL_ERRATUM_A008585 + if (fsl_a008585_enable < 0) + fsl_a008585_enable = of_property_read_bool(np, "fsl,erratum-a008585"); + if (fsl_a008585_enable) { + static_branch_enable(&arch_timer_read_ool_enabled); + pr_info("Enabling workaround for FSL erratum A-008585\n"); + } +#endif + /* * If we cannot rely on firmware initializing the timer registers then * we should use the physical timers instead. From 1d8f51d41fc7116f3753fe9f9a5dd93e0b550a2c Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 22 Sep 2016 03:35:18 -0500 Subject: [PATCH 096/100] arm/arm64: arch_timer: Use archdata to indicate vdso suitability Instead of comparing the name to a magic string, use archdata to explicitly communicate whether the arch timer is suitable for direct vdso access. Acked-by: Will Deacon Acked-by: Russell King Acked-by: Marc Zyngier Signed-off-by: Scott Wood Signed-off-by: Will Deacon --- arch/arm/Kconfig | 1 + arch/arm/include/asm/clocksource.h | 8 ++++++++ arch/arm/kernel/vdso.c | 2 +- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/clocksource.h | 8 ++++++++ arch/arm64/kernel/vdso.c | 2 +- drivers/clocksource/arm_arch_timer.c | 11 +++-------- 7 files changed, 23 insertions(+), 10 deletions(-) create mode 100644 arch/arm/include/asm/clocksource.h create mode 100644 arch/arm64/include/asm/clocksource.h diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a9c4e48bb7ec..b2113c24850c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1,6 +1,7 @@ config ARM bool default y + select ARCH_CLOCKSOURCE_DATA select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE diff --git a/arch/arm/include/asm/clocksource.h b/arch/arm/include/asm/clocksource.h new file mode 100644 index 000000000000..0b350a7e26f3 --- /dev/null +++ b/arch/arm/include/asm/clocksource.h @@ -0,0 +1,8 @@ +#ifndef _ASM_CLOCKSOURCE_H +#define _ASM_CLOCKSOURCE_H + +struct arch_clocksource_data { + bool vdso_direct; /* Usable for direct VDSO access? */ +}; + +#endif diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index 994e971a8538..a0affd14086a 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -270,7 +270,7 @@ static bool tk_is_cntvct(const struct timekeeper *tk) if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER)) return false; - if (strcmp(tk->tkr_mono.clock->name, "arch_sys_counter") != 0) + if (!tk->tkr_mono.clock->archdata.vdso_direct) return false; return true; diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0e11c8a2aec1..8f868ec4e27f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -4,6 +4,7 @@ config ARM64 select ACPI_GENERIC_GSI if ACPI select ACPI_REDUCED_HARDWARE_ONLY if ACPI select ACPI_MCFG if ACPI + select ARCH_CLOCKSOURCE_DATA select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE diff --git a/arch/arm64/include/asm/clocksource.h b/arch/arm64/include/asm/clocksource.h new file mode 100644 index 000000000000..0b350a7e26f3 --- /dev/null +++ b/arch/arm64/include/asm/clocksource.h @@ -0,0 +1,8 @@ +#ifndef _ASM_CLOCKSOURCE_H +#define _ASM_CLOCKSOURCE_H + +struct arch_clocksource_data { + bool vdso_direct; /* Usable for direct VDSO access? */ +}; + +#endif diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 6225612f2464..a2c2478e7d78 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -201,7 +201,7 @@ up_fail: */ void update_vsyscall(struct timekeeper *tk) { - u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter"); + u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct; ++vdso_data->tb_seq_count; smp_wmb(); diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index eb5fb4121ac8..73c487da6d2a 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -602,23 +602,18 @@ static void __init arch_counter_register(unsigned type) else arch_timer_read_counter = arch_counter_get_cntpct; + clocksource_counter.archdata.vdso_direct = true; + #ifdef CONFIG_FSL_ERRATUM_A008585 /* * Don't use the vdso fastpath if errata require using * the out-of-line counter accessor. */ if (static_branch_unlikely(&arch_timer_read_ool_enabled)) - clocksource_counter.name = "arch_sys_counter_ool"; + clocksource_counter.archdata.vdso_direct = false; #endif } else { arch_timer_read_counter = arch_counter_get_cntvct_mem; - - /* If the clocksource name is "arch_sys_counter" the - * VDSO will attempt to read the CP15-based counter. - * Ensure this does not happen when CP15-based - * counter is not available. - */ - clocksource_counter.name = "arch_mem_counter"; } start_count = arch_timer_read_counter(); From b5e7307d9d5a340d2c9fabbe1cee137d4c682c71 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 23 Sep 2016 17:55:05 +0100 Subject: [PATCH 097/100] arm64: fix dump_backtrace/unwind_frame with NULL tsk In some places, dump_backtrace() is called with a NULL tsk parameter, e.g. in bug_handler() in arch/arm64, or indirectly via show_stack() in core code. The expectation is that this is treated as if current were passed instead of NULL. Similar is true of unwind_frame(). Commit a80a0eb70c358f8c ("arm64: make irq_stack_ptr more robust") didn't take this into account. In dump_backtrace() it compares tsk against current *before* we check if tsk is NULL, and in unwind_frame() we never set tsk if it is NULL. Due to this, we won't initialise irq_stack_ptr in either function. In dump_backtrace() this results in calling dump_mem() for memory immediately above the IRQ stack range, rather than for the relevant range on the task stack. In unwind_frame we'll reject unwinding frames on the IRQ stack. In either case this results in incomplete or misleading backtrace information, but is not otherwise problematic. The initial percpu areas (including the IRQ stacks) are allocated in the linear map, and dump_mem uses __get_user(), so we shouldn't access anything with side-effects, and will handle holes safely. This patch fixes the issue by having both functions handle the NULL tsk case before doing anything else with tsk. Signed-off-by: Mark Rutland Fixes: a80a0eb70c358f8c ("arm64: make irq_stack_ptr more robust") Acked-by: James Morse Cc: Catalin Marinas Cc: Will Deacon Cc: Yang Shi Signed-off-by: Will Deacon --- arch/arm64/kernel/stacktrace.c | 5 ++++- arch/arm64/kernel/traps.c | 10 +++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index ca01addf8c4c..c2efddfca18c 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -43,6 +43,9 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) unsigned long fp = frame->fp; unsigned long irq_stack_ptr; + if (!tsk) + tsk = current; + /* * Switching between stacks is valid when tracing current and in * non-preemptible context. @@ -67,7 +70,7 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8)); #ifdef CONFIG_FUNCTION_GRAPH_TRACER - if (tsk && tsk->ret_stack && + if (tsk->ret_stack && (frame->pc == (unsigned long)return_to_handler)) { /* * This is a case where function graph tracer has diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 93445f8b530c..5ff020f8fb7f 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -142,6 +142,11 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) unsigned long irq_stack_ptr; int skip; + pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); + + if (!tsk) + tsk = current; + /* * Switching between stacks is valid when tracing current and in * non-preemptible context. @@ -151,11 +156,6 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) else irq_stack_ptr = 0; - pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); - - if (!tsk) - tsk = current; - if (tsk == current) { frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_stack_pointer; From 0c2a6cce1776ed041b71a305edcb7d89923743e1 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 26 Sep 2016 15:36:50 +0800 Subject: [PATCH 098/100] arm64: Kconfig: select OF/ACPI_NUMA under NUMA config Move OF_NUMA select under NUMA config, and select ACPI_NUMA when ACPI enabled. Signed-off-by: Kefeng Wang Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8f868ec4e27f..4572f00e64f5 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -103,7 +103,6 @@ config ARM64 select NO_BOOTMEM select OF select OF_EARLY_FLATTREE - select OF_NUMA if NUMA && OF select OF_RESERVED_MEM select PCI_ECAM if ACPI select POWER_RESET @@ -581,6 +580,8 @@ config HOTPLUG_CPU config NUMA bool "Numa Memory Allocation and Scheduler Support" depends on SMP + select ACPI_NUMA if ACPI + select OF_NUMA help Enable NUMA (Non Uniform Memory Access) support. From b4b9551e2f467d1a96c324b8a45f9b5b87d2f0fc Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Tue, 27 Sep 2016 11:57:18 +0800 Subject: [PATCH 099/100] arm64: Kconfig: remove SMP dependence for NUMA The arm64 forces CONFIG_SMP=y with commit 4b3dc9679cf7, no need to add SMP dependence for NUMA. Signed-off-by: Kefeng Wang Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 4572f00e64f5..17c14a1d9112 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -579,7 +579,6 @@ config HOTPLUG_CPU # Common NUMA Features config NUMA bool "Numa Memory Allocation and Scheduler Support" - depends on SMP select ACPI_NUMA if ACPI select OF_NUMA help From db68f3e7594aca77632d56c449bd36c6c931d59a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 13 Sep 2016 11:16:06 +0100 Subject: [PATCH 100/100] arm64: tlbflush.h: add __tlbi() macro As with dsb() and isb(), add a __tlbi() helper so that we can avoid distracting asm boilerplate every time we want a TLBI. As some TLBI operations take an argument while others do not, some pre-processor is used to handle these two cases with different assembly blocks. The existing tlbflush.h code is moved over to use the helper. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Marc Zyngier [ rename helper to __tlbi, update comment and commit log ] Signed-off-by: Punit Agrawal Reviewed-by: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/tlbflush.h | 34 +++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index b460ae28e346..deab52374119 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -24,6 +24,24 @@ #include #include +/* + * Raw TLBI operations. + * + * Where necessary, use the __tlbi() macro to avoid asm() + * boilerplate. Drivers and most kernel code should use the TLB + * management routines in preference to the macro below. + * + * The macro can be used as __tlbi(op) or __tlbi(op, arg), depending + * on whether a particular TLBI operation takes an argument or + * not. The macros handles invoking the asm with or without the + * register argument as appropriate. + */ +#define __TLBI_0(op, arg) asm ("tlbi " #op) +#define __TLBI_1(op, arg) asm ("tlbi " #op ", %0" : : "r" (arg)) +#define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg) + +#define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0) + /* * TLB Management * ============== @@ -66,7 +84,7 @@ static inline void local_flush_tlb_all(void) { dsb(nshst); - asm("tlbi vmalle1"); + __tlbi(vmalle1); dsb(nsh); isb(); } @@ -74,7 +92,7 @@ static inline void local_flush_tlb_all(void) static inline void flush_tlb_all(void) { dsb(ishst); - asm("tlbi vmalle1is"); + __tlbi(vmalle1is); dsb(ish); isb(); } @@ -84,7 +102,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm) unsigned long asid = ASID(mm) << 48; dsb(ishst); - asm("tlbi aside1is, %0" : : "r" (asid)); + __tlbi(aside1is, asid); dsb(ish); } @@ -94,7 +112,7 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr = uaddr >> 12 | (ASID(vma->vm_mm) << 48); dsb(ishst); - asm("tlbi vale1is, %0" : : "r" (addr)); + __tlbi(vale1is, addr); dsb(ish); } @@ -122,9 +140,9 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, dsb(ishst); for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) { if (last_level) - asm("tlbi vale1is, %0" : : "r"(addr)); + __tlbi(vale1is, addr); else - asm("tlbi vae1is, %0" : : "r"(addr)); + __tlbi(vae1is, addr); } dsb(ish); } @@ -149,7 +167,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end dsb(ishst); for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) - asm("tlbi vaae1is, %0" : : "r"(addr)); + __tlbi(vaae1is, addr); dsb(ish); isb(); } @@ -163,7 +181,7 @@ static inline void __flush_tlb_pgtable(struct mm_struct *mm, { unsigned long addr = uaddr >> 12 | (ASID(mm) << 48); - asm("tlbi vae1is, %0" : : "r" (addr)); + __tlbi(vae1is, addr); dsb(ish); }