From 55f0fb6adb83a5883589e945cbce37e90615ea09 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 8 Aug 2014 14:12:17 +0100 Subject: [PATCH 1/4] ARM: 8127/1: module: add support for R_ARM_TARGET1 relocations Kernel module build with GCOV profiling fails to load with the following error: $ insmod test_module.ko test_module: unknown relocation: 38 insmod: can't insert 'test_module.ko': invalid module format This happens because constructor pointers in the .init_array section have not supported R_ARM_TARGET1 relocation type. Documentation (ELF for the ARM Architecture) says: "The relocation must be processed either in the same way as R_ARM_REL32 or as R_ARM_ABS32: a virtual platform must specify which method is used." Since kernel expects to see absolute addresses in .init_array R_ARM_TARGET1 relocation type should be treated the same way as R_ARM_ABS32. Signed-off-by: Andrey Ryabinin Signed-off-by: Russell King --- arch/arm/include/asm/elf.h | 1 + arch/arm/kernel/module.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index f4b46d39b9cf..afb9cafd3786 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -50,6 +50,7 @@ typedef struct user_fp elf_fpregset_t; #define R_ARM_ABS32 2 #define R_ARM_CALL 28 #define R_ARM_JUMP24 29 +#define R_ARM_TARGET1 38 #define R_ARM_V4BX 40 #define R_ARM_PREL31 42 #define R_ARM_MOVW_ABS_NC 43 diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 45e478157278..6a4dffefd357 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -91,6 +91,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, break; case R_ARM_ABS32: + case R_ARM_TARGET1: *(u32 *)loc += sym->st_value; break; From 85868313177700d20644263a782351262d2aff84 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 15 Aug 2014 12:11:49 +0100 Subject: [PATCH 2/4] ARM: 8128/1: abort: don't clear the exclusive monitors The ARMv6 and ARMv7 early abort handlers clear the exclusive monitors upon entry to the kernel, but this is redundant: - We clear the monitors on every exception return since commit 200b812d0084 ("Clear the exclusive monitor when returning from an exception"), so this is not necessary to ensure the monitors are cleared before returning from a fault handler. - Any dummy STREX will target a temporary scratch area in memory, and may succeed or fail without corrupting useful data. Its status value will not be used. - Any other STREX in the kernel must be preceded by an LDREX, which will initialise the monitors consistently and will not depend on the earlier state of the monitors. Therefore we have no reason to care about the initial state of the exclusive monitors when a data abort is taken, and clearing the monitors prior to exception return (as we already do) is sufficient. This patch removes the redundant clearing of the exclusive monitors from the early abort handlers. Signed-off-by: Mark Rutland Acked-by: Will Deacon Cc: stable@vger.kernel.org Signed-off-by: Russell King --- arch/arm/mm/abort-ev6.S | 6 ------ arch/arm/mm/abort-ev7.S | 6 ------ 2 files changed, 12 deletions(-) diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S index 3815a8262af0..8c48c5c22a33 100644 --- a/arch/arm/mm/abort-ev6.S +++ b/arch/arm/mm/abort-ev6.S @@ -17,12 +17,6 @@ */ .align 5 ENTRY(v6_early_abort) -#ifdef CONFIG_CPU_V6 - sub r1, sp, #4 @ Get unused stack location - strex r0, r1, [r1] @ Clear the exclusive monitor -#elif defined(CONFIG_CPU_32v6K) - clrex -#endif mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR /* diff --git a/arch/arm/mm/abort-ev7.S b/arch/arm/mm/abort-ev7.S index 703375277ba6..4812ad054214 100644 --- a/arch/arm/mm/abort-ev7.S +++ b/arch/arm/mm/abort-ev7.S @@ -13,12 +13,6 @@ */ .align 5 ENTRY(v7_early_abort) - /* - * The effect of data aborts on on the exclusive access monitor are - * UNPREDICTABLE. Do a CLREX to clear the state - */ - clrex - mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR From 2c32c65e3726c773760038910be30cce1b4d4149 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 15 Aug 2014 12:11:50 +0100 Subject: [PATCH 3/4] ARM: 8129/1: errata: work around Cortex-A15 erratum 830321 using dummy strex On revisions of Cortex-A15 prior to r3p3, a CLREX instruction at PL1 may falsely trigger a watchpoint exception, leading to potential data aborts during exception return and/or livelock. This patch resolves the issue in the following ways: - Replacing our uses of CLREX with a dummy STREX sequence instead (as we did for v6 CPUs). - Removing the clrex code from v7_exit_coherency_flush and derivatives, since this only exists as a minor performance improvement when non-cached exclusives are in use (Linux doesn't use these). Benchmarking on a variety of ARM cores revealed no measurable performance difference with this change applied, so the change is performed unconditionally and no new Kconfig entry is added. Signed-off-by: Mark Rutland Signed-off-by: Will Deacon Cc: stable@vger.kernel.org Signed-off-by: Russell King --- arch/arm/include/asm/cacheflush.h | 1 - arch/arm/kernel/entry-header.S | 29 +++++++++++++++-------------- arch/arm/mach-exynos/mcpm-exynos.c | 1 - 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index fd43f7f55b70..79ecb4f34ffb 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -472,7 +472,6 @@ static inline void __sync_cache_range_r(volatile void *p, size_t size) "mcr p15, 0, r0, c1, c0, 0 @ set SCTLR \n\t" \ "isb \n\t" \ "bl v7_flush_dcache_"__stringify(level)" \n\t" \ - "clrex \n\t" \ "mrc p15, 0, r0, c1, c0, 1 @ get ACTLR \n\t" \ "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" \ "mcr p15, 0, r0, c1, c0, 1 @ set ACTLR \n\t" \ diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 8db307d0954b..2fdf8679b46e 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -208,26 +208,21 @@ #endif .endif msr spsr_cxsf, \rpsr -#if defined(CONFIG_CPU_V6) - ldr r0, [sp] - strex r1, r2, [sp] @ clear the exclusive monitor - ldmib sp, {r1 - pc}^ @ load r1 - pc, cpsr -#elif defined(CONFIG_CPU_32v6K) - clrex @ clear the exclusive monitor - ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr -#else - ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr +#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K) + @ We must avoid clrex due to Cortex-A15 erratum #830321 + sub r0, sp, #4 @ uninhabited address + strex r1, r2, [r0] @ clear the exclusive monitor #endif + ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr .endm .macro restore_user_regs, fast = 0, offset = 0 ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr ldr lr, [sp, #\offset + S_PC]! @ get pc msr spsr_cxsf, r1 @ save in spsr_svc -#if defined(CONFIG_CPU_V6) +#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K) + @ We must avoid clrex due to Cortex-A15 erratum #830321 strex r1, r2, [sp] @ clear the exclusive monitor -#elif defined(CONFIG_CPU_32v6K) - clrex @ clear the exclusive monitor #endif .if \fast ldmdb sp, {r1 - lr}^ @ get calling r1 - lr @@ -261,7 +256,10 @@ .endif ldr lr, [sp, #S_SP] @ top of the stack ldrd r0, r1, [sp, #S_LR] @ calling lr and pc - clrex @ clear the exclusive monitor + + @ We must avoid clrex due to Cortex-A15 erratum #830321 + strex r2, r1, [sp, #S_LR] @ clear the exclusive monitor + stmdb lr!, {r0, r1, \rpsr} @ calling lr and rfe context ldmia sp, {r0 - r12} mov sp, lr @@ -282,13 +280,16 @@ .endm #else /* ifdef CONFIG_CPU_V7M */ .macro restore_user_regs, fast = 0, offset = 0 - clrex @ clear the exclusive monitor mov r2, sp load_user_sp_lr r2, r3, \offset + S_SP @ calling sp, lr ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr ldr lr, [sp, #\offset + S_PC] @ get pc add sp, sp, #\offset + S_SP msr spsr_cxsf, r1 @ save in spsr_svc + + @ We must avoid clrex due to Cortex-A15 erratum #830321 + strex r1, r2, [sp] @ clear the exclusive monitor + .if \fast ldmdb sp, {r1 - r12} @ get calling r1 - r12 .else diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c index b2f8b60cf0e9..dc9a764a7c37 100644 --- a/arch/arm/mach-exynos/mcpm-exynos.c +++ b/arch/arm/mach-exynos/mcpm-exynos.c @@ -43,7 +43,6 @@ "mcr p15, 0, r0, c1, c0, 0 @ set SCTLR\n\t" \ "isb\n\t"\ "bl v7_flush_dcache_"__stringify(level)"\n\t" \ - "clrex\n\t"\ "mrc p15, 0, r0, c1, c0, 1 @ get ACTLR\n\t" \ "bic r0, r0, #(1 << 6) @ disable local coherency\n\t" \ /* Dummy Load of a device register to avoid Erratum 799270 */ \ From eba1c71819d210f5e0d522571f9b8abce94fe9c5 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Fri, 15 Aug 2014 15:53:14 +0100 Subject: [PATCH 4/4] ARM: 8130/1: cpuidle/cpuidle-big_little: fix reading cpu id part number Commit af040ffc9ba1 ("ARM: make it easier to check the CPU part number correctly") changed ARM_CPU_PART_X masks, and the way they are returned and checked against. Usage of read_cpuid_part_number() is now deprecated, and calling places updated accordingly. This actually broke cpuidle-big_little initialization, as bl_idle_driver_init() performs a check using an hardcoded mask on cpu_id. Create an interface to perform the check (that is now even easier to read). Define also a proper mask (ARM_CPU_PART_MASK) that makes this kind of checks cleaner and helps preventing bugs in the future. Update usage accordingly. Signed-off-by: Juri Lelli Signed-off-by: Lorenzo Pieralisi Signed-off-by: Russell King --- arch/arm/include/asm/cputype.h | 3 ++- arch/arm/include/asm/smp_plat.h | 15 +++++++++++++++ drivers/cpuidle/cpuidle-big_little.c | 13 +++---------- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index 963a2515906d..819777d0e91f 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -74,6 +74,7 @@ #define ARM_CPU_PART_CORTEX_A12 0x4100c0d0 #define ARM_CPU_PART_CORTEX_A17 0x4100c0e0 #define ARM_CPU_PART_CORTEX_A15 0x4100c0f0 +#define ARM_CPU_PART_MASK 0xff00fff0 #define ARM_CPU_XSCALE_ARCH_MASK 0xe000 #define ARM_CPU_XSCALE_ARCH_V1 0x2000 @@ -179,7 +180,7 @@ static inline unsigned int __attribute_const__ read_cpuid_implementor(void) */ static inline unsigned int __attribute_const__ read_cpuid_part(void) { - return read_cpuid_id() & 0xff00fff0; + return read_cpuid_id() & ARM_CPU_PART_MASK; } static inline unsigned int __attribute_const__ __deprecated read_cpuid_part_number(void) diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h index a252c0bfacf5..0ad7d490ee6f 100644 --- a/arch/arm/include/asm/smp_plat.h +++ b/arch/arm/include/asm/smp_plat.h @@ -8,6 +8,7 @@ #include #include +#include #include /* @@ -25,6 +26,20 @@ static inline bool is_smp(void) #endif } +/** + * smp_cpuid_part() - return part id for a given cpu + * @cpu: logical cpu id. + * + * Return: part id of logical cpu passed as argument. + */ +static inline unsigned int smp_cpuid_part(int cpu) +{ + struct cpuinfo_arm *cpu_info = &per_cpu(cpu_data, cpu); + + return is_smp() ? cpu_info->cpuid & ARM_CPU_PART_MASK : + read_cpuid_part(); +} + /* all SMP configurations have the extended CPUID registers */ #ifndef CONFIG_MMU #define tlb_ops_need_broadcast() 0 diff --git a/drivers/cpuidle/cpuidle-big_little.c b/drivers/cpuidle/cpuidle-big_little.c index 344d79fa3407..ef94c3b81f18 100644 --- a/drivers/cpuidle/cpuidle-big_little.c +++ b/drivers/cpuidle/cpuidle-big_little.c @@ -138,25 +138,18 @@ static int bl_enter_powerdown(struct cpuidle_device *dev, return idx; } -static int __init bl_idle_driver_init(struct cpuidle_driver *drv, int cpu_id) +static int __init bl_idle_driver_init(struct cpuidle_driver *drv, int part_id) { - struct cpuinfo_arm *cpu_info; struct cpumask *cpumask; - unsigned long cpuid; int cpu; cpumask = kzalloc(cpumask_size(), GFP_KERNEL); if (!cpumask) return -ENOMEM; - for_each_possible_cpu(cpu) { - cpu_info = &per_cpu(cpu_data, cpu); - cpuid = is_smp() ? cpu_info->cpuid : read_cpuid_id(); - - /* read cpu id part number */ - if ((cpuid & 0xFFF0) == cpu_id) + for_each_possible_cpu(cpu) + if (smp_cpuid_part(cpu) == part_id) cpumask_set_cpu(cpu, cpumask); - } drv->cpumask = cpumask;