From 81cddd65b5c82758ea5571a25e31ff6f1f89ff02 Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Wed, 3 May 2017 16:37:45 +0100 Subject: [PATCH 01/13] arm64: traps: fix userspace cache maintenance emulation on a tagged pointer When we emulate userspace cache maintenance in the kernel, we can currently send the task a SIGSEGV even though the maintenance was done on a valid address. This happens if the address has a non-zero address tag, and happens to not be mapped in. When we get the address from a user register, we don't currently remove the address tag before performing cache maintenance on it. If the maintenance faults, we end up in either __do_page_fault, where find_vma can't find the VMA if the address has a tag, or in do_translation_fault, where the tagged address will appear to be above TASK_SIZE. In both cases, the address is not mapped in, and the task is sent a SIGSEGV. This patch removes the tag from the address before using it. With this patch, the fault is handled correctly, the address gets mapped in, and the cache maintenance succeeds. As a second bug, if cache maintenance (correctly) fails on an invalid tagged address, the address gets passed into arm64_notify_segfault, where find_vma fails to find the VMA due to the tag, and the wrong si_code may be sent as part of the siginfo_t of the segfault. With this patch, the correct si_code is sent. Fixes: 7dd01aef0557 ("arm64: trap userspace "dc cvau" cache operation on errata-affected core") Cc: # 4.8.x- Acked-by: Will Deacon Signed-off-by: Kristina Martsenko Signed-off-by: Catalin Marinas --- arch/arm64/kernel/traps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index d4d6ae02cd55..0805b44f986a 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -443,7 +443,7 @@ int cpu_enable_cache_maint_trap(void *__unused) } #define __user_cache_maint(insn, address, res) \ - if (untagged_addr(address) >= user_addr_max()) { \ + if (address >= user_addr_max()) { \ res = -EFAULT; \ } else { \ uaccess_ttbr0_enable(); \ @@ -469,7 +469,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; int ret = 0; - address = pt_regs_read_reg(regs, rt); + address = untagged_addr(pt_regs_read_reg(regs, rt)); switch (crm) { case ESR_ELx_SYS64_ISS_CRM_DC_CVAU: /* DC CVAU, gets promoted */ From 7dcd9dd8cebe9fa626af7e2358d03a37041a70fb Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Wed, 3 May 2017 16:37:46 +0100 Subject: [PATCH 02/13] arm64: hw_breakpoint: fix watchpoint matching for tagged pointers When we take a watchpoint exception, the address that triggered the watchpoint is found in FAR_EL1. We compare it to the address of each configured watchpoint to see which one was hit. The configured watchpoint addresses are untagged, while the address in FAR_EL1 will have an address tag if the data access was done using a tagged address. The tag needs to be removed to compare the address to the watchpoints. Currently we don't remove it, and as a result can report the wrong watchpoint as being hit (specifically, always either the highest TTBR0 watchpoint or lowest TTBR1 watchpoint). This patch removes the tag. Fixes: d50240a5f6ce ("arm64: mm: permit use of tagged pointers at EL0") Cc: # 3.12.x- Acked-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Kristina Martsenko Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/uaccess.h | 6 +++--- arch/arm64/kernel/hw_breakpoint.c | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 5308d696311b..0221029e27ff 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -106,9 +106,9 @@ static inline void set_fs(mm_segment_t fs) }) /* - * When dealing with data aborts or instruction traps we may end up with - * a tagged userland pointer. Clear the tag to get a sane pointer to pass - * on to access_ok(), for instance. + * When dealing with data aborts, watchpoints, or instruction traps we may end + * up with a tagged userland pointer. Clear the tag to get a sane pointer to + * pass on to access_ok(), for instance. */ #define untagged_addr(addr) sign_extend64(addr, 55) diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 0296e7924240..749f81779420 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -36,6 +36,7 @@ #include #include #include +#include /* Breakpoint currently in use for each BRP. */ static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]); @@ -721,6 +722,8 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val, u64 wp_low, wp_high; u32 lens, lene; + addr = untagged_addr(addr); + lens = __ffs(ctrl->len); lene = __fls(ctrl->len); From 276e93279a630657fff4b086ba14c95955912dfa Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Wed, 3 May 2017 16:37:47 +0100 Subject: [PATCH 03/13] arm64: entry: improve data abort handling of tagged pointers When handling a data abort from EL0, we currently zero the top byte of the faulting address, as we assume the address is a TTBR0 address, which may contain a non-zero address tag. However, the address may be a TTBR1 address, in which case we should not zero the top byte. This patch fixes that. The effect is that the full TTBR1 address is passed to the task's signal handler (or printed out in the kernel log). When handling a data abort from EL1, we leave the faulting address intact, as we assume it's either a TTBR1 address or a TTBR0 address with tag 0x00. This is true as far as I'm aware, we don't seem to access a tagged TTBR0 address anywhere in the kernel. Regardless, it's easy to forget about address tags, and code added in the future may not always remember to remove tags from addresses before accessing them. So add tag handling to the EL1 data abort handler as well. This also makes it consistent with the EL0 data abort handler. Fixes: d50240a5f6ce ("arm64: mm: permit use of tagged pointers at EL0") Cc: # 3.12.x- Reviewed-by: Dave Martin Acked-by: Will Deacon Signed-off-by: Kristina Martsenko Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/asm-uaccess.h | 9 +++++++++ arch/arm64/kernel/entry.S | 5 +++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index df411f3e083c..ecd9788cd298 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -62,4 +62,13 @@ alternative_if ARM64_ALT_PAN_NOT_UAO alternative_else_nop_endif .endm +/* + * Remove the address tag from a virtual address, if present. + */ + .macro clear_address_tag, dst, addr + tst \addr, #(1 << 55) + bic \dst, \addr, #(0xff << 56) + csel \dst, \dst, \addr, eq + .endm + #endif diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 43512d4d7df2..b738880350f9 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -428,12 +428,13 @@ el1_da: /* * Data abort handling */ - mrs x0, far_el1 + mrs x3, far_el1 enable_dbg // re-enable interrupts if they were enabled in the aborted context tbnz x23, #7, 1f // PSR_I_BIT enable_irq 1: + clear_address_tag x0, x3 mov x2, sp // struct pt_regs bl do_mem_abort @@ -594,7 +595,7 @@ el0_da: // enable interrupts before calling the main handler enable_dbg_and_irq ct_user_exit - bic x0, x26, #(0xff << 56) + clear_address_tag x0, x26 mov x1, x25 mov x2, sp bl do_mem_abort From f0e421b1bf7af97f026e1bb8bfe4c5a7a8c08f42 Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Wed, 3 May 2017 16:37:48 +0100 Subject: [PATCH 04/13] arm64: documentation: document tagged pointer stack constraints Some kernel features don't currently work if a task puts a non-zero address tag in its stack pointer, frame pointer, or frame record entries (FP, LR). For example, with a tagged stack pointer, the kernel can't deliver signals to the process, and the task is killed instead. As another example, with a tagged frame pointer or frame records, perf fails to generate call graphs or resolve symbols. For now, just document these limitations, instead of finding and fixing everything that doesn't work, as it's not known if anyone needs to use tags in these places anyway. In addition, as requested by Dave Martin, generalize the limitations into a general kernel address tag policy, and refactor tagged-pointers.txt to include it. Fixes: d50240a5f6ce ("arm64: mm: permit use of tagged pointers at EL0") Cc: # 3.12.x- Reviewed-by: Dave Martin Acked-by: Will Deacon Signed-off-by: Kristina Martsenko Signed-off-by: Catalin Marinas --- Documentation/arm64/tagged-pointers.txt | 62 +++++++++++++++++++------ 1 file changed, 47 insertions(+), 15 deletions(-) diff --git a/Documentation/arm64/tagged-pointers.txt b/Documentation/arm64/tagged-pointers.txt index d9995f1f51b3..a25a99e82bb1 100644 --- a/Documentation/arm64/tagged-pointers.txt +++ b/Documentation/arm64/tagged-pointers.txt @@ -11,24 +11,56 @@ in AArch64 Linux. The kernel configures the translation tables so that translations made via TTBR0 (i.e. userspace mappings) have the top byte (bits 63:56) of the virtual address ignored by the translation hardware. This frees up -this byte for application use, with the following caveats: +this byte for application use. - (1) The kernel requires that all user addresses passed to EL1 - are tagged with tag 0x00. This means that any syscall - parameters containing user virtual addresses *must* have - their top byte cleared before trapping to the kernel. - (2) Non-zero tags are not preserved when delivering signals. - This means that signal handlers in applications making use - of tags cannot rely on the tag information for user virtual - addresses being maintained for fields inside siginfo_t. - One exception to this rule is for signals raised in response - to watchpoint debug exceptions, where the tag information - will be preserved. +Passing tagged addresses to the kernel +-------------------------------------- - (3) Special care should be taken when using tagged pointers, - since it is likely that C compilers will not hazard two - virtual addresses differing only in the upper byte. +All interpretation of userspace memory addresses by the kernel assumes +an address tag of 0x00. + +This includes, but is not limited to, addresses found in: + + - pointer arguments to system calls, including pointers in structures + passed to system calls, + + - the stack pointer (sp), e.g. when interpreting it to deliver a + signal, + + - the frame pointer (x29) and frame records, e.g. when interpreting + them to generate a backtrace or call graph. + +Using non-zero address tags in any of these locations may result in an +error code being returned, a (fatal) signal being raised, or other modes +of failure. + +For these reasons, passing non-zero address tags to the kernel via +system calls is forbidden, and using a non-zero address tag for sp is +strongly discouraged. + +Programs maintaining a frame pointer and frame records that use non-zero +address tags may suffer impaired or inaccurate debug and profiling +visibility. + + +Preserving tags +--------------- + +Non-zero tags are not preserved when delivering signals. This means that +signal handlers in applications making use of tags cannot rely on the +tag information for user virtual addresses being maintained for fields +inside siginfo_t. One exception to this rule is for signals raised in +response to watchpoint debug exceptions, where the tag information will +be preserved. The architecture prevents the use of a tagged PC, so the upper byte will be set to a sign-extension of bit 55 on exception return. + + +Other considerations +-------------------- + +Special care should be taken when using tagged pointers, since it is +likely that C compilers will not hazard two virtual addresses differing +only in the upper byte. From fee960bed5e857eb126c4e56dd9ff85938356579 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 3 May 2017 16:09:33 +0100 Subject: [PATCH 05/13] arm64: xchg: hazard against entire exchange variable The inline assembly in __XCHG_CASE() uses a +Q constraint to hazard against other accesses to the memory location being exchanged. However, the pointer passed to the constraint is a u8 pointer, and thus the hazard only applies to the first byte of the location. GCC can take advantage of this, assuming that other portions of the location are unchanged, as demonstrated with the following test case: union u { unsigned long l; unsigned int i[2]; }; unsigned long update_char_hazard(union u *u) { unsigned int a, b; a = u->i[1]; asm ("str %1, %0" : "+Q" (*(char *)&u->l) : "r" (0UL)); b = u->i[1]; return a ^ b; } unsigned long update_long_hazard(union u *u) { unsigned int a, b; a = u->i[1]; asm ("str %1, %0" : "+Q" (*(long *)&u->l) : "r" (0UL)); b = u->i[1]; return a ^ b; } The linaro 15.08 GCC 5.1.1 toolchain compiles the above as follows when using -O2 or above: 0000000000000000 : 0: d2800001 mov x1, #0x0 // #0 4: f9000001 str x1, [x0] 8: d2800000 mov x0, #0x0 // #0 c: d65f03c0 ret 0000000000000010 : 10: b9400401 ldr w1, [x0,#4] 14: d2800002 mov x2, #0x0 // #0 18: f9000002 str x2, [x0] 1c: b9400400 ldr w0, [x0,#4] 20: 4a000020 eor w0, w1, w0 24: d65f03c0 ret This patch fixes the issue by passing an unsigned long pointer into the +Q constraint, as we do for our cmpxchg code. This may hazard against more than is necessary, but this is better than missing a necessary hazard. Fixes: 305d454aaa29 ("arm64: atomics: implement native {relaxed, acquire, release} atomics") Cc: # 4.4.x- Acked-by: Will Deacon Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cmpxchg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 91b26d26af8a..ae852add053d 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -46,7 +46,7 @@ static inline unsigned long __xchg_case_##name(unsigned long x, \ " swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n" \ __nops(3) \ " " #nop_lse) \ - : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) \ + : "=&r" (ret), "=&r" (tmp), "+Q" (*(unsigned long *)ptr) \ : "r" (x) \ : cl); \ \ From 994870bead4ab19087a79492400a5478e2906196 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 3 May 2017 16:09:34 +0100 Subject: [PATCH 06/13] arm64: ensure extension of smp_store_release value When an inline assembly operand's type is narrower than the register it is allocated to, the least significant bits of the register (up to the operand type's width) are valid, and any other bits are permitted to contain any arbitrary value. This aligns with the AAPCS64 parameter passing rules. Our __smp_store_release() implementation does not account for this, and implicitly assumes that operands have been zero-extended to the width of the type being stored to. Thus, we may store unknown values to memory when the value type is narrower than the pointer type (e.g. when storing a char to a long). This patch fixes the issue by casting the value operand to the same width as the pointer operand in all cases, which ensures that the value is zero-extended as we expect. We use the same union trickery as __smp_load_acquire and {READ,WRITE}_ONCE() to avoid GCC complaining that pointers are potentially cast to narrower width integers in unreachable paths. A whitespace issue at the top of __smp_store_release() is also corrected. No changes are necessary for __smp_load_acquire(). Load instructions implicitly clear any upper bits of the register, and the compiler will only consider the least significant bits of the register as valid regardless. Fixes: 47933ad41a86 ("arch: Introduce smp_load_acquire(), smp_store_release()") Fixes: 878a84d5a8a1 ("arm64: add missing data types in smp_load_acquire/smp_store_release") Cc: # 3.14.x- Acked-by: Will Deacon Signed-off-by: Mark Rutland Cc: Matthias Kaehlcke Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/barrier.h | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 4e0497f581a0..0fe7e43b7fbc 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -42,25 +42,35 @@ #define __smp_rmb() dmb(ishld) #define __smp_wmb() dmb(ishst) -#define __smp_store_release(p, v) \ +#define __smp_store_release(p, v) \ do { \ + union { typeof(*p) __val; char __c[1]; } __u = \ + { .__val = (__force typeof(*p)) (v) }; \ compiletime_assert_atomic_type(*p); \ switch (sizeof(*p)) { \ case 1: \ asm volatile ("stlrb %w1, %0" \ - : "=Q" (*p) : "r" (v) : "memory"); \ + : "=Q" (*p) \ + : "r" (*(__u8 *)__u.__c) \ + : "memory"); \ break; \ case 2: \ asm volatile ("stlrh %w1, %0" \ - : "=Q" (*p) : "r" (v) : "memory"); \ + : "=Q" (*p) \ + : "r" (*(__u16 *)__u.__c) \ + : "memory"); \ break; \ case 4: \ asm volatile ("stlr %w1, %0" \ - : "=Q" (*p) : "r" (v) : "memory"); \ + : "=Q" (*p) \ + : "r" (*(__u32 *)__u.__c) \ + : "memory"); \ break; \ case 8: \ asm volatile ("stlr %1, %0" \ - : "=Q" (*p) : "r" (v) : "memory"); \ + : "=Q" (*p) \ + : "r" (*(__u64 *)__u.__c) \ + : "memory"); \ break; \ } \ } while (0) From a06040d7a791a9177581dcf7293941bd92400856 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 3 May 2017 16:09:35 +0100 Subject: [PATCH 07/13] arm64: uaccess: ensure extension of access_ok() addr Our access_ok() simply hands its arguments over to __range_ok(), which implicitly assummes that the addr parameter is 64 bits wide. This isn't necessarily true for compat code, which might pass down a 32-bit address parameter. In these cases, we don't have a guarantee that the address has been zero extended to 64 bits, and the upper bits of the register may contain unknown values, potentially resulting in a suprious failure. Avoid this by explicitly casting the addr parameter to an unsigned long (as is done on other architectures), ensuring that the parameter is widened appropriately. Fixes: 0aea86a2176c ("arm64: User access library functions") Cc: # 3.7.x- Acked-by: Will Deacon Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/uaccess.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 0221029e27ff..e6540471dcda 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -95,11 +95,12 @@ static inline void set_fs(mm_segment_t fs) */ #define __range_ok(addr, size) \ ({ \ + unsigned long __addr = (unsigned long __force)(addr); \ unsigned long flag, roksum; \ __chk_user_ptr(addr); \ asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, ls" \ : "=&r" (flag), "=&r" (roksum) \ - : "1" (addr), "Ir" (size), \ + : "1" (__addr), "Ir" (size), \ "r" (current_thread_info()->addr_limit) \ : "cc"); \ flag; \ From 55de49f9aa17b0b2b144dd2af587177b9aadf429 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 3 May 2017 16:09:36 +0100 Subject: [PATCH 08/13] arm64: armv8_deprecated: ensure extension of addr Our compat swp emulation holds the compat user address in an unsigned int, which it passes to __user_swpX_asm(). When a 32-bit value is passed in a register, the upper 32 bits of the register are unknown, and we must extend the value to 64 bits before we can use it as a base address. This patch casts the address to unsigned long to ensure it has been suitably extended, avoiding the potential issue, and silencing a related warning from clang. Fixes: bd35a4adc413 ("arm64: Port SWP/SWPB emulation support from arm") Cc: # 3.19.x- Acked-by: Will Deacon Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/kernel/armv8_deprecated.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 657977e77ec8..f0e6d717885b 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -306,7 +306,8 @@ do { \ _ASM_EXTABLE(0b, 4b) \ _ASM_EXTABLE(1b, 4b) \ : "=&r" (res), "+r" (data), "=&r" (temp), "=&r" (temp2) \ - : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT), \ + : "r" ((unsigned long)addr), "i" (-EAGAIN), \ + "i" (-EFAULT), \ "i" (__SWP_LL_SC_LOOPS) \ : "memory"); \ uaccess_disable(); \ From 8997c93452d16aac11d3b0cc53940c94330273a4 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 3 May 2017 16:09:37 +0100 Subject: [PATCH 09/13] arm64: atomic_lse: match asm register sizes The LSE atomic code uses asm register variables to ensure that parameters are allocated in specific registers. In the majority of cases we specifically ask for an x register when using 64-bit values, but in a couple of cases we use a w regsiter for a 64-bit value. For asm register variables, the compiler only cares about the register index, with wN and xN having the same meaning. The compiler determines the register size to use based on the type of the variable. Thus, this inconsistency is merely confusing, and not harmful to code generation. For consistency, this patch updates those cases to use the x register alias. There should be no functional change as a result of this patch. Acked-by: Will Deacon Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/atomic_lse.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 7457ce082b5f..99fa69c9c3cf 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -322,7 +322,7 @@ static inline void atomic64_and(long i, atomic64_t *v) #define ATOMIC64_FETCH_OP_AND(name, mb, cl...) \ static inline long atomic64_fetch_and##name(long i, atomic64_t *v) \ { \ - register long x0 asm ("w0") = i; \ + register long x0 asm ("x0") = i; \ register atomic64_t *x1 asm ("x1") = v; \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ @@ -394,7 +394,7 @@ ATOMIC64_OP_SUB_RETURN( , al, "memory") #define ATOMIC64_FETCH_OP_SUB(name, mb, cl...) \ static inline long atomic64_fetch_sub##name(long i, atomic64_t *v) \ { \ - register long x0 asm ("w0") = i; \ + register long x0 asm ("x0") = i; \ register atomic64_t *x1 asm ("x1") = v; \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ From d135b8b5060ea91dd751ff172d179eb4eab1e966 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 3 May 2017 16:09:38 +0100 Subject: [PATCH 10/13] arm64: uaccess: suppress spurious clang warning Clang tries to warn when there's a mismatch between an operand's size, and the size of the register it is held in, as this may indicate a bug. Specifically, clang warns when the operand's type is less than 64 bits wide, and the register is used unqualified (i.e. %N rather than %xN or %wN). Unfortunately clang can generate these warnings for unreachable code. For example, for code like: do { \ typeof(*(ptr)) __v = (v); \ switch(sizeof(*(ptr))) { \ case 1: \ // assume __v is 1 byte wide \ asm ("{op}b %w0" : : "r" (v)); \ break; \ case 8: \ // assume __v is 8 bytes wide \ asm ("{op} %0" : : "r" (v)); \ break; \ } while (0) ... if op() were passed a char value and pointer to char, clang may produce a warning for the unreachable case where sizeof(*(ptr)) is 8. For the same reasons, clang produces warnings when __put_user_err() is used for types that are less than 64 bits wide. We could avoid this with a cast to a fixed-width type in each of the cases. However, GCC will then warn that pointer types are being cast to mismatched integer sizes (in unreachable paths). Another option would be to use the same union trickery as we do for __smp_store_release() and __smp_load_acquire(), but this is fairly invasive. Instead, this patch suppresses the clang warning by using an x modifier in the assembly for the 8 byte case of __put_user_err(). No additional work is necessary as the value has been cast to typeof(*(ptr)), so the compiler will have performed any necessary extension for the reachable case. For consistency, __get_user_err() is also updated to use the x modifier for its 8 byte case. Acked-by: Will Deacon Signed-off-by: Mark Rutland Reported-by: Matthias Kaehlcke Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/uaccess.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index e6540471dcda..90b5755c5003 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -257,7 +257,7 @@ do { \ (err), ARM64_HAS_UAO); \ break; \ case 8: \ - __get_user_asm("ldr", "ldtr", "%", __gu_val, (ptr), \ + __get_user_asm("ldr", "ldtr", "%x", __gu_val, (ptr), \ (err), ARM64_HAS_UAO); \ break; \ default: \ @@ -324,7 +324,7 @@ do { \ (err), ARM64_HAS_UAO); \ break; \ case 8: \ - __put_user_asm("str", "sttr", "%", __pu_val, (ptr), \ + __put_user_asm("str", "sttr", "%x", __pu_val, (ptr), \ (err), ARM64_HAS_UAO); \ break; \ default: \ From 03497d761c55438144fd63534d4223418fdfd345 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 27 Apr 2017 11:19:00 -0700 Subject: [PATCH 11/13] mm: Silence vmap() allocation failures based on caller gfp_flags If the caller has set __GFP_NOWARN don't print the following message: vmap allocation for size 15736832 failed: use vmalloc= to increase size. This can happen with the ARM/Linux or ARM64/Linux module loader built with CONFIG_ARM{,64}_MODULE_PLTS=y which does a first attempt at loading a large module from module space, then falls back to vmalloc space. Acked-by: Michal Hocko Signed-off-by: Florian Fainelli Signed-off-by: Catalin Marinas --- mm/vmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 0b057628a7ba..b74f1d01ef76 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -521,7 +521,7 @@ overflow: } } - if (printk_ratelimit()) + if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) pr_warn("vmap allocation for size %lu failed: use vmalloc= to increase size\n", size); kfree(va); From 75d24d968af8913f641c612930c96acc5399e427 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 27 Apr 2017 11:19:01 -0700 Subject: [PATCH 12/13] ARM: Silence first allocation with CONFIG_ARM_MODULE_PLTS=y When CONFIG_ARM_MODULE_PLTS is enabled, the first allocation using the module space fails, because the module is too big, and then the module allocation is attempted from vmalloc space. Silence the first allocation failure in that case by setting __GFP_NOWARN. Acked-by: Russell King Signed-off-by: Florian Fainelli Signed-off-by: Catalin Marinas --- arch/arm/kernel/module.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 80254b47dc34..3ff571c2c71c 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -40,8 +40,15 @@ #ifdef CONFIG_MMU void *module_alloc(unsigned long size) { - void *p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + gfp_t gfp_mask = GFP_KERNEL; + void *p; + + /* Silence the initial allocation */ + if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) + gfp_mask |= __GFP_NOWARN; + + p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, + gfp_mask, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p) return p; From 0c2cf6d9487cb90be6ad7fac66044dfa8e8e5243 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 27 Apr 2017 11:19:02 -0700 Subject: [PATCH 13/13] arm64: Silence first allocation with CONFIG_ARM64_MODULE_PLTS=y When CONFIG_ARM64_MODULE_PLTS is enabled, the first allocation using the module space fails, because the module is too big, and then the module allocation is attempted from vmalloc space. Silence the first allocation failure in that case by setting __GFP_NOWARN. Reviewed-by: Ard Biesheuvel Acked-by: Will Deacon Signed-off-by: Florian Fainelli Signed-off-by: Catalin Marinas --- arch/arm64/kernel/module.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index c9a2ab446dc6..f035ff6fb223 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -32,11 +32,16 @@ void *module_alloc(unsigned long size) { + gfp_t gfp_mask = GFP_KERNEL; void *p; + /* Silence the initial allocation */ + if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) + gfp_mask |= __GFP_NOWARN; + p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, module_alloc_base + MODULES_VSIZE, - GFP_KERNEL, PAGE_KERNEL_EXEC, 0, + gfp_mask, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&