diff --git a/ChangeLog.arm b/ChangeLog.arm index 52480aca41..fd520bcd9c 100644 --- a/ChangeLog.arm +++ b/ChangeLog.arm @@ -1,3 +1,25 @@ +2009-02-05 Paul Brook + Joseph Myers + + * sysdeps/arm/dl-machine.h (elf_machine_dynamic): Ditto. + (elf_machine_load_address): Clear T bit of PLT entry contents. + (RTLD_START): Mark function symbols as such. Tweak pc-relative + addressing to avoid depending on pc read pipeline offset. + * sysdeps/arm/machine-gmon.h (MCOUNT): Add Thumb-2 implementation. + * sysdeps/arm/tls-macros.h: Add alignment for Thumb-2. + (ARM_PC_OFFSET): Define. + (TLS_IE): Define differently for Thumb-2. + (TLS_LE, TLS_LD, TLS_GD): Use ARM_PC_OFFSET. + * sysdeps/arm/elf/start.S: Switch to thumb mode for Thumb-2. + * sysdeps/unix/sysv/linux/arm/eabi/sysdep.h (INTERNAL_SYSCALL_RAW): + Add Thumb implementation. + * sysdeps/unix/sysv/linux/arm/eabi/nptl/aio_misc.h: New. + * sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-resume.c: Enforce + alignment for Thumb-2. Adjust offset from PC for Thumb-2. + * sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-forcedunwind.c: Ditto. + * sysdeps/unix/sysv/linux/arm/nptl/bits/atomic.h (atomic_full_barrier, + __arch_compare_and_exchange_val_32_acq): Add Thumb-2 implementation. + 2009-02-02 Joseph Myers * sysdeps/unix/sysv/linux/arm/bits/shm.h (SHM_EXEC): Define. diff --git a/sysdeps/arm/dl-machine.h b/sysdeps/arm/dl-machine.h index 1a45a2613c..f839d97b18 100644 --- a/sysdeps/arm/dl-machine.h +++ b/sysdeps/arm/dl-machine.h @@ -53,11 +53,22 @@ static inline Elf32_Addr __attribute__ ((unused)) elf_machine_dynamic (void) { Elf32_Addr dynamic; +#ifdef __thumb2__ + long tmp; + asm ("ldr\t%0, 1f\n\t" + "adr\t%1, 1f\n\t" + "ldr\t%0, [%0, %1]\n\t" + "b 2f\n" + ".align 2\n" + "1: .word _GLOBAL_OFFSET_TABLE_ - 1b\n" + "2:" : "=r" (dynamic), "=r"(tmp)); +#else asm ("ldr %0, 2f\n" "1: ldr %0, [pc, %0]\n" "b 3f\n" "2: .word _GLOBAL_OFFSET_TABLE_ - (1b+8)\n" "3:" : "=r" (dynamic)); +#endif return dynamic; } @@ -69,6 +80,10 @@ elf_machine_load_address (void) extern void __dl_start asm ("_dl_start"); Elf32_Addr got_addr = (Elf32_Addr) &__dl_start; Elf32_Addr pcrel_addr; +#ifdef __thumb__ + /* Clear the low bit of the funciton address. */ + got_addr &= ~(Elf32_Addr) 1; +#endif asm ("adr %0, _dl_start" : "=r" (pcrel_addr)); return pcrel_addr - got_addr; } @@ -140,7 +155,9 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) #define RTLD_START asm ("\ .text\n\ .globl _start\n\ +.type _start, %function\n\ .globl _dl_start_user\n\ +.type _dl_start_user, %function\n\ _start:\n\ @ we are PIC code, so get global offset table\n\ ldr sl, .L_GET_GOT\n\ @@ -152,8 +169,8 @@ _start:\n\ bl _dl_start\n\ @ returns user entry point in r0\n\ _dl_start_user:\n\ - add sl, pc, sl\n\ -.L_GOT_GOT:\n\ + adr r6, .L_GET_GOT\n\ + add sl, sl, r6\n\ ldr r4, [sl, r4]\n\ @ save the entry point in another register\n\ mov r6, r0\n\ @@ -210,7 +227,7 @@ _dl_start_user:\n\ b .L_done_fixup\n\ \n\ .L_GET_GOT:\n\ - .word _GLOBAL_OFFSET_TABLE_ - .L_GOT_GOT - 4\n\ + .word _GLOBAL_OFFSET_TABLE_ - .L_GET_GOT\n\ .L_SKIP_ARGS:\n\ .word _dl_skip_args(GOTOFF)\n\ .L_FINI_PROC:\n\ diff --git a/sysdeps/arm/elf/start.S b/sysdeps/arm/elf/start.S index f63b3dba27..0cf4339b46 100644 --- a/sysdeps/arm/elf/start.S +++ b/sysdeps/arm/elf/start.S @@ -58,6 +58,10 @@ ... NULL */ +#if defined(__thumb2__) + .thumb + .syntax unified +#endif .text .globl _start diff --git a/sysdeps/arm/machine-gmon.h b/sysdeps/arm/machine-gmon.h index fa3f65237d..dbda0ddeba 100644 --- a/sysdeps/arm/machine-gmon.h +++ b/sysdeps/arm/machine-gmon.h @@ -50,6 +50,28 @@ static void mcount_internal (u_long frompc, u_long selfpc) } */ +#ifdef __thumb2__ + +#define MCOUNT \ +void _mcount (void) \ +{ \ + __asm__("push {r0, r1, r2, r3};" \ + "movs fp, fp;" \ + "it eq;" \ + "moveq r1, #0;" \ + "itttt ne;" \ + "ldrne r1, [fp, $-4];" \ + "ldrne r0, [fp, $-12];" \ + "movnes r0, r0;" \ + "ldrne r0, [r0, $-4];" \ + "movs r0, r0;" \ + "it ne;" \ + "blne mcount_internal;" \ + "pop {r0, r1, r2, r3}"); \ +} + +#else + #define MCOUNT \ void _mcount (void) \ { \ @@ -65,3 +87,4 @@ void _mcount (void) \ "ldmia sp!, {r0, r1, r2, r3}"); \ } +#endif diff --git a/sysdeps/arm/tls-macros.h b/sysdeps/arm/tls-macros.h index 94aa3a83b1..e41d3bc5aa 100644 --- a/sysdeps/arm/tls-macros.h +++ b/sysdeps/arm/tls-macros.h @@ -1,14 +1,36 @@ +#ifdef __thumb2__ +#define ARM_PC_OFFSET "4" +#else +#define ARM_PC_OFFSET "8" +#endif + #define TLS_LE(x) \ ({ int *__result; \ void *tp = __builtin_thread_pointer (); \ asm ("ldr %0, 1f; " \ "add %0, %1, %0; " \ "b 2f; " \ + ".align 2; " \ "1: .word " #x "(tpoff); " \ "2: " \ : "=&r" (__result) : "r" (tp)); \ __result; }) +#ifdef __thumb2__ +#define TLS_IE(x) \ + ({ int *__result; \ + void *tp = __builtin_thread_pointer (); \ + asm ("ldr %0, 1f; " \ + "3: add %0, pc, %0;" \ + "ldr %0, [%0];" \ + "add %0, %1, %0; " \ + "b 2f; " \ + ".align 2; " \ + "1: .word " #x "(gottpoff) + (. - 3b - 4); " \ + "2: " \ + : "=&r" (__result) : "r" (tp)); \ + __result; }) +#else #define TLS_IE(x) \ ({ int *__result; \ void *tp = __builtin_thread_pointer (); \ @@ -16,10 +38,12 @@ "3: ldr %0, [pc, %0];" \ "add %0, %1, %0; " \ "b 2f; " \ + ".align 2; " \ "1: .word " #x "(gottpoff) + (. - 3b - 8); " \ "2: " \ : "=&r" (__result) : "r" (tp)); \ __result; }) +#endif #define TLS_LD(x) \ ({ char *__result; \ @@ -28,12 +52,14 @@ asm ("ldr %0, 2f; " \ "1: add %0, pc, %0; " \ "b 3f; " \ - "2: .word " #x "(tlsldm) + (. - 1b - 8); " \ + ".align 2; " \ + "2: .word " #x "(tlsldm) + (. - 1b - "ARM_PC_OFFSET"); " \ "3: " \ : "=r" (__result)); \ __result = (char *)__tls_get_addr (__result); \ asm ("ldr %0, 1f; " \ "b 2f; " \ + ".align 2; " \ "1: .word " #x "(tlsldo); " \ "2: " \ : "=r" (__offset)); \ @@ -45,7 +71,8 @@ asm ("ldr %0, 2f; " \ "1: add %0, pc, %0; " \ "b 3f; " \ - "2: .word " #x "(tlsgd) + (. - 1b - 8); " \ + ".align 2; " \ + "2: .word " #x "(tlsgd) + (. - 1b - "ARM_PC_OFFSET"); " \ "3: " \ : "=r" (__result)); \ (int *)__tls_get_addr (__result); }) diff --git a/sysdeps/unix/sysv/linux/arm/eabi/nptl/aio_misc.h b/sysdeps/unix/sysv/linux/arm/eabi/nptl/aio_misc.h new file mode 100644 index 0000000000..3fb1ec95fd --- /dev/null +++ b/sysdeps/unix/sysv/linux/arm/eabi/nptl/aio_misc.h @@ -0,0 +1,52 @@ +/* Copyright (C) 2008 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include_next + +#ifdef __thumb2__ + +#include + +/* The Thumb-2 definition of INTERNAL_SYSCALL_RAW has to hide the use + of r7 from the compiler because it cannot handle asm clobbering the + hard frame pointer. In aio_suspend, GCC does not eliminate the + hard frame pointer because the function uses variable-length + arrays, so it generates unwind information using r7 as virtual + stack pointer. During system calls, when r7 has been saved on the + stack, this means the unwind information is invalid. Without extra + unwind directives, which would need to cause unwind information for + the asm to be generated separately from that for the parts of the + function before and after the asm (with three index table entries), + it is not possible to represent any temporary change to the virtual + stack pointer. Instead, we move the problematic system calls out + of line into a function that does not require a frame pointer. */ + +static __attribute_noinline__ void +aio_misc_wait (int *resultp, + volatile int *futexp, + const struct timespec *timeout, + int cancel) +{ + AIO_MISC_WAIT (*resultp, *futexp, timeout, cancel); +} + +#undef AIO_MISC_WAIT +#define AIO_MISC_WAIT(result, futex, timeout, cancel) \ + aio_misc_wait (&result, &futex, timeout, cancel) + +#endif diff --git a/sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-forcedunwind.c b/sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-forcedunwind.c index 24ce61ba19..71ab77c6fa 100644 --- a/sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-forcedunwind.c +++ b/sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-forcedunwind.c @@ -89,7 +89,12 @@ asm ( "4: bl pthread_cancel_init\n" " ldr r3, [r4, r5]\n" " b 5b\n" +" .align 2\n" +#ifdef __thumb2__ +"1: .word _GLOBAL_OFFSET_TABLE_ - 3b - 4\n" +#else "1: .word _GLOBAL_OFFSET_TABLE_ - 3b - 8\n" +#endif "2: .word libgcc_s_resume(GOTOFF)\n" " .size _Unwind_Resume, .-_Unwind_Resume\n" ); diff --git a/sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-resume.c b/sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-resume.c index a9c9d18217..3c780b7137 100644 --- a/sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-resume.c +++ b/sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-resume.c @@ -66,7 +66,12 @@ asm ( "4: bl init\n" " ldr r3, [r4, r5]\n" " b 5b\n" +" .align 2\n" +#ifdef __thumb2__ +"1: .word _GLOBAL_OFFSET_TABLE_ - 3b - 4\n" +#else "1: .word _GLOBAL_OFFSET_TABLE_ - 3b - 8\n" +#endif "2: .word libgcc_s_resume(GOTOFF)\n" " .size _Unwind_Resume, .-_Unwind_Resume\n" ); diff --git a/sysdeps/unix/sysv/linux/arm/eabi/sysdep.h b/sysdeps/unix/sysv/linux/arm/eabi/sysdep.h index 1444f40ebc..a7dd40d741 100644 --- a/sysdeps/unix/sysv/linux/arm/eabi/sysdep.h +++ b/sysdeps/unix/sysv/linux/arm/eabi/sysdep.h @@ -44,6 +44,30 @@ argument; otherwise the (optional) compatibility code for APCS binaries may be invoked. */ +#ifdef __thumb__ +/* Hide the use of r7 from the compiler, this would be a lot + easier but for the fact that the syscalls can exceed 255. + For the moment the LOAD_ARGS_7 is sacrificed. + We can't use push/pop inside the asm because that breaks + unwinding (ie. thread cancellation). */ +#undef LOAD_ARGS_7 +#undef INTERNAL_SYSCALL_RAW +#define INTERNAL_SYSCALL_RAW(name, err, nr, args...) \ + ({ \ + int _sys_buf[2]; \ + register int _a1 asm ("a1"); \ + register int *_r6 asm ("r6") = _sys_buf; \ + *_r6 = name; \ + LOAD_ARGS_##nr (args) \ + asm volatile ("str r7, [r6, #4]\n\t" \ + "ldr r7, [r6]\n\t" \ + "swi 0 @ syscall " #name "\n\t" \ + "ldr r7, [r6, #4]" \ + : "=r" (_a1) \ + : "r" (_r6) ASM_ARGS_##nr \ + : "memory"); \ + _a1; }) +#else /* ARM */ #undef INTERNAL_SYSCALL_RAW #define INTERNAL_SYSCALL_RAW(name, err, nr, args...) \ ({ \ @@ -55,6 +79,7 @@ : "r" (_nr) ASM_ARGS_##nr \ : "memory"); \ _a1; }) +#endif /* For EABI, non-constant syscalls are actually pretty easy... */ #undef INTERNAL_SYSCALL_NCS diff --git a/sysdeps/unix/sysv/linux/arm/nptl/bits/atomic.h b/sysdeps/unix/sysv/linux/arm/nptl/bits/atomic.h index 247ddd389b..b0586ea1ee 100644 --- a/sysdeps/unix/sysv/linux/arm/nptl/bits/atomic.h +++ b/sysdeps/unix/sysv/linux/arm/nptl/bits/atomic.h @@ -37,12 +37,21 @@ typedef uintmax_t uatomic_max_t; void __arm_link_error (void); +#ifdef __thumb2__ +#define atomic_full_barrier() \ + __asm__ __volatile__ \ + ("movw\tip, #0x0fa0\n\t" \ + "movt\tip, #0xffff\n\t" \ + "blx\tip" \ + : : : "ip", "lr", "cc", "memory"); +#else #define atomic_full_barrier() \ __asm__ __volatile__ \ ("mov\tip, #0xffff0fff\n\t" \ "mov\tlr, pc\n\t" \ "add\tpc, ip, #(0xffff0fa0 - 0xffff0fff)" \ : : : "ip", "lr", "cc", "memory"); +#endif /* Atomic compare and exchange. This sequence relies on the kernel to provide a compare and exchange operation which is atomic on the @@ -59,6 +68,32 @@ void __arm_link_error (void); specify one to work around GCC PR rtl-optimization/21223. Otherwise it may cause a_oldval or a_tmp to be moved to a different register. */ +#ifdef __thumb2__ +/* Thumb-2 has ldrex/strex. However it does not have barrier instructions, + so we still need to use the kernel helper. */ +#define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \ + ({ register __typeof (oldval) a_oldval asm ("r0"); \ + register __typeof (oldval) a_newval asm ("r1") = (newval); \ + register __typeof (mem) a_ptr asm ("r2") = (mem); \ + register __typeof (oldval) a_tmp asm ("r3"); \ + register __typeof (oldval) a_oldval2 asm ("r4") = (oldval); \ + __asm__ __volatile__ \ + ("0:\tldr\t%[tmp],[%[ptr]]\n\t" \ + "cmp\t%[tmp], %[old2]\n\t" \ + "bne\t1f\n\t" \ + "mov\t%[old], %[old2]\n\t" \ + "movw\t%[tmp], #0x0fc0\n\t" \ + "movt\t%[tmp], #0xffff\n\t" \ + "blx\t%[tmp]\n\t" \ + "bcc\t0b\n\t" \ + "mov\t%[tmp], %[old2]\n\t" \ + "1:" \ + : [old] "=&r" (a_oldval), [tmp] "=&r" (a_tmp) \ + : [new] "r" (a_newval), [ptr] "r" (a_ptr), \ + [old2] "r" (a_oldval2) \ + : "ip", "lr", "cc", "memory"); \ + a_tmp; }) +#else #define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \ ({ register __typeof (oldval) a_oldval asm ("r0"); \ register __typeof (oldval) a_newval asm ("r1") = (newval); \ @@ -81,6 +116,7 @@ void __arm_link_error (void); [old2] "r" (a_oldval2) \ : "ip", "lr", "cc", "memory"); \ a_tmp; }) +#endif #define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \ ({ __arm_link_error (); oldval; })