ARM: Use movw/movt more when available

This commit is contained in:
Roland McGrath 2014-10-22 14:20:35 -07:00
parent b5af9297d5
commit 8c2b1ed8bb
9 changed files with 209 additions and 69 deletions

View File

@ -1,3 +1,30 @@
2014-10-22 Roland McGrath <roland@hack.frob.com>
* sysdeps/arm/__longjmp.S [NEED_HWCAP] [IS_IN_rtld]: Use LDST_PCREL
macro to get at the _rt_local_ro field.
[NEED_HWCAP] [!IS_IN_rtld]: Use LDR_GLOBAL to get at _rtld_global_ro
([PIC] case) or _dl_hwcap ([!PIC] case).
* sysdeps/arm/setjmp.S: Likewise.
* config.h.in (ARM_PCREL_MOVW_OK): New macro.
* sysdeps/arm/configure.ac: New check to define it.
* sysdeps/arm/configure: Regenerated.
* sysdeps/arm/sysdep.h [__ASSEMBLER__]: Include <arm-features.h>.
(LDST_INDEXED_NOINDEX, LDST_INDEXED_INDEX): New macros.
(LDST_INDEXED, LDST_PC_INDEXED): New macros, differing definitions
depending on [ARM_NO_INDEX_REGISTER] and [__thumb2__].
(LDST_PCREL) [!__thumb2__ && ARCH_HAS_T2 && ARM_PCREL_MOVW_OK]:
Use move/movt pair instead of a load.
(LDST_GLOBAL): Macro removed.
(LDR_GLOBAL): New macro replaces it.
(LDR_HIDDEN): New macro.
(PTR_MANGLE_LOAD): Use LDR_GLOBAL rather than LDST_GLOBAL.
Use LDR_HIDDEN instead for __pointer_chk_guard_local.
* setjmp/tst-setjmp-static.c: New file.
* setjmp/Makefile (tests): Add it.
(tests-static): New variable.
2014-10-22 Maciej W. Rozycki <macro@codesourcery.com> 2014-10-22 Maciej W. Rozycki <macro@codesourcery.com>
[BZ #17485] [BZ #17485]

View File

@ -243,6 +243,9 @@
/* The ARM hard-float ABI is being used. */ /* The ARM hard-float ABI is being used. */
#undef HAVE_ARM_PCS_VFP #undef HAVE_ARM_PCS_VFP
/* The ARM movw/movt instructions using PC-relative relocs work right. */
#define ARM_PCREL_MOVW_OK 0
/* The pt_chown binary is being built and used by grantpt. */ /* The pt_chown binary is being built and used by grantpt. */
#define HAVE_PT_CHOWN 0 #define HAVE_PT_CHOWN 0

View File

@ -28,7 +28,8 @@ routines := setjmp sigjmp bsd-setjmp bsd-_setjmp \
longjmp __longjmp jmp-unwind longjmp __longjmp jmp-unwind
tests := tst-setjmp jmpbug bug269-setjmp tst-setjmp-fp \ tests := tst-setjmp jmpbug bug269-setjmp tst-setjmp-fp \
tst-sigsetjmp tst-sigsetjmp tst-setjmp-static
tests-static := tst-setjmp-static
include ../Rules include ../Rules

View File

@ -0,0 +1 @@
#include "tst-setjmp.c"

View File

@ -77,21 +77,15 @@ ENTRY (__longjmp)
#ifdef NEED_HWCAP #ifdef NEED_HWCAP
# ifdef IS_IN_rtld # ifdef IS_IN_rtld
ldr a4, 1f LDST_PCREL (ldr, a4, a3, \
ldr a3, .Lrtld_local_ro C_SYMBOL_NAME(_rtld_local_ro) \
0: add a4, pc, a4 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET)
add a4, a4, a3
ldr a4, [a4, #RTLD_GLOBAL_RO_DL_HWCAP_OFFSET]
# else # else
# ifdef PIC # ifdef PIC
ldr a4, 1f LDR_GLOBAL (a4, a3, C_SYMBOL_NAME(_rtld_global_ro), \
ldr a3, .Lrtld_global_ro RTLD_GLOBAL_RO_DL_HWCAP_OFFSET)
0: add a4, pc, a4
ldr a4, [a4, a3]
ldr a4, [a4, #RTLD_GLOBAL_RO_DL_HWCAP_OFFSET]
# else # else
ldr a4, .Lhwcap LDR_GLOBAL (a4, a3, C_SYMBOL_NAME(_dl_hwcap), 0)
ldr a4, [a4, #0]
# endif # endif
# endif # endif
#endif #endif
@ -138,21 +132,4 @@ ENTRY (__longjmp)
DO_RET(lr) DO_RET(lr)
#ifdef NEED_HWCAP
# ifdef IS_IN_rtld
1: .long _GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS
.Lrtld_local_ro:
.long C_SYMBOL_NAME(_rtld_local_ro)(GOTOFF)
# else
# ifdef PIC
1: .long _GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS
.Lrtld_global_ro:
.long C_SYMBOL_NAME(_rtld_global_ro)(GOT)
# else
.Lhwcap:
.long C_SYMBOL_NAME(_dl_hwcap)
# endif
# endif
#endif
END (__longjmp) END (__longjmp)

52
sysdeps/arm/configure vendored
View File

@ -150,8 +150,8 @@ else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */ /* end confdefs.h. */
#ifdef __ARM_PCS_VFP #ifdef __ARM_PCS_VFP
yes yes
#endif #endif
_ACEOF _ACEOF
if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
@ -211,6 +211,54 @@ else
have-arm-tls-desc = no" have-arm-tls-desc = no"
fi fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether PC-relative relocs in movw/movt work properly" >&5
$as_echo_n "checking whether PC-relative relocs in movw/movt work properly... " >&6; }
if ${libc_cv_arm_pcrel_movw+:} false; then :
$as_echo_n "(cached) " >&6
else
cat > conftest.s <<\EOF
.syntax unified
.arm
.arch armv7-a
.text
.globl foo
.type foo,%function
foo: movw r0, #:lower16:symbol - 1f - 8
movt r0, #:upper16:symbol - 1f - 8
1: add r0, pc
@ And now a case with a local symbol.
movw r0, #:lower16:3f - 2f - 8
movt r0, #:upper16:3f - 2f - 8
2: add r0, pc
bx lr
.data
.globl symbol
.hidden symbol
symbol: .long 23
3: .long 17
EOF
libc_cv_arm_pcrel_movw=no
${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS \
-nostartfiles -nostdlib -shared \
-o conftest.so conftest.s 1>&5 2>&5 &&
LC_ALL=C $READELF -dr conftest.so > conftest.dr 2>&5 &&
{
cat conftest.dr 1>&5
fgrep 'TEXTREL
R_ARM_NONE' conftest.dr > /dev/null || libc_cv_arm_pcrel_movw=yes
}
rm -f conftest*
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_arm_pcrel_movw" >&5
$as_echo "$libc_cv_arm_pcrel_movw" >&6; }
if test $libc_cv_arm_pcrel_movw = yes; then
$as_echo "#define ARM_PCREL_MOVW_OK 1" >>confdefs.h
fi
libc_cv_gcc_unwind_find_fde=no libc_cv_gcc_unwind_find_fde=no
# Remove -fno-unwind-tables that was added in sysdeps/arm/preconfigure.ac. # Remove -fno-unwind-tables that was added in sysdeps/arm/preconfigure.ac.

View File

@ -17,8 +17,8 @@ dnl it. Until we do, don't define it.
AC_CACHE_CHECK([whether the compiler is using the ARM hard-float ABI], AC_CACHE_CHECK([whether the compiler is using the ARM hard-float ABI],
[libc_cv_arm_pcs_vfp], [libc_cv_arm_pcs_vfp],
[AC_EGREP_CPP(yes,[#ifdef __ARM_PCS_VFP [AC_EGREP_CPP(yes,[#ifdef __ARM_PCS_VFP
yes yes
#endif #endif
], libc_cv_arm_pcs_vfp=yes, libc_cv_arm_pcs_vfp=no)]) ], libc_cv_arm_pcs_vfp=yes, libc_cv_arm_pcs_vfp=no)])
if test $libc_cv_arm_pcs_vfp = yes; then if test $libc_cv_arm_pcs_vfp = yes; then
AC_DEFINE(HAVE_ARM_PCS_VFP) AC_DEFINE(HAVE_ARM_PCS_VFP)
@ -40,6 +40,46 @@ else
LIBC_CONFIG_VAR([have-arm-tls-desc], [no]) LIBC_CONFIG_VAR([have-arm-tls-desc], [no])
fi fi
AC_CACHE_CHECK([whether PC-relative relocs in movw/movt work properly],
libc_cv_arm_pcrel_movw, [
cat > conftest.s <<\EOF
.syntax unified
.arm
.arch armv7-a
.text
.globl foo
.type foo,%function
foo: movw r0, #:lower16:symbol - 1f - 8
movt r0, #:upper16:symbol - 1f - 8
1: add r0, pc
@ And now a case with a local symbol.
movw r0, #:lower16:3f - 2f - 8
movt r0, #:upper16:3f - 2f - 8
2: add r0, pc
bx lr
.data
.globl symbol
.hidden symbol
symbol: .long 23
3: .long 17
EOF
libc_cv_arm_pcrel_movw=no
${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS \
-nostartfiles -nostdlib -shared \
-o conftest.so conftest.s 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD &&
LC_ALL=C $READELF -dr conftest.so > conftest.dr 2>&AS_MESSAGE_LOG_FD &&
{
cat conftest.dr 1>&AS_MESSAGE_LOG_FD
fgrep 'TEXTREL
R_ARM_NONE' conftest.dr > /dev/null || libc_cv_arm_pcrel_movw=yes
}
rm -f conftest*])
if test $libc_cv_arm_pcrel_movw = yes; then
AC_DEFINE([ARM_PCREL_MOVW_OK])
fi
libc_cv_gcc_unwind_find_fde=no libc_cv_gcc_unwind_find_fde=no
# Remove -fno-unwind-tables that was added in sysdeps/arm/preconfigure.ac. # Remove -fno-unwind-tables that was added in sysdeps/arm/preconfigure.ac.

View File

@ -58,21 +58,15 @@ ENTRY (__sigsetjmp)
#ifdef NEED_HWCAP #ifdef NEED_HWCAP
/* Check if we have a VFP unit. */ /* Check if we have a VFP unit. */
# ifdef IS_IN_rtld # ifdef IS_IN_rtld
ldr a3, 1f LDST_PCREL (ldr, a3, a4, \
ldr a4, .Lrtld_local_ro C_SYMBOL_NAME(_rtld_local_ro) \
0: add a3, pc, a3 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET)
add a3, a3, a4
ldr a3, [a3, #RTLD_GLOBAL_RO_DL_HWCAP_OFFSET]
# else # else
# ifdef PIC # ifdef PIC
ldr a3, 1f LDR_GLOBAL (a3, a4, C_SYMBOL_NAME(_rtld_global_ro), \
ldr a4, .Lrtld_global_ro RTLD_GLOBAL_RO_DL_HWCAP_OFFSET)
0: add a3, pc, a3
ldr a3, [a3, a4]
ldr a3, [a3, #RTLD_GLOBAL_RO_DL_HWCAP_OFFSET]
# else # else
ldr a3, .Lhwcap LDR_GLOBAL (a3, a4, C_SYMBOL_NAME(_dl_hwcap), 0)
ldr a3, [a3, #0]
# endif # endif
# endif # endif
#endif #endif
@ -114,23 +108,6 @@ ENTRY (__sigsetjmp)
/* Make a tail call to __sigjmp_save; it takes the same args. */ /* Make a tail call to __sigjmp_save; it takes the same args. */
B PLTJMP(C_SYMBOL_NAME(__sigjmp_save)) B PLTJMP(C_SYMBOL_NAME(__sigjmp_save))
#ifdef NEED_HWCAP
# ifdef IS_IN_rtld
1: .long _GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS
.Lrtld_local_ro:
.long C_SYMBOL_NAME(_rtld_local_ro)(GOTOFF)
# else
# ifdef PIC
1: .long _GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS
.Lrtld_global_ro:
.long C_SYMBOL_NAME(_rtld_global_ro)(GOT)
# else
.Lhwcap:
.long C_SYMBOL_NAME(_dl_hwcap)
# endif
# endif
#endif
END (__sigsetjmp) END (__sigsetjmp)
hidden_def (__sigsetjmp) hidden_def (__sigsetjmp)

View File

@ -21,6 +21,8 @@
#ifndef __ASSEMBLER__ #ifndef __ASSEMBLER__
# include <stdint.h> # include <stdint.h>
#else
# include <arm-features.h>
#endif #endif
/* The __ARM_ARCH define is provided by gcc 4.8. Construct it otherwise. */ /* The __ARM_ARCH define is provided by gcc 4.8. Construct it otherwise. */
@ -157,6 +159,32 @@
.arm .arm
# endif # endif
/* Load or store to/from address X + Y into/from R, (maybe) using T.
X or Y can use T freely; T can be R if OP is a load. The first
version eschews the two-register addressing mode, while the
second version uses it. */
# define LDST_INDEXED_NOINDEX(OP, R, T, X, Y) \
add T, X, Y; \
sfi_breg T, \
OP R, [T]
# define LDST_INDEXED_INDEX(OP, R, X, Y) \
OP R, [X, Y]
# ifdef ARM_NO_INDEX_REGISTER
/* We're never using the two-register addressing mode, so this
always uses an intermediate add. */
# define LDST_INDEXED(OP, R, T, X, Y) LDST_INDEXED_NOINDEX (OP, R, T, X, Y)
# define LDST_PC_INDEXED(OP, R, T, X) LDST_INDEXED_NOINDEX (OP, R, T, pc, X)
# else
/* The two-register addressing mode is OK, except on Thumb with pc. */
# define LDST_INDEXED(OP, R, T, X, Y) LDST_INDEXED_INDEX (OP, R, X, Y)
# ifdef __thumb2__
# define LDST_PC_INDEXED(OP, R, T, X) LDST_INDEXED_NOINDEX (OP, R, T, pc, X)
# else
# define LDST_PC_INDEXED(OP, R, T, X) LDST_INDEXED_INDEX (OP, R, pc, X)
# endif
# endif
/* Load or store to/from a pc-relative EXPR into/from R, using T. */ /* Load or store to/from a pc-relative EXPR into/from R, using T. */
# ifdef __thumb2__ # ifdef __thumb2__
# define LDST_PCREL(OP, R, T, EXPR) \ # define LDST_PCREL(OP, R, T, EXPR) \
@ -166,6 +194,11 @@
.previous; \ .previous; \
99: add T, T, pc; \ 99: add T, T, pc; \
OP R, [T] OP R, [T]
# elif defined (ARCH_HAS_T2) && ARM_PCREL_MOVW_OK
# define LDST_PCREL(OP, R, T, EXPR) \
movw T, #:lower16:EXPR - 99f - PC_OFS; \
movt T, #:upper16:EXPR - 99f - PC_OFS; \
99: LDST_PC_INDEXED (OP, R, T, T)
# else # else
# define LDST_PCREL(OP, R, T, EXPR) \ # define LDST_PCREL(OP, R, T, EXPR) \
ldr T, 98f; \ ldr T, 98f; \
@ -175,17 +208,50 @@
99: OP R, [pc, T] 99: OP R, [pc, T]
# endif # endif
/* Load or store to/from a global EXPR into/from R, using T. */ /* Load from a global SYMBOL + CONSTANT into R, using T. */
# define LDST_GLOBAL(OP, R, T, EXPR) \ # if defined (ARCH_HAS_T2) && !defined (PIC)
# define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \
movw T, #:lower16:SYMBOL; \
movt T, #:upper16:SYMBOL; \
ldr R, [T, $CONSTANT]
# elif defined (ARCH_HAS_T2) && defined (PIC) && ARM_PCREL_MOVW_OK
# define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \
movw R, #:lower16:_GLOBAL_OFFSET_TABLE_ - 97f - PC_OFS; \
movw T, #:lower16:99f - 98f - PC_OFS; \
movt R, #:upper16:_GLOBAL_OFFSET_TABLE_ - 97f - PC_OFS; \
movt T, #:upper16:99f - 98f - PC_OFS; \
.pushsection .rodata.cst4, "aM", %progbits, 4; \
.balign 4; \
99: .word SYMBOL##(GOT); \
.popsection; \
97: add R, R, pc; \
98: LDST_PC_INDEXED (ldr, T, T, T); \
LDST_INDEXED (ldr, R, T, R, T); \
ldr R, [R, $CONSTANT]
# else
# define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \
ldr T, 99f; \ ldr T, 99f; \
ldr R, 100f; \ ldr R, 100f; \
98: add T, T, pc; \ 98: add T, T, pc; \
ldr T, [T, R]; \ ldr T, [T, R]; \
.subsection 2; \ .subsection 2; \
99: .word _GLOBAL_OFFSET_TABLE_ - 98b - PC_OFS; \ 99: .word _GLOBAL_OFFSET_TABLE_ - 98b - PC_OFS; \
100: .word EXPR##(GOT); \ 100: .word SYMBOL##(GOT); \
.previous; \ .previous; \
OP R, [T] ldr R, [T, $CONSTANT]
# endif
/* This is the same as LDR_GLOBAL, but for a SYMBOL that is known to
be in the same linked object (as for one with hidden visibility).
We can avoid the GOT indirection in the PIC case. For the pure
static case, LDR_GLOBAL is already optimal. */
# ifdef PIC
# define LDR_HIDDEN(R, T, SYMBOL, CONSTANT) \
LDST_PCREL (ldr, R, T, SYMBOL + CONSTANT)
# else
# define LDR_HIDDEN(R, T, SYMBOL, CONSTANT) \
LDR_GLOBAL (R, T, SYMBOL, CONSTANT)
# endif
/* Cope with negative memory offsets, which thumb can't encode. /* Cope with negative memory offsets, which thumb can't encode.
Use NEGOFF_ADJ_BASE to (conditionally) alter the base register, Use NEGOFF_ADJ_BASE to (conditionally) alter the base register,
@ -296,7 +362,7 @@
(!defined SHARED && (!defined NOT_IN_libc || defined IS_IN_libpthread))) (!defined SHARED && (!defined NOT_IN_libc || defined IS_IN_libpthread)))
# ifdef __ASSEMBLER__ # ifdef __ASSEMBLER__
# define PTR_MANGLE_LOAD(guard, tmp) \ # define PTR_MANGLE_LOAD(guard, tmp) \
LDST_PCREL(ldr, guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard_local)); LDR_HIDDEN (guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard_local), 0)
# define PTR_MANGLE(dst, src, guard, tmp) \ # define PTR_MANGLE(dst, src, guard, tmp) \
PTR_MANGLE_LOAD(guard, tmp); \ PTR_MANGLE_LOAD(guard, tmp); \
PTR_MANGLE2(dst, src, guard) PTR_MANGLE2(dst, src, guard)
@ -316,7 +382,7 @@ extern uintptr_t __pointer_chk_guard_local attribute_relro attribute_hidden;
#else #else
# ifdef __ASSEMBLER__ # ifdef __ASSEMBLER__
# define PTR_MANGLE_LOAD(guard, tmp) \ # define PTR_MANGLE_LOAD(guard, tmp) \
LDST_GLOBAL(ldr, guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard)); LDR_GLOBAL (guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard), 0);
# define PTR_MANGLE(dst, src, guard, tmp) \ # define PTR_MANGLE(dst, src, guard, tmp) \
PTR_MANGLE_LOAD(guard, tmp); \ PTR_MANGLE_LOAD(guard, tmp); \
PTR_MANGLE2(dst, src, guard) PTR_MANGLE2(dst, src, guard)