aarch64: Remove barriers from TLS descriptor functions
Remove ldar synchronization and most lazy TLSDESC initialization related code. * sysdeps/aarch64/dl-machine.h (elf_machine_runtime_setup): Remove DT_TLSDESC_GOT initialization. * sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_return_lazy): Remove. (_dl_tlsdesc_resolve_rela): Likewise. (_dl_tlsdesc_resolve_hold): Likewise. (_dl_tlsdesc_undefweak): Remove ldar. (_dl_tlsdesc_dynamic): Likewise. * sysdeps/aarch64/dl-tlsdesc.h (_dl_tlsdesc_return_lazy): Remove. (_dl_tlsdesc_resolve_rela): Likewise. (_dl_tlsdesc_resolve_hold): Likewise. * sysdeps/aarch64/tlsdesc.c (_dl_tlsdesc_resolve_rela_fixup): Remove. (_dl_tlsdesc_resolve_hold_fixup): Likewise. (_dl_tlsdesc_resolve_rela): Likewise. (_dl_tlsdesc_resolve_hold): Likewise.
This commit is contained in:
parent
b7cf203b5c
commit
91c5a366d8
17
ChangeLog
17
ChangeLog
@ -1,3 +1,20 @@
|
||||
2017-11-03 Szabolcs Nagy <szabolcs.nagy@arm.com>
|
||||
|
||||
* sysdeps/aarch64/dl-machine.h (elf_machine_runtime_setup): Remove
|
||||
DT_TLSDESC_GOT initialization.
|
||||
* sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_return_lazy): Remove.
|
||||
(_dl_tlsdesc_resolve_rela): Likewise.
|
||||
(_dl_tlsdesc_resolve_hold): Likewise.
|
||||
(_dl_tlsdesc_undefweak): Remove ldar.
|
||||
(_dl_tlsdesc_dynamic): Likewise.
|
||||
* sysdeps/aarch64/dl-tlsdesc.h (_dl_tlsdesc_return_lazy): Remove.
|
||||
(_dl_tlsdesc_resolve_rela): Likewise.
|
||||
(_dl_tlsdesc_resolve_hold): Likewise.
|
||||
* sysdeps/aarch64/tlsdesc.c (_dl_tlsdesc_resolve_rela_fixup): Remove.
|
||||
(_dl_tlsdesc_resolve_hold_fixup): Likewise.
|
||||
(_dl_tlsdesc_resolve_rela): Likewise.
|
||||
(_dl_tlsdesc_resolve_hold): Likewise.
|
||||
|
||||
2017-11-03 Szabolcs Nagy <szabolcs.nagy@arm.com>
|
||||
|
||||
* sysdeps/aarch64/dl-machine.h (elf_machine_lazy_rel): Do symbol
|
||||
|
@ -102,10 +102,6 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
|
||||
}
|
||||
}
|
||||
|
||||
if (l->l_info[ADDRIDX (DT_TLSDESC_GOT)] && lazy)
|
||||
*(ElfW(Addr)*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_GOT)]) + l->l_addr)
|
||||
= (ElfW(Addr)) &_dl_tlsdesc_resolve_rela;
|
||||
|
||||
return lazy;
|
||||
}
|
||||
|
||||
|
@ -80,30 +80,6 @@ _dl_tlsdesc_return:
|
||||
cfi_endproc
|
||||
.size _dl_tlsdesc_return, .-_dl_tlsdesc_return
|
||||
|
||||
/* Same as _dl_tlsdesc_return but with synchronization for
|
||||
lazy relocation.
|
||||
Prototype:
|
||||
_dl_tlsdesc_return_lazy (tlsdesc *) ;
|
||||
*/
|
||||
.hidden _dl_tlsdesc_return_lazy
|
||||
.global _dl_tlsdesc_return_lazy
|
||||
.type _dl_tlsdesc_return_lazy,%function
|
||||
cfi_startproc
|
||||
.align 2
|
||||
_dl_tlsdesc_return_lazy:
|
||||
/* The ldar here happens after the load from [x0] at the call site
|
||||
(that is generated by the compiler as part of the TLS access ABI),
|
||||
so it reads the same value (this function is the final value of
|
||||
td->entry) and thus it synchronizes with the release store to
|
||||
td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
|
||||
from [x0,#PTR_SIZE] here happens after the initialization of td->arg. */
|
||||
DELOUSE (0)
|
||||
ldar PTR_REG (zr), [x0]
|
||||
ldr PTR_REG (0), [x0, #PTR_SIZE]
|
||||
RET
|
||||
cfi_endproc
|
||||
.size _dl_tlsdesc_return_lazy, .-_dl_tlsdesc_return_lazy
|
||||
|
||||
/* Handler for undefined weak TLS symbols.
|
||||
Prototype:
|
||||
_dl_tlsdesc_undefweak (tlsdesc *);
|
||||
@ -121,14 +97,7 @@ _dl_tlsdesc_return_lazy:
|
||||
_dl_tlsdesc_undefweak:
|
||||
str x1, [sp, #-16]!
|
||||
cfi_adjust_cfa_offset (16)
|
||||
/* The ldar here happens after the load from [x0] at the call site
|
||||
(that is generated by the compiler as part of the TLS access ABI),
|
||||
so it reads the same value (this function is the final value of
|
||||
td->entry) and thus it synchronizes with the release store to
|
||||
td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
|
||||
from [x0,#8] here happens after the initialization of td->arg. */
|
||||
DELOUSE (0)
|
||||
ldar PTR_REG (zr), [x0]
|
||||
ldr PTR_REG (0), [x0, #PTR_SIZE]
|
||||
mrs x1, tpidr_el0
|
||||
sub PTR_REG (0), PTR_REG (0), PTR_REG (1)
|
||||
@ -192,13 +161,6 @@ _dl_tlsdesc_dynamic:
|
||||
cfi_rel_offset (x4, 32+24)
|
||||
|
||||
mrs x4, tpidr_el0
|
||||
/* The ldar here happens after the load from [x0] at the call site
|
||||
(that is generated by the compiler as part of the TLS access ABI),
|
||||
so it reads the same value (this function is the final value of
|
||||
td->entry) and thus it synchronizes with the release store to
|
||||
td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
|
||||
from [x0,#PTR_SIZE] here happens after the initialization of td->arg. */
|
||||
ldar PTR_REG (zr), [x0]
|
||||
ldr PTR_REG (1), [x0,#TLSDESC_ARG]
|
||||
ldr PTR_REG (0), [x4,#TCBHEAD_DTV]
|
||||
ldr PTR_REG (3), [x1,#TLSDESC_GEN_COUNT]
|
||||
@ -276,168 +238,3 @@ _dl_tlsdesc_dynamic:
|
||||
.size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
|
||||
# undef NSAVEXREGPAIRS
|
||||
#endif
|
||||
|
||||
/* This function is a wrapper for a lazy resolver for TLS_DESC
|
||||
RELA relocations.
|
||||
When the actual resolver returns, it will have adjusted the
|
||||
TLS descriptor such that we can tail-call it for it to return
|
||||
the TP offset of the symbol. */
|
||||
|
||||
.hidden _dl_tlsdesc_resolve_rela
|
||||
.global _dl_tlsdesc_resolve_rela
|
||||
.type _dl_tlsdesc_resolve_rela,%function
|
||||
cfi_startproc
|
||||
.align 2
|
||||
_dl_tlsdesc_resolve_rela:
|
||||
#define NSAVEXREGPAIRS 9
|
||||
/* The tlsdesc PLT entry pushes x2 and x3 to the stack. */
|
||||
cfi_adjust_cfa_offset (16)
|
||||
cfi_rel_offset (x2, 0)
|
||||
cfi_rel_offset (x3, 8)
|
||||
stp x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]!
|
||||
cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
|
||||
cfi_rel_offset (x29, 0)
|
||||
cfi_rel_offset (x30, 8)
|
||||
mov x29, sp
|
||||
stp x1, x4, [sp, #32+16*0]
|
||||
stp x5, x6, [sp, #32+16*1]
|
||||
stp x7, x8, [sp, #32+16*2]
|
||||
stp x9, x10, [sp, #32+16*3]
|
||||
stp x11, x12, [sp, #32+16*4]
|
||||
stp x13, x14, [sp, #32+16*5]
|
||||
stp x15, x16, [sp, #32+16*6]
|
||||
stp x17, x18, [sp, #32+16*7]
|
||||
str x0, [sp, #32+16*8]
|
||||
cfi_rel_offset (x1, 32)
|
||||
cfi_rel_offset (x4, 32+8)
|
||||
cfi_rel_offset (x5, 32+16)
|
||||
cfi_rel_offset (x6, 32+16+8)
|
||||
cfi_rel_offset (x7, 32+16*2)
|
||||
cfi_rel_offset (x8, 32+16*2+8)
|
||||
cfi_rel_offset (x9, 32+16*3)
|
||||
cfi_rel_offset (x10, 32+16*3+8)
|
||||
cfi_rel_offset (x11, 32+16*4)
|
||||
cfi_rel_offset (x12, 32+16*4+8)
|
||||
cfi_rel_offset (x13, 32+16*5)
|
||||
cfi_rel_offset (x14, 32+16*5+8)
|
||||
cfi_rel_offset (x15, 32+16*6)
|
||||
cfi_rel_offset (x16, 32+16*6+8)
|
||||
cfi_rel_offset (x17, 32+16*7)
|
||||
cfi_rel_offset (x18, 32+16*7+8)
|
||||
cfi_rel_offset (x0, 32+16*8)
|
||||
|
||||
SAVE_Q_REGISTERS
|
||||
|
||||
DELOUSE (3)
|
||||
ldr PTR_REG (1), [x3, #PTR_SIZE]
|
||||
bl _dl_tlsdesc_resolve_rela_fixup
|
||||
|
||||
RESTORE_Q_REGISTERS
|
||||
|
||||
ldr x0, [sp, #32+16*8]
|
||||
DELOUSE (0)
|
||||
ldr PTR_REG (1), [x0]
|
||||
blr x1
|
||||
|
||||
ldp x1, x4, [sp, #32+16*0]
|
||||
ldp x5, x6, [sp, #32+16*1]
|
||||
ldp x7, x8, [sp, #32+16*2]
|
||||
ldp x9, x10, [sp, #32+16*3]
|
||||
ldp x11, x12, [sp, #32+16*4]
|
||||
ldp x13, x14, [sp, #32+16*5]
|
||||
ldp x15, x16, [sp, #32+16*6]
|
||||
ldp x17, x18, [sp, #32+16*7]
|
||||
ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
|
||||
cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
|
||||
cfi_restore (x29)
|
||||
cfi_restore (x30)
|
||||
ldp x2, x3, [sp], #16
|
||||
cfi_adjust_cfa_offset (-16)
|
||||
RET
|
||||
#undef NSAVEXREGPAIRS
|
||||
cfi_endproc
|
||||
.size _dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela
|
||||
|
||||
/* This function is a placeholder for lazy resolving of TLS
|
||||
relocations. Once some thread starts resolving a TLS
|
||||
relocation, it sets up the TLS descriptor to use this
|
||||
resolver, such that other threads that would attempt to
|
||||
resolve it concurrently may skip the call to the original lazy
|
||||
resolver and go straight to a condition wait.
|
||||
|
||||
When the actual resolver returns, it will have adjusted the
|
||||
TLS descriptor such that we can tail-call it for it to return
|
||||
the TP offset of the symbol. */
|
||||
|
||||
.hidden _dl_tlsdesc_resolve_hold
|
||||
.global _dl_tlsdesc_resolve_hold
|
||||
.type _dl_tlsdesc_resolve_hold,%function
|
||||
cfi_startproc
|
||||
.align 2
|
||||
_dl_tlsdesc_resolve_hold:
|
||||
#define NSAVEXREGPAIRS 10
|
||||
1:
|
||||
stp x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]!
|
||||
cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
|
||||
cfi_rel_offset (x29, 0)
|
||||
cfi_rel_offset (x30, 8)
|
||||
mov x29, sp
|
||||
stp x1, x2, [sp, #32+16*0]
|
||||
stp x3, x4, [sp, #32+16*1]
|
||||
stp x5, x6, [sp, #32+16*2]
|
||||
stp x7, x8, [sp, #32+16*3]
|
||||
stp x9, x10, [sp, #32+16*4]
|
||||
stp x11, x12, [sp, #32+16*5]
|
||||
stp x13, x14, [sp, #32+16*6]
|
||||
stp x15, x16, [sp, #32+16*7]
|
||||
stp x17, x18, [sp, #32+16*8]
|
||||
str x0, [sp, #32+16*9]
|
||||
cfi_rel_offset (x1, 32)
|
||||
cfi_rel_offset (x2, 32+8)
|
||||
cfi_rel_offset (x3, 32+16)
|
||||
cfi_rel_offset (x4, 32+16+8)
|
||||
cfi_rel_offset (x5, 32+16*2)
|
||||
cfi_rel_offset (x6, 32+16*2+8)
|
||||
cfi_rel_offset (x7, 32+16*3)
|
||||
cfi_rel_offset (x8, 32+16*3+8)
|
||||
cfi_rel_offset (x9, 32+16*4)
|
||||
cfi_rel_offset (x10, 32+16*4+8)
|
||||
cfi_rel_offset (x11, 32+16*5)
|
||||
cfi_rel_offset (x12, 32+16*5+8)
|
||||
cfi_rel_offset (x13, 32+16*6)
|
||||
cfi_rel_offset (x14, 32+16*6+8)
|
||||
cfi_rel_offset (x15, 32+16*7)
|
||||
cfi_rel_offset (x16, 32+16*7+8)
|
||||
cfi_rel_offset (x17, 32+16*8)
|
||||
cfi_rel_offset (x18, 32+16*8+8)
|
||||
cfi_rel_offset (x0, 32+16*9)
|
||||
|
||||
SAVE_Q_REGISTERS
|
||||
|
||||
adr x1, 1b
|
||||
bl _dl_tlsdesc_resolve_hold_fixup
|
||||
|
||||
RESTORE_Q_REGISTERS
|
||||
|
||||
ldr x0, [sp, #32+16*9]
|
||||
DELOUSE (0)
|
||||
ldr PTR_REG (1), [x0]
|
||||
blr x1
|
||||
|
||||
ldp x1, x2, [sp, #32+16*0]
|
||||
ldp x3, x4, [sp, #32+16*1]
|
||||
ldp x5, x6, [sp, #32+16*2]
|
||||
ldp x7, x8, [sp, #32+16*3]
|
||||
ldp x9, x10, [sp, #32+16*4]
|
||||
ldp x11, x12, [sp, #32+16*5]
|
||||
ldp x13, x14, [sp, #32+16*6]
|
||||
ldp x15, x16, [sp, #32+16*7]
|
||||
ldp x17, x18, [sp, #32+16*8]
|
||||
ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
|
||||
cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
|
||||
cfi_restore (x29)
|
||||
cfi_restore (x30)
|
||||
RET
|
||||
cfi_endproc
|
||||
.size _dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold
|
||||
#undef NSAVEXREGPAIRS
|
||||
|
@ -45,18 +45,9 @@ struct tlsdesc_dynamic_arg
|
||||
extern ptrdiff_t attribute_hidden
|
||||
_dl_tlsdesc_return (struct tlsdesc *);
|
||||
|
||||
extern ptrdiff_t attribute_hidden
|
||||
_dl_tlsdesc_return_lazy (struct tlsdesc *);
|
||||
|
||||
extern ptrdiff_t attribute_hidden
|
||||
_dl_tlsdesc_undefweak (struct tlsdesc *);
|
||||
|
||||
extern ptrdiff_t attribute_hidden
|
||||
_dl_tlsdesc_resolve_rela (struct tlsdesc *);
|
||||
|
||||
extern ptrdiff_t attribute_hidden
|
||||
_dl_tlsdesc_resolve_hold (struct tlsdesc *);
|
||||
|
||||
# ifdef SHARED
|
||||
extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
|
||||
|
||||
|
@ -18,137 +18,12 @@
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <link.h>
|
||||
#include <ldsodefs.h>
|
||||
#include <elf/dynamic-link.h>
|
||||
#include <tls.h>
|
||||
#include <dl-tlsdesc.h>
|
||||
#include <dl-unmap-segments.h>
|
||||
#define _dl_tlsdesc_resolve_hold 0
|
||||
#include <tlsdeschtab.h>
|
||||
#include <atomic.h>
|
||||
|
||||
/* The following functions take an entry_check_offset argument. It's
|
||||
computed by the caller as an offset between its entry point and the
|
||||
call site, such that by adding the built-in return address that is
|
||||
implicitly passed to the function with this offset, we can easily
|
||||
obtain the caller's entry point to compare with the entry point
|
||||
given in the TLS descriptor. If it's changed, we want to return
|
||||
immediately. */
|
||||
|
||||
/* This function is used to lazily resolve TLS_DESC RELA relocations.
|
||||
The argument location is used to hold a pointer to the relocation. */
|
||||
|
||||
void
|
||||
attribute_hidden
|
||||
_dl_tlsdesc_resolve_rela_fixup (struct tlsdesc *td, struct link_map *l)
|
||||
{
|
||||
const ElfW(Rela) *reloc = atomic_load_relaxed (&td->arg);
|
||||
|
||||
/* After GL(dl_load_lock) is grabbed only one caller can see td->entry in
|
||||
initial state in _dl_tlsdesc_resolve_early_return_p, other concurrent
|
||||
callers will return and retry calling td->entry. The updated td->entry
|
||||
synchronizes with the single writer so all read accesses here can use
|
||||
relaxed order. */
|
||||
if (_dl_tlsdesc_resolve_early_return_p
|
||||
(td, (void*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_PLT)]) + l->l_addr)))
|
||||
return;
|
||||
|
||||
/* The code below was borrowed from _dl_fixup(),
|
||||
except for checking for STB_LOCAL. */
|
||||
const ElfW(Sym) *const symtab
|
||||
= (const void *) D_PTR (l, l_info[DT_SYMTAB]);
|
||||
const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]);
|
||||
const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (reloc->r_info)];
|
||||
lookup_t result;
|
||||
|
||||
/* Look up the target symbol. If the normal lookup rules are not
|
||||
used don't look in the global scope. */
|
||||
if (ELFW(ST_BIND) (sym->st_info) != STB_LOCAL
|
||||
&& __builtin_expect (ELFW(ST_VISIBILITY) (sym->st_other), 0) == 0)
|
||||
{
|
||||
const struct r_found_version *version = NULL;
|
||||
|
||||
if (l->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
|
||||
{
|
||||
const ElfW(Half) *vernum =
|
||||
(const void *) D_PTR (l, l_info[VERSYMIDX (DT_VERSYM)]);
|
||||
ElfW(Half) ndx = vernum[ELFW(R_SYM) (reloc->r_info)] & 0x7fff;
|
||||
version = &l->l_versions[ndx];
|
||||
if (version->hash == 0)
|
||||
version = NULL;
|
||||
}
|
||||
|
||||
result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym,
|
||||
l->l_scope, version, ELF_RTYPE_CLASS_PLT,
|
||||
DL_LOOKUP_ADD_DEPENDENCY, NULL);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* We already found the symbol. The module (and therefore its load
|
||||
address) is also known. */
|
||||
result = l;
|
||||
}
|
||||
|
||||
if (!sym)
|
||||
{
|
||||
atomic_store_relaxed (&td->arg, (void *) reloc->r_addend);
|
||||
/* This release store synchronizes with the ldar acquire load
|
||||
instruction in _dl_tlsdesc_undefweak. */
|
||||
atomic_store_release (&td->entry, _dl_tlsdesc_undefweak);
|
||||
}
|
||||
else
|
||||
{
|
||||
# ifndef SHARED
|
||||
CHECK_STATIC_TLS (l, result);
|
||||
# else
|
||||
if (!TRY_STATIC_TLS (l, result))
|
||||
{
|
||||
void *p = _dl_make_tlsdesc_dynamic (result, sym->st_value
|
||||
+ reloc->r_addend);
|
||||
atomic_store_relaxed (&td->arg, p);
|
||||
/* This release store synchronizes with the ldar acquire load
|
||||
instruction in _dl_tlsdesc_dynamic. */
|
||||
atomic_store_release (&td->entry, _dl_tlsdesc_dynamic);
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
void *p = (void*) (sym->st_value + result->l_tls_offset
|
||||
+ reloc->r_addend);
|
||||
atomic_store_relaxed (&td->arg, p);
|
||||
/* This release store synchronizes with the ldar acquire load
|
||||
instruction in _dl_tlsdesc_return_lazy. */
|
||||
atomic_store_release (&td->entry, _dl_tlsdesc_return_lazy);
|
||||
}
|
||||
}
|
||||
|
||||
_dl_tlsdesc_wake_up_held_fixups ();
|
||||
}
|
||||
|
||||
/* This function is used to avoid busy waiting for other threads to
|
||||
complete the lazy relocation. Once another thread wins the race to
|
||||
relocate a TLS descriptor, it sets the descriptor up such that this
|
||||
function is called to wait until the resolver releases the
|
||||
lock. */
|
||||
|
||||
void
|
||||
attribute_hidden
|
||||
_dl_tlsdesc_resolve_hold_fixup (struct tlsdesc *td, void *caller)
|
||||
{
|
||||
/* Maybe we're lucky and can return early. */
|
||||
if (caller != atomic_load_relaxed (&td->entry))
|
||||
return;
|
||||
|
||||
/* Locking here will stop execution until the running resolver runs
|
||||
_dl_tlsdesc_wake_up_held_fixups(), releasing the lock.
|
||||
|
||||
FIXME: We'd be better off waiting on a condition variable, such
|
||||
that we didn't have to hold the lock throughout the relocation
|
||||
processing. */
|
||||
__rtld_lock_lock_recursive (GL(dl_load_lock));
|
||||
__rtld_lock_unlock_recursive (GL(dl_load_lock));
|
||||
}
|
||||
|
||||
|
||||
/* Unmap the dynamic object, but also release its TLS descriptor table
|
||||
if there is one. */
|
||||
|
Loading…
Reference in New Issue
Block a user