aarch64: Remove barriers from TLS descriptor functions

Remove ldar synchronization and most lazy TLSDESC initialization
related code.

	* sysdeps/aarch64/dl-machine.h (elf_machine_runtime_setup): Remove
	DT_TLSDESC_GOT initialization.
	* sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_return_lazy): Remove.
	(_dl_tlsdesc_resolve_rela): Likewise.
	(_dl_tlsdesc_resolve_hold): Likewise.
	(_dl_tlsdesc_undefweak): Remove ldar.
	(_dl_tlsdesc_dynamic): Likewise.
	* sysdeps/aarch64/dl-tlsdesc.h (_dl_tlsdesc_return_lazy): Remove.
	(_dl_tlsdesc_resolve_rela): Likewise.
	(_dl_tlsdesc_resolve_hold): Likewise.
	* sysdeps/aarch64/tlsdesc.c (_dl_tlsdesc_resolve_rela_fixup): Remove.
	(_dl_tlsdesc_resolve_hold_fixup): Likewise.
	(_dl_tlsdesc_resolve_rela): Likewise.
	(_dl_tlsdesc_resolve_hold): Likewise.
This commit is contained in:
Szabolcs Nagy 2017-09-27 18:14:21 +01:00
parent b7cf203b5c
commit 91c5a366d8
5 changed files with 18 additions and 342 deletions

View File

@ -1,3 +1,20 @@
2017-11-03 Szabolcs Nagy <szabolcs.nagy@arm.com>
* sysdeps/aarch64/dl-machine.h (elf_machine_runtime_setup): Remove
DT_TLSDESC_GOT initialization.
* sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_return_lazy): Remove.
(_dl_tlsdesc_resolve_rela): Likewise.
(_dl_tlsdesc_resolve_hold): Likewise.
(_dl_tlsdesc_undefweak): Remove ldar.
(_dl_tlsdesc_dynamic): Likewise.
* sysdeps/aarch64/dl-tlsdesc.h (_dl_tlsdesc_return_lazy): Remove.
(_dl_tlsdesc_resolve_rela): Likewise.
(_dl_tlsdesc_resolve_hold): Likewise.
* sysdeps/aarch64/tlsdesc.c (_dl_tlsdesc_resolve_rela_fixup): Remove.
(_dl_tlsdesc_resolve_hold_fixup): Likewise.
(_dl_tlsdesc_resolve_rela): Likewise.
(_dl_tlsdesc_resolve_hold): Likewise.
2017-11-03 Szabolcs Nagy <szabolcs.nagy@arm.com>
* sysdeps/aarch64/dl-machine.h (elf_machine_lazy_rel): Do symbol

View File

@ -102,10 +102,6 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
}
}
if (l->l_info[ADDRIDX (DT_TLSDESC_GOT)] && lazy)
*(ElfW(Addr)*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_GOT)]) + l->l_addr)
= (ElfW(Addr)) &_dl_tlsdesc_resolve_rela;
return lazy;
}

View File

@ -80,30 +80,6 @@ _dl_tlsdesc_return:
cfi_endproc
.size _dl_tlsdesc_return, .-_dl_tlsdesc_return
/* Same as _dl_tlsdesc_return but with synchronization for
lazy relocation.
Prototype:
_dl_tlsdesc_return_lazy (tlsdesc *) ;
*/
.hidden _dl_tlsdesc_return_lazy
.global _dl_tlsdesc_return_lazy
.type _dl_tlsdesc_return_lazy,%function
cfi_startproc
.align 2
_dl_tlsdesc_return_lazy:
/* The ldar here happens after the load from [x0] at the call site
(that is generated by the compiler as part of the TLS access ABI),
so it reads the same value (this function is the final value of
td->entry) and thus it synchronizes with the release store to
td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
from [x0,#PTR_SIZE] here happens after the initialization of td->arg. */
DELOUSE (0)
ldar PTR_REG (zr), [x0]
ldr PTR_REG (0), [x0, #PTR_SIZE]
RET
cfi_endproc
.size _dl_tlsdesc_return_lazy, .-_dl_tlsdesc_return_lazy
/* Handler for undefined weak TLS symbols.
Prototype:
_dl_tlsdesc_undefweak (tlsdesc *);
@ -121,14 +97,7 @@ _dl_tlsdesc_return_lazy:
_dl_tlsdesc_undefweak:
str x1, [sp, #-16]!
cfi_adjust_cfa_offset (16)
/* The ldar here happens after the load from [x0] at the call site
(that is generated by the compiler as part of the TLS access ABI),
so it reads the same value (this function is the final value of
td->entry) and thus it synchronizes with the release store to
td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
from [x0,#8] here happens after the initialization of td->arg. */
DELOUSE (0)
ldar PTR_REG (zr), [x0]
ldr PTR_REG (0), [x0, #PTR_SIZE]
mrs x1, tpidr_el0
sub PTR_REG (0), PTR_REG (0), PTR_REG (1)
@ -192,13 +161,6 @@ _dl_tlsdesc_dynamic:
cfi_rel_offset (x4, 32+24)
mrs x4, tpidr_el0
/* The ldar here happens after the load from [x0] at the call site
(that is generated by the compiler as part of the TLS access ABI),
so it reads the same value (this function is the final value of
td->entry) and thus it synchronizes with the release store to
td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
from [x0,#PTR_SIZE] here happens after the initialization of td->arg. */
ldar PTR_REG (zr), [x0]
ldr PTR_REG (1), [x0,#TLSDESC_ARG]
ldr PTR_REG (0), [x4,#TCBHEAD_DTV]
ldr PTR_REG (3), [x1,#TLSDESC_GEN_COUNT]
@ -276,168 +238,3 @@ _dl_tlsdesc_dynamic:
.size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
# undef NSAVEXREGPAIRS
#endif
/* This function is a wrapper for a lazy resolver for TLS_DESC
RELA relocations.
When the actual resolver returns, it will have adjusted the
TLS descriptor such that we can tail-call it for it to return
the TP offset of the symbol. */
.hidden _dl_tlsdesc_resolve_rela
.global _dl_tlsdesc_resolve_rela
.type _dl_tlsdesc_resolve_rela,%function
cfi_startproc
.align 2
_dl_tlsdesc_resolve_rela:
#define NSAVEXREGPAIRS 9
/* The tlsdesc PLT entry pushes x2 and x3 to the stack. */
cfi_adjust_cfa_offset (16)
cfi_rel_offset (x2, 0)
cfi_rel_offset (x3, 8)
stp x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]!
cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
cfi_rel_offset (x29, 0)
cfi_rel_offset (x30, 8)
mov x29, sp
stp x1, x4, [sp, #32+16*0]
stp x5, x6, [sp, #32+16*1]
stp x7, x8, [sp, #32+16*2]
stp x9, x10, [sp, #32+16*3]
stp x11, x12, [sp, #32+16*4]
stp x13, x14, [sp, #32+16*5]
stp x15, x16, [sp, #32+16*6]
stp x17, x18, [sp, #32+16*7]
str x0, [sp, #32+16*8]
cfi_rel_offset (x1, 32)
cfi_rel_offset (x4, 32+8)
cfi_rel_offset (x5, 32+16)
cfi_rel_offset (x6, 32+16+8)
cfi_rel_offset (x7, 32+16*2)
cfi_rel_offset (x8, 32+16*2+8)
cfi_rel_offset (x9, 32+16*3)
cfi_rel_offset (x10, 32+16*3+8)
cfi_rel_offset (x11, 32+16*4)
cfi_rel_offset (x12, 32+16*4+8)
cfi_rel_offset (x13, 32+16*5)
cfi_rel_offset (x14, 32+16*5+8)
cfi_rel_offset (x15, 32+16*6)
cfi_rel_offset (x16, 32+16*6+8)
cfi_rel_offset (x17, 32+16*7)
cfi_rel_offset (x18, 32+16*7+8)
cfi_rel_offset (x0, 32+16*8)
SAVE_Q_REGISTERS
DELOUSE (3)
ldr PTR_REG (1), [x3, #PTR_SIZE]
bl _dl_tlsdesc_resolve_rela_fixup
RESTORE_Q_REGISTERS
ldr x0, [sp, #32+16*8]
DELOUSE (0)
ldr PTR_REG (1), [x0]
blr x1
ldp x1, x4, [sp, #32+16*0]
ldp x5, x6, [sp, #32+16*1]
ldp x7, x8, [sp, #32+16*2]
ldp x9, x10, [sp, #32+16*3]
ldp x11, x12, [sp, #32+16*4]
ldp x13, x14, [sp, #32+16*5]
ldp x15, x16, [sp, #32+16*6]
ldp x17, x18, [sp, #32+16*7]
ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
cfi_restore (x29)
cfi_restore (x30)
ldp x2, x3, [sp], #16
cfi_adjust_cfa_offset (-16)
RET
#undef NSAVEXREGPAIRS
cfi_endproc
.size _dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela
/* This function is a placeholder for lazy resolving of TLS
relocations. Once some thread starts resolving a TLS
relocation, it sets up the TLS descriptor to use this
resolver, such that other threads that would attempt to
resolve it concurrently may skip the call to the original lazy
resolver and go straight to a condition wait.
When the actual resolver returns, it will have adjusted the
TLS descriptor such that we can tail-call it for it to return
the TP offset of the symbol. */
.hidden _dl_tlsdesc_resolve_hold
.global _dl_tlsdesc_resolve_hold
.type _dl_tlsdesc_resolve_hold,%function
cfi_startproc
.align 2
_dl_tlsdesc_resolve_hold:
#define NSAVEXREGPAIRS 10
1:
stp x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]!
cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
cfi_rel_offset (x29, 0)
cfi_rel_offset (x30, 8)
mov x29, sp
stp x1, x2, [sp, #32+16*0]
stp x3, x4, [sp, #32+16*1]
stp x5, x6, [sp, #32+16*2]
stp x7, x8, [sp, #32+16*3]
stp x9, x10, [sp, #32+16*4]
stp x11, x12, [sp, #32+16*5]
stp x13, x14, [sp, #32+16*6]
stp x15, x16, [sp, #32+16*7]
stp x17, x18, [sp, #32+16*8]
str x0, [sp, #32+16*9]
cfi_rel_offset (x1, 32)
cfi_rel_offset (x2, 32+8)
cfi_rel_offset (x3, 32+16)
cfi_rel_offset (x4, 32+16+8)
cfi_rel_offset (x5, 32+16*2)
cfi_rel_offset (x6, 32+16*2+8)
cfi_rel_offset (x7, 32+16*3)
cfi_rel_offset (x8, 32+16*3+8)
cfi_rel_offset (x9, 32+16*4)
cfi_rel_offset (x10, 32+16*4+8)
cfi_rel_offset (x11, 32+16*5)
cfi_rel_offset (x12, 32+16*5+8)
cfi_rel_offset (x13, 32+16*6)
cfi_rel_offset (x14, 32+16*6+8)
cfi_rel_offset (x15, 32+16*7)
cfi_rel_offset (x16, 32+16*7+8)
cfi_rel_offset (x17, 32+16*8)
cfi_rel_offset (x18, 32+16*8+8)
cfi_rel_offset (x0, 32+16*9)
SAVE_Q_REGISTERS
adr x1, 1b
bl _dl_tlsdesc_resolve_hold_fixup
RESTORE_Q_REGISTERS
ldr x0, [sp, #32+16*9]
DELOUSE (0)
ldr PTR_REG (1), [x0]
blr x1
ldp x1, x2, [sp, #32+16*0]
ldp x3, x4, [sp, #32+16*1]
ldp x5, x6, [sp, #32+16*2]
ldp x7, x8, [sp, #32+16*3]
ldp x9, x10, [sp, #32+16*4]
ldp x11, x12, [sp, #32+16*5]
ldp x13, x14, [sp, #32+16*6]
ldp x15, x16, [sp, #32+16*7]
ldp x17, x18, [sp, #32+16*8]
ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
cfi_restore (x29)
cfi_restore (x30)
RET
cfi_endproc
.size _dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold
#undef NSAVEXREGPAIRS

View File

@ -45,18 +45,9 @@ struct tlsdesc_dynamic_arg
extern ptrdiff_t attribute_hidden
_dl_tlsdesc_return (struct tlsdesc *);
extern ptrdiff_t attribute_hidden
_dl_tlsdesc_return_lazy (struct tlsdesc *);
extern ptrdiff_t attribute_hidden
_dl_tlsdesc_undefweak (struct tlsdesc *);
extern ptrdiff_t attribute_hidden
_dl_tlsdesc_resolve_rela (struct tlsdesc *);
extern ptrdiff_t attribute_hidden
_dl_tlsdesc_resolve_hold (struct tlsdesc *);
# ifdef SHARED
extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);

View File

@ -18,137 +18,12 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <link.h>
#include <ldsodefs.h>
#include <elf/dynamic-link.h>
#include <tls.h>
#include <dl-tlsdesc.h>
#include <dl-unmap-segments.h>
#define _dl_tlsdesc_resolve_hold 0
#include <tlsdeschtab.h>
#include <atomic.h>
/* The following functions take an entry_check_offset argument. It's
computed by the caller as an offset between its entry point and the
call site, such that by adding the built-in return address that is
implicitly passed to the function with this offset, we can easily
obtain the caller's entry point to compare with the entry point
given in the TLS descriptor. If it's changed, we want to return
immediately. */
/* This function is used to lazily resolve TLS_DESC RELA relocations.
The argument location is used to hold a pointer to the relocation. */
void
attribute_hidden
_dl_tlsdesc_resolve_rela_fixup (struct tlsdesc *td, struct link_map *l)
{
const ElfW(Rela) *reloc = atomic_load_relaxed (&td->arg);
/* After GL(dl_load_lock) is grabbed only one caller can see td->entry in
initial state in _dl_tlsdesc_resolve_early_return_p, other concurrent
callers will return and retry calling td->entry. The updated td->entry
synchronizes with the single writer so all read accesses here can use
relaxed order. */
if (_dl_tlsdesc_resolve_early_return_p
(td, (void*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_PLT)]) + l->l_addr)))
return;
/* The code below was borrowed from _dl_fixup(),
except for checking for STB_LOCAL. */
const ElfW(Sym) *const symtab
= (const void *) D_PTR (l, l_info[DT_SYMTAB]);
const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]);
const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (reloc->r_info)];
lookup_t result;
/* Look up the target symbol. If the normal lookup rules are not
used don't look in the global scope. */
if (ELFW(ST_BIND) (sym->st_info) != STB_LOCAL
&& __builtin_expect (ELFW(ST_VISIBILITY) (sym->st_other), 0) == 0)
{
const struct r_found_version *version = NULL;
if (l->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
{
const ElfW(Half) *vernum =
(const void *) D_PTR (l, l_info[VERSYMIDX (DT_VERSYM)]);
ElfW(Half) ndx = vernum[ELFW(R_SYM) (reloc->r_info)] & 0x7fff;
version = &l->l_versions[ndx];
if (version->hash == 0)
version = NULL;
}
result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym,
l->l_scope, version, ELF_RTYPE_CLASS_PLT,
DL_LOOKUP_ADD_DEPENDENCY, NULL);
}
else
{
/* We already found the symbol. The module (and therefore its load
address) is also known. */
result = l;
}
if (!sym)
{
atomic_store_relaxed (&td->arg, (void *) reloc->r_addend);
/* This release store synchronizes with the ldar acquire load
instruction in _dl_tlsdesc_undefweak. */
atomic_store_release (&td->entry, _dl_tlsdesc_undefweak);
}
else
{
# ifndef SHARED
CHECK_STATIC_TLS (l, result);
# else
if (!TRY_STATIC_TLS (l, result))
{
void *p = _dl_make_tlsdesc_dynamic (result, sym->st_value
+ reloc->r_addend);
atomic_store_relaxed (&td->arg, p);
/* This release store synchronizes with the ldar acquire load
instruction in _dl_tlsdesc_dynamic. */
atomic_store_release (&td->entry, _dl_tlsdesc_dynamic);
}
else
# endif
{
void *p = (void*) (sym->st_value + result->l_tls_offset
+ reloc->r_addend);
atomic_store_relaxed (&td->arg, p);
/* This release store synchronizes with the ldar acquire load
instruction in _dl_tlsdesc_return_lazy. */
atomic_store_release (&td->entry, _dl_tlsdesc_return_lazy);
}
}
_dl_tlsdesc_wake_up_held_fixups ();
}
/* This function is used to avoid busy waiting for other threads to
complete the lazy relocation. Once another thread wins the race to
relocate a TLS descriptor, it sets the descriptor up such that this
function is called to wait until the resolver releases the
lock. */
void
attribute_hidden
_dl_tlsdesc_resolve_hold_fixup (struct tlsdesc *td, void *caller)
{
/* Maybe we're lucky and can return early. */
if (caller != atomic_load_relaxed (&td->entry))
return;
/* Locking here will stop execution until the running resolver runs
_dl_tlsdesc_wake_up_held_fixups(), releasing the lock.
FIXME: We'd be better off waiting on a condition variable, such
that we didn't have to hold the lock throughout the relocation
processing. */
__rtld_lock_lock_recursive (GL(dl_load_lock));
__rtld_lock_unlock_recursive (GL(dl_load_lock));
}
/* Unmap the dynamic object, but also release its TLS descriptor table
if there is one. */