elf: Do not use memalign for TCB/TLS blocks allocation [BZ #17730]

Instead, call malloc and explicitly align the pointer.

There is no external location to store the original (unaligned)
pointer, and this commit increases the allocation size to store
the pointer at a fixed location relative to the TCB pointer.

The manual alignment means that some space goes unused which
was previously made available for subsequent allocations.
However, in the TLS_DTV_AT_TP case, the manual alignment code
avoids aligning the pre-TCB to the TLS block alignment.  (Even
while using memalign, the allocation had some unused padding
in front.)

This concludes the removal of memalign calls from the TLS code,
and the new tst-tls3-malloc test verifies that only core malloc
routines are used.
This commit is contained in:
Florian Weimer 2016-08-03 16:16:57 +02:00
parent a2ff21f825
commit 6c444ad6e9
4 changed files with 248 additions and 36 deletions

View File

@ -1,3 +1,18 @@
2016-08-03 Florian Weimer <fweimer@redhat.com>
[BZ #17730]
Avoid using memalign for TCB allocations.
* elf/dl-tls.c (tcb_to_pointer_to_free_location): New.
(_dl_allocate_tls_storage): Use malloc and manual alignment.
Avoid alignment gap in the TLS_DTV_AT_TP case.
(_dl_deallocate_tls): Use tcb_to_pointer_to_free_location to
determine the pointer to free.
* nptl/tst-tls3-malloc.c: New test.
* nptl/Makefile (tests): Add it.
(tst-tls3-malloc): Link with libdl, libpthread.
(LDFLAGS-tst-tls3-malloc): Set.
(tst-tls3-malloc.out): Depend on DSO used in test.
2016-08-03 Florian Weimer <fweimer@redhat.com>
[BZ #17730]

View File

@ -347,6 +347,22 @@ _dl_get_tls_static_info (size_t *sizep, size_t *alignp)
*alignp = GL(dl_tls_static_align);
}
/* Derive the location of the pointer to the start of the original
allocation (before alignment) from the pointer to the TCB. */
static inline void **
tcb_to_pointer_to_free_location (void *tcb)
{
#if TLS_TCB_AT_TP
/* The TCB follows the TLS blocks, and the pointer to the front
follows the TCB. */
void **original_pointer_location = tcb + TLS_TCB_SIZE;
#elif TLS_DTV_AT_TP
/* The TCB comes first, preceded by the pre-TCB, and the pointer is
before that. */
void **original_pointer_location = tcb - TLS_PRE_TCB_SIZE - sizeof (void *);
#endif
return original_pointer_location;
}
void *
internal_function
@ -359,39 +375,50 @@ _dl_allocate_tls_storage (void)
/* Memory layout is:
[ TLS_PRE_TCB_SIZE ] [ TLS_TCB_SIZE ] [ TLS blocks ]
^ This should be returned. */
size += (TLS_PRE_TCB_SIZE + GL(dl_tls_static_align) - 1)
& ~(GL(dl_tls_static_align) - 1);
size += TLS_PRE_TCB_SIZE;
#endif
/* Allocate a correctly aligned chunk of memory. */
result = __libc_memalign (GL(dl_tls_static_align), size);
if (__builtin_expect (result != NULL, 1))
{
/* Allocate the DTV. */
void *allocated = result;
/* Perform the allocation. Reserve space for the required alignment
and the pointer to the original allocation. */
size_t alignment = GL(dl_tls_static_align);
void *allocated = malloc (size + alignment + sizeof (void *));
if (__glibc_unlikely (allocated == NULL))
return NULL;
/* Perform alignment and allocate the DTV. */
#if TLS_TCB_AT_TP
/* The TCB follows the TLS blocks. */
result = (char *) result + size - TLS_TCB_SIZE;
/* The TCB follows the TLS blocks, which determine the alignment.
(TCB alignment requirements have been taken into account when
calculating GL(dl_tls_static_align).) */
void *aligned = (void *) roundup ((uintptr_t) allocated, alignment);
result = aligned + size - TLS_TCB_SIZE;
/* Clear the TCB data structure. We can't ask the caller (i.e.
libpthread) to do it, because we will initialize the DTV et al. */
memset (result, '\0', TLS_TCB_SIZE);
/* Clear the TCB data structure. We can't ask the caller (i.e.
libpthread) to do it, because we will initialize the DTV et al. */
memset (result, '\0', TLS_TCB_SIZE);
#elif TLS_DTV_AT_TP
result = (char *) result + size - GL(dl_tls_static_size);
/* Pre-TCB and TCB come before the TLS blocks. The layout computed
in _dl_determine_tlsoffset assumes that the TCB is aligned to the
TLS block alignment, and not just the TLS blocks after it. This
can leave an unused alignment gap between the TCB and the TLS
blocks. */
result = (void *) roundup
(sizeof (void *) + TLS_PRE_TCB_SIZE + (uintptr_t) allocated,
alignment);
/* Clear the TCB data structure and TLS_PRE_TCB_SIZE bytes before it.
We can't ask the caller (i.e. libpthread) to do it, because we will
initialize the DTV et al. */
memset ((char *) result - TLS_PRE_TCB_SIZE, '\0',
TLS_PRE_TCB_SIZE + TLS_TCB_SIZE);
/* Clear the TCB data structure and TLS_PRE_TCB_SIZE bytes before
it. We can't ask the caller (i.e. libpthread) to do it, because
we will initialize the DTV et al. */
memset (result - TLS_PRE_TCB_SIZE, '\0', TLS_PRE_TCB_SIZE + TLS_TCB_SIZE);
#endif
result = allocate_dtv (result);
if (result == NULL)
free (allocated);
}
/* Record the value of the original pointer for later
deallocation. */
*tcb_to_pointer_to_free_location (result) = allocated;
result = allocate_dtv (result);
if (result == NULL)
free (allocated);
return result;
}
@ -558,17 +585,7 @@ _dl_deallocate_tls (void *tcb, bool dealloc_tcb)
free (dtv - 1);
if (dealloc_tcb)
{
#if TLS_TCB_AT_TP
/* The TCB follows the TLS blocks. Back up to free the whole block. */
tcb -= GL(dl_tls_static_size) - TLS_TCB_SIZE;
#elif TLS_DTV_AT_TP
/* Back up the TLS_PRE_TCB_SIZE bytes. */
tcb -= (TLS_PRE_TCB_SIZE + GL(dl_tls_static_align) - 1)
& ~(GL(dl_tls_static_align) - 1);
#endif
free (tcb);
}
free (*tcb_to_pointer_to_free_location (tcb));
}
rtld_hidden_def (_dl_deallocate_tls)

View File

@ -321,8 +321,8 @@ tests += tst-cancelx2 tst-cancelx3 tst-cancelx4 tst-cancelx5 \
tst-cleanupx0 tst-cleanupx1 tst-cleanupx2 tst-cleanupx3 tst-cleanupx4 \
tst-oncex3 tst-oncex4
ifeq ($(build-shared),yes)
tests += tst-atfork2 tst-tls3 tst-tls4 tst-tls5 tst-_res1 tst-fini1 \
tst-stackguard1
tests += tst-atfork2 tst-tls3 tst-tls3-malloc tst-tls4 tst-tls5 tst-_res1 \
tst-fini1 tst-stackguard1
tests-nolibpthread += tst-fini1
ifeq ($(have-z-execstack),yes)
tests += tst-execstack
@ -529,6 +529,10 @@ LDFLAGS-tst-tls3 = -rdynamic
$(objpfx)tst-tls3.out: $(objpfx)tst-tls3mod.so
$(objpfx)tst-tls3mod.so: $(shared-thread-library)
$(objpfx)tst-tls3-malloc: $(libdl) $(shared-thread-library)
LDFLAGS-tst-tls3-malloc = -rdynamic
$(objpfx)tst-tls3-malloc.out: $(objpfx)tst-tls3mod.so
$(objpfx)tst-tls4: $(libdl) $(shared-thread-library)
$(objpfx)tst-tls4.out: $(objpfx)tst-tls4moda.so $(objpfx)tst-tls4modb.so

176
nptl/tst-tls3-malloc.c Normal file
View File

@ -0,0 +1,176 @@
/* Test TLS allocation with an interposed malloc.
Copyright (C) 2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
/* Reuse the test. */
#include "tst-tls3.c"
#include <sys/mman.h>
/* Interpose a minimal malloc implementation. This implementation
deliberately interposes just a restricted set of symbols, to detect
if the TLS code bypasses the interposed malloc. */
/* Lock to guard malloc internals. */
static pthread_mutex_t malloc_lock = PTHREAD_MUTEX_INITIALIZER;
/* Information about an allocation chunk. */
struct malloc_chunk
{
/* Start of the allocation. */
void *start;
/* Size of the allocation. */
size_t size;
};
enum { malloc_chunk_count = 1000 };
static struct malloc_chunk chunks[malloc_chunk_count];
/* Lock the malloc lock. */
static void
xlock (void)
{
int ret = pthread_mutex_lock (&malloc_lock);
if (ret != 0)
{
errno = ret;
printf ("error: pthread_mutex_lock: %m\n");
_exit (1);
}
}
/* Unlock the malloc lock. */
static void
xunlock (void)
{
int ret = pthread_mutex_unlock (&malloc_lock);
if (ret != 0)
{
errno = ret;
printf ("error: pthread_mutex_unlock: %m\n");
_exit (1);
}
}
/* Internal malloc without locking and registration. */
static void *
malloc_internal (size_t size)
{
void *result = mmap (NULL, size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (result == MAP_FAILED)
{
printf ("error: mmap: %m\n");
_exit (1);
}
return result;
}
void *
malloc (size_t size)
{
if (size == 0)
size = 1;
xlock ();
void *result = malloc_internal (size);
for (int i = 0; i < malloc_chunk_count; ++i)
if (chunks[i].start == NULL)
{
chunks[i].start = result;
chunks[i].size = size;
xunlock ();
return result;
}
xunlock ();
printf ("error: no place to store chunk pointer\n");
_exit (1);
}
void *
calloc (size_t a, size_t b)
{
if (b != 0 && a > SIZE_MAX / b)
return NULL;
/* malloc uses mmap, which provides zeroed memory. */
return malloc (a * b);
}
static void
xunmap (void *ptr, size_t size)
{
int ret = munmap (ptr, size);
if (ret < 0)
{
printf ("error: munmap (%p, %zu) failed: %m\n", ptr, size);
_exit (1);
}
}
void
free (void *ptr)
{
if (ptr == NULL)
return;
xlock ();
for (int i = 0; i < malloc_chunk_count; ++i)
if (chunks[i].start == ptr)
{
xunmap (ptr, chunks[i].size);
chunks[i] = (struct malloc_chunk) {};
xunlock ();
return;
}
xunlock ();
printf ("error: tried to free non-allocated pointer %p\n", ptr);
_exit (1);
}
void *
realloc (void *old, size_t size)
{
if (old != NULL)
{
xlock ();
for (int i = 0; i < malloc_chunk_count; ++i)
if (chunks[i].start == old)
{
size_t old_size = chunks[i].size;
void *result;
if (old_size < size)
{
result = malloc_internal (size);
/* Reuse the slot for the new allocation. */
memcpy (result, old, old_size);
xunmap (old, old_size);
chunks[i].start = result;
chunks[i].size = size;
}
else
/* Old size is not smaller, so reuse the old
allocation. */
result = old;
xunlock ();
return result;
}
xunlock ();
printf ("error: tried to realloc non-allocated pointer %p\n", old);
_exit (1);
}
else
return malloc (size);
}