Fix _dl_profile_fixup data-dependency issue (Bug 23690)

There is a data-dependency between the fields of struct l_reloc_result
and the field used as the initialization guard. Users of the guard
expect writes to the structure to be observable when they also observe
the guard initialized. The solution for this problem is to use an acquire
and release load and store to ensure previous writes to the structure are
observable if the guard is initialized.

The previous implementation used DL_FIXUP_VALUE_ADDR (l_reloc_result->addr)
as the initialization guard, making it impossible for some architectures
to load and store it atomically, i.e. hppa and ia64, due to its larger size.

This commit adds an unsigned int to l_reloc_result to be used as the new
initialization guard of the struct, making it possible to load and store
it atomically in all architectures. The fix ensures that the values
observed in l_reloc_result are consistent and do not lead to crashes.
The algorithm is documented in the code in elf/dl-runtime.c
(_dl_profile_fixup). Not all data races have been eliminated.

Tested with build-many-glibcs and on powerpc, powerpc64, and powerpc64le.

	[BZ #23690]
	* elf/dl-runtime.c (_dl_profile_fixup): Guarantee memory
	modification order when accessing reloc_result->addr.
	* include/link.h (reloc_result): Add field init.
	* nptl/Makefile (tests): Add tst-audit-threads.
	(modules-names): Add tst-audit-threads-mod1 and
	tst-audit-threads-mod2.
	Add rules to build tst-audit-threads.
	* nptl/tst-audit-threads-mod1.c: New file.
	* nptl/tst-audit-threads-mod2.c: Likewise.
	* nptl/tst-audit-threads.c: Likewise.
	* nptl/tst-audit-threads.h: Likewise.

Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
This commit is contained in:
Tulio Magno Quites Machado Filho 2018-11-30 18:05:32 -02:00
parent 7e1d42400c
commit e5d262effe
8 changed files with 359 additions and 7 deletions

View File

@ -1,3 +1,18 @@
2018-11-30 Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
[BZ #23690]
* elf/dl-runtime.c (_dl_profile_fixup): Guarantee memory
modification order when accessing reloc_result->addr.
* include/link.h (reloc_result): Add field init.
* nptl/Makefile (tests): Add tst-audit-threads.
(modules-names): Add tst-audit-threads-mod1 and
tst-audit-threads-mod2.
Add rules to build tst-audit-threads.
* nptl/tst-audit-threads-mod1.c: New file.
* nptl/tst-audit-threads-mod2.c: Likewise.
* nptl/tst-audit-threads.c: Likewise.
* nptl/tst-audit-threads.h: Likewise.
2018-11-30 Joseph Myers <joseph@codesourcery.com>
* scripts/gen-as-const.py: New file.

View File

@ -183,10 +183,36 @@ _dl_profile_fixup (
/* This is the address in the array where we store the result of previous
relocations. */
struct reloc_result *reloc_result = &l->l_reloc_result[reloc_index];
DL_FIXUP_VALUE_TYPE *resultp = &reloc_result->addr;
DL_FIXUP_VALUE_TYPE value = *resultp;
if (DL_FIXUP_VALUE_CODE_ADDR (value) == 0)
/* CONCURRENCY NOTES:
Multiple threads may be calling the same PLT sequence and with
LD_AUDIT enabled they will be calling into _dl_profile_fixup to
update the reloc_result with the result of the lazy resolution.
The reloc_result guard variable is reloc_init, and we use
acquire/release loads and store to it to ensure that the results of
the structure are consistent with the loaded value of the guard.
This does not fix all of the data races that occur when two or more
threads read reloc_result->reloc_init with a value of zero and read
and write to that reloc_result concurrently. The expectation is
generally that while this is a data race it works because the
threads write the same values. Until the data races are fixed
there is a potential for problems to arise from these data races.
The reloc result updates should happen in parallel but there should
be an atomic RMW which does the final update to the real result
entry (see bug 23790).
The following code uses reloc_result->init set to 0 to indicate if it is
the first time this object is being relocated, otherwise 1 which
indicates the object has already been relocated.
Reading/Writing from/to reloc_result->reloc_init must not happen
before previous writes to reloc_result complete as they could
end-up with an incomplete struct. */
DL_FIXUP_VALUE_TYPE value;
unsigned int init = atomic_load_acquire (&reloc_result->init);
if (init == 0)
{
/* This is the first time we have to relocate this object. */
const ElfW(Sym) *const symtab
@ -346,19 +372,31 @@ _dl_profile_fixup (
/* Store the result for later runs. */
if (__glibc_likely (! GLRO(dl_bind_not)))
*resultp = value;
{
reloc_result->addr = value;
/* Guarantee all previous writes complete before
init is updated. See CONCURRENCY NOTES earlier */
atomic_store_release (&reloc_result->init, 1);
}
init = 1;
}
else
value = reloc_result->addr;
/* By default we do not call the pltexit function. */
long int framesize = -1;
#ifdef SHARED
/* Auditing checkpoint: report the PLT entering and allow the
auditors to change the value. */
if (DL_FIXUP_VALUE_CODE_ADDR (value) != 0 && GLRO(dl_naudit) > 0
if (GLRO(dl_naudit) > 0
/* Don't do anything if no auditor wants to intercept this call. */
&& (reloc_result->enterexit & LA_SYMB_NOPLTENTER) == 0)
{
/* Sanity check: DL_FIXUP_VALUE_CODE_ADDR (value) should have been
initialized earlier in this function or in another thread. */
assert (DL_FIXUP_VALUE_CODE_ADDR (value) != 0);
ElfW(Sym) *defsym = ((ElfW(Sym) *) D_PTR (reloc_result->bound,
l_info[DT_SYMTAB])
+ reloc_result->boundndx);

View File

@ -216,6 +216,10 @@ struct link_map
unsigned int boundndx;
uint32_t enterexit;
unsigned int flags;
/* CONCURRENCY NOTE: This is used to guard the concurrent initialization
of the relocation result across multiple threads. See the more
detailed notes in elf/dl-runtime.c. */
unsigned int init;
} *l_reloc_result;
/* Pointer to the version information if available. */

View File

@ -382,7 +382,8 @@ tests += tst-cancelx2 tst-cancelx3 tst-cancelx4 tst-cancelx5 \
tst-cleanupx0 tst-cleanupx1 tst-cleanupx2 tst-cleanupx3 tst-cleanupx4 \
tst-oncex3 tst-oncex4
ifeq ($(build-shared),yes)
tests += tst-atfork2 tst-tls4 tst-_res1 tst-fini1 tst-compat-forwarder
tests += tst-atfork2 tst-tls4 tst-_res1 tst-fini1 tst-compat-forwarder \
tst-audit-threads
tests-internal += tst-tls3 tst-tls3-malloc tst-tls5 tst-stackguard1
tests-nolibpthread += tst-fini1
ifeq ($(have-z-execstack),yes)
@ -394,7 +395,8 @@ modules-names = tst-atfork2mod tst-tls3mod tst-tls4moda tst-tls4modb \
tst-tls5mod tst-tls5moda tst-tls5modb tst-tls5modc \
tst-tls5modd tst-tls5mode tst-tls5modf tst-stack4mod \
tst-_res1mod1 tst-_res1mod2 tst-execstack-mod tst-fini1mod \
tst-join7mod tst-compat-forwarder-mod
tst-join7mod tst-compat-forwarder-mod tst-audit-threads-mod1 \
tst-audit-threads-mod2
extra-test-objs += $(addsuffix .os,$(strip $(modules-names))) \
tst-cleanup4aux.o tst-cleanupx4aux.o
test-extras += tst-cleanup4aux tst-cleanupx4aux
@ -712,6 +714,14 @@ $(objpfx)tst-compat-forwarder: $(objpfx)tst-compat-forwarder-mod.so
tst-mutex10-ENV = GLIBC_TUNABLES=glibc.elision.enable=1
# Protect against a build using -Wl,-z,now.
LDFLAGS-tst-audit-threads-mod1.so = -Wl,-z,lazy
LDFLAGS-tst-audit-threads-mod2.so = -Wl,-z,lazy
LDFLAGS-tst-audit-threads = -Wl,-z,lazy
$(objpfx)tst-audit-threads: $(objpfx)tst-audit-threads-mod2.so
$(objpfx)tst-audit-threads.out: $(objpfx)tst-audit-threads-mod1.so
tst-audit-threads-ENV = LD_AUDIT=$(objpfx)tst-audit-threads-mod1.so
# The tests here better do not run in parallel
ifneq ($(filter %tests,$(MAKECMDGOALS)),)
.NOTPARALLEL:

View File

@ -0,0 +1,74 @@
/* Dummy audit library for test-audit-threads.
Copyright (C) 2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <elf.h>
#include <link.h>
#include <stdio.h>
#include <assert.h>
#include <string.h>
/* We must use a dummy LD_AUDIT module to force the dynamic loader to
*not* update the real PLT, and instead use a cached value for the
lazy resolution result. It is the update of that cached value that
we are testing for correctness by doing this. */
/* Library to be audited. */
#define LIB "tst-audit-threads-mod2.so"
/* CALLNUM is the number of retNum functions. */
#define CALLNUM 7999
#define CONCATX(a, b) __CONCAT (a, b)
static int previous = 0;
unsigned int
la_version (unsigned int ver)
{
return 1;
}
unsigned int
la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie)
{
return LA_FLG_BINDTO | LA_FLG_BINDFROM;
}
uintptr_t
CONCATX(la_symbind, __ELF_NATIVE_CLASS) (ElfW(Sym) *sym,
unsigned int ndx,
uintptr_t *refcook,
uintptr_t *defcook,
unsigned int *flags,
const char *symname)
{
const char * retnum = "retNum";
char * num = strstr (symname, retnum);
int n;
/* Validate if the symbols are getting called in the correct order.
This code is here to verify binutils does not optimize out the PLT
entries that require the symbol binding. */
if (num != NULL)
{
n = atoi (num);
assert (n >= previous);
assert (n <= CALLNUM);
previous = n;
}
return sym->st_value;
}

View File

@ -0,0 +1,22 @@
/* Shared object with a huge number of functions for test-audit-threads.
Copyright (C) 2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
/* Define all the retNumN functions in a library. */
#define definenum
#include "tst-audit-threads.h"

97
nptl/tst-audit-threads.c Normal file
View File

@ -0,0 +1,97 @@
/* Test multi-threading using LD_AUDIT.
Copyright (C) 2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
/* This test uses a dummy LD_AUDIT library (test-audit-threads-mod1) and a
library with a huge number of functions in order to validate lazy symbol
binding with an audit library. We use one thread per CPU to test that
concurrent lazy resolution does not have any defects which would cause
the process to fail. We use an LD_AUDIT library to force the testing of
the relocation resolution caching code in the dynamic loader i.e.
_dl_runtime_profile and _dl_profile_fixup. */
#include <support/xthread.h>
#include <strings.h>
#include <stdlib.h>
#include <sys/sysinfo.h>
static int do_test (void);
/* This test usually takes less than 3s to run. However, there are cases that
take up to 30s. */
#define TIMEOUT 60
#define TEST_FUNCTION do_test ()
#include "../test-skeleton.c"
/* Declare the functions we are going to call. */
#define externnum
#include "tst-audit-threads.h"
#undef externnum
int num_threads;
pthread_barrier_t barrier;
void
sync_all (int num)
{
pthread_barrier_wait (&barrier);
}
void
call_all_ret_nums (void)
{
/* Call each function one at a time from all threads. */
#define callnum
#include "tst-audit-threads.h"
#undef callnum
}
void *
thread_main (void *unused)
{
call_all_ret_nums ();
return NULL;
}
#define STR2(X) #X
#define STR(X) STR2(X)
static int
do_test (void)
{
int i;
pthread_t *threads;
num_threads = get_nprocs ();
if (num_threads <= 1)
num_threads = 2;
/* Used to synchronize all the threads after calling each retNumN. */
xpthread_barrier_init (&barrier, NULL, num_threads);
threads = (pthread_t *) xcalloc (num_threads, sizeof(pthread_t));
for (i = 0; i < num_threads; i++)
threads[i] = xpthread_create(NULL, thread_main, NULL);
for (i = 0; i < num_threads; i++)
xpthread_join(threads[i]);
free (threads);
return 0;
}

92
nptl/tst-audit-threads.h Normal file
View File

@ -0,0 +1,92 @@
/* Helper header for test-audit-threads.
Copyright (C) 2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
/* We use this helper to create a large number of functions, all of
which will be resolved lazily and thus have their PLT updated.
This is done to provide enough functions that we can statistically
observe a thread vs. PLT resolution failure if one exists. */
#define CONCAT(a, b) a ## b
#define NUM(x, y) CONCAT (x, y)
#define FUNC10(x) \
FUNC (NUM (x, 0)); \
FUNC (NUM (x, 1)); \
FUNC (NUM (x, 2)); \
FUNC (NUM (x, 3)); \
FUNC (NUM (x, 4)); \
FUNC (NUM (x, 5)); \
FUNC (NUM (x, 6)); \
FUNC (NUM (x, 7)); \
FUNC (NUM (x, 8)); \
FUNC (NUM (x, 9))
#define FUNC100(x) \
FUNC10 (NUM (x, 0)); \
FUNC10 (NUM (x, 1)); \
FUNC10 (NUM (x, 2)); \
FUNC10 (NUM (x, 3)); \
FUNC10 (NUM (x, 4)); \
FUNC10 (NUM (x, 5)); \
FUNC10 (NUM (x, 6)); \
FUNC10 (NUM (x, 7)); \
FUNC10 (NUM (x, 8)); \
FUNC10 (NUM (x, 9))
#define FUNC1000(x) \
FUNC100 (NUM (x, 0)); \
FUNC100 (NUM (x, 1)); \
FUNC100 (NUM (x, 2)); \
FUNC100 (NUM (x, 3)); \
FUNC100 (NUM (x, 4)); \
FUNC100 (NUM (x, 5)); \
FUNC100 (NUM (x, 6)); \
FUNC100 (NUM (x, 7)); \
FUNC100 (NUM (x, 8)); \
FUNC100 (NUM (x, 9))
#define FUNC7000() \
FUNC1000 (1); \
FUNC1000 (2); \
FUNC1000 (3); \
FUNC1000 (4); \
FUNC1000 (5); \
FUNC1000 (6); \
FUNC1000 (7);
#ifdef FUNC
# undef FUNC
#endif
#ifdef externnum
# define FUNC(x) extern int CONCAT (retNum, x) (void)
#endif
#ifdef definenum
# define FUNC(x) int CONCAT (retNum, x) (void) { return x; }
#endif
#ifdef callnum
# define FUNC(x) CONCAT (retNum, x) (); sync_all (x)
#endif
/* A value of 7000 functions is chosen as an arbitrarily large
number of functions that will allow us enough attempts to
verify lazy resolution operation. */
FUNC7000 ();