diff --git a/ChangeLog b/ChangeLog index f478eeed45..09f716809f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +2018-11-30 Tulio Magno Quites Machado Filho + + [BZ #23690] + * elf/dl-runtime.c (_dl_profile_fixup): Guarantee memory + modification order when accessing reloc_result->addr. + * include/link.h (reloc_result): Add field init. + * nptl/Makefile (tests): Add tst-audit-threads. + (modules-names): Add tst-audit-threads-mod1 and + tst-audit-threads-mod2. + Add rules to build tst-audit-threads. + * nptl/tst-audit-threads-mod1.c: New file. + * nptl/tst-audit-threads-mod2.c: Likewise. + * nptl/tst-audit-threads.c: Likewise. + * nptl/tst-audit-threads.h: Likewise. + 2018-11-30 Joseph Myers * scripts/gen-as-const.py: New file. diff --git a/elf/dl-runtime.c b/elf/dl-runtime.c index 63bbc89776..3d2f4a7a76 100644 --- a/elf/dl-runtime.c +++ b/elf/dl-runtime.c @@ -183,10 +183,36 @@ _dl_profile_fixup ( /* This is the address in the array where we store the result of previous relocations. */ struct reloc_result *reloc_result = &l->l_reloc_result[reloc_index]; - DL_FIXUP_VALUE_TYPE *resultp = &reloc_result->addr; - DL_FIXUP_VALUE_TYPE value = *resultp; - if (DL_FIXUP_VALUE_CODE_ADDR (value) == 0) + /* CONCURRENCY NOTES: + + Multiple threads may be calling the same PLT sequence and with + LD_AUDIT enabled they will be calling into _dl_profile_fixup to + update the reloc_result with the result of the lazy resolution. + The reloc_result guard variable is reloc_init, and we use + acquire/release loads and store to it to ensure that the results of + the structure are consistent with the loaded value of the guard. + This does not fix all of the data races that occur when two or more + threads read reloc_result->reloc_init with a value of zero and read + and write to that reloc_result concurrently. The expectation is + generally that while this is a data race it works because the + threads write the same values. Until the data races are fixed + there is a potential for problems to arise from these data races. + The reloc result updates should happen in parallel but there should + be an atomic RMW which does the final update to the real result + entry (see bug 23790). + + The following code uses reloc_result->init set to 0 to indicate if it is + the first time this object is being relocated, otherwise 1 which + indicates the object has already been relocated. + + Reading/Writing from/to reloc_result->reloc_init must not happen + before previous writes to reloc_result complete as they could + end-up with an incomplete struct. */ + DL_FIXUP_VALUE_TYPE value; + unsigned int init = atomic_load_acquire (&reloc_result->init); + + if (init == 0) { /* This is the first time we have to relocate this object. */ const ElfW(Sym) *const symtab @@ -346,19 +372,31 @@ _dl_profile_fixup ( /* Store the result for later runs. */ if (__glibc_likely (! GLRO(dl_bind_not))) - *resultp = value; + { + reloc_result->addr = value; + /* Guarantee all previous writes complete before + init is updated. See CONCURRENCY NOTES earlier */ + atomic_store_release (&reloc_result->init, 1); + } + init = 1; } + else + value = reloc_result->addr; /* By default we do not call the pltexit function. */ long int framesize = -1; + #ifdef SHARED /* Auditing checkpoint: report the PLT entering and allow the auditors to change the value. */ - if (DL_FIXUP_VALUE_CODE_ADDR (value) != 0 && GLRO(dl_naudit) > 0 + if (GLRO(dl_naudit) > 0 /* Don't do anything if no auditor wants to intercept this call. */ && (reloc_result->enterexit & LA_SYMB_NOPLTENTER) == 0) { + /* Sanity check: DL_FIXUP_VALUE_CODE_ADDR (value) should have been + initialized earlier in this function or in another thread. */ + assert (DL_FIXUP_VALUE_CODE_ADDR (value) != 0); ElfW(Sym) *defsym = ((ElfW(Sym) *) D_PTR (reloc_result->bound, l_info[DT_SYMTAB]) + reloc_result->boundndx); diff --git a/include/link.h b/include/link.h index 5924594548..83b1c34b7b 100644 --- a/include/link.h +++ b/include/link.h @@ -216,6 +216,10 @@ struct link_map unsigned int boundndx; uint32_t enterexit; unsigned int flags; + /* CONCURRENCY NOTE: This is used to guard the concurrent initialization + of the relocation result across multiple threads. See the more + detailed notes in elf/dl-runtime.c. */ + unsigned int init; } *l_reloc_result; /* Pointer to the version information if available. */ diff --git a/nptl/Makefile b/nptl/Makefile index 982e43adfa..98b0aa01c7 100644 --- a/nptl/Makefile +++ b/nptl/Makefile @@ -382,7 +382,8 @@ tests += tst-cancelx2 tst-cancelx3 tst-cancelx4 tst-cancelx5 \ tst-cleanupx0 tst-cleanupx1 tst-cleanupx2 tst-cleanupx3 tst-cleanupx4 \ tst-oncex3 tst-oncex4 ifeq ($(build-shared),yes) -tests += tst-atfork2 tst-tls4 tst-_res1 tst-fini1 tst-compat-forwarder +tests += tst-atfork2 tst-tls4 tst-_res1 tst-fini1 tst-compat-forwarder \ + tst-audit-threads tests-internal += tst-tls3 tst-tls3-malloc tst-tls5 tst-stackguard1 tests-nolibpthread += tst-fini1 ifeq ($(have-z-execstack),yes) @@ -394,7 +395,8 @@ modules-names = tst-atfork2mod tst-tls3mod tst-tls4moda tst-tls4modb \ tst-tls5mod tst-tls5moda tst-tls5modb tst-tls5modc \ tst-tls5modd tst-tls5mode tst-tls5modf tst-stack4mod \ tst-_res1mod1 tst-_res1mod2 tst-execstack-mod tst-fini1mod \ - tst-join7mod tst-compat-forwarder-mod + tst-join7mod tst-compat-forwarder-mod tst-audit-threads-mod1 \ + tst-audit-threads-mod2 extra-test-objs += $(addsuffix .os,$(strip $(modules-names))) \ tst-cleanup4aux.o tst-cleanupx4aux.o test-extras += tst-cleanup4aux tst-cleanupx4aux @@ -712,6 +714,14 @@ $(objpfx)tst-compat-forwarder: $(objpfx)tst-compat-forwarder-mod.so tst-mutex10-ENV = GLIBC_TUNABLES=glibc.elision.enable=1 +# Protect against a build using -Wl,-z,now. +LDFLAGS-tst-audit-threads-mod1.so = -Wl,-z,lazy +LDFLAGS-tst-audit-threads-mod2.so = -Wl,-z,lazy +LDFLAGS-tst-audit-threads = -Wl,-z,lazy +$(objpfx)tst-audit-threads: $(objpfx)tst-audit-threads-mod2.so +$(objpfx)tst-audit-threads.out: $(objpfx)tst-audit-threads-mod1.so +tst-audit-threads-ENV = LD_AUDIT=$(objpfx)tst-audit-threads-mod1.so + # The tests here better do not run in parallel ifneq ($(filter %tests,$(MAKECMDGOALS)),) .NOTPARALLEL: diff --git a/nptl/tst-audit-threads-mod1.c b/nptl/tst-audit-threads-mod1.c new file mode 100644 index 0000000000..615d5ee512 --- /dev/null +++ b/nptl/tst-audit-threads-mod1.c @@ -0,0 +1,74 @@ +/* Dummy audit library for test-audit-threads. + + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include + +/* We must use a dummy LD_AUDIT module to force the dynamic loader to + *not* update the real PLT, and instead use a cached value for the + lazy resolution result. It is the update of that cached value that + we are testing for correctness by doing this. */ + +/* Library to be audited. */ +#define LIB "tst-audit-threads-mod2.so" +/* CALLNUM is the number of retNum functions. */ +#define CALLNUM 7999 + +#define CONCATX(a, b) __CONCAT (a, b) + +static int previous = 0; + +unsigned int +la_version (unsigned int ver) +{ + return 1; +} + +unsigned int +la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie) +{ + return LA_FLG_BINDTO | LA_FLG_BINDFROM; +} + +uintptr_t +CONCATX(la_symbind, __ELF_NATIVE_CLASS) (ElfW(Sym) *sym, + unsigned int ndx, + uintptr_t *refcook, + uintptr_t *defcook, + unsigned int *flags, + const char *symname) +{ + const char * retnum = "retNum"; + char * num = strstr (symname, retnum); + int n; + /* Validate if the symbols are getting called in the correct order. + This code is here to verify binutils does not optimize out the PLT + entries that require the symbol binding. */ + if (num != NULL) + { + n = atoi (num); + assert (n >= previous); + assert (n <= CALLNUM); + previous = n; + } + return sym->st_value; +} diff --git a/nptl/tst-audit-threads-mod2.c b/nptl/tst-audit-threads-mod2.c new file mode 100644 index 0000000000..f9817dd3dc --- /dev/null +++ b/nptl/tst-audit-threads-mod2.c @@ -0,0 +1,22 @@ +/* Shared object with a huge number of functions for test-audit-threads. + + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Define all the retNumN functions in a library. */ +#define definenum +#include "tst-audit-threads.h" diff --git a/nptl/tst-audit-threads.c b/nptl/tst-audit-threads.c new file mode 100644 index 0000000000..e4bf433bd8 --- /dev/null +++ b/nptl/tst-audit-threads.c @@ -0,0 +1,97 @@ +/* Test multi-threading using LD_AUDIT. + + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* This test uses a dummy LD_AUDIT library (test-audit-threads-mod1) and a + library with a huge number of functions in order to validate lazy symbol + binding with an audit library. We use one thread per CPU to test that + concurrent lazy resolution does not have any defects which would cause + the process to fail. We use an LD_AUDIT library to force the testing of + the relocation resolution caching code in the dynamic loader i.e. + _dl_runtime_profile and _dl_profile_fixup. */ + +#include +#include +#include +#include + +static int do_test (void); + +/* This test usually takes less than 3s to run. However, there are cases that + take up to 30s. */ +#define TIMEOUT 60 +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" + +/* Declare the functions we are going to call. */ +#define externnum +#include "tst-audit-threads.h" +#undef externnum + +int num_threads; +pthread_barrier_t barrier; + +void +sync_all (int num) +{ + pthread_barrier_wait (&barrier); +} + +void +call_all_ret_nums (void) +{ + /* Call each function one at a time from all threads. */ +#define callnum +#include "tst-audit-threads.h" +#undef callnum +} + +void * +thread_main (void *unused) +{ + call_all_ret_nums (); + return NULL; +} + +#define STR2(X) #X +#define STR(X) STR2(X) + +static int +do_test (void) +{ + int i; + pthread_t *threads; + + num_threads = get_nprocs (); + if (num_threads <= 1) + num_threads = 2; + + /* Used to synchronize all the threads after calling each retNumN. */ + xpthread_barrier_init (&barrier, NULL, num_threads); + + threads = (pthread_t *) xcalloc (num_threads, sizeof(pthread_t)); + for (i = 0; i < num_threads; i++) + threads[i] = xpthread_create(NULL, thread_main, NULL); + + for (i = 0; i < num_threads; i++) + xpthread_join(threads[i]); + + free (threads); + + return 0; +} diff --git a/nptl/tst-audit-threads.h b/nptl/tst-audit-threads.h new file mode 100644 index 0000000000..1c9ecc08df --- /dev/null +++ b/nptl/tst-audit-threads.h @@ -0,0 +1,92 @@ +/* Helper header for test-audit-threads. + + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* We use this helper to create a large number of functions, all of + which will be resolved lazily and thus have their PLT updated. + This is done to provide enough functions that we can statistically + observe a thread vs. PLT resolution failure if one exists. */ + +#define CONCAT(a, b) a ## b +#define NUM(x, y) CONCAT (x, y) + +#define FUNC10(x) \ + FUNC (NUM (x, 0)); \ + FUNC (NUM (x, 1)); \ + FUNC (NUM (x, 2)); \ + FUNC (NUM (x, 3)); \ + FUNC (NUM (x, 4)); \ + FUNC (NUM (x, 5)); \ + FUNC (NUM (x, 6)); \ + FUNC (NUM (x, 7)); \ + FUNC (NUM (x, 8)); \ + FUNC (NUM (x, 9)) + +#define FUNC100(x) \ + FUNC10 (NUM (x, 0)); \ + FUNC10 (NUM (x, 1)); \ + FUNC10 (NUM (x, 2)); \ + FUNC10 (NUM (x, 3)); \ + FUNC10 (NUM (x, 4)); \ + FUNC10 (NUM (x, 5)); \ + FUNC10 (NUM (x, 6)); \ + FUNC10 (NUM (x, 7)); \ + FUNC10 (NUM (x, 8)); \ + FUNC10 (NUM (x, 9)) + +#define FUNC1000(x) \ + FUNC100 (NUM (x, 0)); \ + FUNC100 (NUM (x, 1)); \ + FUNC100 (NUM (x, 2)); \ + FUNC100 (NUM (x, 3)); \ + FUNC100 (NUM (x, 4)); \ + FUNC100 (NUM (x, 5)); \ + FUNC100 (NUM (x, 6)); \ + FUNC100 (NUM (x, 7)); \ + FUNC100 (NUM (x, 8)); \ + FUNC100 (NUM (x, 9)) + +#define FUNC7000() \ + FUNC1000 (1); \ + FUNC1000 (2); \ + FUNC1000 (3); \ + FUNC1000 (4); \ + FUNC1000 (5); \ + FUNC1000 (6); \ + FUNC1000 (7); + +#ifdef FUNC +# undef FUNC +#endif + +#ifdef externnum +# define FUNC(x) extern int CONCAT (retNum, x) (void) +#endif + +#ifdef definenum +# define FUNC(x) int CONCAT (retNum, x) (void) { return x; } +#endif + +#ifdef callnum +# define FUNC(x) CONCAT (retNum, x) (); sync_all (x) +#endif + +/* A value of 7000 functions is chosen as an arbitrarily large + number of functions that will allow us enough attempts to + verify lazy resolution operation. */ +FUNC7000 ();