diff --git a/gdb/ChangeLog b/gdb/ChangeLog index 44d213e0b8..4886f1c07b 100644 --- a/gdb/ChangeLog +++ b/gdb/ChangeLog @@ -1,3 +1,9 @@ +2018-06-16 Andrew Burgess + Richard Bunt + + * linux-nat.c (stop_wait_callback): Don't discard SIGSTOP if it + was requested by GDB. + 2018-06-15 Tom de Vries * MAINTAINERS (Write After Approval): Add Tom de Vries. diff --git a/gdb/linux-nat.c b/gdb/linux-nat.c index 445b59fa4a..a07f41cf31 100644 --- a/gdb/linux-nat.c +++ b/gdb/linux-nat.c @@ -2527,17 +2527,23 @@ stop_wait_callback (struct lwp_info *lp, void *data) } else { - /* We caught the SIGSTOP that we intended to catch, so - there's no SIGSTOP pending. */ + /* We caught the SIGSTOP that we intended to catch. */ if (debug_linux_nat) fprintf_unfiltered (gdb_stdlog, "SWC: Expected SIGSTOP caught for %s.\n", target_pid_to_str (lp->ptid)); - /* Reset SIGNALLED only after the stop_wait_callback call - above as it does gdb_assert on SIGNALLED. */ lp->signalled = 0; + + /* If we are waiting for this stop so we can report the thread + stopped then we need to record this status. Otherwise, we can + now discard this stop event. */ + if (lp->last_resume_kind == resume_stop) + { + lp->status = status; + save_stop_reason (lp); + } } } diff --git a/gdb/testsuite/ChangeLog b/gdb/testsuite/ChangeLog index 6d1f5f7af5..c7fee804a5 100644 --- a/gdb/testsuite/ChangeLog +++ b/gdb/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2018-06-16 Andrew Burgess + Richard Bunt + + * gdb.threads/attach-slow-waitpid.c: New file. + * gdb.threads/attach-slow-waitpid.exp: New file. + * gdb.threads/slow-waitpid.c: New file. + 2018-06-14 Pedro Alves * gdb.base/fork-running-state.c: Include . diff --git a/gdb/testsuite/gdb.threads/attach-slow-waitpid.c b/gdb/testsuite/gdb.threads/attach-slow-waitpid.c new file mode 100644 index 0000000000..06e99ab22d --- /dev/null +++ b/gdb/testsuite/gdb.threads/attach-slow-waitpid.c @@ -0,0 +1,77 @@ +/* This testcase is part of GDB, the GNU debugger. + + Copyright 2018 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include +#include +#include +#include +#define NUM_THREADS 4 + +/* Crude spin lock. Threads all spin until this is set to 0. */ +int go = 1; + +/* Thread function, just spin until GO is set to 0. */ +void * +perform_work (void *argument) +{ + /* Cast to volatile to ensure that ARGUMENT is loaded each time around + the loop. */ + while (*((volatile int*) argument)) + { + /* Nothing. */ + } + return NULL; +} + +/* The spin loop for the main thread. */ +void +function (void) +{ + (void) perform_work (&go); + printf ("Finished from function\n"); +} + +/* Main program, create some threads which all spin waiting for GO to be + set to 0. */ +int +main (void) +{ + pthread_t threads[NUM_THREADS]; + int result_code; + unsigned index; + + /* Create some threads. */ + for (index = 0; index < NUM_THREADS; ++index) + { + printf ("In main: creating thread %d\n", index); + result_code = pthread_create (&threads[index], NULL, perform_work, &go); + assert (!result_code); + } + + function (); + + /* Wait for each thread to complete. */ + for (index = 0; index < NUM_THREADS; ++index) + { + /* Block until thread INDEX completes. */ + result_code = pthread_join (threads[index], NULL); + assert (!result_code); + printf ("In main: thread %d has completed\n", index); + } + printf ("In main: All threads completed successfully\n"); + return 0; +} diff --git a/gdb/testsuite/gdb.threads/attach-slow-waitpid.exp b/gdb/testsuite/gdb.threads/attach-slow-waitpid.exp new file mode 100644 index 0000000000..095c1934c2 --- /dev/null +++ b/gdb/testsuite/gdb.threads/attach-slow-waitpid.exp @@ -0,0 +1,100 @@ +# Copyright 2018 Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# This test script tries to expose a bug in some of the uses of +# waitpid in the Linux native support within GDB. The problem was +# spotted on systems which were heavily loaded when attaching to +# threaded test programs. What happened was that during the initial +# attach, the loop of waitpid calls that normally received the stop +# events from each of the threads in the inferior was not receiving a +# stop event for some threads (the kernel just hadn't sent the stop +# event yet). +# +# GDB would then trigger a call to stop_all_threads which would +# continue to wait for all of the outstanding threads to stop, when +# the outstanding stop events finally arrived GDB would then +# (incorrectly) discard the stop event, resume the thread, and +# continue to wait for the thread to stop.... which it now never +# would. +# +# In order to try and expose this issue reliably, this test preloads a +# library that intercepts waitpid calls. All waitpid calls targeting +# pid -1 with the WNOHANG flag are rate limited so that only 1 per +# second can complete. Additional calls are forced to return 0 +# indicating no event waiting. This is enough to trigger the bug +# during the attach phase. + +# This test only works on Linux +if { ![isnative] || [is_remote host] || [use_gdb_stub] + || ![istarget *-linux*] } { + continue +} + +standard_testfile + +set libfile slow-waitpid +set libsrc "${srcdir}/${subdir}/${libfile}.c" +set libobj [standard_output_file ${libfile}.so] + +with_test_prefix "compile preload library" { + # Compile the preload library. We only get away with this as we + # limit this test to running when ISNATIVE is true. + if { [gdb_compile_shlib_pthreads \ + $libsrc $libobj {debug}] != "" } then { + return -1 + } +} + +with_test_prefix "compile test executable" { + # Compile the test program + if { [gdb_compile_pthreads \ + "${srcdir}/${subdir}/${srcfile}" "${binfile}" \ + executable {debug}] != "" } { + return -1 + } +} + +# Spawn GDB with LIB preloaded with LD_PRELOAD. + +proc gdb_spawn_with_ld_preload {lib} { + global env + + save_vars { env(LD_PRELOAD) } { + if { ![info exists env(LD_PRELOAD) ] + || $env(LD_PRELOAD) == "" } { + set env(LD_PRELOAD) "$lib" + } else { + append env(LD_PRELOAD) ":$lib" + } + + gdb_start + } +} + +# Run test program in the background. +set test_spawn_id [spawn_wait_for_attach $binfile] +set testpid [spawn_id_get_pid $test_spawn_id] + +# Start GDB with preload library in place. +gdb_spawn_with_ld_preload $libobj + +# Load binary, and attach to running program. +gdb_load ${binfile} +gdb_test "attach $testpid" "Attaching to program.*" "attach to target" + +gdb_exit + +# Kill of test program. +kill_wait_spawned_process $test_spawn_id diff --git a/gdb/testsuite/gdb.threads/slow-waitpid.c b/gdb/testsuite/gdb.threads/slow-waitpid.c new file mode 100644 index 0000000000..93304efb0e --- /dev/null +++ b/gdb/testsuite/gdb.threads/slow-waitpid.c @@ -0,0 +1,342 @@ +/* This testcase is part of GDB, the GNU debugger. + + Copyright 2018 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* This file contains a library that can be preloaded into GDB on Linux + using the LD_PRELOAD technique. + + The library intercepts calls to WAITPID and SIGSUSPEND in order to + simulate the behaviour of a heavily loaded kernel. + + When GDB wants to stop all threads in an inferior each thread is sent a + SIGSTOP, GDB will then wait for the signal to be received by the thread + with a waitpid call. + + If the kernel is slow in either delivering the signal, or making the + result available to the waitpid call then GDB will enter a sigsuspend + call in order to wait for the inferior threads to change state, this is + signalled to GDB with a SIGCHLD. + + A bug in GDB meant that in some cases we would deadlock during this + process. This was rarely seen as the kernel is usually quick at + delivering signals and making the results available to waitpid, so quick + that GDB would gather the statuses from all inferior threads in the + original pass. + + The idea in this library is to rate limit calls to waitpid (where pid is + -1 and the WNOHANG option is set) so that only 1 per second can return + an answer. Any additional calls will report that no threads are + currently ready. This should match the behaviour we see on a slow + kernel. + + However, given that usually when using this library, the kernel does + have the waitpid result ready this means that the kernel will never send + GDB a SIGCHLD. This means that when GDB enters sigsuspend it will block + forever. Alternatively, if GDB enters its polling loop the lack of + SIGCHLD means that we will never see an event on the child threads. To + resolve these problems the library intercepts calls to sigsuspend and + forces the call to exit if there is a pending waitpid result. Also, + when we know that there's a waitpid result that we've ignored, we create + a new thread which, after a short delay, will send GDB a SIGCHLD. */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Logging. */ + +static void +log_msg (const char *fmt, ...) +{ +#ifdef LOGGING + va_list ap; + + va_start (ap, fmt); + vfprintf (stderr, fmt, ap); + va_end (ap); +#endif /* LOGGING */ +} + +/* Error handling, message and exit. */ + +static void +error (const char *fmt, ...) +{ + va_list ap; + + va_start (ap, fmt); + vfprintf (stderr, fmt, ap); + va_end (ap); + + exit (EXIT_FAILURE); +} + +/* Cache the result of a waitpid call that has not been reported back to + GDB yet. We only ever cache a single result. Once we have a result + cached then later calls to waitpid with the WNOHANG option will return a + result of 0. */ + +static struct +{ + /* Flag to indicate when we have a result cached. */ + int cached_p; + + /* The cached result fields from a waitpid call. */ + pid_t pid; + int wstatus; +} cached_wait_status; + +/* Lock to hold when modifying SIGNAL_THREAD_ACTIVE_P. */ + +static pthread_mutex_t thread_creation_lock_obj = PTHREAD_MUTEX_INITIALIZER; +#define thread_creation_lock (&thread_creation_lock_obj) + +/* This flag is only modified while holding the THREAD_CREATION_LOCK mutex. + When this flag is true then there is a signal thread alive that will be + sending a SIGCHLD at some point in the future. */ + +static int signal_thread_active_p; + +/* When we last allowed a waitpid to complete. */ + +static struct timeval last_waitpid_time = { 0, 0 }; + +/* The number of seconds that must elapse between calls to waitpid where + the pid is -1 and the WNOHANG option is set. If calls occur faster than + this then we force a result of 0 to be returned from waitpid. */ + +#define WAITPID_MIN_TIME (1) + +/* Return true (non-zero) if we should skip this call to waitpid, or false + (zero) if this waitpid call should be handled with a call to the "real" + waitpid function. Allows 1 waitpid call per second. */ + +static int +should_skip_waitpid (void) +{ + struct timeval *tv = &last_waitpid_time; + if (tv->tv_sec == 0) + { + if (gettimeofday (tv, NULL) < 0) + error ("error: gettimeofday failed\n"); + return 0; /* Don't skip. */ + } + else + { + struct timeval new_tv; + + if (gettimeofday (&new_tv, NULL) < 0) + error ("error: gettimeofday failed\n"); + + if ((new_tv.tv_sec - tv->tv_sec) < WAITPID_MIN_TIME) + return 1; /* Skip. */ + + *tv = new_tv; + } + + /* Don't skip. */ + return 0; +} + +/* Perform a real waitpid call. */ + +static pid_t +real_waitpid (pid_t pid, int *wstatus, int options) +{ + typedef pid_t (*fptr_t) (pid_t, int *, int); + static fptr_t real_func = NULL; + + if (real_func == NULL) + { + real_func = dlsym (RTLD_NEXT, "waitpid"); + if (real_func == NULL) + error ("error: failed to find real waitpid\n"); + } + + return (*real_func) (pid, wstatus, options); +} + +/* Thread worker created when we cache a waitpid result. Delays for a + short period of time and then sends SIGCHLD to the GDB process. This + should trigger GDB to call waitpid again, at which point we will make + the cached waitpid result available. */ + +static void* +send_sigchld_thread (void *arg) +{ + /* Delay one second longer than WAITPID_MIN_TIME so that there can be no + chance that a call to SHOULD_SKIP_WAITPID will return true once the + SIGCHLD is delivered and handled. */ + sleep (WAITPID_MIN_TIME + 1); + + pthread_mutex_lock (thread_creation_lock); + signal_thread_active_p = 0; + + if (cached_wait_status.cached_p) + { + log_msg ("signal-thread: sending SIGCHLD\n"); + kill (getpid (), SIGCHLD); + } + + pthread_mutex_unlock (thread_creation_lock); + return NULL; +} + +/* The waitpid entry point function. */ + +pid_t +waitpid (pid_t pid, int *wstatus, int options) +{ + log_msg ("waitpid: waitpid (%d, %p, 0x%x)\n", pid, wstatus, options); + + if ((options & WNOHANG) != 0 + && pid == -1 + && should_skip_waitpid ()) + { + if (!cached_wait_status.cached_p) + { + /* Do the waitpid call, but hold the result back. */ + pid_t tmp_pid; + int tmp_wstatus; + + tmp_pid = real_waitpid (-1, &tmp_wstatus, options); + if (tmp_pid > 0) + { + log_msg ("waitpid: delaying waitpid result (pid = %d)\n", + tmp_pid); + + /* Cache the result. */ + cached_wait_status.pid = tmp_pid; + cached_wait_status.wstatus = tmp_wstatus; + cached_wait_status.cached_p = 1; + + /* Is there a thread around that will be sending a signal in + the near future? The prevents us from creating one + thread per call to waitpid when the calls occur in a + sequence. */ + pthread_mutex_lock (thread_creation_lock); + if (!signal_thread_active_p) + { + sigset_t old_ss, new_ss; + pthread_t thread_id; + pthread_attr_t attr; + + /* Create the new signal sending thread in detached + state. This means that the thread doesn't need to be + pthread_join'ed. Which is fine as there's no result + we care about. */ + pthread_attr_init (&attr); + pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED); + + /* Ensure the signal sending thread has all signals + blocked. We don't want any signals to GDB to be + handled in that thread. */ + sigfillset (&new_ss); + sigprocmask (SIG_BLOCK, &new_ss, &old_ss); + + log_msg ("waitpid: spawn thread to signal us\n"); + if (pthread_create (&thread_id, &attr, + send_sigchld_thread, NULL) != 0) + error ("error: pthread_create failed\n"); + + signal_thread_active_p = 1; + sigprocmask (SIG_SETMASK, &old_ss, NULL); + pthread_attr_destroy (&attr); + } + + pthread_mutex_unlock (thread_creation_lock); + } + } + + log_msg ("waitpid: skipping\n"); + return 0; + } + + /* If we have a cached result that is a suitable reply for this call to + waitpid then send that cached result back now. */ + if (cached_wait_status.cached_p + && (pid == -1 || pid == cached_wait_status.pid)) + { + pid_t pid; + + pid = cached_wait_status.pid; + log_msg ("waitpid: return cached result (%d)\n", pid); + *wstatus = cached_wait_status.wstatus; + cached_wait_status.cached_p = 0; + return pid; + } + + log_msg ("waitpid: real waitpid call\n"); + return real_waitpid (pid, wstatus, options); +} + +/* Perform a real sigsuspend call. */ + +static int +real_sigsuspend (const sigset_t *mask) +{ + typedef int (*fptr_t) (const sigset_t *); + static fptr_t real_func = NULL; + + if (real_func == NULL) + { + real_func = dlsym (RTLD_NEXT, "sigsuspend"); + if (real_func == NULL) + error ("error: failed to find real sigsuspend\n"); + } + + return (*real_func) (mask); +} + +/* The sigsuspend entry point function. */ + +int +sigsuspend (const sigset_t *mask) +{ + log_msg ("sigsuspend: sigsuspend (0x%p)\n", ((void *) mask)); + + /* If SIGCHLD is _not_ in MASK, and is therefore deliverable, then if we + have a pending wait status pretend that a signal arrived. We will + have a thread alive that is going to deliver a signal but doing this + will boost the speed as we don't have to wait for a signal. If the + signal ends up being delivered then it should be harmless, we'll just + perform an additional waitpid call. */ + if (!sigismember (mask, SIGCHLD)) + { + if (cached_wait_status.cached_p) + { + log_msg ("sigsuspend: interrupt for cached waitstatus\n"); + last_waitpid_time.tv_sec = 0; + last_waitpid_time.tv_usec = 0; + errno = EINTR; + return -1; + } + } + + log_msg ("sigsuspend: real sigsuspend call\n"); + return real_sigsuspend (mask); +}