tracing/function-graph-tracer: prevent hangs during self-tests

Impact: detect tracing related hangs

Sometimes, with some configs, the function graph tracer can make
the timer interrupt too much slow, hanging the kernel in an endless
loop of timer interrupts servicing.

As suggested by Ingo, this patch brings a watchdog which stops the
selftest after a defined number of functions traced, definitely
disabling this tracer.

For those who want to debug the cause of the function graph trace
hang, you can pass the ftrace_dump_on_oops kernel parameter to dump
the traces after this hang detection.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <1237694675-23509-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Frederic Weisbecker 2009-03-22 05:04:35 +01:00 committed by Ingo Molnar
parent ac199db018
commit cf586b61f8
2 changed files with 60 additions and 4 deletions

View File

@ -4018,11 +4018,12 @@ trace_printk_seq(struct trace_seq *s)
trace_seq_init(s); trace_seq_init(s);
} }
void ftrace_dump(void) static void __ftrace_dump(bool disable_tracing)
{ {
static DEFINE_SPINLOCK(ftrace_dump_lock); static DEFINE_SPINLOCK(ftrace_dump_lock);
/* use static because iter can be a bit big for the stack */ /* use static because iter can be a bit big for the stack */
static struct trace_iterator iter; static struct trace_iterator iter;
unsigned int old_userobj;
static int dump_ran; static int dump_ran;
unsigned long flags; unsigned long flags;
int cnt = 0, cpu; int cnt = 0, cpu;
@ -4034,14 +4035,17 @@ void ftrace_dump(void)
dump_ran = 1; dump_ran = 1;
/* No turning back! */
tracing_off(); tracing_off();
ftrace_kill();
if (disable_tracing)
ftrace_kill();
for_each_tracing_cpu(cpu) { for_each_tracing_cpu(cpu) {
atomic_inc(&global_trace.data[cpu]->disabled); atomic_inc(&global_trace.data[cpu]->disabled);
} }
old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
/* don't look at user memory in panic mode */ /* don't look at user memory in panic mode */
trace_flags &= ~TRACE_ITER_SYM_USEROBJ; trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
@ -4086,10 +4090,26 @@ void ftrace_dump(void)
else else
printk(KERN_TRACE "---------------------------------\n"); printk(KERN_TRACE "---------------------------------\n");
/* Re-enable tracing if requested */
if (!disable_tracing) {
trace_flags |= old_userobj;
for_each_tracing_cpu(cpu) {
atomic_dec(&global_trace.data[cpu]->disabled);
}
tracing_on();
}
out: out:
spin_unlock_irqrestore(&ftrace_dump_lock, flags); spin_unlock_irqrestore(&ftrace_dump_lock, flags);
} }
/* By default: disable tracing after the dump */
void ftrace_dump(void)
{
__ftrace_dump(true);
}
__init static int tracer_alloc_buffers(void) __init static int tracer_alloc_buffers(void)
{ {
struct trace_array_cpu *data; struct trace_array_cpu *data;

View File

@ -248,6 +248,28 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER #ifdef CONFIG_FUNCTION_GRAPH_TRACER
/* Maximum number of functions to trace before diagnosing a hang */
#define GRAPH_MAX_FUNC_TEST 100000000
static void __ftrace_dump(bool disable_tracing);
static unsigned int graph_hang_thresh;
/* Wrap the real function entry probe to avoid possible hanging */
static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace)
{
/* This is harmlessly racy, we want to approximately detect a hang */
if (unlikely(++graph_hang_thresh > GRAPH_MAX_FUNC_TEST)) {
ftrace_graph_stop();
printk(KERN_WARNING "BUG: Function graph tracer hang!\n");
if (ftrace_dump_on_oops)
__ftrace_dump(false);
return 0;
}
return trace_graph_entry(trace);
}
/* /*
* Pretty much the same than for the function tracer from which the selftest * Pretty much the same than for the function tracer from which the selftest
* has been borrowed. * has been borrowed.
@ -259,15 +281,29 @@ trace_selftest_startup_function_graph(struct tracer *trace,
int ret; int ret;
unsigned long count; unsigned long count;
ret = tracer_init(trace, tr); /*
* Simulate the init() callback but we attach a watchdog callback
* to detect and recover from possible hangs
*/
tracing_reset_online_cpus(tr);
ret = register_ftrace_graph(&trace_graph_return,
&trace_graph_entry_watchdog);
if (ret) { if (ret) {
warn_failed_init_tracer(trace, ret); warn_failed_init_tracer(trace, ret);
goto out; goto out;
} }
tracing_start_cmdline_record();
/* Sleep for a 1/10 of a second */ /* Sleep for a 1/10 of a second */
msleep(100); msleep(100);
/* Have we just recovered from a hang? */
if (graph_hang_thresh > GRAPH_MAX_FUNC_TEST) {
trace->reset(tr);
ret = -1;
goto out;
}
tracing_stop(); tracing_stop();
/* check the trace buffer */ /* check the trace buffer */