From 921a7acd85ebbab1b3cd99828e6842fd3e78df24 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 16 Jan 2018 17:02:28 +0800 Subject: [PATCH 1/6] tracing: Detect the string nul character when parsing user input string User space can pass in a C nul character '\0' along with its input. The function trace_get_user() will try to process it as a normal character, and that will fail to parse. open("/sys/kernel/debug/tracing//set_ftrace_pid", O_WRONLY|O_TRUNC) = 3 write(3, " \0", 2) = -1 EINVAL (Invalid argument) while parse can handle spaces, so below works. $ echo "" > set_ftrace_pid $ echo " " > set_ftrace_pid $ echo -n " " > set_ftrace_pid Have the parser stop on '\0' and cease any further parsing. Only process the characters up to the nul '\0' character and do not process it. Link: http://lkml.kernel.org/r/1516093350-12045-2-git-send-email-changbin.du@intel.com Acked-by: Namhyung Kim Signed-off-by: Changbin Du Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8e3f20a18a06..c00a31d18f8a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1237,7 +1237,7 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, } /* only spaces were written */ - if (isspace(ch)) { + if (isspace(ch) || !ch) { *ppos += read; ret = read; goto out; @@ -1247,7 +1247,7 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, } /* read the non-space input */ - while (cnt && !isspace(ch)) { + while (cnt && !isspace(ch) && ch) { if (parser->idx < parser->size - 1) parser->buffer[parser->idx++] = ch; else { @@ -1262,7 +1262,7 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, } /* We either got finished input or we have to wait for another call. */ - if (isspace(ch)) { + if (isspace(ch) || !ch) { parser->buffer[parser->idx] = 0; parser->cont = false; } else if (parser->idx < parser->size - 1) { From 76638d96502744b0d593f2386b75ae5a017c13bb Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 16 Jan 2018 17:02:29 +0800 Subject: [PATCH 2/6] tracing: Clear parser->idx if only spaces are read If only spaces were read while parsing the next string, then parser->idx should be cleared in order to make trace_parser_loaded() return false. Link: http://lkml.kernel.org/r/1516093350-12045-3-git-send-email-changbin.du@intel.com Acked-by: Namhyung Kim Signed-off-by: Changbin Du Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c00a31d18f8a..cb90435e63da 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1236,14 +1236,14 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, cnt--; } + parser->idx = 0; + /* only spaces were written */ if (isspace(ch) || !ch) { *ppos += read; ret = read; goto out; } - - parser->idx = 0; } /* read the non-space input */ From f4d0706cde27f29ff89e6bf94ded4113f8fe6e80 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 16 Jan 2018 17:02:30 +0800 Subject: [PATCH 3/6] tracing: Make sure the parsed string always terminates with '\0' Always mark the parsed string with a terminated nul '\0' character. This removes the need for the users to have to append the '\0' before using the parsed string. Link: http://lkml.kernel.org/r/1516093350-12045-4-git-send-email-changbin.du@intel.com Acked-by: Namhyung Kim Signed-off-by: Changbin Du Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 2 -- kernel/trace/trace.c | 4 ++-- kernel/trace/trace_events.c | 2 -- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 554b517c61a0..dabd9d167d42 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5015,7 +5015,6 @@ int ftrace_regex_release(struct inode *inode, struct file *file) parser = &iter->parser; if (trace_parser_loaded(parser)) { - parser->buffer[parser->idx] = 0; ftrace_match_records(iter->hash, parser->buffer, parser->idx); } @@ -5329,7 +5328,6 @@ ftrace_graph_release(struct inode *inode, struct file *file) parser = &fgd->parser; if (trace_parser_loaded((parser))) { - parser->buffer[parser->idx] = 0; ret = ftrace_graph_set_hash(fgd->new_hash, parser->buffer); } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index cb90435e63da..58de825df19c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -530,8 +530,6 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, ubuf += ret; cnt -= ret; - parser.buffer[parser.idx] = 0; - ret = -EINVAL; if (kstrtoul(parser.buffer, 0, &val)) break; @@ -1268,6 +1266,8 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, } else if (parser->idx < parser->size - 1) { parser->cont = true; parser->buffer[parser->idx++] = ch; + /* Make sure the parsed string always terminates with '\0'. */ + parser->buffer[parser->idx] = 0; } else { ret = -EINVAL; goto out; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 1b87157edbff..05c7172c6667 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -885,8 +885,6 @@ ftrace_event_write(struct file *file, const char __user *ubuf, if (*parser.buffer == '!') set = 0; - parser.buffer[parser.idx] = 0; - ret = ftrace_set_clr_event(tr, parser.buffer + !set, set); if (ret) goto out_put; From 0e4d819d0893dc043ea7b7cb6baf4be1e310bd96 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Sat, 6 Jan 2018 11:12:46 +0530 Subject: [PATCH 4/6] trace_uprobe: Display correct offset in uprobe_events Recently, how the pointers being printed with %p has been changed by commit ad67b74d2469 ("printk: hash addresses printed with %p"). This is causing a regression while showing offset in the uprobe_events file. Instead of %p, use %px to display offset. Before patch: # perf probe -vv -x /tmp/a.out main Opening /sys/kernel/debug/tracing//uprobe_events write=1 Writing event: p:probe_a/main /tmp/a.out:0x58c # cat /sys/kernel/debug/tracing/uprobe_events p:probe_a/main /tmp/a.out:0x0000000049a0f352 After patch: # cat /sys/kernel/debug/tracing/uprobe_events p:probe_a/main /tmp/a.out:0x000000000000058c Link: http://lkml.kernel.org/r/20180106054246.15375-1-ravi.bangoria@linux.vnet.ibm.com Cc: stable@vger.kernel.org Fixes: ad67b74d2469 ("printk: hash addresses printed with %p") Acked-by: Srikar Dronamraju Signed-off-by: Ravi Bangoria Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_uprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 40592e7b3568..268029ae1be6 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -608,7 +608,7 @@ static int probes_seq_show(struct seq_file *m, void *v) /* Don't print "0x (null)" when offset is 0 */ if (tu->offset) { - seq_printf(m, "0x%p", (void *)tu->offset); + seq_printf(m, "0x%px", (void *)tu->offset); } else { switch (sizeof(void *)) { case 4: From dd3dad0d716dbb9fccadcc8709900d9cac6ca252 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 21 Dec 2017 15:37:32 -0800 Subject: [PATCH 5/6] ftrace: Mark function tracer test functions noinline/noclone The ftrace function tracer self tests calls some functions to verify the get traced. This relies on them not being inlined. Previously this was ensured by putting them into another file, but with LTO the compiler can inline across files, which makes the tests fail. Mark these functions as noinline and noclone. Link: http://lkml.kernel.org/r/20171221233732.31896-1-andi@firstfloor.org Signed-off-by: Andi Kleen Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_selftest_dynamic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_selftest_dynamic.c b/kernel/trace/trace_selftest_dynamic.c index 8cda06a10d66..c364cf777e1a 100644 --- a/kernel/trace/trace_selftest_dynamic.c +++ b/kernel/trace/trace_selftest_dynamic.c @@ -1,13 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include "trace.h" -int DYN_FTRACE_TEST_NAME(void) +noinline __noclone int DYN_FTRACE_TEST_NAME(void) { /* used to call mcount */ return 0; } -int DYN_FTRACE_TEST_NAME2(void) +noinline __noclone int DYN_FTRACE_TEST_NAME2(void) { /* used to call mcount */ return 0; From 841a915d20c7b22fc4f36f12368daf94d9f8cb10 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 28 Dec 2017 20:40:25 -0500 Subject: [PATCH 6/6] vsprintf: Do not have bprintf dereference pointers When trace_printk() was introduced, it was discussed that making it be as low overhead as possible, that the processing of the format string should be delayed until it is read. That is, a "trace_printk()" should not convert the %d into numbers and so on, but instead, save the fmt string and all the args in the buffer at the time of recording. When the trace_printk() data is read, it would then parse the format string and do the conversions of the saved arguments in the tracing buffer. The code to perform this was added to vsprintf where vbin_printf() would save the arguments of a specified format string in a buffer, then bstr_printf() could be used to convert the buffer with the same format string into the final output, as if vsprintf() was called in one go. The issue arises when dereferenced pointers are used. The problem is that something like %*pbl which reads a bitmask, will save the pointer to the bitmask in the buffer. Then the reading of the buffer via bstr_printf() will then look at the pointer to process the final output. Obviously the value of that pointer could have changed since the time it was recorded to the time the buffer is read. Worse yet, the bitmask could be unmapped, and the reading of the trace buffer could actually cause a kernel oops. Another problem is that user space tools such as perf and trace-cmd do not have access to the contents of these pointers, and they become useless when the tracing buffer is extracted. Instead of having vbin_printf() simply save the pointer in the buffer for later processing, have it perform the formatting at the time bin_printf() is called. This will fix the issue of dereferencing pointers at a later time, and has the extra benefit of having user space tools understand these values. Since perf and trace-cmd already can handle %p[sSfF] via saving kallsyms, their pointers are saved and not processed during vbin_printf(). If they were converted, it would break perf and trace-cmd, as they would not know how to deal with the conversion. Link: http://lkml.kernel.org/r/20171228204025.14a71d8f@gandalf.local.home Reported-by: Thomas Gleixner Signed-off-by: Steven Rostedt (VMware) --- lib/vsprintf.c | 82 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 69 insertions(+), 13 deletions(-) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 01c3957b2de6..c0c3542d92fa 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2516,29 +2516,34 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args) { struct printf_spec spec = {0}; char *str, *end; + int width; str = (char *)bin_buf; end = (char *)(bin_buf + size); #define save_arg(type) \ -do { \ +({ \ + unsigned long long value; \ if (sizeof(type) == 8) { \ - unsigned long long value; \ + unsigned long long val8; \ str = PTR_ALIGN(str, sizeof(u32)); \ - value = va_arg(args, unsigned long long); \ + val8 = va_arg(args, unsigned long long); \ if (str + sizeof(type) <= end) { \ - *(u32 *)str = *(u32 *)&value; \ - *(u32 *)(str + 4) = *((u32 *)&value + 1); \ + *(u32 *)str = *(u32 *)&val8; \ + *(u32 *)(str + 4) = *((u32 *)&val8 + 1); \ } \ + value = val8; \ } else { \ - unsigned long value; \ + unsigned int val4; \ str = PTR_ALIGN(str, sizeof(type)); \ - value = va_arg(args, int); \ + val4 = va_arg(args, int); \ if (str + sizeof(type) <= end) \ - *(typeof(type) *)str = (type)value; \ + *(typeof(type) *)str = (type)(long)val4; \ + value = (unsigned long long)val4; \ } \ str += sizeof(type); \ -} while (0) + value; \ +}) while (*fmt) { int read = format_decode(fmt, &spec); @@ -2554,7 +2559,10 @@ do { \ case FORMAT_TYPE_WIDTH: case FORMAT_TYPE_PRECISION: - save_arg(int); + width = (int)save_arg(int); + /* Pointers may require the width */ + if (*fmt == 'p') + set_field_width(&spec, width); break; case FORMAT_TYPE_CHAR: @@ -2576,7 +2584,27 @@ do { \ } case FORMAT_TYPE_PTR: - save_arg(void *); + /* Dereferenced pointers must be done now */ + switch (*fmt) { + /* Dereference of functions is still OK */ + case 'S': + case 's': + case 'F': + case 'f': + save_arg(void *); + break; + default: + if (!isalnum(*fmt)) { + save_arg(void *); + break; + } + str = pointer(fmt, str, end, va_arg(args, void *), + spec); + if (str + 1 < end) + *str++ = '\0'; + else + end[-1] = '\0'; /* Must be nul terminated */ + } /* skip all alphanumeric pointer suffixes */ while (isalnum(*fmt)) fmt++; @@ -2728,11 +2756,39 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) break; } - case FORMAT_TYPE_PTR: - str = pointer(fmt, str, end, get_arg(void *), spec); + case FORMAT_TYPE_PTR: { + bool process = false; + int copy, len; + /* Non function dereferences were already done */ + switch (*fmt) { + case 'S': + case 's': + case 'F': + case 'f': + process = true; + break; + default: + if (!isalnum(*fmt)) { + process = true; + break; + } + /* Pointer dereference was already processed */ + if (str < end) { + len = copy = strlen(args); + if (copy > end - str) + copy = end - str; + memcpy(str, args, copy); + str += len; + args += len; + } + } + if (process) + str = pointer(fmt, str, end, get_arg(void *), spec); + while (isalnum(*fmt)) fmt++; break; + } case FORMAT_TYPE_PERCENT_CHAR: if (str < end)