perf/core improvements and fixes:

New feature:
 
 - Introduce the 'delta-abs' 'perf diff' compute method, that orders the
   histogram entries by the absolute value of the percentage delta for a
   function in two perf.data files, i.e. the functions that changed the
   most (increase or decrease in samples) comes first (Namhyung Kim)
 
 User visible:
 
 - Improve message about tweaking the kernel.perf_event_paranoid setting,
   telling how to make the change permanent by editing /etc/sysctl.conf
   (Ingo Molnar)
 
 Infrastructure:
 
 - Introduce linux/compiler-gcc.h as a counterpart to the kernel's,
   initially containing the definition of __fallthrough, more to
   come (__maybe_unused, etc) (Arnaldo Carvalho de Melo)
 
 - Fixes for problems uncovered by building tools/perf with clang, such
   as always true tests of arrays against NULL and variables that sometimes
   were used without being initialized (Arnaldo Carvalho de Melo, Steven Rostedt)
 
 - Before loading a new ELF, clear global variables set by the
   samples/bpf loader (Mickaël Salaün)
 
 - Ignore already processed ELF sections in the samples/bpf
   loader (Mickaël Salaün)
 
 - Fix compile error in the scripting code with some perl5
   versions (Wang YanQing)
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iQIcBAABCAAGBQJYolWFAAoJENZQFvNTUqpA67MQAJnigpmhmyOnBn9easBPGG2k
 +2kHX7GPczbuNXeVKKfUCpBadUFjOpN17RF8019lrxoMEZEYCWgxjLjxYUMm4I0Y
 Q09X5OX4aIjSdCsvqKPnJ1nmJ9sPdyjSGqTAXemaJLbwMAcwCPv8ICMumbnjQmr+
 LWNvHFDuRgsmlGRHGKLNlWFHuuGj4iR83XOozZ/4l5p+pOBMxs4yhP21VEK1Dgqz
 0VDHlzE4kXIgZanJIgEaoZJaajwfgDS047GJuf79tq7P0kSLTZs004shY2mSqUhl
 3EYN4UVoGwIpkpXU+HHmMa5eb0LKjNd/JXABJ8r1Q+blDdP2fYmsx4Ztknc/Ie41
 u8KkYUs4Eu01jZsJpVp4BEfujUk14sbg3fdI4VGNpWI/FnsfgBsJF8uzZ/bmFCH1
 UtsMkwZkWvR1a9r7AkgMySpzEZ4Sc/0xPkFZkE0zvHpSQ4nFH2hdNM4wxt8tmtNB
 me12bMYl2aEX8Z5QssgeGPxlgEmMgq/+9GEr+ivZWe9XITR3dvqzGhVdyQiCoaav
 NaFCu6oyVJUQEtq9xgFRV+2r8jG14V6HdEruqzxEYhvvD5BC4AekC5QMClj09xfn
 ZK3xv25Diiv0QM+UcL2fH9abpSDEs4ff1Fsr6HkrZbpxzyPwkBHh6bfR4kGonE6U
 jbuqniPpwxLk9M1RmhFV
 =LCz9
 -----END PGP SIGNATURE-----

Merge tag 'perf-core-for-mingo-4.11-20170213' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

New features:

 - Introduce the 'delta-abs' 'perf diff' compute method, that orders the
   histogram entries by the absolute value of the percentage delta for a
   function in two perf.data files, i.e. the functions that changed the
   most (increase or decrease in samples) comes first (Namhyung Kim)

User visible changes:

 - Improve message about tweaking the kernel.perf_event_paranoid setting,
   telling how to make the change permanent by editing /etc/sysctl.conf
   (Arnaldo Carvalho de Melo)

Infrastructure changes:

 - Introduce linux/compiler-gcc.h as a counterpart to the kernel's,
   initially containing the definition of __fallthrough, more to
   come (__maybe_unused, etc) (Arnaldo Carvalho de Melo)

 - Fixes for problems uncovered by building tools/perf with clang, such
   as always true tests of arrays against NULL and variables that sometimes
   were used without being initialized (Arnaldo Carvalho de Melo, Steven Rostedt)

 - Before loading a new ELF, clear global variables set by the
   samples/bpf loader (Mickaël Salaün)

 - Ignore already processed ELF sections in the samples/bpf
   loader (Mickaël Salaün)

 - Fix compile error in the scripting code with some perl5
   versions (Wang YanQing)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2017-02-14 07:29:38 +01:00
commit 277d6f1dca
23 changed files with 145 additions and 29 deletions

View File

@ -277,6 +277,11 @@ int load_bpf_file(char *path)
Elf_Data *data, *data_prog, *symbols = NULL;
char *shname, *shname_prog;
/* reset global variables */
kern_version = 0;
memset(license, 0, sizeof(license));
memset(processed_sec, 0, sizeof(processed_sec));
if (elf_version(EV_CURRENT) == EV_NONE)
return 1;
@ -328,6 +333,8 @@ int load_bpf_file(char *path)
/* load programs that need map fixup (relocations) */
for (i = 1; i < ehdr.e_shnum; i++) {
if (processed_sec[i])
continue;
if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
continue;

View File

@ -8,6 +8,7 @@
#include <linux/version.h>
#include <uapi/linux/bpf.h>
#include <uapi/linux/seccomp.h>
#include <uapi/linux/unistd.h>
#include "bpf_helpers.h"
#define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F

View File

@ -0,0 +1,14 @@
#ifndef _TOOLS_LINUX_COMPILER_H_
#error "Please don't include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead."
#endif
/*
* Common definitions for all gcc versions go here.
*/
#define GCC_VERSION (__GNUC__ * 10000 \
+ __GNUC_MINOR__ * 100 \
+ __GNUC_PATCHLEVEL__)
#if GCC_VERSION >= 70000 && !defined(__CHECKER__)
# define __fallthrough __attribute__ ((fallthrough))
#endif

View File

@ -1,6 +1,10 @@
#ifndef _TOOLS_LINUX_COMPILER_H_
#define _TOOLS_LINUX_COMPILER_H_
#ifdef __GNUC__
#include <linux/compiler-gcc.h>
#endif
/* Optimization barrier */
/* The "volatile" is due to gcc bugs */
#define barrier() __asm__ __volatile__("": : :"memory")
@ -128,11 +132,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
#ifndef __fallthrough
# if defined(__GNUC__) && __GNUC__ >= 7
# define __fallthrough __attribute__ ((fallthrough))
# else
# define __fallthrough
# endif
# define __fallthrough
#endif
#endif /* _TOOLS_LINUX_COMPILER_H */

View File

@ -315,6 +315,7 @@ static unsigned int old_update_pointers(struct kbuffer *kbuf)
extend += delta;
delta = extend;
ptr += 4;
length = 0;
break;
case OLD_RINGBUF_TYPE_TIME_STAMP:

View File

@ -130,7 +130,7 @@ static int function_handler(struct trace_seq *s, struct pevent_record *record,
unsigned long long pfunction;
const char *func;
const char *parent;
int index;
int index = 0;
if (pevent_get_field_val(s, event, "ip", record, &function, 1))
return trace_seq_putc(s, '!');

View File

@ -498,6 +498,18 @@ record.*::
But if this option is 'no-cache', it will not update the build-id cache.
'skip' skips post-processing and does not update the cache.
diff.*::
diff.order::
This option sets the number of columns to sort the result.
The default is 0, which means sorting by baseline.
Setting it to 1 will sort the result by delta (or other
compute method selected).
diff.compute::
This options sets the method for computing the diff result.
Possible values are 'delta', 'delta-abs', 'ratio' and
'wdiff'. Default is 'delta'.
SEE ALSO
--------
linkperf:perf[1]

View File

@ -86,8 +86,9 @@ OPTIONS
-c::
--compute::
Differential computation selection - delta,ratio,wdiff (default is delta).
See COMPARISON METHODS section for more info.
Differential computation selection - delta, ratio, wdiff, delta-abs
(default is delta-abs). Default can be changed using diff.compute
config option. See COMPARISON METHODS section for more info.
-p::
--period::
@ -99,7 +100,11 @@ OPTIONS
-o::
--order::
Specify compute sorting column number.
Specify compute sorting column number. 0 means sorting by baseline
overhead and 1 (default) means sorting by computed value of column 1
(data from the first file other base baseline). Values more than 1
can be used only if enough data files are provided.
The default value can be set using the diff.order config option.
--percentage::
Determine how to display the overhead percentage of filtered entries.
@ -181,6 +186,10 @@ with:
relative to how entries are filtered. Use --percentage=absolute to
prevent such fluctuation.
delta-abs
~~~~~~~~~
Same as 'delta` method, but sort the result with the absolute values.
ratio
~~~~~
If specified the 'Ratio' column is displayed with value 'r' computed as:

View File

@ -61,6 +61,7 @@ tools/include/asm-generic/bitops.h
tools/include/linux/atomic.h
tools/include/linux/bitops.h
tools/include/linux/compiler.h
tools/include/linux/compiler-gcc.h
tools/include/linux/coresight-pmu.h
tools/include/linux/filter.h
tools/include/linux/hash.h

View File

@ -17,6 +17,7 @@
#include "util/symbol.h"
#include "util/util.h"
#include "util/data.h"
#include "util/config.h"
#include <stdlib.h>
#include <math.h>
@ -30,6 +31,7 @@ enum {
PERF_HPP_DIFF__RATIO,
PERF_HPP_DIFF__WEIGHTED_DIFF,
PERF_HPP_DIFF__FORMULA,
PERF_HPP_DIFF__DELTA_ABS,
PERF_HPP_DIFF__MAX_INDEX
};
@ -64,7 +66,7 @@ static bool force;
static bool show_period;
static bool show_formula;
static bool show_baseline_only;
static unsigned int sort_compute;
static unsigned int sort_compute = 1;
static s64 compute_wdiff_w1;
static s64 compute_wdiff_w2;
@ -73,19 +75,22 @@ enum {
COMPUTE_DELTA,
COMPUTE_RATIO,
COMPUTE_WEIGHTED_DIFF,
COMPUTE_DELTA_ABS,
COMPUTE_MAX,
};
const char *compute_names[COMPUTE_MAX] = {
[COMPUTE_DELTA] = "delta",
[COMPUTE_DELTA_ABS] = "delta-abs",
[COMPUTE_RATIO] = "ratio",
[COMPUTE_WEIGHTED_DIFF] = "wdiff",
};
static int compute;
static int compute = COMPUTE_DELTA_ABS;
static int compute_2_hpp[COMPUTE_MAX] = {
[COMPUTE_DELTA] = PERF_HPP_DIFF__DELTA,
[COMPUTE_DELTA_ABS] = PERF_HPP_DIFF__DELTA_ABS,
[COMPUTE_RATIO] = PERF_HPP_DIFF__RATIO,
[COMPUTE_WEIGHTED_DIFF] = PERF_HPP_DIFF__WEIGHTED_DIFF,
};
@ -111,6 +116,10 @@ static struct header_column {
.name = "Delta",
.width = 7,
},
[PERF_HPP_DIFF__DELTA_ABS] = {
.name = "Delta Abs",
.width = 7,
},
[PERF_HPP_DIFF__RATIO] = {
.name = "Ratio",
.width = 14,
@ -298,6 +307,7 @@ static int formula_fprintf(struct hist_entry *he, struct hist_entry *pair,
{
switch (compute) {
case COMPUTE_DELTA:
case COMPUTE_DELTA_ABS:
return formula_delta(he, pair, buf, size);
case COMPUTE_RATIO:
return formula_ratio(he, pair, buf, size);
@ -461,6 +471,7 @@ static void hists__precompute(struct hists *hists)
switch (compute) {
case COMPUTE_DELTA:
case COMPUTE_DELTA_ABS:
compute_delta(he, pair);
break;
case COMPUTE_RATIO:
@ -498,6 +509,13 @@ __hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right,
return cmp_doubles(l, r);
}
case COMPUTE_DELTA_ABS:
{
double l = fabs(left->diff.period_ratio_delta);
double r = fabs(right->diff.period_ratio_delta);
return cmp_doubles(l, r);
}
case COMPUTE_RATIO:
{
double l = left->diff.period_ratio;
@ -564,7 +582,7 @@ hist_entry__cmp_compute_idx(struct hist_entry *left, struct hist_entry *right,
if (!p_left || !p_right)
return p_left ? -1 : 1;
if (c != COMPUTE_DELTA) {
if (c != COMPUTE_DELTA && c != COMPUTE_DELTA_ABS) {
/*
* The delta can be computed without the baseline, but
* others are not. Put those entries which have no
@ -606,6 +624,15 @@ hist_entry__cmp_delta(struct perf_hpp_fmt *fmt,
return hist_entry__cmp_compute(right, left, COMPUTE_DELTA, d->idx);
}
static int64_t
hist_entry__cmp_delta_abs(struct perf_hpp_fmt *fmt,
struct hist_entry *left, struct hist_entry *right)
{
struct data__file *d = fmt_to_data_file(fmt);
return hist_entry__cmp_compute(right, left, COMPUTE_DELTA_ABS, d->idx);
}
static int64_t
hist_entry__cmp_ratio(struct perf_hpp_fmt *fmt,
struct hist_entry *left, struct hist_entry *right)
@ -632,6 +659,14 @@ hist_entry__cmp_delta_idx(struct perf_hpp_fmt *fmt __maybe_unused,
sort_compute);
}
static int64_t
hist_entry__cmp_delta_abs_idx(struct perf_hpp_fmt *fmt __maybe_unused,
struct hist_entry *left, struct hist_entry *right)
{
return hist_entry__cmp_compute_idx(right, left, COMPUTE_DELTA_ABS,
sort_compute);
}
static int64_t
hist_entry__cmp_ratio_idx(struct perf_hpp_fmt *fmt __maybe_unused,
struct hist_entry *left, struct hist_entry *right)
@ -775,7 +810,7 @@ static const struct option options[] = {
OPT_BOOLEAN('b', "baseline-only", &show_baseline_only,
"Show only items with match in baseline"),
OPT_CALLBACK('c', "compute", &compute,
"delta,ratio,wdiff:w1,w2 (default delta)",
"delta,delta-abs,ratio,wdiff:w1,w2 (default delta-abs)",
"Entries differential computation selection",
setup_compute),
OPT_BOOLEAN('p', "period", &show_period,
@ -945,6 +980,7 @@ hpp__entry_pair(struct hist_entry *he, struct hist_entry *pair,
switch (idx) {
case PERF_HPP_DIFF__DELTA:
case PERF_HPP_DIFF__DELTA_ABS:
if (pair->diff.computed)
diff = pair->diff.period_ratio_delta;
else
@ -1118,6 +1154,10 @@ static void data__hpp_register(struct data__file *d, int idx)
fmt->color = hpp__color_wdiff;
fmt->sort = hist_entry__cmp_wdiff;
break;
case PERF_HPP_DIFF__DELTA_ABS:
fmt->color = hpp__color_delta;
fmt->sort = hist_entry__cmp_delta_abs;
break;
default:
fmt->sort = hist_entry__cmp_nop;
break;
@ -1195,6 +1235,9 @@ static int ui_init(void)
case COMPUTE_WEIGHTED_DIFF:
fmt->sort = hist_entry__cmp_wdiff_idx;
break;
case COMPUTE_DELTA_ABS:
fmt->sort = hist_entry__cmp_delta_abs_idx;
break;
default:
BUG_ON(1);
}
@ -1249,6 +1292,31 @@ static int data_init(int argc, const char **argv)
return 0;
}
static int diff__config(const char *var, const char *value,
void *cb __maybe_unused)
{
if (!strcmp(var, "diff.order")) {
sort_compute = perf_config_int(var, value);
return 0;
}
if (!strcmp(var, "diff.compute")) {
if (!strcmp(value, "delta")) {
compute = COMPUTE_DELTA;
} else if (!strcmp(value, "delta-abs")) {
compute = COMPUTE_DELTA_ABS;
} else if (!strcmp(value, "ratio")) {
compute = COMPUTE_RATIO;
} else if (!strcmp(value, "wdiff")) {
compute = COMPUTE_WEIGHTED_DIFF;
} else {
pr_err("Invalid compute method: %s\n", value);
return -1;
}
}
return 0;
}
int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
{
int ret = hists__init();
@ -1256,6 +1324,8 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
if (ret < 0)
return ret;
perf_config(diff__config, NULL);
argc = parse_options(argc, argv, options, diff_usage, 0);
if (symbol__init(NULL) < 0)

View File

@ -1065,7 +1065,7 @@ static void __print_page_alloc_result(struct perf_session *session, int n_lines)
data = rb_entry(next, struct page_stat, node);
sym = machine__find_kernel_function(machine, data->callsite, &map);
if (sym && sym->name)
if (sym)
caller = sym->name;
else
scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
@ -1107,7 +1107,7 @@ static void __print_page_caller_result(struct perf_session *session, int n_lines
data = rb_entry(next, struct page_stat, node);
sym = machine__find_kernel_function(machine, data->callsite, &map);
if (sym && sym->name)
if (sym)
caller = sym->name;
else
scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);

View File

@ -418,7 +418,7 @@ static int record__mmap(struct record *rec)
static int record__open(struct record *rec)
{
char msg[512];
char msg[BUFSIZ];
struct perf_evsel *pos;
struct perf_evlist *evlist = rec->evlist;
struct perf_session *session = rec->session;

View File

@ -2067,7 +2067,7 @@ static void save_task_callchain(struct perf_sched *sched,
break;
sym = node->sym;
if (sym && sym->name) {
if (sym) {
if (!strcmp(sym->name, "schedule") ||
!strcmp(sym->name, "__schedule") ||
!strcmp(sym->name, "preempt_schedule"))

View File

@ -533,7 +533,7 @@ static int store_counter_ids(struct perf_evsel *counter)
static int __run_perf_stat(int argc, const char **argv)
{
int interval = stat_config.interval;
char msg[512];
char msg[BUFSIZ];
unsigned long long t0, t1;
struct perf_evsel *counter;
struct timespec ts;

View File

@ -859,7 +859,7 @@ static void perf_top__mmap_read(struct perf_top *top)
static int perf_top__start_counters(struct perf_top *top)
{
char msg[512];
char msg[BUFSIZ];
struct perf_evsel *counter;
struct perf_evlist *evlist = top->evlist;
struct record_opts *opts = &top->record_opts;

View File

@ -66,7 +66,7 @@ int test__PERF_RECORD(int subtest __maybe_unused)
if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
evlist = perf_evlist__new_default();
if (evlist == NULL || argv == NULL) {
if (evlist == NULL) {
pr_debug("Not enough memory to create evlist\n");
goto out;
}

View File

@ -2469,7 +2469,9 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
" -1: Allow use of (almost) all events by all users\n"
">= 0: Disallow raw tracepoint access by users without CAP_IOC_LOCK\n"
">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n"
">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN",
">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN\n\n"
"To make this setting permanent, edit /etc/sysctl.conf too, e.g.:\n\n"
" kernel.perf_event_paranoid = -1\n" ,
target->system_wide ? "system-wide " : "",
perf_event_paranoid());
case ENOENT:

View File

@ -168,7 +168,6 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
if (symbol_conf.bt_stop_list &&
node->sym &&
node->sym->name &&
strlist__has_entry(symbol_conf.bt_stop_list,
node->sym->name)) {
break;

View File

@ -1565,7 +1565,7 @@ int machine__process_event(struct machine *machine, union perf_event *event,
static bool symbol__match_regex(struct symbol *sym, regex_t *regex)
{
if (sym->name && !regexec(regex, sym->name, 0, NULL, 0))
if (!regexec(regex, sym->name, 0, NULL, 0))
return 1;
return 0;
}

View File

@ -387,10 +387,10 @@ size_t map__fprintf_dsoname(struct map *map, FILE *fp)
{
const char *dsoname = "[unknown]";
if (map && map->dso && (map->dso->name || map->dso->long_name)) {
if (map && map->dso) {
if (symbol_conf.show_kernel_path && map->dso->long_name)
dsoname = map->dso->long_name;
else if (map->dso->name)
else
dsoname = map->dso->name;
}

View File

@ -1,6 +1,6 @@
libperf-$(CONFIG_LIBPERL) += trace-event-perl.o
libperf-$(CONFIG_LIBPYTHON) += trace-event-python.o
CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default
CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default
CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow

View File

@ -309,10 +309,10 @@ static SV *perl_process_callchain(struct perf_sample *sample,
if (node->map) {
struct map *map = node->map;
const char *dsoname = "[unknown]";
if (map && map->dso && (map->dso->name || map->dso->long_name)) {
if (map && map->dso) {
if (symbol_conf.show_kernel_path && map->dso->long_name)
dsoname = map->dso->long_name;
else if (map->dso->name)
else
dsoname = map->dso->name;
}
if (!hv_stores(elem, "dso", newSVpv(dsoname,0))) {

View File

@ -21,7 +21,7 @@ size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
unsigned long offset;
size_t length;
if (sym && sym->name) {
if (sym) {
length = fprintf(fp, "%s", sym->name);
if (al && print_offsets) {
if (al->addr < sym->end)