2009-11-23 15:42:35 +01:00
|
|
|
#include "../../../include/linux/hw_breakpoint.h"
|
2009-05-26 11:10:09 +02:00
|
|
|
#include "util.h"
|
2009-09-04 21:39:51 +02:00
|
|
|
#include "../perf.h"
|
2011-01-11 23:56:53 +01:00
|
|
|
#include "evlist.h"
|
2011-01-03 19:39:04 +01:00
|
|
|
#include "evsel.h"
|
2009-05-26 11:10:09 +02:00
|
|
|
#include "parse-options.h"
|
|
|
|
#include "parse-events.h"
|
|
|
|
#include "exec_cmd.h"
|
2009-06-01 22:50:19 +02:00
|
|
|
#include "string.h"
|
2010-03-25 23:59:00 +01:00
|
|
|
#include "symbol.h"
|
2009-07-21 20:16:29 +02:00
|
|
|
#include "cache.h"
|
2009-09-12 07:52:51 +02:00
|
|
|
#include "header.h"
|
2009-11-08 16:03:07 +01:00
|
|
|
#include "debugfs.h"
|
2009-05-26 11:10:09 +02:00
|
|
|
|
|
|
|
struct event_symbol {
|
2009-08-15 12:26:57 +02:00
|
|
|
u8 type;
|
|
|
|
u64 config;
|
|
|
|
const char *symbol;
|
|
|
|
const char *alias;
|
2009-05-26 11:10:09 +02:00
|
|
|
};
|
|
|
|
|
2009-09-11 23:19:45 +02:00
|
|
|
enum event_result {
|
|
|
|
EVT_FAILED,
|
|
|
|
EVT_HANDLED,
|
|
|
|
EVT_HANDLED_ALL
|
|
|
|
};
|
|
|
|
|
2009-07-21 20:16:29 +02:00
|
|
|
char debugfs_path[MAXPATHLEN];
|
|
|
|
|
2009-06-22 13:13:14 +02:00
|
|
|
#define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x
|
|
|
|
#define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x
|
2009-06-06 09:58:57 +02:00
|
|
|
|
2009-05-26 11:10:09 +02:00
|
|
|
static struct event_symbol event_symbols[] = {
|
2011-04-29 14:41:28 +02:00
|
|
|
{ CHW(CPU_CYCLES), "cpu-cycles", "cycles" },
|
|
|
|
{ CHW(STALLED_CYCLES_FRONTEND), "stalled-cycles-frontend", "idle-cycles-frontend" },
|
|
|
|
{ CHW(STALLED_CYCLES_BACKEND), "stalled-cycles-backend", "idle-cycles-backend" },
|
|
|
|
{ CHW(INSTRUCTIONS), "instructions", "" },
|
|
|
|
{ CHW(CACHE_REFERENCES), "cache-references", "" },
|
|
|
|
{ CHW(CACHE_MISSES), "cache-misses", "" },
|
|
|
|
{ CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" },
|
|
|
|
{ CHW(BRANCH_MISSES), "branch-misses", "" },
|
|
|
|
{ CHW(BUS_CYCLES), "bus-cycles", "" },
|
|
|
|
|
|
|
|
{ CSW(CPU_CLOCK), "cpu-clock", "" },
|
|
|
|
{ CSW(TASK_CLOCK), "task-clock", "" },
|
|
|
|
{ CSW(PAGE_FAULTS), "page-faults", "faults" },
|
|
|
|
{ CSW(PAGE_FAULTS_MIN), "minor-faults", "" },
|
|
|
|
{ CSW(PAGE_FAULTS_MAJ), "major-faults", "" },
|
|
|
|
{ CSW(CONTEXT_SWITCHES), "context-switches", "cs" },
|
|
|
|
{ CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" },
|
|
|
|
{ CSW(ALIGNMENT_FAULTS), "alignment-faults", "" },
|
|
|
|
{ CSW(EMULATION_FAULTS), "emulation-faults", "" },
|
2009-05-26 11:10:09 +02:00
|
|
|
};
|
|
|
|
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 12:02:48 +02:00
|
|
|
#define __PERF_EVENT_FIELD(config, name) \
|
|
|
|
((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT)
|
2009-05-26 09:17:18 +02:00
|
|
|
|
perf stat: Add stalled cycles to the default output
The new default output looks like this:
Performance counter stats for './loop_1b_instructions':
236.010686 task-clock # 0.996 CPUs utilized
0 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
99 page-faults # 0.000 M/sec
756,487,646 cycles # 3.205 GHz
354,938,996 stalled-cycles # 46.92% of all cycles are idle
1,001,403,797 instructions # 1.32 insns per cycle
# 0.35 stalled cycles per insn
100,279,773 branches # 424.895 M/sec
12,646 branch-misses # 0.013 % of all branches
0.236902540 seconds time elapsed
We dropped cache-refs and cache-misses and added stalled-cycles - this is a
more generic "how well utilized is the CPU" metric.
If the stalled-cycles ratio is too high then more specific measurements can be
taken to figure out the source of the inefficiency.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-pbpl2l4mn797s69bclfpwkwn@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 05:20:22 +02:00
|
|
|
#define PERF_EVENT_RAW(config) __PERF_EVENT_FIELD(config, RAW)
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 12:02:48 +02:00
|
|
|
#define PERF_EVENT_CONFIG(config) __PERF_EVENT_FIELD(config, CONFIG)
|
perf stat: Add stalled cycles to the default output
The new default output looks like this:
Performance counter stats for './loop_1b_instructions':
236.010686 task-clock # 0.996 CPUs utilized
0 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
99 page-faults # 0.000 M/sec
756,487,646 cycles # 3.205 GHz
354,938,996 stalled-cycles # 46.92% of all cycles are idle
1,001,403,797 instructions # 1.32 insns per cycle
# 0.35 stalled cycles per insn
100,279,773 branches # 424.895 M/sec
12,646 branch-misses # 0.013 % of all branches
0.236902540 seconds time elapsed
We dropped cache-refs and cache-misses and added stalled-cycles - this is a
more generic "how well utilized is the CPU" metric.
If the stalled-cycles ratio is too high then more specific measurements can be
taken to figure out the source of the inefficiency.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-pbpl2l4mn797s69bclfpwkwn@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 05:20:22 +02:00
|
|
|
#define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE)
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 12:02:48 +02:00
|
|
|
#define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT)
|
2009-05-26 09:17:18 +02:00
|
|
|
|
perf stat: Analyze front-end and back-end stall counts
Sample output:
Performance counter stats for './loop_1b':
873.691065 task-clock # 1.000 CPUs utilized
1 context-switches # 0.000 M/sec
1 CPU-migrations # 0.000 M/sec
96 page-faults # 0.000 M/sec
2,012,637,222 cycles # 2.304 GHz (66.58%)
1,001,397,911 stalled-cycles-frontend # 49.76% frontend cycles idle (66.58%)
7,523,398 stalled-cycles-backend # 0.37% backend cycles idle (66.76%)
2,004,551,046 instructions # 1.00 insns per cycle
# 0.50 stalled cycles per insn (66.80%)
1,001,304,992 branches # 1146.063 M/sec (66.76%)
39,453 branch-misses # 0.00% of all branches (66.64%)
0.874046121 seconds time elapsed
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n003io7hjpn1dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-29 13:49:08 +02:00
|
|
|
static const char *hw_event_names[PERF_COUNT_HW_MAX] = {
|
2009-06-06 13:58:12 +02:00
|
|
|
"cycles",
|
2009-05-26 09:17:18 +02:00
|
|
|
"instructions",
|
2009-06-06 13:58:12 +02:00
|
|
|
"cache-references",
|
|
|
|
"cache-misses",
|
2009-05-26 09:17:18 +02:00
|
|
|
"branches",
|
2009-06-06 13:58:12 +02:00
|
|
|
"branch-misses",
|
|
|
|
"bus-cycles",
|
perf stat: Analyze front-end and back-end stall counts
Sample output:
Performance counter stats for './loop_1b':
873.691065 task-clock # 1.000 CPUs utilized
1 context-switches # 0.000 M/sec
1 CPU-migrations # 0.000 M/sec
96 page-faults # 0.000 M/sec
2,012,637,222 cycles # 2.304 GHz (66.58%)
1,001,397,911 stalled-cycles-frontend # 49.76% frontend cycles idle (66.58%)
7,523,398 stalled-cycles-backend # 0.37% backend cycles idle (66.76%)
2,004,551,046 instructions # 1.00 insns per cycle
# 0.50 stalled cycles per insn (66.80%)
1,001,304,992 branches # 1146.063 M/sec (66.76%)
39,453 branch-misses # 0.00% of all branches (66.64%)
0.874046121 seconds time elapsed
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n003io7hjpn1dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-29 13:49:08 +02:00
|
|
|
"stalled-cycles-frontend",
|
|
|
|
"stalled-cycles-backend",
|
2009-05-26 09:17:18 +02:00
|
|
|
};
|
|
|
|
|
perf stat: Analyze front-end and back-end stall counts
Sample output:
Performance counter stats for './loop_1b':
873.691065 task-clock # 1.000 CPUs utilized
1 context-switches # 0.000 M/sec
1 CPU-migrations # 0.000 M/sec
96 page-faults # 0.000 M/sec
2,012,637,222 cycles # 2.304 GHz (66.58%)
1,001,397,911 stalled-cycles-frontend # 49.76% frontend cycles idle (66.58%)
7,523,398 stalled-cycles-backend # 0.37% backend cycles idle (66.76%)
2,004,551,046 instructions # 1.00 insns per cycle
# 0.50 stalled cycles per insn (66.80%)
1,001,304,992 branches # 1146.063 M/sec (66.76%)
39,453 branch-misses # 0.00% of all branches (66.64%)
0.874046121 seconds time elapsed
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n003io7hjpn1dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-29 13:49:08 +02:00
|
|
|
static const char *sw_event_names[PERF_COUNT_SW_MAX] = {
|
2011-04-27 04:24:57 +02:00
|
|
|
"cpu-clock",
|
|
|
|
"task-clock",
|
2009-06-06 13:58:12 +02:00
|
|
|
"page-faults",
|
|
|
|
"context-switches",
|
|
|
|
"CPU-migrations",
|
|
|
|
"minor-faults",
|
|
|
|
"major-faults",
|
2009-10-18 03:09:29 +02:00
|
|
|
"alignment-faults",
|
|
|
|
"emulation-faults",
|
2009-05-26 09:17:18 +02:00
|
|
|
};
|
|
|
|
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
#define MAX_ALIASES 8
|
|
|
|
|
2011-07-13 22:58:18 +02:00
|
|
|
static const char *hw_cache[PERF_COUNT_HW_CACHE_MAX][MAX_ALIASES] = {
|
2009-07-06 14:01:31 +02:00
|
|
|
{ "L1-dcache", "l1-d", "l1d", "L1-data", },
|
|
|
|
{ "L1-icache", "l1-i", "l1i", "L1-instruction", },
|
2011-07-13 22:58:18 +02:00
|
|
|
{ "LLC", "L2", },
|
2009-06-25 14:55:22 +02:00
|
|
|
{ "dTLB", "d-tlb", "Data-TLB", },
|
|
|
|
{ "iTLB", "i-tlb", "Instruction-TLB", },
|
|
|
|
{ "branch", "branches", "bpu", "btb", "bpc", },
|
2011-07-13 22:58:18 +02:00
|
|
|
{ "node", },
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
};
|
|
|
|
|
2011-07-13 22:58:18 +02:00
|
|
|
static const char *hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][MAX_ALIASES] = {
|
2009-06-25 14:55:22 +02:00
|
|
|
{ "load", "loads", "read", },
|
|
|
|
{ "store", "stores", "write", },
|
|
|
|
{ "prefetch", "prefetches", "speculative-read", "speculative-load", },
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
};
|
|
|
|
|
2011-07-13 22:58:18 +02:00
|
|
|
static const char *hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
|
|
|
|
[MAX_ALIASES] = {
|
2009-06-25 14:55:22 +02:00
|
|
|
{ "refs", "Reference", "ops", "access", },
|
|
|
|
{ "misses", "miss", },
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
};
|
|
|
|
|
2009-06-25 13:46:07 +02:00
|
|
|
#define C(x) PERF_COUNT_HW_CACHE_##x
|
|
|
|
#define CACHE_READ (1 << C(OP_READ))
|
|
|
|
#define CACHE_WRITE (1 << C(OP_WRITE))
|
|
|
|
#define CACHE_PREFETCH (1 << C(OP_PREFETCH))
|
|
|
|
#define COP(x) (1 << x)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* cache operartion stat
|
|
|
|
* L1I : Read and prefetch only
|
|
|
|
* ITLB and BPU : Read-only
|
|
|
|
*/
|
|
|
|
static unsigned long hw_cache_stat[C(MAX)] = {
|
|
|
|
[C(L1D)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
|
|
|
|
[C(L1I)] = (CACHE_READ | CACHE_PREFETCH),
|
|
|
|
[C(LL)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
|
|
|
|
[C(DTLB)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
|
|
|
|
[C(ITLB)] = (CACHE_READ),
|
|
|
|
[C(BPU)] = (CACHE_READ),
|
2011-07-13 22:58:18 +02:00
|
|
|
[C(NODE)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
|
2009-06-25 13:46:07 +02:00
|
|
|
};
|
|
|
|
|
2009-09-04 21:39:51 +02:00
|
|
|
#define for_each_subsystem(sys_dir, sys_dirent, sys_next) \
|
2009-07-21 18:20:22 +02:00
|
|
|
while (!readdir_r(sys_dir, &sys_dirent, &sys_next) && sys_next) \
|
2009-09-04 21:39:51 +02:00
|
|
|
if (sys_dirent.d_type == DT_DIR && \
|
2009-07-21 18:20:22 +02:00
|
|
|
(strcmp(sys_dirent.d_name, ".")) && \
|
|
|
|
(strcmp(sys_dirent.d_name, "..")))
|
|
|
|
|
2009-08-06 16:48:54 +02:00
|
|
|
static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir)
|
|
|
|
{
|
|
|
|
char evt_path[MAXPATHLEN];
|
|
|
|
int fd;
|
|
|
|
|
|
|
|
snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path,
|
|
|
|
sys_dir->d_name, evt_dir->d_name);
|
|
|
|
fd = open(evt_path, O_RDONLY);
|
|
|
|
if (fd < 0)
|
|
|
|
return -EINVAL;
|
|
|
|
close(fd);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-09-04 21:39:51 +02:00
|
|
|
#define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) \
|
2009-07-21 18:20:22 +02:00
|
|
|
while (!readdir_r(evt_dir, &evt_dirent, &evt_next) && evt_next) \
|
2009-09-04 21:39:51 +02:00
|
|
|
if (evt_dirent.d_type == DT_DIR && \
|
2009-07-21 18:20:22 +02:00
|
|
|
(strcmp(evt_dirent.d_name, ".")) && \
|
2009-08-06 16:48:54 +02:00
|
|
|
(strcmp(evt_dirent.d_name, "..")) && \
|
|
|
|
(!tp_event_has_id(&sys_dirent, &evt_dirent)))
|
2009-07-21 18:20:22 +02:00
|
|
|
|
2009-09-17 10:34:51 +02:00
|
|
|
#define MAX_EVENT_LENGTH 512
|
2009-07-21 18:20:22 +02:00
|
|
|
|
|
|
|
|
2009-08-28 03:09:58 +02:00
|
|
|
struct tracepoint_path *tracepoint_id_to_path(u64 config)
|
2009-07-21 18:20:22 +02:00
|
|
|
{
|
2009-08-28 03:09:58 +02:00
|
|
|
struct tracepoint_path *path = NULL;
|
2009-07-21 18:20:22 +02:00
|
|
|
DIR *sys_dir, *evt_dir;
|
|
|
|
struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent;
|
|
|
|
char id_buf[4];
|
2009-09-24 15:39:09 +02:00
|
|
|
int fd;
|
2009-07-21 18:20:22 +02:00
|
|
|
u64 id;
|
|
|
|
char evt_path[MAXPATHLEN];
|
2009-09-24 15:39:09 +02:00
|
|
|
char dir_path[MAXPATHLEN];
|
2009-07-21 18:20:22 +02:00
|
|
|
|
2009-11-08 16:03:07 +01:00
|
|
|
if (debugfs_valid_mountpoint(debugfs_path))
|
2009-08-28 03:09:58 +02:00
|
|
|
return NULL;
|
2009-07-21 18:20:22 +02:00
|
|
|
|
2009-07-21 20:16:29 +02:00
|
|
|
sys_dir = opendir(debugfs_path);
|
2009-07-21 18:20:22 +02:00
|
|
|
if (!sys_dir)
|
2009-09-24 15:39:09 +02:00
|
|
|
return NULL;
|
2009-09-04 21:39:51 +02:00
|
|
|
|
|
|
|
for_each_subsystem(sys_dir, sys_dirent, sys_next) {
|
2009-09-24 15:39:09 +02:00
|
|
|
|
|
|
|
snprintf(dir_path, MAXPATHLEN, "%s/%s", debugfs_path,
|
|
|
|
sys_dirent.d_name);
|
|
|
|
evt_dir = opendir(dir_path);
|
|
|
|
if (!evt_dir)
|
2009-09-04 21:39:51 +02:00
|
|
|
continue;
|
2009-09-24 15:39:09 +02:00
|
|
|
|
2009-09-04 21:39:51 +02:00
|
|
|
for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) {
|
2009-09-24 15:39:09 +02:00
|
|
|
|
|
|
|
snprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path,
|
2009-07-21 18:20:22 +02:00
|
|
|
evt_dirent.d_name);
|
2009-09-24 15:39:09 +02:00
|
|
|
fd = open(evt_path, O_RDONLY);
|
2009-07-21 18:20:22 +02:00
|
|
|
if (fd < 0)
|
|
|
|
continue;
|
|
|
|
if (read(fd, id_buf, sizeof(id_buf)) < 0) {
|
|
|
|
close(fd);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
close(fd);
|
|
|
|
id = atoll(id_buf);
|
|
|
|
if (id == config) {
|
|
|
|
closedir(evt_dir);
|
|
|
|
closedir(sys_dir);
|
2009-12-06 10:16:30 +01:00
|
|
|
path = zalloc(sizeof(*path));
|
2009-08-28 03:09:58 +02:00
|
|
|
path->system = malloc(MAX_EVENT_LENGTH);
|
|
|
|
if (!path->system) {
|
|
|
|
free(path);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
path->name = malloc(MAX_EVENT_LENGTH);
|
|
|
|
if (!path->name) {
|
|
|
|
free(path->system);
|
|
|
|
free(path);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
strncpy(path->system, sys_dirent.d_name,
|
|
|
|
MAX_EVENT_LENGTH);
|
|
|
|
strncpy(path->name, evt_dirent.d_name,
|
|
|
|
MAX_EVENT_LENGTH);
|
|
|
|
return path;
|
2009-07-21 18:20:22 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
closedir(evt_dir);
|
|
|
|
}
|
|
|
|
|
|
|
|
closedir(sys_dir);
|
2009-08-28 03:09:58 +02:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define TP_PATH_LEN (MAX_EVENT_LENGTH * 2 + 1)
|
|
|
|
static const char *tracepoint_id_to_name(u64 config)
|
|
|
|
{
|
|
|
|
static char buf[TP_PATH_LEN];
|
|
|
|
struct tracepoint_path *path;
|
|
|
|
|
|
|
|
path = tracepoint_id_to_path(config);
|
|
|
|
if (path) {
|
|
|
|
snprintf(buf, TP_PATH_LEN, "%s:%s", path->system, path->name);
|
|
|
|
free(path->name);
|
|
|
|
free(path->system);
|
|
|
|
free(path);
|
|
|
|
} else
|
|
|
|
snprintf(buf, TP_PATH_LEN, "%s:%s", "unknown", "unknown");
|
|
|
|
|
|
|
|
return buf;
|
2009-07-21 18:20:22 +02:00
|
|
|
}
|
|
|
|
|
2009-06-25 13:46:07 +02:00
|
|
|
static int is_cache_op_valid(u8 cache_type, u8 cache_op)
|
|
|
|
{
|
|
|
|
if (hw_cache_stat[cache_type] & COP(cache_op))
|
|
|
|
return 1; /* valid */
|
|
|
|
else
|
|
|
|
return 0; /* invalid */
|
|
|
|
}
|
|
|
|
|
2009-06-25 14:55:22 +02:00
|
|
|
static char *event_cache_name(u8 cache_type, u8 cache_op, u8 cache_result)
|
|
|
|
{
|
|
|
|
static char name[50];
|
|
|
|
|
|
|
|
if (cache_result) {
|
|
|
|
sprintf(name, "%s-%s-%s", hw_cache[cache_type][0],
|
|
|
|
hw_cache_op[cache_op][0],
|
|
|
|
hw_cache_result[cache_result][0]);
|
|
|
|
} else {
|
|
|
|
sprintf(name, "%s-%s", hw_cache[cache_type][0],
|
|
|
|
hw_cache_op[cache_op][1]);
|
|
|
|
}
|
|
|
|
|
|
|
|
return name;
|
|
|
|
}
|
|
|
|
|
2011-03-10 06:23:28 +01:00
|
|
|
const char *event_type(int type)
|
|
|
|
{
|
|
|
|
switch (type) {
|
|
|
|
case PERF_TYPE_HARDWARE:
|
|
|
|
return "hardware";
|
|
|
|
|
|
|
|
case PERF_TYPE_SOFTWARE:
|
|
|
|
return "software";
|
|
|
|
|
|
|
|
case PERF_TYPE_TRACEPOINT:
|
|
|
|
return "tracepoint";
|
|
|
|
|
|
|
|
case PERF_TYPE_HW_CACHE:
|
|
|
|
return "hardware-cache";
|
|
|
|
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return "unknown";
|
|
|
|
}
|
|
|
|
|
2011-01-03 19:39:04 +01:00
|
|
|
const char *event_name(struct perf_evsel *evsel)
|
2009-05-26 09:17:18 +02:00
|
|
|
{
|
2011-01-03 19:39:04 +01:00
|
|
|
u64 config = evsel->attr.config;
|
|
|
|
int type = evsel->attr.type;
|
2009-08-06 19:40:28 +02:00
|
|
|
|
2011-02-16 14:10:01 +01:00
|
|
|
if (evsel->name)
|
|
|
|
return evsel->name;
|
|
|
|
|
2009-08-06 19:40:28 +02:00
|
|
|
return __event_name(type, config);
|
|
|
|
}
|
|
|
|
|
2009-08-15 12:26:57 +02:00
|
|
|
const char *__event_name(int type, u64 config)
|
2009-08-06 19:40:28 +02:00
|
|
|
{
|
2009-05-26 09:17:18 +02:00
|
|
|
static char buf[32];
|
|
|
|
|
2009-08-06 19:40:28 +02:00
|
|
|
if (type == PERF_TYPE_RAW) {
|
2011-01-22 23:37:02 +01:00
|
|
|
sprintf(buf, "raw 0x%" PRIx64, config);
|
2009-05-26 09:17:18 +02:00
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case PERF_TYPE_HARDWARE:
|
perf stat: Add stalled cycles to the default output
The new default output looks like this:
Performance counter stats for './loop_1b_instructions':
236.010686 task-clock # 0.996 CPUs utilized
0 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
99 page-faults # 0.000 M/sec
756,487,646 cycles # 3.205 GHz
354,938,996 stalled-cycles # 46.92% of all cycles are idle
1,001,403,797 instructions # 1.32 insns per cycle
# 0.35 stalled cycles per insn
100,279,773 branches # 424.895 M/sec
12,646 branch-misses # 0.013 % of all branches
0.236902540 seconds time elapsed
We dropped cache-refs and cache-misses and added stalled-cycles - this is a
more generic "how well utilized is the CPU" metric.
If the stalled-cycles ratio is too high then more specific measurements can be
taken to figure out the source of the inefficiency.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-pbpl2l4mn797s69bclfpwkwn@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 05:20:22 +02:00
|
|
|
if (config < PERF_COUNT_HW_MAX && hw_event_names[config])
|
2009-06-06 09:58:57 +02:00
|
|
|
return hw_event_names[config];
|
2009-05-26 09:17:18 +02:00
|
|
|
return "unknown-hardware";
|
|
|
|
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
case PERF_TYPE_HW_CACHE: {
|
perf_counter tools: Define and use our own u64, s64 etc. definitions
On 64-bit powerpc, __u64 is defined to be unsigned long rather than
unsigned long long. This causes compiler warnings every time we
print a __u64 value with %Lx.
Rather than changing __u64, we define our own u64 to be unsigned long
long on all architectures, and similarly s64 as signed long long.
For consistency we also define u32, s32, u16, s16, u8 and s8. These
definitions are put in a new header, types.h, because these definitions
are needed in util/string.h and util/symbol.h.
The main change here is the mechanical change of __[us]{64,32,16,8}
to remove the "__". The other changes are:
* Create types.h
* Include types.h in perf.h, util/string.h and util/symbol.h
* Add types.h to the LIB_H definition in Makefile
* Added (u64) casts in process_overflow_event() and print_sym_table()
to kill two remaining warnings.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: benh@kernel.crashing.org
LKML-Reference: <19003.33494.495844.956580@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-19 14:21:42 +02:00
|
|
|
u8 cache_type, cache_op, cache_result;
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
|
|
|
|
cache_type = (config >> 0) & 0xff;
|
|
|
|
if (cache_type > PERF_COUNT_HW_CACHE_MAX)
|
|
|
|
return "unknown-ext-hardware-cache-type";
|
|
|
|
|
|
|
|
cache_op = (config >> 8) & 0xff;
|
2009-06-06 13:58:12 +02:00
|
|
|
if (cache_op > PERF_COUNT_HW_CACHE_OP_MAX)
|
|
|
|
return "unknown-ext-hardware-cache-op";
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
|
|
|
|
cache_result = (config >> 16) & 0xff;
|
2009-06-06 13:58:12 +02:00
|
|
|
if (cache_result > PERF_COUNT_HW_CACHE_RESULT_MAX)
|
|
|
|
return "unknown-ext-hardware-cache-result";
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
|
2009-06-25 13:46:07 +02:00
|
|
|
if (!is_cache_op_valid(cache_type, cache_op))
|
|
|
|
return "invalid-cache";
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
|
2009-06-25 14:55:22 +02:00
|
|
|
return event_cache_name(cache_type, cache_op, cache_result);
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
}
|
|
|
|
|
2009-05-26 09:17:18 +02:00
|
|
|
case PERF_TYPE_SOFTWARE:
|
perf stat: Add stalled cycles to the default output
The new default output looks like this:
Performance counter stats for './loop_1b_instructions':
236.010686 task-clock # 0.996 CPUs utilized
0 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
99 page-faults # 0.000 M/sec
756,487,646 cycles # 3.205 GHz
354,938,996 stalled-cycles # 46.92% of all cycles are idle
1,001,403,797 instructions # 1.32 insns per cycle
# 0.35 stalled cycles per insn
100,279,773 branches # 424.895 M/sec
12,646 branch-misses # 0.013 % of all branches
0.236902540 seconds time elapsed
We dropped cache-refs and cache-misses and added stalled-cycles - this is a
more generic "how well utilized is the CPU" metric.
If the stalled-cycles ratio is too high then more specific measurements can be
taken to figure out the source of the inefficiency.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-pbpl2l4mn797s69bclfpwkwn@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-27 05:20:22 +02:00
|
|
|
if (config < PERF_COUNT_SW_MAX && sw_event_names[config])
|
2009-06-06 09:58:57 +02:00
|
|
|
return sw_event_names[config];
|
2009-05-26 09:17:18 +02:00
|
|
|
return "unknown-software";
|
|
|
|
|
2009-07-21 18:20:22 +02:00
|
|
|
case PERF_TYPE_TRACEPOINT:
|
|
|
|
return tracepoint_id_to_name(config);
|
|
|
|
|
2009-05-26 09:17:18 +02:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return "unknown";
|
|
|
|
}
|
|
|
|
|
2009-08-15 12:26:57 +02:00
|
|
|
static int parse_aliases(const char **str, const char *names[][MAX_ALIASES], int size)
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
{
|
|
|
|
int i, j;
|
2009-07-01 05:04:34 +02:00
|
|
|
int n, longest = -1;
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
|
|
|
|
for (i = 0; i < size; i++) {
|
2009-07-01 05:04:34 +02:00
|
|
|
for (j = 0; j < MAX_ALIASES && names[i][j]; j++) {
|
|
|
|
n = strlen(names[i][j]);
|
|
|
|
if (n > longest && !strncasecmp(*str, names[i][j], n))
|
|
|
|
longest = n;
|
|
|
|
}
|
|
|
|
if (longest > 0) {
|
|
|
|
*str += longest;
|
|
|
|
return i;
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-06-06 21:04:17 +02:00
|
|
|
return -1;
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
}
|
|
|
|
|
2009-09-11 23:19:45 +02:00
|
|
|
static enum event_result
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 12:02:48 +02:00
|
|
|
parse_generic_hw_event(const char **str, struct perf_event_attr *attr)
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
{
|
2009-07-01 05:04:34 +02:00
|
|
|
const char *s = *str;
|
|
|
|
int cache_type = -1, cache_op = -1, cache_result = -1;
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
|
2009-07-01 05:04:34 +02:00
|
|
|
cache_type = parse_aliases(&s, hw_cache, PERF_COUNT_HW_CACHE_MAX);
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
/*
|
|
|
|
* No fallback - if we cannot get a clear cache type
|
|
|
|
* then bail out:
|
|
|
|
*/
|
|
|
|
if (cache_type == -1)
|
2009-09-11 23:19:45 +02:00
|
|
|
return EVT_FAILED;
|
2009-07-01 05:04:34 +02:00
|
|
|
|
|
|
|
while ((cache_op == -1 || cache_result == -1) && *s == '-') {
|
|
|
|
++s;
|
|
|
|
|
|
|
|
if (cache_op == -1) {
|
|
|
|
cache_op = parse_aliases(&s, hw_cache_op,
|
|
|
|
PERF_COUNT_HW_CACHE_OP_MAX);
|
|
|
|
if (cache_op >= 0) {
|
|
|
|
if (!is_cache_op_valid(cache_type, cache_op))
|
2011-07-13 22:58:18 +02:00
|
|
|
return EVT_FAILED;
|
2009-07-01 05:04:34 +02:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cache_result == -1) {
|
|
|
|
cache_result = parse_aliases(&s, hw_cache_result,
|
|
|
|
PERF_COUNT_HW_CACHE_RESULT_MAX);
|
|
|
|
if (cache_result >= 0)
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Can't parse this as a cache op or result, so back up
|
|
|
|
* to the '-'.
|
|
|
|
*/
|
|
|
|
--s;
|
|
|
|
break;
|
|
|
|
}
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Fall back to reads:
|
|
|
|
*/
|
2009-06-06 21:04:17 +02:00
|
|
|
if (cache_op == -1)
|
|
|
|
cache_op = PERF_COUNT_HW_CACHE_OP_READ;
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Fall back to accesses:
|
|
|
|
*/
|
|
|
|
if (cache_result == -1)
|
|
|
|
cache_result = PERF_COUNT_HW_CACHE_RESULT_ACCESS;
|
|
|
|
|
|
|
|
attr->config = cache_type | (cache_op << 8) | (cache_result << 16);
|
|
|
|
attr->type = PERF_TYPE_HW_CACHE;
|
|
|
|
|
2009-07-01 05:04:34 +02:00
|
|
|
*str = s;
|
2009-09-11 23:19:45 +02:00
|
|
|
return EVT_HANDLED;
|
|
|
|
}
|
|
|
|
|
|
|
|
static enum event_result
|
|
|
|
parse_single_tracepoint_event(char *sys_name,
|
|
|
|
const char *evt_name,
|
|
|
|
unsigned int evt_length,
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 12:02:48 +02:00
|
|
|
struct perf_event_attr *attr,
|
2009-09-11 23:19:45 +02:00
|
|
|
const char **strp)
|
|
|
|
{
|
|
|
|
char evt_path[MAXPATHLEN];
|
|
|
|
char id_buf[4];
|
|
|
|
u64 id;
|
|
|
|
int fd;
|
|
|
|
|
|
|
|
snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path,
|
|
|
|
sys_name, evt_name);
|
|
|
|
|
|
|
|
fd = open(evt_path, O_RDONLY);
|
|
|
|
if (fd < 0)
|
|
|
|
return EVT_FAILED;
|
|
|
|
|
|
|
|
if (read(fd, id_buf, sizeof(id_buf)) < 0) {
|
|
|
|
close(fd);
|
|
|
|
return EVT_FAILED;
|
|
|
|
}
|
|
|
|
|
|
|
|
close(fd);
|
|
|
|
id = atoll(id_buf);
|
|
|
|
attr->config = id;
|
|
|
|
attr->type = PERF_TYPE_TRACEPOINT;
|
2010-11-30 23:27:01 +01:00
|
|
|
*strp += strlen(sys_name) + evt_length + 1; /* + 1 for the ':' */
|
2009-09-11 23:19:45 +02:00
|
|
|
|
2010-04-21 18:06:01 +02:00
|
|
|
attr->sample_type |= PERF_SAMPLE_RAW;
|
|
|
|
attr->sample_type |= PERF_SAMPLE_TIME;
|
|
|
|
attr->sample_type |= PERF_SAMPLE_CPU;
|
|
|
|
|
|
|
|
attr->sample_period = 1;
|
|
|
|
|
|
|
|
|
2009-09-11 23:19:45 +02:00
|
|
|
return EVT_HANDLED;
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
}
|
|
|
|
|
2009-09-11 23:19:45 +02:00
|
|
|
/* sys + ':' + event + ':' + flags*/
|
|
|
|
#define MAX_EVOPT_LEN (MAX_EVENT_LENGTH * 2 + 2 + 128)
|
|
|
|
static enum event_result
|
2011-07-14 11:25:32 +02:00
|
|
|
parse_multiple_tracepoint_event(struct perf_evlist *evlist, char *sys_name,
|
2011-01-17 17:40:46 +01:00
|
|
|
const char *evt_exp, char *flags)
|
2009-09-11 23:19:45 +02:00
|
|
|
{
|
|
|
|
char evt_path[MAXPATHLEN];
|
|
|
|
struct dirent *evt_ent;
|
|
|
|
DIR *evt_dir;
|
|
|
|
|
|
|
|
snprintf(evt_path, MAXPATHLEN, "%s/%s", debugfs_path, sys_name);
|
|
|
|
evt_dir = opendir(evt_path);
|
|
|
|
|
|
|
|
if (!evt_dir) {
|
|
|
|
perror("Can't open event dir");
|
|
|
|
return EVT_FAILED;
|
|
|
|
}
|
|
|
|
|
|
|
|
while ((evt_ent = readdir(evt_dir))) {
|
|
|
|
char event_opt[MAX_EVOPT_LEN + 1];
|
|
|
|
int len;
|
|
|
|
|
|
|
|
if (!strcmp(evt_ent->d_name, ".")
|
|
|
|
|| !strcmp(evt_ent->d_name, "..")
|
|
|
|
|| !strcmp(evt_ent->d_name, "enable")
|
|
|
|
|| !strcmp(evt_ent->d_name, "filter"))
|
|
|
|
continue;
|
|
|
|
|
2010-01-05 23:47:17 +01:00
|
|
|
if (!strglobmatch(evt_ent->d_name, evt_exp))
|
|
|
|
continue;
|
|
|
|
|
2009-12-06 19:25:30 +01:00
|
|
|
len = snprintf(event_opt, MAX_EVOPT_LEN, "%s:%s%s%s", sys_name,
|
|
|
|
evt_ent->d_name, flags ? ":" : "",
|
|
|
|
flags ?: "");
|
2009-09-11 23:19:45 +02:00
|
|
|
if (len < 0)
|
|
|
|
return EVT_FAILED;
|
|
|
|
|
2011-07-14 11:25:32 +02:00
|
|
|
if (parse_events(evlist, event_opt, 0))
|
2009-09-11 23:19:45 +02:00
|
|
|
return EVT_FAILED;
|
|
|
|
}
|
|
|
|
|
|
|
|
return EVT_HANDLED_ALL;
|
|
|
|
}
|
|
|
|
|
2011-01-17 17:40:46 +01:00
|
|
|
static enum event_result
|
2011-07-14 11:25:32 +02:00
|
|
|
parse_tracepoint_event(struct perf_evlist *evlist, const char **strp,
|
2011-01-17 17:40:46 +01:00
|
|
|
struct perf_event_attr *attr)
|
2009-07-21 18:20:22 +02:00
|
|
|
{
|
|
|
|
const char *evt_name;
|
2010-11-30 23:27:01 +01:00
|
|
|
char *flags = NULL, *comma_loc;
|
2009-07-21 18:20:22 +02:00
|
|
|
char sys_name[MAX_EVENT_LENGTH];
|
|
|
|
unsigned int sys_length, evt_length;
|
|
|
|
|
2009-11-08 16:03:07 +01:00
|
|
|
if (debugfs_valid_mountpoint(debugfs_path))
|
2009-07-21 18:20:22 +02:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
evt_name = strchr(*strp, ':');
|
|
|
|
if (!evt_name)
|
2009-09-11 23:19:45 +02:00
|
|
|
return EVT_FAILED;
|
2009-07-21 18:20:22 +02:00
|
|
|
|
|
|
|
sys_length = evt_name - *strp;
|
|
|
|
if (sys_length >= MAX_EVENT_LENGTH)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
strncpy(sys_name, *strp, sys_length);
|
|
|
|
sys_name[sys_length] = '\0';
|
|
|
|
evt_name = evt_name + 1;
|
2009-08-13 10:27:18 +02:00
|
|
|
|
2010-11-30 23:27:01 +01:00
|
|
|
comma_loc = strchr(evt_name, ',');
|
|
|
|
if (comma_loc) {
|
|
|
|
/* take the event name up to the comma */
|
|
|
|
evt_name = strndup(evt_name, comma_loc - evt_name);
|
|
|
|
}
|
2009-08-13 10:27:18 +02:00
|
|
|
flags = strchr(evt_name, ':');
|
|
|
|
if (flags) {
|
2009-09-13 09:44:29 +02:00
|
|
|
/* split it out: */
|
|
|
|
evt_name = strndup(evt_name, flags - evt_name);
|
2009-08-13 10:27:18 +02:00
|
|
|
flags++;
|
|
|
|
}
|
|
|
|
|
2009-07-21 18:20:22 +02:00
|
|
|
evt_length = strlen(evt_name);
|
|
|
|
if (evt_length >= MAX_EVENT_LENGTH)
|
2009-09-11 23:19:45 +02:00
|
|
|
return EVT_FAILED;
|
2010-01-05 23:47:17 +01:00
|
|
|
if (strpbrk(evt_name, "*?")) {
|
2011-01-17 17:25:06 +01:00
|
|
|
*strp += strlen(sys_name) + evt_length + 1; /* 1 == the ':' */
|
2011-07-14 11:25:32 +02:00
|
|
|
return parse_multiple_tracepoint_event(evlist, sys_name,
|
|
|
|
evt_name, flags);
|
perf tools: Fix buffer overflow error when specifying all tracepoints
I found when specifying all tracepoints with -e to one of subcommand,
such as 'stat', the program will trigger a buffer overflow error, like
this:
*** buffer overflow detected ***: ./perf terminated
======= Backtrace: =========
/lib64/libc.so.6(__fortify_fail+0x37)[0x382cefb2c7]
....
The tracepoints are separated by comma, something like this:
$ perf stat -a -e `perf list |grep Tracepoint|awk -F'[' '{gsub(/[[:space:]]+/,"",$1);array[FNR]=$1}END{outputs=array[1];for (i=2;i<=FNR;i++){ outputs=outputs "," array[i];};print outputs}'`
The root reason of this problem is that store_event_type() is called for all
events, and will overflow the 'filename' at:
strncat(filename, orgname, strlen(orgname));
This patch fixes it by calling store_event_type() only when the event name has
been found.
LKML-Reference: <20110106093922.GB6713@hpt.nay.redhat.com>
Signed-off-by: Han Pingtian <phan@redhat.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2011-01-06 10:39:22 +01:00
|
|
|
} else {
|
2009-09-11 23:19:45 +02:00
|
|
|
return parse_single_tracepoint_event(sys_name, evt_name,
|
2010-04-14 20:05:17 +02:00
|
|
|
evt_length, attr, strp);
|
perf tools: Fix buffer overflow error when specifying all tracepoints
I found when specifying all tracepoints with -e to one of subcommand,
such as 'stat', the program will trigger a buffer overflow error, like
this:
*** buffer overflow detected ***: ./perf terminated
======= Backtrace: =========
/lib64/libc.so.6(__fortify_fail+0x37)[0x382cefb2c7]
....
The tracepoints are separated by comma, something like this:
$ perf stat -a -e `perf list |grep Tracepoint|awk -F'[' '{gsub(/[[:space:]]+/,"",$1);array[FNR]=$1}END{outputs=array[1];for (i=2;i<=FNR;i++){ outputs=outputs "," array[i];};print outputs}'`
The root reason of this problem is that store_event_type() is called for all
events, and will overflow the 'filename' at:
strncat(filename, orgname, strlen(orgname));
This patch fixes it by calling store_event_type() only when the event name has
been found.
LKML-Reference: <20110106093922.GB6713@hpt.nay.redhat.com>
Signed-off-by: Han Pingtian <phan@redhat.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2011-01-06 10:39:22 +01:00
|
|
|
}
|
2009-07-21 18:20:22 +02:00
|
|
|
}
|
|
|
|
|
2009-11-23 15:42:35 +01:00
|
|
|
static enum event_result
|
|
|
|
parse_breakpoint_type(const char *type, const char **strp,
|
|
|
|
struct perf_event_attr *attr)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < 3; i++) {
|
|
|
|
if (!type[i])
|
|
|
|
break;
|
|
|
|
|
|
|
|
switch (type[i]) {
|
|
|
|
case 'r':
|
|
|
|
attr->bp_type |= HW_BREAKPOINT_R;
|
|
|
|
break;
|
|
|
|
case 'w':
|
|
|
|
attr->bp_type |= HW_BREAKPOINT_W;
|
|
|
|
break;
|
|
|
|
case 'x':
|
|
|
|
attr->bp_type |= HW_BREAKPOINT_X;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return EVT_FAILED;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!attr->bp_type) /* Default */
|
|
|
|
attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W;
|
|
|
|
|
|
|
|
*strp = type + i;
|
|
|
|
|
|
|
|
return EVT_HANDLED;
|
|
|
|
}
|
|
|
|
|
|
|
|
static enum event_result
|
|
|
|
parse_breakpoint_event(const char **strp, struct perf_event_attr *attr)
|
|
|
|
{
|
|
|
|
const char *target;
|
|
|
|
const char *type;
|
|
|
|
char *endaddr;
|
|
|
|
u64 addr;
|
|
|
|
enum event_result err;
|
|
|
|
|
|
|
|
target = strchr(*strp, ':');
|
|
|
|
if (!target)
|
|
|
|
return EVT_FAILED;
|
|
|
|
|
|
|
|
if (strncmp(*strp, "mem", target - *strp) != 0)
|
|
|
|
return EVT_FAILED;
|
|
|
|
|
|
|
|
target++;
|
|
|
|
|
|
|
|
addr = strtoull(target, &endaddr, 0);
|
|
|
|
if (target == endaddr)
|
|
|
|
return EVT_FAILED;
|
|
|
|
|
|
|
|
attr->bp_addr = addr;
|
|
|
|
*strp = endaddr;
|
|
|
|
|
|
|
|
type = strchr(target, ':');
|
|
|
|
|
|
|
|
/* If no type is defined, just rw as default */
|
|
|
|
if (!type) {
|
|
|
|
attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W;
|
|
|
|
} else {
|
|
|
|
err = parse_breakpoint_type(++type, strp, attr);
|
|
|
|
if (err == EVT_FAILED)
|
|
|
|
return EVT_FAILED;
|
|
|
|
}
|
|
|
|
|
2010-06-24 21:36:19 +02:00
|
|
|
/*
|
|
|
|
* We should find a nice way to override the access length
|
|
|
|
* Provide some defaults for now
|
|
|
|
*/
|
|
|
|
if (attr->bp_type == HW_BREAKPOINT_X)
|
|
|
|
attr->bp_len = sizeof(long);
|
|
|
|
else
|
|
|
|
attr->bp_len = HW_BREAKPOINT_LEN_4;
|
|
|
|
|
2009-11-23 15:42:35 +01:00
|
|
|
attr->type = PERF_TYPE_BREAKPOINT;
|
|
|
|
|
|
|
|
return EVT_HANDLED;
|
|
|
|
}
|
|
|
|
|
2009-06-22 13:14:28 +02:00
|
|
|
static int check_events(const char *str, unsigned int i)
|
|
|
|
{
|
2009-07-01 05:04:34 +02:00
|
|
|
int n;
|
2009-06-22 13:14:28 +02:00
|
|
|
|
2009-07-01 05:04:34 +02:00
|
|
|
n = strlen(event_symbols[i].symbol);
|
2011-04-27 03:55:40 +02:00
|
|
|
if (!strncasecmp(str, event_symbols[i].symbol, n))
|
2009-07-01 05:04:34 +02:00
|
|
|
return n;
|
|
|
|
|
|
|
|
n = strlen(event_symbols[i].alias);
|
2011-04-27 03:55:40 +02:00
|
|
|
if (n) {
|
|
|
|
if (!strncasecmp(str, event_symbols[i].alias, n))
|
2009-07-01 05:04:34 +02:00
|
|
|
return n;
|
2011-04-27 03:55:40 +02:00
|
|
|
}
|
|
|
|
|
2009-06-22 13:14:28 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-09-11 23:19:45 +02:00
|
|
|
static enum event_result
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 12:02:48 +02:00
|
|
|
parse_symbolic_event(const char **strp, struct perf_event_attr *attr)
|
2009-05-26 11:10:09 +02:00
|
|
|
{
|
2009-07-01 05:04:34 +02:00
|
|
|
const char *str = *strp;
|
2009-05-26 11:10:09 +02:00
|
|
|
unsigned int i;
|
2009-07-01 05:04:34 +02:00
|
|
|
int n;
|
2009-05-26 11:10:09 +02:00
|
|
|
|
2009-07-01 05:04:34 +02:00
|
|
|
for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
|
|
|
|
n = check_events(str, i);
|
|
|
|
if (n > 0) {
|
|
|
|
attr->type = event_symbols[i].type;
|
|
|
|
attr->config = event_symbols[i].config;
|
|
|
|
*strp = str + n;
|
2009-09-11 23:19:45 +02:00
|
|
|
return EVT_HANDLED;
|
2009-07-01 05:04:34 +02:00
|
|
|
}
|
|
|
|
}
|
2009-09-11 23:19:45 +02:00
|
|
|
return EVT_FAILED;
|
2009-07-01 05:04:34 +02:00
|
|
|
}
|
|
|
|
|
2009-09-11 23:19:45 +02:00
|
|
|
static enum event_result
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 12:02:48 +02:00
|
|
|
parse_raw_event(const char **strp, struct perf_event_attr *attr)
|
2009-07-01 05:04:34 +02:00
|
|
|
{
|
|
|
|
const char *str = *strp;
|
|
|
|
u64 config;
|
|
|
|
int n;
|
2009-06-06 09:58:57 +02:00
|
|
|
|
2009-07-01 05:04:34 +02:00
|
|
|
if (*str != 'r')
|
2009-09-11 23:19:45 +02:00
|
|
|
return EVT_FAILED;
|
2009-07-01 05:04:34 +02:00
|
|
|
n = hex2u64(str + 1, &config);
|
|
|
|
if (n > 0) {
|
2011-07-23 04:10:43 +02:00
|
|
|
const char *end = str + n + 1;
|
|
|
|
if (*end != '\0' && *end != ',' && *end != ':')
|
|
|
|
return EVT_FAILED;
|
|
|
|
|
|
|
|
*strp = end;
|
2009-07-01 05:04:34 +02:00
|
|
|
attr->type = PERF_TYPE_RAW;
|
|
|
|
attr->config = config;
|
2009-09-11 23:19:45 +02:00
|
|
|
return EVT_HANDLED;
|
2009-06-06 09:58:57 +02:00
|
|
|
}
|
2009-09-11 23:19:45 +02:00
|
|
|
return EVT_FAILED;
|
2009-07-01 05:04:34 +02:00
|
|
|
}
|
2009-05-26 11:10:09 +02:00
|
|
|
|
2009-09-11 23:19:45 +02:00
|
|
|
static enum event_result
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 12:02:48 +02:00
|
|
|
parse_numeric_event(const char **strp, struct perf_event_attr *attr)
|
2009-07-01 05:04:34 +02:00
|
|
|
{
|
|
|
|
const char *str = *strp;
|
|
|
|
char *endp;
|
|
|
|
unsigned long type;
|
|
|
|
u64 config;
|
|
|
|
|
|
|
|
type = strtoul(str, &endp, 0);
|
|
|
|
if (endp > str && type < PERF_TYPE_MAX && *endp == ':') {
|
|
|
|
str = endp + 1;
|
|
|
|
config = strtoul(str, &endp, 0);
|
|
|
|
if (endp > str) {
|
|
|
|
attr->type = type;
|
|
|
|
attr->config = config;
|
|
|
|
*strp = endp;
|
2009-09-11 23:19:45 +02:00
|
|
|
return EVT_HANDLED;
|
2009-06-01 22:50:19 +02:00
|
|
|
}
|
2009-07-01 05:04:34 +02:00
|
|
|
}
|
2009-09-11 23:19:45 +02:00
|
|
|
return EVT_FAILED;
|
2009-07-01 05:04:34 +02:00
|
|
|
}
|
|
|
|
|
2011-04-27 04:06:33 +02:00
|
|
|
static int
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 12:02:48 +02:00
|
|
|
parse_event_modifier(const char **strp, struct perf_event_attr *attr)
|
2009-07-01 05:04:34 +02:00
|
|
|
{
|
|
|
|
const char *str = *strp;
|
2010-04-08 23:03:20 +02:00
|
|
|
int exclude = 0;
|
|
|
|
int eu = 0, ek = 0, eh = 0, precise = 0;
|
2009-06-06 09:58:57 +02:00
|
|
|
|
2011-04-27 04:06:33 +02:00
|
|
|
if (!*str)
|
2009-06-06 09:58:57 +02:00
|
|
|
return 0;
|
2011-04-27 04:06:33 +02:00
|
|
|
|
2011-05-17 15:36:19 +02:00
|
|
|
if (*str == ',')
|
|
|
|
return 0;
|
|
|
|
|
2011-04-27 04:06:33 +02:00
|
|
|
if (*str++ != ':')
|
|
|
|
return -1;
|
|
|
|
|
2009-07-01 05:04:34 +02:00
|
|
|
while (*str) {
|
2010-04-08 23:03:20 +02:00
|
|
|
if (*str == 'u') {
|
|
|
|
if (!exclude)
|
|
|
|
exclude = eu = ek = eh = 1;
|
2009-07-01 05:04:34 +02:00
|
|
|
eu = 0;
|
2010-04-08 23:03:20 +02:00
|
|
|
} else if (*str == 'k') {
|
|
|
|
if (!exclude)
|
|
|
|
exclude = eu = ek = eh = 1;
|
2009-07-01 05:04:34 +02:00
|
|
|
ek = 0;
|
2010-04-08 23:03:20 +02:00
|
|
|
} else if (*str == 'h') {
|
|
|
|
if (!exclude)
|
|
|
|
exclude = eu = ek = eh = 1;
|
2009-07-01 05:04:34 +02:00
|
|
|
eh = 0;
|
2010-04-08 23:03:20 +02:00
|
|
|
} else if (*str == 'p') {
|
|
|
|
precise++;
|
|
|
|
} else
|
2009-07-01 05:04:34 +02:00
|
|
|
break;
|
2010-04-08 23:03:20 +02:00
|
|
|
|
2009-07-01 05:04:34 +02:00
|
|
|
++str;
|
2009-05-26 09:17:18 +02:00
|
|
|
}
|
2011-04-27 04:06:33 +02:00
|
|
|
if (str < *strp + 2)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
*strp = str;
|
|
|
|
|
|
|
|
attr->exclude_user = eu;
|
|
|
|
attr->exclude_kernel = ek;
|
|
|
|
attr->exclude_hv = eh;
|
|
|
|
attr->precise_ip = precise;
|
|
|
|
|
2009-07-01 05:04:34 +02:00
|
|
|
return 0;
|
|
|
|
}
|
2009-05-26 11:10:09 +02:00
|
|
|
|
2009-07-01 05:04:34 +02:00
|
|
|
/*
|
|
|
|
* Each event can have multiple symbolic names.
|
|
|
|
* Symbolic names are (almost) exactly matched.
|
|
|
|
*/
|
2009-09-11 23:19:45 +02:00
|
|
|
static enum event_result
|
2011-07-14 11:25:32 +02:00
|
|
|
parse_event_symbols(struct perf_evlist *evlist, const char **str,
|
2011-01-17 17:40:46 +01:00
|
|
|
struct perf_event_attr *attr)
|
2009-07-01 05:04:34 +02:00
|
|
|
{
|
2009-09-11 23:19:45 +02:00
|
|
|
enum event_result ret;
|
|
|
|
|
2011-07-14 11:25:32 +02:00
|
|
|
ret = parse_tracepoint_event(evlist, str, attr);
|
2009-09-11 23:19:45 +02:00
|
|
|
if (ret != EVT_FAILED)
|
|
|
|
goto modifier;
|
|
|
|
|
|
|
|
ret = parse_raw_event(str, attr);
|
|
|
|
if (ret != EVT_FAILED)
|
|
|
|
goto modifier;
|
2009-06-06 09:58:57 +02:00
|
|
|
|
2009-09-11 23:19:45 +02:00
|
|
|
ret = parse_numeric_event(str, attr);
|
|
|
|
if (ret != EVT_FAILED)
|
|
|
|
goto modifier;
|
|
|
|
|
|
|
|
ret = parse_symbolic_event(str, attr);
|
|
|
|
if (ret != EVT_FAILED)
|
|
|
|
goto modifier;
|
|
|
|
|
|
|
|
ret = parse_generic_hw_event(str, attr);
|
|
|
|
if (ret != EVT_FAILED)
|
|
|
|
goto modifier;
|
|
|
|
|
2009-11-23 15:42:35 +01:00
|
|
|
ret = parse_breakpoint_event(str, attr);
|
|
|
|
if (ret != EVT_FAILED)
|
|
|
|
goto modifier;
|
|
|
|
|
2009-10-27 01:33:04 +01:00
|
|
|
fprintf(stderr, "invalid or unsupported event: '%s'\n", *str);
|
|
|
|
fprintf(stderr, "Run 'perf list' for a list of valid events\n");
|
2009-09-11 23:19:45 +02:00
|
|
|
return EVT_FAILED;
|
|
|
|
|
|
|
|
modifier:
|
2011-04-27 04:06:33 +02:00
|
|
|
if (parse_event_modifier(str, attr) < 0) {
|
|
|
|
fprintf(stderr, "invalid event modifier: '%s'\n", *str);
|
|
|
|
fprintf(stderr, "Run 'perf list' for a list of valid events and modifiers\n");
|
|
|
|
|
|
|
|
return EVT_FAILED;
|
|
|
|
}
|
2009-05-26 11:10:09 +02:00
|
|
|
|
2009-09-11 23:19:45 +02:00
|
|
|
return ret;
|
2009-05-26 11:10:09 +02:00
|
|
|
}
|
|
|
|
|
2011-07-14 11:25:32 +02:00
|
|
|
int parse_events(struct perf_evlist *evlist , const char *str, int unset __used)
|
2009-05-26 11:10:09 +02:00
|
|
|
{
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 12:02:48 +02:00
|
|
|
struct perf_event_attr attr;
|
2009-09-11 23:19:45 +02:00
|
|
|
enum event_result ret;
|
2011-02-16 14:10:01 +01:00
|
|
|
const char *ostr;
|
2009-05-26 11:10:09 +02:00
|
|
|
|
2009-07-01 05:04:34 +02:00
|
|
|
for (;;) {
|
2011-02-16 14:10:01 +01:00
|
|
|
ostr = str;
|
2009-07-01 05:04:34 +02:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2011-07-14 11:25:32 +02:00
|
|
|
ret = parse_event_symbols(evlist, &str, &attr);
|
2009-09-11 23:19:45 +02:00
|
|
|
if (ret == EVT_FAILED)
|
2009-07-01 05:04:34 +02:00
|
|
|
return -1;
|
2009-05-26 11:10:09 +02:00
|
|
|
|
2009-07-01 05:04:34 +02:00
|
|
|
if (!(*str == 0 || *str == ',' || isspace(*str)))
|
|
|
|
return -1;
|
2009-05-26 11:10:09 +02:00
|
|
|
|
2009-09-11 23:19:45 +02:00
|
|
|
if (ret != EVT_HANDLED_ALL) {
|
2011-01-03 19:39:04 +01:00
|
|
|
struct perf_evsel *evsel;
|
2011-01-11 23:56:53 +01:00
|
|
|
evsel = perf_evsel__new(&attr, evlist->nr_entries);
|
2011-01-03 19:39:04 +01:00
|
|
|
if (evsel == NULL)
|
|
|
|
return -1;
|
2011-01-11 23:56:53 +01:00
|
|
|
perf_evlist__add(evlist, evsel);
|
2011-02-16 14:10:01 +01:00
|
|
|
|
|
|
|
evsel->name = calloc(str - ostr + 1, 1);
|
|
|
|
if (!evsel->name)
|
|
|
|
return -1;
|
|
|
|
strncpy(evsel->name, ostr, str - ostr);
|
2009-09-11 23:19:45 +02:00
|
|
|
}
|
2009-05-26 11:10:09 +02:00
|
|
|
|
2009-07-01 05:04:34 +02:00
|
|
|
if (*str == 0)
|
|
|
|
break;
|
|
|
|
if (*str == ',')
|
|
|
|
++str;
|
|
|
|
while (isspace(*str))
|
|
|
|
++str;
|
2009-05-26 11:10:09 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-07-14 11:25:32 +02:00
|
|
|
int parse_events_option(const struct option *opt, const char *str,
|
|
|
|
int unset __used)
|
|
|
|
{
|
|
|
|
struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
|
|
|
|
return parse_events(evlist, str, unset);
|
|
|
|
}
|
|
|
|
|
2011-01-11 23:56:53 +01:00
|
|
|
int parse_filter(const struct option *opt, const char *str,
|
2009-10-15 05:22:07 +02:00
|
|
|
int unset __used)
|
|
|
|
{
|
2011-01-11 23:56:53 +01:00
|
|
|
struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
|
2011-01-03 19:39:04 +01:00
|
|
|
struct perf_evsel *last = NULL;
|
2009-10-15 05:22:07 +02:00
|
|
|
|
2011-01-11 23:56:53 +01:00
|
|
|
if (evlist->nr_entries > 0)
|
|
|
|
last = list_entry(evlist->entries.prev, struct perf_evsel, node);
|
2011-01-03 19:39:04 +01:00
|
|
|
|
|
|
|
if (last == NULL || last->attr.type != PERF_TYPE_TRACEPOINT) {
|
2009-10-15 05:22:07 +02:00
|
|
|
fprintf(stderr,
|
|
|
|
"-F option should follow a -e tracepoint option\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2011-01-03 19:39:04 +01:00
|
|
|
last->filter = strdup(str);
|
|
|
|
if (last->filter == NULL) {
|
2009-10-15 05:22:07 +02:00
|
|
|
fprintf(stderr, "not enough memory to hold filter string\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-06-06 12:24:17 +02:00
|
|
|
static const char * const event_type_descriptors[] = {
|
|
|
|
"Hardware event",
|
|
|
|
"Software event",
|
|
|
|
"Tracepoint event",
|
|
|
|
"Hardware cache event",
|
2009-12-29 09:37:07 +01:00
|
|
|
"Raw hardware event descriptor",
|
|
|
|
"Hardware breakpoint",
|
2009-06-06 12:24:17 +02:00
|
|
|
};
|
|
|
|
|
2009-07-21 18:20:22 +02:00
|
|
|
/*
|
|
|
|
* Print the events from <debugfs_mount_point>/tracing/events
|
|
|
|
*/
|
|
|
|
|
2011-02-17 18:38:58 +01:00
|
|
|
void print_tracepoint_events(const char *subsys_glob, const char *event_glob)
|
2009-07-21 18:20:22 +02:00
|
|
|
{
|
|
|
|
DIR *sys_dir, *evt_dir;
|
|
|
|
struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent;
|
|
|
|
char evt_path[MAXPATHLEN];
|
2009-09-24 15:39:09 +02:00
|
|
|
char dir_path[MAXPATHLEN];
|
2009-07-21 18:20:22 +02:00
|
|
|
|
2009-11-08 16:03:07 +01:00
|
|
|
if (debugfs_valid_mountpoint(debugfs_path))
|
2009-07-21 18:20:22 +02:00
|
|
|
return;
|
|
|
|
|
2009-07-21 20:16:29 +02:00
|
|
|
sys_dir = opendir(debugfs_path);
|
2009-07-21 18:20:22 +02:00
|
|
|
if (!sys_dir)
|
2009-09-24 15:39:09 +02:00
|
|
|
return;
|
2009-09-04 21:39:51 +02:00
|
|
|
|
|
|
|
for_each_subsystem(sys_dir, sys_dirent, sys_next) {
|
2011-02-17 18:38:58 +01:00
|
|
|
if (subsys_glob != NULL &&
|
|
|
|
!strglobmatch(sys_dirent.d_name, subsys_glob))
|
|
|
|
continue;
|
2009-09-24 15:39:09 +02:00
|
|
|
|
|
|
|
snprintf(dir_path, MAXPATHLEN, "%s/%s", debugfs_path,
|
|
|
|
sys_dirent.d_name);
|
|
|
|
evt_dir = opendir(dir_path);
|
|
|
|
if (!evt_dir)
|
2009-09-04 21:39:51 +02:00
|
|
|
continue;
|
2009-09-24 15:39:09 +02:00
|
|
|
|
2009-09-04 21:39:51 +02:00
|
|
|
for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) {
|
2011-02-17 18:38:58 +01:00
|
|
|
if (event_glob != NULL &&
|
|
|
|
!strglobmatch(evt_dirent.d_name, event_glob))
|
|
|
|
continue;
|
|
|
|
|
2009-07-21 18:20:22 +02:00
|
|
|
snprintf(evt_path, MAXPATHLEN, "%s:%s",
|
|
|
|
sys_dirent.d_name, evt_dirent.d_name);
|
2011-04-29 22:52:42 +02:00
|
|
|
printf(" %-50s [%s]\n", evt_path,
|
2009-12-29 09:37:07 +01:00
|
|
|
event_type_descriptors[PERF_TYPE_TRACEPOINT]);
|
2009-07-21 18:20:22 +02:00
|
|
|
}
|
|
|
|
closedir(evt_dir);
|
|
|
|
}
|
|
|
|
closedir(sys_dir);
|
|
|
|
}
|
|
|
|
|
2011-01-03 17:50:45 +01:00
|
|
|
/*
|
|
|
|
* Check whether event is in <debugfs_mount_point>/tracing/events
|
|
|
|
*/
|
|
|
|
|
|
|
|
int is_valid_tracepoint(const char *event_string)
|
|
|
|
{
|
|
|
|
DIR *sys_dir, *evt_dir;
|
|
|
|
struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent;
|
|
|
|
char evt_path[MAXPATHLEN];
|
|
|
|
char dir_path[MAXPATHLEN];
|
|
|
|
|
|
|
|
if (debugfs_valid_mountpoint(debugfs_path))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
sys_dir = opendir(debugfs_path);
|
|
|
|
if (!sys_dir)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
for_each_subsystem(sys_dir, sys_dirent, sys_next) {
|
|
|
|
|
|
|
|
snprintf(dir_path, MAXPATHLEN, "%s/%s", debugfs_path,
|
|
|
|
sys_dirent.d_name);
|
|
|
|
evt_dir = opendir(dir_path);
|
|
|
|
if (!evt_dir)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) {
|
|
|
|
snprintf(evt_path, MAXPATHLEN, "%s:%s",
|
|
|
|
sys_dirent.d_name, evt_dirent.d_name);
|
|
|
|
if (!strcmp(evt_path, event_string)) {
|
|
|
|
closedir(evt_dir);
|
|
|
|
closedir(sys_dir);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
closedir(evt_dir);
|
|
|
|
}
|
|
|
|
closedir(sys_dir);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-02-17 18:38:58 +01:00
|
|
|
void print_events_type(u8 type)
|
|
|
|
{
|
|
|
|
struct event_symbol *syms = event_symbols;
|
|
|
|
unsigned int i;
|
|
|
|
char name[64];
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) {
|
|
|
|
if (type != syms->type)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (strlen(syms->alias))
|
|
|
|
snprintf(name, sizeof(name), "%s OR %s",
|
|
|
|
syms->symbol, syms->alias);
|
|
|
|
else
|
|
|
|
snprintf(name, sizeof(name), "%s", syms->symbol);
|
|
|
|
|
2011-04-29 22:52:42 +02:00
|
|
|
printf(" %-50s [%s]\n", name,
|
2011-02-17 18:38:58 +01:00
|
|
|
event_type_descriptors[type]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int print_hwcache_events(const char *event_glob)
|
|
|
|
{
|
|
|
|
unsigned int type, op, i, printed = 0;
|
|
|
|
|
|
|
|
for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
|
|
|
|
for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
|
|
|
|
/* skip invalid cache type */
|
|
|
|
if (!is_cache_op_valid(type, op))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
|
|
|
|
char *name = event_cache_name(type, op, i);
|
|
|
|
|
2011-04-29 22:52:42 +02:00
|
|
|
if (event_glob != NULL && !strglobmatch(name, event_glob))
|
2011-02-17 18:38:58 +01:00
|
|
|
continue;
|
|
|
|
|
2011-04-29 22:52:42 +02:00
|
|
|
printf(" %-50s [%s]\n", name,
|
2011-02-17 18:38:58 +01:00
|
|
|
event_type_descriptors[PERF_TYPE_HW_CACHE]);
|
|
|
|
++printed;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return printed;
|
|
|
|
}
|
|
|
|
|
2011-04-29 22:52:42 +02:00
|
|
|
#define MAX_NAME_LEN 100
|
|
|
|
|
2009-05-26 11:10:09 +02:00
|
|
|
/*
|
2009-06-06 12:24:17 +02:00
|
|
|
* Print the help text for the event symbols:
|
2009-05-26 11:10:09 +02:00
|
|
|
*/
|
2011-02-17 18:38:58 +01:00
|
|
|
void print_events(const char *event_glob)
|
2009-05-26 11:10:09 +02:00
|
|
|
{
|
2011-02-17 18:38:58 +01:00
|
|
|
unsigned int i, type, prev_type = -1, printed = 0, ntypes_printed = 0;
|
2011-04-29 22:52:42 +02:00
|
|
|
struct event_symbol *syms = event_symbols;
|
|
|
|
char name[MAX_NAME_LEN];
|
2009-05-26 11:10:09 +02:00
|
|
|
|
2009-10-27 01:33:05 +01:00
|
|
|
printf("\n");
|
|
|
|
printf("List of pre-defined events (to be used in -e):\n");
|
2009-05-26 11:10:09 +02:00
|
|
|
|
2009-06-06 12:24:17 +02:00
|
|
|
for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) {
|
2009-12-29 09:37:07 +01:00
|
|
|
type = syms->type;
|
2009-05-26 11:10:09 +02:00
|
|
|
|
2011-02-17 18:38:58 +01:00
|
|
|
if (type != prev_type && printed) {
|
2009-10-27 01:33:05 +01:00
|
|
|
printf("\n");
|
2011-02-17 18:38:58 +01:00
|
|
|
printed = 0;
|
|
|
|
ntypes_printed++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (event_glob != NULL &&
|
|
|
|
!(strglobmatch(syms->symbol, event_glob) ||
|
|
|
|
(syms->alias && strglobmatch(syms->alias, event_glob))))
|
|
|
|
continue;
|
2009-05-26 11:10:09 +02:00
|
|
|
|
2009-06-22 13:14:28 +02:00
|
|
|
if (strlen(syms->alias))
|
2011-04-29 22:52:42 +02:00
|
|
|
snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias);
|
2009-06-22 13:14:28 +02:00
|
|
|
else
|
2011-04-29 22:52:42 +02:00
|
|
|
strncpy(name, syms->symbol, MAX_NAME_LEN);
|
|
|
|
printf(" %-50s [%s]\n", name,
|
2009-06-06 12:24:17 +02:00
|
|
|
event_type_descriptors[type]);
|
2009-05-26 11:10:09 +02:00
|
|
|
|
2009-06-06 12:24:17 +02:00
|
|
|
prev_type = type;
|
2011-02-17 18:38:58 +01:00
|
|
|
++printed;
|
2009-05-26 11:10:09 +02:00
|
|
|
}
|
|
|
|
|
2011-02-17 18:38:58 +01:00
|
|
|
if (ntypes_printed) {
|
|
|
|
printed = 0;
|
|
|
|
printf("\n");
|
2009-07-01 15:06:18 +02:00
|
|
|
}
|
2011-02-17 18:38:58 +01:00
|
|
|
print_hwcache_events(event_glob);
|
|
|
|
|
|
|
|
if (event_glob != NULL)
|
|
|
|
return;
|
2009-07-01 15:06:18 +02:00
|
|
|
|
2009-10-27 01:33:05 +01:00
|
|
|
printf("\n");
|
2011-04-29 22:52:42 +02:00
|
|
|
printf(" %-50s [%s]\n",
|
2010-05-07 19:07:05 +02:00
|
|
|
"rNNN (see 'perf list --help' on how to encode it)",
|
|
|
|
event_type_descriptors[PERF_TYPE_RAW]);
|
2009-10-27 01:33:05 +01:00
|
|
|
printf("\n");
|
2009-06-06 12:24:17 +02:00
|
|
|
|
2011-04-29 22:52:42 +02:00
|
|
|
printf(" %-50s [%s]\n",
|
2009-12-29 09:37:07 +01:00
|
|
|
"mem:<addr>[:access]",
|
|
|
|
event_type_descriptors[PERF_TYPE_BREAKPOINT]);
|
2009-11-23 15:42:35 +01:00
|
|
|
printf("\n");
|
|
|
|
|
2011-02-17 18:38:58 +01:00
|
|
|
print_tracepoint_events(NULL, NULL);
|
2009-05-26 11:10:09 +02:00
|
|
|
}
|