perf/x86/intel: Add Icelake support

Add Icelake core PMU perf code, including constraint tables and the main
enable code.

Icelake expanded the generic counters to always 8 even with HT on, but a
range of events cannot be scheduled on the extra 4 counters.
Add new constraint ranges to describe this to the scheduler.
The number of constraints that need to be checked is larger now than
with earlier CPUs.
At some point we may need a new data structure to look them up more
efficiently than with linear search. So far it still seems to be
acceptable however.

Icelake added a new fixed counter SLOTS. Full support for it is added
later in the patch series.

The cache events table is identical to Skylake.

Compare to PEBS instruction event on generic counter, fixed counter 0
has less skid. Force instruction:ppp always in fixed counter 0.

Originally-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: https://lkml.kernel.org/r/20190402194509.2832-9-kan.liang@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Kan Liang 2019-04-02 12:45:05 -07:00 committed by Ingo Molnar
parent 63b79f6ebc
commit 6017608936
5 changed files with 138 additions and 4 deletions

View File

@ -239,6 +239,35 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
EVENT_EXTRA_END
};
static struct event_constraint intel_icl_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
INTEL_UEVENT_CONSTRAINT(0x1c0, 0), /* INST_RETIRED.PREC_DIST */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf),
INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf),
INTEL_EVENT_CONSTRAINT(0x32, 0xf), /* SW_PREFETCH_ACCESS.* */
INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x54, 0xf),
INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf),
INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff), /* CYCLE_ACTIVITY.STALLS_TOTAL */
INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.STALLS_MEM_ANY */
INTEL_EVENT_CONSTRAINT(0xa3, 0xf), /* CYCLE_ACTIVITY.* */
INTEL_EVENT_CONSTRAINT_RANGE(0xa8, 0xb0, 0xf),
INTEL_EVENT_CONSTRAINT_RANGE(0xb7, 0xbd, 0xf),
INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xe6, 0xf),
INTEL_EVENT_CONSTRAINT_RANGE(0xf0, 0xf4, 0xf),
EVENT_CONSTRAINT_END
};
static struct extra_reg intel_icl_extra_regs[] __read_mostly = {
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff9fffull, RSP_0),
INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff9fffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
EVENT_EXTRA_END
};
EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
@ -3374,6 +3403,9 @@ static struct event_constraint counter0_constraint =
static struct event_constraint counter2_constraint =
EVENT_CONSTRAINT(0, 0x4, 0);
static struct event_constraint fixed0_constraint =
FIXED_EVENT_CONSTRAINT(0x00c0, 0);
static struct event_constraint *
hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
@ -3392,6 +3424,21 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
return c;
}
static struct event_constraint *
icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
/*
* Fixed counter 0 has less skid.
* Force instruction:ppp in Fixed counter 0
*/
if ((event->attr.precise_ip == 3) &&
constraint_match(&fixed0_constraint, event->hw.config))
return &fixed0_constraint;
return hsw_get_event_constraints(cpuc, idx, event);
}
static struct event_constraint *
glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
@ -4124,6 +4171,42 @@ static struct attribute *hsw_tsx_events_attrs[] = {
NULL
};
EVENT_ATTR_STR(tx-capacity-read, tx_capacity_read, "event=0x54,umask=0x80");
EVENT_ATTR_STR(tx-capacity-write, tx_capacity_write, "event=0x54,umask=0x2");
EVENT_ATTR_STR(el-capacity-read, el_capacity_read, "event=0x54,umask=0x80");
EVENT_ATTR_STR(el-capacity-write, el_capacity_write, "event=0x54,umask=0x2");
static struct attribute *icl_events_attrs[] = {
EVENT_PTR(mem_ld_hsw),
EVENT_PTR(mem_st_hsw),
NULL,
};
static struct attribute *icl_tsx_events_attrs[] = {
EVENT_PTR(tx_start),
EVENT_PTR(tx_abort),
EVENT_PTR(tx_commit),
EVENT_PTR(tx_capacity_read),
EVENT_PTR(tx_capacity_write),
EVENT_PTR(tx_conflict),
EVENT_PTR(el_start),
EVENT_PTR(el_abort),
EVENT_PTR(el_commit),
EVENT_PTR(el_capacity_read),
EVENT_PTR(el_capacity_write),
EVENT_PTR(el_conflict),
EVENT_PTR(cycles_t),
EVENT_PTR(cycles_ct),
NULL,
};
static __init struct attribute **get_icl_events_attrs(void)
{
return boot_cpu_has(X86_FEATURE_RTM) ?
merge_attr(icl_events_attrs, icl_tsx_events_attrs) :
icl_events_attrs;
}
static ssize_t freeze_on_smi_show(struct device *cdev,
struct device_attribute *attr,
char *buf)
@ -4757,6 +4840,34 @@ __init int intel_pmu_init(void)
name = "skylake";
break;
case INTEL_FAM6_ICELAKE_MOBILE:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
intel_pmu_lbr_init_skl();
x86_pmu.event_constraints = intel_icl_event_constraints;
x86_pmu.pebs_constraints = intel_icl_pebs_event_constraints;
x86_pmu.extra_regs = intel_icl_extra_regs;
x86_pmu.pebs_aliases = NULL;
x86_pmu.pebs_prec_dist = true;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = icl_get_event_constraints;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
extra_attr = merge_attr(extra_attr, skl_format_attr);
x86_pmu.cpu_events = get_icl_events_attrs();
x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02);
x86_pmu.lbr_pt_coexist = true;
intel_pmu_pebs_data_source_skl(false);
pr_cont("Icelake events, ");
name = "icelake";
break;
default:
switch (x86_pmu.version) {
case 1:

View File

@ -849,6 +849,26 @@ struct event_constraint intel_skl_pebs_event_constraints[] = {
EVENT_CONSTRAINT_END
};
struct event_constraint intel_icl_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x400000000ULL), /* SLOTS */
INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf), /* MEM_INST_RETIRED.LOAD */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf), /* MEM_INST_RETIRED.STORE */
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */
/*
* Everything else is handled by PMU_FL_PEBS_ALL, because we
* need the full constraints from the main table.
*/
EVENT_CONSTRAINT_END
};
struct event_constraint *intel_pebs_constraints(struct perf_event *event)
{
struct event_constraint *c;
@ -1053,7 +1073,7 @@ void intel_pmu_pebs_enable(struct perf_event *event)
cpuc->pebs_enabled |= 1ULL << hwc->idx;
if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
cpuc->pebs_enabled |= 1ULL << 63;
@ -1105,7 +1125,8 @@ void intel_pmu_pebs_disable(struct perf_event *event)
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
(x86_pmu.version < 5))
cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
cpuc->pebs_enabled &= ~(1ULL << 63);

View File

@ -999,6 +999,8 @@ extern struct event_constraint intel_bdw_pebs_event_constraints[];
extern struct event_constraint intel_skl_pebs_event_constraints[];
extern struct event_constraint intel_icl_pebs_event_constraints[];
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
void intel_pmu_pebs_add(struct perf_event *event);

View File

@ -8,7 +8,7 @@
/* The maximal number of PEBS events: */
#define MAX_PEBS_EVENTS 8
#define MAX_FIXED_PEBS_EVENTS 3
#define MAX_FIXED_PEBS_EVENTS 4
/*
* A debug store configuration.

View File

@ -7,7 +7,7 @@
*/
#define INTEL_PMC_MAX_GENERIC 32
#define INTEL_PMC_MAX_FIXED 3
#define INTEL_PMC_MAX_FIXED 4
#define INTEL_PMC_IDX_FIXED 32
#define X86_PMC_IDX_MAX 64