Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf updates from Ingo Molnar:
 "The main changes in this cycle were:

  Kernel side changes:

   - Kprobes and uprobes changes:
      - Make their trampolines read-only while they are used
      - Make UPROBES_EVENTS default-y which is the distro practice
      - Apply misc fixes and robustization to probe point insertion.

   - add support for AMD IOMMU events

   - extend hw events on Intel Goldmont CPUs

   - ... plus misc fixes and updates.

  Tooling side changes:

   - support s390 jump instructions in perf annotate (Christian
     Borntraeger)

   - vendor hardware events updates (Andi Kleen)

   - add argument support for SDT events in powerpc (Ravi Bangoria)

   - beautify the statx syscall arguments in 'perf trace' (Arnaldo
     Carvalho de Melo)

   - handle inline functions in callchains (Jin Yao)

   - enable sorting by srcline as key (Milian Wolff)

   - add 'brstackinsn' field in 'perf script' to reuse the x86
     instruction decoder used in the Intel PT code to study hot paths to
     samples (Andi Kleen)

   - add PERF_RECORD_NAMESPACES so that the kernel can record
     information required to associate samples to namespaces, helping in
     container problem characterization. (Hari Bathini)

   - allow sorting by symbol_size in 'perf report' and 'perf top'
     (Charles Baylis)

   - in perf stat, make system wide (-a) the default option if no target
     was specified and one of following conditions is met:
      - no workload specified (current behaviour)
      - a workload is specified but all requested events are system wide
        ones, like uncore ones. (Jiri Olsa)

   - ... plus lots of other updates, enhancements, cleanups and fixes"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (235 commits)
  perf tools: Fix the code to strip command name
  tools arch x86: Sync cpufeatures.h
  tools arch: Sync arch/x86/lib/memcpy_64.S with the kernel
  tools: Update asm-generic/mman-common.h copy from the kernel
  perf tools: Use just forward declarations for struct thread where possible
  perf tools: Add the right header to obtain PERF_ALIGN()
  perf tools: Remove poll.h and wait.h from util.h
  perf tools: Remove string.h, unistd.h and sys/stat.h from util.h
  perf tools: Remove stale prototypes from builtin.h
  perf tools: Remove string.h from util.h
  perf tools: Remove sys/ioctl.h from util.h
  perf tools: Remove a few more needless includes from util.h
  perf tools: Include sys/param.h where needed
  perf callchain: Move callchain specific routines from util.[ch]
  perf tools: Add compress.h for the *_decompress_to_file() headers
  perf mem: Fix display of data source snoop indication
  perf debug: Move dump_stack() and sighandler_dump_stack() to debug.h
  perf kvm: Make function only used by 'perf kvm' static
  perf tools: Move timestamp routines from util.h to time-utils.h
  perf tools: Move units conversion/formatting routines to separate object
  ...
This commit is contained in:
Linus Torvalds 2017-05-01 20:23:17 -07:00
commit 7c8c03bfc7
360 changed files with 8148 additions and 2229 deletions

View File

@ -8,8 +8,9 @@ Overview
--------
These events are similar to tracepoint based events. Instead of Tracepoint,
this is based on kprobes (kprobe and kretprobe). So it can probe wherever
kprobes can probe (this means, all functions body except for __kprobes
functions). Unlike the Tracepoint based event, this can be added and removed
kprobes can probe (this means, all functions except those with
__kprobes/nokprobe_inline annotation and those marked NOKPROBE_SYMBOL).
Unlike the Tracepoint based event, this can be added and removed
dynamically, on the fly.
To enable this feature, build your kernel with CONFIG_KPROBE_EVENTS=y.

View File

@ -11,6 +11,8 @@
* published by the Free Software Foundation.
*/
#define pr_fmt(fmt) "perf/amd_iommu: " fmt
#include <linux/perf_event.h>
#include <linux/init.h>
#include <linux/cpumask.h>
@ -21,44 +23,42 @@
#define COUNTER_SHIFT 16
#define _GET_BANK(ev) ((u8)(ev->hw.extra_reg.reg >> 8))
#define _GET_CNTR(ev) ((u8)(ev->hw.extra_reg.reg))
/* iommu pmu conf masks */
#define GET_CSOURCE(x) ((x)->conf & 0xFFULL)
#define GET_DEVID(x) (((x)->conf >> 8) & 0xFFFFULL)
#define GET_DOMID(x) (((x)->conf >> 24) & 0xFFFFULL)
#define GET_PASID(x) (((x)->conf >> 40) & 0xFFFFFULL)
/* iommu pmu config masks */
#define _GET_CSOURCE(ev) ((ev->hw.config & 0xFFULL))
#define _GET_DEVID(ev) ((ev->hw.config >> 8) & 0xFFFFULL)
#define _GET_PASID(ev) ((ev->hw.config >> 24) & 0xFFFFULL)
#define _GET_DOMID(ev) ((ev->hw.config >> 40) & 0xFFFFULL)
#define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config) & 0xFFFFULL)
#define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL)
#define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL)
/* iommu pmu conf1 masks */
#define GET_DEVID_MASK(x) ((x)->conf1 & 0xFFFFULL)
#define GET_DOMID_MASK(x) (((x)->conf1 >> 16) & 0xFFFFULL)
#define GET_PASID_MASK(x) (((x)->conf1 >> 32) & 0xFFFFFULL)
static struct perf_amd_iommu __perf_iommu;
#define IOMMU_NAME_SIZE 16
struct perf_amd_iommu {
struct list_head list;
struct pmu pmu;
struct amd_iommu *iommu;
char name[IOMMU_NAME_SIZE];
u8 max_banks;
u8 max_counters;
u64 cntr_assign_mask;
raw_spinlock_t lock;
const struct attribute_group *attr_groups[4];
};
#define format_group attr_groups[0]
#define cpumask_group attr_groups[1]
#define events_group attr_groups[2]
#define null_group attr_groups[3]
static LIST_HEAD(perf_amd_iommu_list);
/*---------------------------------------------
* sysfs format attributes
*---------------------------------------------*/
PMU_FORMAT_ATTR(csource, "config:0-7");
PMU_FORMAT_ATTR(devid, "config:8-23");
PMU_FORMAT_ATTR(pasid, "config:24-39");
PMU_FORMAT_ATTR(domid, "config:40-55");
PMU_FORMAT_ATTR(domid, "config:24-39");
PMU_FORMAT_ATTR(pasid, "config:40-59");
PMU_FORMAT_ATTR(devid_mask, "config1:0-15");
PMU_FORMAT_ATTR(pasid_mask, "config1:16-31");
PMU_FORMAT_ATTR(domid_mask, "config1:32-47");
PMU_FORMAT_ATTR(domid_mask, "config1:16-31");
PMU_FORMAT_ATTR(pasid_mask, "config1:32-51");
static struct attribute *iommu_format_attrs[] = {
&format_attr_csource.attr,
@ -79,6 +79,10 @@ static struct attribute_group amd_iommu_format_group = {
/*---------------------------------------------
* sysfs events attributes
*---------------------------------------------*/
static struct attribute_group amd_iommu_events_group = {
.name = "events",
};
struct amd_iommu_event_desc {
struct kobj_attribute attr;
const char *event;
@ -150,30 +154,34 @@ static struct attribute_group amd_iommu_cpumask_group = {
/*---------------------------------------------*/
static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu)
static int get_next_avail_iommu_bnk_cntr(struct perf_event *event)
{
struct perf_amd_iommu *piommu = container_of(event->pmu, struct perf_amd_iommu, pmu);
int max_cntrs = piommu->max_counters;
int max_banks = piommu->max_banks;
u32 shift, bank, cntr;
unsigned long flags;
int shift, bank, cntr, retval;
int max_banks = perf_iommu->max_banks;
int max_cntrs = perf_iommu->max_counters;
int retval;
raw_spin_lock_irqsave(&perf_iommu->lock, flags);
raw_spin_lock_irqsave(&piommu->lock, flags);
for (bank = 0, shift = 0; bank < max_banks; bank++) {
for (cntr = 0; cntr < max_cntrs; cntr++) {
shift = bank + (bank*3) + cntr;
if (perf_iommu->cntr_assign_mask & (1ULL<<shift)) {
if (piommu->cntr_assign_mask & BIT_ULL(shift)) {
continue;
} else {
perf_iommu->cntr_assign_mask |= (1ULL<<shift);
retval = ((u16)((u16)bank<<8) | (u8)(cntr));
piommu->cntr_assign_mask |= BIT_ULL(shift);
event->hw.iommu_bank = bank;
event->hw.iommu_cntr = cntr;
retval = 0;
goto out;
}
}
}
retval = -ENOSPC;
out:
raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
raw_spin_unlock_irqrestore(&piommu->lock, flags);
return retval;
}
@ -202,8 +210,6 @@ static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu,
static int perf_iommu_event_init(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct perf_amd_iommu *perf_iommu;
u64 config, config1;
/* test the event attr type check for PMU enumeration */
if (event->attr.type != event->pmu->type)
@ -225,80 +231,62 @@ static int perf_iommu_event_init(struct perf_event *event)
if (event->cpu < 0)
return -EINVAL;
perf_iommu = &__perf_iommu;
if (event->pmu != &perf_iommu->pmu)
return -ENOENT;
if (perf_iommu) {
config = event->attr.config;
config1 = event->attr.config1;
} else {
return -EINVAL;
}
/* integrate with iommu base devid (0000), assume one iommu */
perf_iommu->max_banks =
amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID);
perf_iommu->max_counters =
amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID);
if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0))
return -EINVAL;
/* update the hw_perf_event struct with the iommu config data */
hwc->config = config;
hwc->extra_reg.config = config1;
hwc->conf = event->attr.config;
hwc->conf1 = event->attr.config1;
return 0;
}
static inline struct amd_iommu *perf_event_2_iommu(struct perf_event *ev)
{
return (container_of(ev->pmu, struct perf_amd_iommu, pmu))->iommu;
}
static void perf_iommu_enable_event(struct perf_event *ev)
{
u8 csource = _GET_CSOURCE(ev);
u16 devid = _GET_DEVID(ev);
struct amd_iommu *iommu = perf_event_2_iommu(ev);
struct hw_perf_event *hwc = &ev->hw;
u8 bank = hwc->iommu_bank;
u8 cntr = hwc->iommu_cntr;
u64 reg = 0ULL;
reg = csource;
amd_iommu_pc_get_set_reg_val(devid,
_GET_BANK(ev), _GET_CNTR(ev) ,
IOMMU_PC_COUNTER_SRC_REG, &reg, true);
reg = GET_CSOURCE(hwc);
amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_COUNTER_SRC_REG, &reg);
reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32);
reg = GET_DEVID_MASK(hwc);
reg = GET_DEVID(hwc) | (reg << 32);
if (reg)
reg |= (1UL << 31);
amd_iommu_pc_get_set_reg_val(devid,
_GET_BANK(ev), _GET_CNTR(ev) ,
IOMMU_PC_DEVID_MATCH_REG, &reg, true);
reg |= BIT(31);
amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DEVID_MATCH_REG, &reg);
reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
reg = GET_PASID_MASK(hwc);
reg = GET_PASID(hwc) | (reg << 32);
if (reg)
reg |= (1UL << 31);
amd_iommu_pc_get_set_reg_val(devid,
_GET_BANK(ev), _GET_CNTR(ev) ,
IOMMU_PC_PASID_MATCH_REG, &reg, true);
reg |= BIT(31);
amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_PASID_MATCH_REG, &reg);
reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
reg = GET_DOMID_MASK(hwc);
reg = GET_DOMID(hwc) | (reg << 32);
if (reg)
reg |= (1UL << 31);
amd_iommu_pc_get_set_reg_val(devid,
_GET_BANK(ev), _GET_CNTR(ev) ,
IOMMU_PC_DOMID_MATCH_REG, &reg, true);
reg |= BIT(31);
amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DOMID_MATCH_REG, &reg);
}
static void perf_iommu_disable_event(struct perf_event *event)
{
struct amd_iommu *iommu = perf_event_2_iommu(event);
struct hw_perf_event *hwc = &event->hw;
u64 reg = 0ULL;
amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
_GET_BANK(event), _GET_CNTR(event),
IOMMU_PC_COUNTER_SRC_REG, &reg, true);
amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr,
IOMMU_PC_COUNTER_SRC_REG, &reg);
}
static void perf_iommu_start(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
pr_debug("perf: amd_iommu:perf_iommu_start\n");
if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
return;
@ -306,10 +294,11 @@ static void perf_iommu_start(struct perf_event *event, int flags)
hwc->state = 0;
if (flags & PERF_EF_RELOAD) {
u64 prev_raw_count = local64_read(&hwc->prev_count);
amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
_GET_BANK(event), _GET_CNTR(event),
IOMMU_PC_COUNTER_REG, &prev_raw_count, true);
u64 prev_raw_count = local64_read(&hwc->prev_count);
struct amd_iommu *iommu = perf_event_2_iommu(event);
amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr,
IOMMU_PC_COUNTER_REG, &prev_raw_count);
}
perf_iommu_enable_event(event);
@ -319,37 +308,30 @@ static void perf_iommu_start(struct perf_event *event, int flags)
static void perf_iommu_read(struct perf_event *event)
{
u64 count = 0ULL;
u64 prev_raw_count = 0ULL;
u64 delta = 0ULL;
u64 count, prev, delta;
struct hw_perf_event *hwc = &event->hw;
pr_debug("perf: amd_iommu:perf_iommu_read\n");
struct amd_iommu *iommu = perf_event_2_iommu(event);
amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
_GET_BANK(event), _GET_CNTR(event),
IOMMU_PC_COUNTER_REG, &count, false);
/* IOMMU pc counter register is only 48 bits */
count &= 0xFFFFFFFFFFFFULL;
prev_raw_count = local64_read(&hwc->prev_count);
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
count) != prev_raw_count)
if (amd_iommu_pc_get_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr,
IOMMU_PC_COUNTER_REG, &count))
return;
/* Handling 48-bit counter overflowing */
delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT);
/* IOMMU pc counter register is only 48 bits */
count &= GENMASK_ULL(47, 0);
prev = local64_read(&hwc->prev_count);
if (local64_cmpxchg(&hwc->prev_count, prev, count) != prev)
return;
/* Handle 48-bit counter overflow */
delta = (count << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
delta >>= COUNTER_SHIFT;
local64_add(delta, &event->count);
}
static void perf_iommu_stop(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
u64 config;
pr_debug("perf: amd_iommu:perf_iommu_stop\n");
if (hwc->state & PERF_HES_UPTODATE)
return;
@ -361,7 +343,6 @@ static void perf_iommu_stop(struct perf_event *event, int flags)
if (hwc->state & PERF_HES_UPTODATE)
return;
config = hwc->config;
perf_iommu_read(event);
hwc->state |= PERF_HES_UPTODATE;
}
@ -369,17 +350,12 @@ static void perf_iommu_stop(struct perf_event *event, int flags)
static int perf_iommu_add(struct perf_event *event, int flags)
{
int retval;
struct perf_amd_iommu *perf_iommu =
container_of(event->pmu, struct perf_amd_iommu, pmu);
pr_debug("perf: amd_iommu:perf_iommu_add\n");
event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
/* request an iommu bank/counter */
retval = get_next_avail_iommu_bnk_cntr(perf_iommu);
if (retval != -ENOSPC)
event->hw.extra_reg.reg = (u16)retval;
else
retval = get_next_avail_iommu_bnk_cntr(event);
if (retval)
return retval;
if (flags & PERF_EF_START)
@ -390,115 +366,124 @@ static int perf_iommu_add(struct perf_event *event, int flags)
static void perf_iommu_del(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
struct perf_amd_iommu *perf_iommu =
container_of(event->pmu, struct perf_amd_iommu, pmu);
pr_debug("perf: amd_iommu:perf_iommu_del\n");
perf_iommu_stop(event, PERF_EF_UPDATE);
/* clear the assigned iommu bank/counter */
clear_avail_iommu_bnk_cntr(perf_iommu,
_GET_BANK(event),
_GET_CNTR(event));
hwc->iommu_bank, hwc->iommu_cntr);
perf_event_update_userpage(event);
}
static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu)
static __init int _init_events_attrs(void)
{
struct attribute **attrs;
struct attribute_group *attr_group;
int i = 0, j;
struct attribute **attrs;
while (amd_iommu_v2_event_descs[i].attr.attr.name)
i++;
attr_group = kzalloc(sizeof(struct attribute *)
* (i + 1) + sizeof(*attr_group), GFP_KERNEL);
if (!attr_group)
attrs = kzalloc(sizeof(struct attribute **) * (i + 1), GFP_KERNEL);
if (!attrs)
return -ENOMEM;
attrs = (struct attribute **)(attr_group + 1);
for (j = 0; j < i; j++)
attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr;
attr_group->name = "events";
attr_group->attrs = attrs;
perf_iommu->events_group = attr_group;
amd_iommu_events_group.attrs = attrs;
return 0;
}
static __init void amd_iommu_pc_exit(void)
{
if (__perf_iommu.events_group != NULL) {
kfree(__perf_iommu.events_group);
__perf_iommu.events_group = NULL;
}
}
const struct attribute_group *amd_iommu_attr_groups[] = {
&amd_iommu_format_group,
&amd_iommu_cpumask_group,
&amd_iommu_events_group,
NULL,
};
static __init int _init_perf_amd_iommu(
struct perf_amd_iommu *perf_iommu, char *name)
static struct pmu iommu_pmu = {
.event_init = perf_iommu_event_init,
.add = perf_iommu_add,
.del = perf_iommu_del,
.start = perf_iommu_start,
.stop = perf_iommu_stop,
.read = perf_iommu_read,
.task_ctx_nr = perf_invalid_context,
.attr_groups = amd_iommu_attr_groups,
};
static __init int init_one_iommu(unsigned int idx)
{
struct perf_amd_iommu *perf_iommu;
int ret;
perf_iommu = kzalloc(sizeof(struct perf_amd_iommu), GFP_KERNEL);
if (!perf_iommu)
return -ENOMEM;
raw_spin_lock_init(&perf_iommu->lock);
/* Init format attributes */
perf_iommu->format_group = &amd_iommu_format_group;
perf_iommu->pmu = iommu_pmu;
perf_iommu->iommu = get_amd_iommu(idx);
perf_iommu->max_banks = amd_iommu_pc_get_max_banks(idx);
perf_iommu->max_counters = amd_iommu_pc_get_max_counters(idx);
/* Init cpumask attributes to only core 0 */
cpumask_set_cpu(0, &iommu_cpumask);
perf_iommu->cpumask_group = &amd_iommu_cpumask_group;
/* Init events attributes */
if (_init_events_attrs(perf_iommu) != 0)
pr_err("perf: amd_iommu: Only support raw events.\n");
/* Init null attributes */
perf_iommu->null_group = NULL;
perf_iommu->pmu.attr_groups = perf_iommu->attr_groups;
ret = perf_pmu_register(&perf_iommu->pmu, name, -1);
if (ret) {
pr_err("perf: amd_iommu: Failed to initialized.\n");
amd_iommu_pc_exit();
} else {
pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n",
amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID),
amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID));
if (!perf_iommu->iommu ||
!perf_iommu->max_banks ||
!perf_iommu->max_counters) {
kfree(perf_iommu);
return -EINVAL;
}
snprintf(perf_iommu->name, IOMMU_NAME_SIZE, "amd_iommu_%u", idx);
ret = perf_pmu_register(&perf_iommu->pmu, perf_iommu->name, -1);
if (!ret) {
pr_info("Detected AMD IOMMU #%d (%d banks, %d counters/bank).\n",
idx, perf_iommu->max_banks, perf_iommu->max_counters);
list_add_tail(&perf_iommu->list, &perf_amd_iommu_list);
} else {
pr_warn("Error initializing IOMMU %d.\n", idx);
kfree(perf_iommu);
}
return ret;
}
static struct perf_amd_iommu __perf_iommu = {
.pmu = {
.task_ctx_nr = perf_invalid_context,
.event_init = perf_iommu_event_init,
.add = perf_iommu_add,
.del = perf_iommu_del,
.start = perf_iommu_start,
.stop = perf_iommu_stop,
.read = perf_iommu_read,
},
.max_banks = 0x00,
.max_counters = 0x00,
.cntr_assign_mask = 0ULL,
.format_group = NULL,
.cpumask_group = NULL,
.events_group = NULL,
.null_group = NULL,
};
static __init int amd_iommu_pc_init(void)
{
unsigned int i, cnt = 0;
int ret;
/* Make sure the IOMMU PC resource is available */
if (!amd_iommu_pc_supported())
return -ENODEV;
_init_perf_amd_iommu(&__perf_iommu, "amd_iommu");
ret = _init_events_attrs();
if (ret)
return ret;
/*
* An IOMMU PMU is specific to an IOMMU, and can function independently.
* So we go through all IOMMUs and ignore the one that fails init
* unless all IOMMU are failing.
*/
for (i = 0; i < amd_iommu_get_num_iommus(); i++) {
ret = init_one_iommu(i);
if (!ret)
cnt++;
}
if (!cnt) {
kfree(amd_iommu_events_group.attrs);
return -ENODEV;
}
/* Init cpumask attributes to only core 0 */
cpumask_set_cpu(0, &iommu_cpumask);
return 0;
}

View File

@ -24,17 +24,23 @@
#define PC_MAX_SPEC_BNKS 64
#define PC_MAX_SPEC_CNTRS 16
/* iommu pc reg masks*/
#define IOMMU_BASE_DEVID 0x0000
struct amd_iommu;
/* amd_iommu_init.c external support functions */
extern int amd_iommu_get_num_iommus(void);
extern bool amd_iommu_pc_supported(void);
extern u8 amd_iommu_pc_get_max_banks(u16 devid);
extern u8 amd_iommu_pc_get_max_banks(unsigned int idx);
extern u8 amd_iommu_pc_get_max_counters(u16 devid);
extern u8 amd_iommu_pc_get_max_counters(unsigned int idx);
extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr,
u8 fxn, u64 *value, bool is_write);
extern int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
u8 fxn, u64 *value);
extern int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
u8 fxn, u64 *value);
extern struct amd_iommu *get_amd_iommu(int idx);
#endif /*_PERF_EVENT_AMD_IOMMU_H_*/

View File

@ -30,6 +30,9 @@
#define COUNTER_SHIFT 16
#undef pr_fmt
#define pr_fmt(fmt) "amd_uncore: " fmt
static int num_counters_llc;
static int num_counters_nb;
@ -509,51 +512,34 @@ static int __init amd_uncore_init(void)
int ret = -ENODEV;
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
goto fail_nodev;
switch(boot_cpu_data.x86) {
case 23:
/* Family 17h: */
num_counters_nb = NUM_COUNTERS_NB;
num_counters_llc = NUM_COUNTERS_L3;
/*
* For Family17h, the NorthBridge counters are
* re-purposed as Data Fabric counters. Also, support is
* added for L3 counters. The pmus are exported based on
* family as either L2 or L3 and NB or DF.
*/
amd_nb_pmu.name = "amd_df";
amd_llc_pmu.name = "amd_l3";
format_attr_event_df.show = &event_show_df;
format_attr_event_l3.show = &event_show_l3;
break;
case 22:
/* Family 16h - may change: */
num_counters_nb = NUM_COUNTERS_NB;
num_counters_llc = NUM_COUNTERS_L2;
amd_nb_pmu.name = "amd_nb";
amd_llc_pmu.name = "amd_l2";
format_attr_event_df = format_attr_event;
format_attr_event_l3 = format_attr_event;
break;
default:
/*
* All prior families have the same number of
* NorthBridge and Last Level Cache counters
*/
num_counters_nb = NUM_COUNTERS_NB;
num_counters_llc = NUM_COUNTERS_L2;
amd_nb_pmu.name = "amd_nb";
amd_llc_pmu.name = "amd_l2";
format_attr_event_df = format_attr_event;
format_attr_event_l3 = format_attr_event;
break;
}
amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df;
amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3;
return -ENODEV;
if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
goto fail_nodev;
return -ENODEV;
if (boot_cpu_data.x86 == 0x17) {
/*
* For F17h, the Northbridge counters are repurposed as Data
* Fabric counters. Also, L3 counters are supported too. The PMUs
* are exported based on family as either L2 or L3 and NB or DF.
*/
num_counters_nb = NUM_COUNTERS_NB;
num_counters_llc = NUM_COUNTERS_L3;
amd_nb_pmu.name = "amd_df";
amd_llc_pmu.name = "amd_l3";
format_attr_event_df.show = &event_show_df;
format_attr_event_l3.show = &event_show_l3;
} else {
num_counters_nb = NUM_COUNTERS_NB;
num_counters_llc = NUM_COUNTERS_L2;
amd_nb_pmu.name = "amd_nb";
amd_llc_pmu.name = "amd_l2";
format_attr_event_df = format_attr_event;
format_attr_event_l3 = format_attr_event;
}
amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df;
amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3;
if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
amd_uncore_nb = alloc_percpu(struct amd_uncore *);
@ -565,7 +551,7 @@ static int __init amd_uncore_init(void)
if (ret)
goto fail_nb;
pr_info("perf: AMD NB counters detected\n");
pr_info("AMD NB counters detected\n");
ret = 0;
}
@ -579,7 +565,7 @@ static int __init amd_uncore_init(void)
if (ret)
goto fail_llc;
pr_info("perf: AMD LLC counters detected\n");
pr_info("AMD LLC counters detected\n");
ret = 0;
}
@ -615,7 +601,6 @@ fail_nb:
if (amd_uncore_nb)
free_percpu(amd_uncore_nb);
fail_nodev:
return ret;
}
device_initcall(amd_uncore_init);

View File

@ -63,7 +63,6 @@ struct bts_buffer {
unsigned int cur_buf;
bool snapshot;
local_t data_size;
local_t lost;
local_t head;
unsigned long end;
void **data_pages;
@ -199,7 +198,8 @@ static void bts_update(struct bts_ctx *bts)
return;
if (ds->bts_index >= ds->bts_absolute_maximum)
local_inc(&buf->lost);
perf_aux_output_flag(&bts->handle,
PERF_AUX_FLAG_TRUNCATED);
/*
* old and head are always in the same physical buffer, so we
@ -276,7 +276,7 @@ static void bts_event_start(struct perf_event *event, int flags)
return;
fail_end_stop:
perf_aux_output_end(&bts->handle, 0, false);
perf_aux_output_end(&bts->handle, 0);
fail_stop:
event->hw.state = PERF_HES_STOPPED;
@ -319,9 +319,8 @@ static void bts_event_stop(struct perf_event *event, int flags)
bts->handle.head =
local_xchg(&buf->data_size,
buf->nr_pages << PAGE_SHIFT);
perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
!!local_xchg(&buf->lost, 0));
perf_aux_output_end(&bts->handle,
local_xchg(&buf->data_size, 0));
}
cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
@ -484,8 +483,7 @@ int intel_bts_interrupt(void)
if (old_head == local_read(&buf->head))
return handled;
perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
!!local_xchg(&buf->lost, 0));
perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0));
buf = perf_aux_output_begin(&bts->handle, event);
if (buf)
@ -500,7 +498,7 @@ int intel_bts_interrupt(void)
* cleared handle::event
*/
barrier();
perf_aux_output_end(&bts->handle, 0, false);
perf_aux_output_end(&bts->handle, 0);
}
}

View File

@ -1553,6 +1553,27 @@ static __initconst const u64 slm_hw_cache_event_ids
},
};
EVENT_ATTR_STR(topdown-total-slots, td_total_slots_glm, "event=0x3c");
EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_glm, "3");
/* UOPS_NOT_DELIVERED.ANY */
EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_glm, "event=0x9c");
/* ISSUE_SLOTS_NOT_CONSUMED.RECOVERY */
EVENT_ATTR_STR(topdown-recovery-bubbles, td_recovery_bubbles_glm, "event=0xca,umask=0x02");
/* UOPS_RETIRED.ANY */
EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_glm, "event=0xc2");
/* UOPS_ISSUED.ANY */
EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_glm, "event=0x0e");
static struct attribute *glm_events_attrs[] = {
EVENT_PTR(td_total_slots_glm),
EVENT_PTR(td_total_slots_scale_glm),
EVENT_PTR(td_fetch_bubbles_glm),
EVENT_PTR(td_recovery_bubbles_glm),
EVENT_PTR(td_slots_issued_glm),
EVENT_PTR(td_slots_retired_glm),
NULL
};
static struct extra_reg intel_glm_extra_regs[] __read_mostly = {
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0),
@ -2130,7 +2151,7 @@ again:
* counters from the GLOBAL_STATUS mask and we always process PEBS
* events via drain_pebs().
*/
status &= ~cpuc->pebs_enabled;
status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
/*
* PEBS overflow sets bit 62 in the global status register
@ -3750,6 +3771,7 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_prec_dist = true;
x86_pmu.lbr_pt_coexist = true;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.cpu_events = glm_events_attrs;
pr_cont("Goldmont events, ");
break;

View File

@ -1222,7 +1222,7 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit)
/* clear non-PEBS bit and re-check */
pebs_status = p->status & cpuc->pebs_enabled;
pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
pebs_status &= PEBS_COUNTER_MASK;
if (pebs_status == (1 << bit))
return at;
}

View File

@ -28,6 +28,7 @@
#include <asm/insn.h>
#include <asm/io.h>
#include <asm/intel_pt.h>
#include <asm/intel-family.h>
#include "../perf_event.h"
#include "pt.h"
@ -98,6 +99,7 @@ static struct attribute_group pt_cap_group = {
.name = "caps",
};
PMU_FORMAT_ATTR(pt, "config:0" );
PMU_FORMAT_ATTR(cyc, "config:1" );
PMU_FORMAT_ATTR(pwr_evt, "config:4" );
PMU_FORMAT_ATTR(fup_on_ptw, "config:5" );
@ -105,11 +107,13 @@ PMU_FORMAT_ATTR(mtc, "config:9" );
PMU_FORMAT_ATTR(tsc, "config:10" );
PMU_FORMAT_ATTR(noretcomp, "config:11" );
PMU_FORMAT_ATTR(ptw, "config:12" );
PMU_FORMAT_ATTR(branch, "config:13" );
PMU_FORMAT_ATTR(mtc_period, "config:14-17" );
PMU_FORMAT_ATTR(cyc_thresh, "config:19-22" );
PMU_FORMAT_ATTR(psb_period, "config:24-27" );
static struct attribute *pt_formats_attr[] = {
&format_attr_pt.attr,
&format_attr_cyc.attr,
&format_attr_pwr_evt.attr,
&format_attr_fup_on_ptw.attr,
@ -117,6 +121,7 @@ static struct attribute *pt_formats_attr[] = {
&format_attr_tsc.attr,
&format_attr_noretcomp.attr,
&format_attr_ptw.attr,
&format_attr_branch.attr,
&format_attr_mtc_period.attr,
&format_attr_cyc_thresh.attr,
&format_attr_psb_period.attr,
@ -197,6 +202,19 @@ static int __init pt_pmu_hw_init(void)
pt_pmu.tsc_art_den = eax;
}
/* model-specific quirks */
switch (boot_cpu_data.x86_model) {
case INTEL_FAM6_BROADWELL_CORE:
case INTEL_FAM6_BROADWELL_XEON_D:
case INTEL_FAM6_BROADWELL_GT3E:
case INTEL_FAM6_BROADWELL_X:
/* not setting BRANCH_EN will #GP, erratum BDM106 */
pt_pmu.branch_en_always_on = true;
break;
default:
break;
}
if (boot_cpu_has(X86_FEATURE_VMX)) {
/*
* Intel SDM, 36.5 "Tracing post-VMXON" says that
@ -263,8 +281,20 @@ fail:
#define RTIT_CTL_PTW (RTIT_CTL_PTW_EN | \
RTIT_CTL_FUP_ON_PTW)
#define PT_CONFIG_MASK (RTIT_CTL_TSC_EN | \
/*
* Bit 0 (TraceEn) in the attr.config is meaningless as the
* corresponding bit in the RTIT_CTL can only be controlled
* by the driver; therefore, repurpose it to mean: pass
* through the bit that was previously assumed to be always
* on for PT, thereby allowing the user to *not* set it if
* they so wish. See also pt_event_valid() and pt_config().
*/
#define RTIT_CTL_PASSTHROUGH RTIT_CTL_TRACEEN
#define PT_CONFIG_MASK (RTIT_CTL_TRACEEN | \
RTIT_CTL_TSC_EN | \
RTIT_CTL_DISRETC | \
RTIT_CTL_BRANCH_EN | \
RTIT_CTL_CYC_PSB | \
RTIT_CTL_MTC | \
RTIT_CTL_PWR_EVT_EN | \
@ -332,6 +362,33 @@ static bool pt_event_valid(struct perf_event *event)
return false;
}
/*
* Setting bit 0 (TraceEn in RTIT_CTL MSR) in the attr.config
* clears the assomption that BranchEn must always be enabled,
* as was the case with the first implementation of PT.
* If this bit is not set, the legacy behavior is preserved
* for compatibility with the older userspace.
*
* Re-using bit 0 for this purpose is fine because it is never
* directly set by the user; previous attempts at setting it in
* the attr.config resulted in -EINVAL.
*/
if (config & RTIT_CTL_PASSTHROUGH) {
/*
* Disallow not setting BRANCH_EN where BRANCH_EN is
* always required.
*/
if (pt_pmu.branch_en_always_on &&
!(config & RTIT_CTL_BRANCH_EN))
return false;
} else {
/*
* Disallow BRANCH_EN without the PASSTHROUGH.
*/
if (config & RTIT_CTL_BRANCH_EN)
return false;
}
return true;
}
@ -411,6 +468,7 @@ static u64 pt_config_filters(struct perf_event *event)
static void pt_config(struct perf_event *event)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
u64 reg;
if (!event->hw.itrace_started) {
@ -419,7 +477,20 @@ static void pt_config(struct perf_event *event)
}
reg = pt_config_filters(event);
reg |= RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
reg |= RTIT_CTL_TOPA | RTIT_CTL_TRACEEN;
/*
* Previously, we had BRANCH_EN on by default, but now that PT has
* grown features outside of branch tracing, it is useful to allow
* the user to disable it. Setting bit 0 in the event's attr.config
* allows BRANCH_EN to pass through instead of being always on. See
* also the comment in pt_event_valid().
*/
if (event->attr.config & BIT(0)) {
reg |= event->attr.config & RTIT_CTL_BRANCH_EN;
} else {
reg |= RTIT_CTL_BRANCH_EN;
}
if (!event->attr.exclude_kernel)
reg |= RTIT_CTL_OS;
@ -429,11 +500,15 @@ static void pt_config(struct perf_event *event)
reg |= (event->attr.config & PT_CONFIG_MASK);
event->hw.config = reg;
wrmsrl(MSR_IA32_RTIT_CTL, reg);
if (READ_ONCE(pt->vmx_on))
perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
else
wrmsrl(MSR_IA32_RTIT_CTL, reg);
}
static void pt_config_stop(struct perf_event *event)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
u64 ctl = READ_ONCE(event->hw.config);
/* may be already stopped by a PMI */
@ -441,7 +516,8 @@ static void pt_config_stop(struct perf_event *event)
return;
ctl &= ~RTIT_CTL_TRACEEN;
wrmsrl(MSR_IA32_RTIT_CTL, ctl);
if (!READ_ONCE(pt->vmx_on))
wrmsrl(MSR_IA32_RTIT_CTL, ctl);
WRITE_ONCE(event->hw.config, ctl);
@ -753,7 +829,8 @@ static void pt_handle_status(struct pt *pt)
*/
if (!pt_cap_get(PT_CAP_topa_multiple_entries) ||
buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
local_inc(&buf->lost);
perf_aux_output_flag(&pt->handle,
PERF_AUX_FLAG_TRUNCATED);
advance++;
}
}
@ -846,8 +923,10 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
/* can't stop in the middle of an output region */
if (buf->output_off + handle->size + 1 <
sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size))
sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
return -EINVAL;
}
/* single entry ToPA is handled by marking all regions STOP=1 INT=1 */
@ -1171,12 +1250,6 @@ void intel_pt_interrupt(void)
if (!READ_ONCE(pt->handle_nmi))
return;
/*
* If VMX is on and PT does not support it, don't touch anything.
*/
if (READ_ONCE(pt->vmx_on))
return;
if (!event)
return;
@ -1192,8 +1265,7 @@ void intel_pt_interrupt(void)
pt_update_head(pt);
perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
local_xchg(&buf->lost, 0));
perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0));
if (!event->hw.state) {
int ret;
@ -1208,7 +1280,7 @@ void intel_pt_interrupt(void)
/* snapshot counters don't use PMI, so it's safe */
ret = pt_buffer_reset_markers(buf, &pt->handle);
if (ret) {
perf_aux_output_end(&pt->handle, 0, true);
perf_aux_output_end(&pt->handle, 0);
return;
}
@ -1237,12 +1309,19 @@ void intel_pt_handle_vmx(int on)
local_irq_save(flags);
WRITE_ONCE(pt->vmx_on, on);
if (on) {
/* prevent pt_config_stop() from writing RTIT_CTL */
event = pt->handle.event;
if (event)
event->hw.config = 0;
}
/*
* If an AUX transaction is in progress, it will contain
* gap(s), so flag it PARTIAL to inform the user.
*/
event = pt->handle.event;
if (event)
perf_aux_output_flag(&pt->handle,
PERF_AUX_FLAG_PARTIAL);
/* Turn PTs back on */
if (!on && event)
wrmsrl(MSR_IA32_RTIT_CTL, event->hw.config);
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(intel_pt_handle_vmx);
@ -1257,9 +1336,6 @@ static void pt_event_start(struct perf_event *event, int mode)
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct pt_buffer *buf;
if (READ_ONCE(pt->vmx_on))
return;
buf = perf_aux_output_begin(&pt->handle, event);
if (!buf)
goto fail_stop;
@ -1280,7 +1356,7 @@ static void pt_event_start(struct perf_event *event, int mode)
return;
fail_end_stop:
perf_aux_output_end(&pt->handle, 0, true);
perf_aux_output_end(&pt->handle, 0);
fail_stop:
hwc->state = PERF_HES_STOPPED;
}
@ -1321,8 +1397,7 @@ static void pt_event_stop(struct perf_event *event, int mode)
pt->handle.head =
local_xchg(&buf->data_size,
buf->nr_pages << PAGE_SHIFT);
perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
local_xchg(&buf->lost, 0));
perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0));
}
}

View File

@ -110,6 +110,7 @@ struct pt_pmu {
struct pmu pmu;
u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
bool vmx;
bool branch_en_always_on;
unsigned long max_nonturbo_ratio;
unsigned int tsc_art_num;
unsigned int tsc_art_den;
@ -143,7 +144,6 @@ struct pt_buffer {
size_t output_off;
unsigned long nr_pages;
local_t data_size;
local_t lost;
local64_t head;
bool snapshot;
unsigned long stop_pos, intr_pos;

View File

@ -79,6 +79,7 @@ struct amd_nb {
/* The maximal number of PEBS events: */
#define MAX_PEBS_EVENTS 8
#define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1)
/*
* Flags PEBS can handle without an PMI.

View File

@ -72,14 +72,13 @@ struct arch_specific_insn {
/* copy of the original instruction */
kprobe_opcode_t *insn;
/*
* boostable = -1: This instruction type is not boostable.
* boostable = 0: This instruction type is boostable.
* boostable = 1: This instruction has been boosted: we have
* boostable = false: This instruction type is not boostable.
* boostable = true: This instruction has been boosted: we have
* added a relative jump after the instruction copy in insn,
* so no single-step and fixup are needed (unless there's
* a post_handler or break_handler).
*/
int boostable;
bool boostable;
bool if_modifier;
};

View File

@ -67,7 +67,7 @@
#endif
/* Ensure if the instruction can be boostable */
extern int can_boost(kprobe_opcode_t *instruction, void *addr);
extern int can_boost(struct insn *insn, void *orig_addr);
/* Recover instruction if given address is probed */
extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
unsigned long addr);
@ -75,7 +75,7 @@ extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
* Copy an instruction and adjust the displacement if the instruction
* uses the %rip-relative addressing mode.
*/
extern int __copy_instruction(u8 *dest, u8 *src);
extern int __copy_instruction(u8 *dest, u8 *src, struct insn *insn);
/* Generate a relative-jump/call instruction */
extern void synthesize_reljump(void *from, void *to);

View File

@ -164,42 +164,38 @@ static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn)
NOKPROBE_SYMBOL(skip_prefixes);
/*
* Returns non-zero if opcode is boostable.
* Returns non-zero if INSN is boostable.
* RIP relative instructions are adjusted at copying time in 64 bits mode
*/
int can_boost(kprobe_opcode_t *opcodes, void *addr)
int can_boost(struct insn *insn, void *addr)
{
kprobe_opcode_t opcode;
kprobe_opcode_t *orig_opcodes = opcodes;
if (search_exception_tables((unsigned long)addr))
return 0; /* Page fault may occur on this address. */
retry:
if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
return 0;
opcode = *(opcodes++);
/* 2nd-byte opcode */
if (opcode == 0x0f) {
if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
return 0;
return test_bit(*opcodes,
if (insn->opcode.nbytes == 2)
return test_bit(insn->opcode.bytes[1],
(unsigned long *)twobyte_is_boostable);
}
if (insn->opcode.nbytes != 1)
return 0;
/* Can't boost Address-size override prefix */
if (unlikely(inat_is_address_size_prefix(insn->attr)))
return 0;
opcode = insn->opcode.bytes[0];
switch (opcode & 0xf0) {
#ifdef CONFIG_X86_64
case 0x40:
goto retry; /* REX prefix is boostable */
#endif
case 0x60:
if (0x63 < opcode && opcode < 0x67)
goto retry; /* prefixes */
/* can't boost Address-size override and bound */
return (opcode != 0x62 && opcode != 0x67);
/* can't boost "bound" */
return (opcode != 0x62);
case 0x70:
return 0; /* can't boost conditional jump */
case 0x90:
return opcode != 0x9a; /* can't boost call far */
case 0xc0:
/* can't boost software-interruptions */
return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf;
@ -210,14 +206,9 @@ retry:
/* can boost in/out and absolute jmps */
return ((opcode & 0x04) || opcode == 0xea);
case 0xf0:
if ((opcode & 0x0c) == 0 && opcode != 0xf1)
goto retry; /* lock/rep(ne) prefix */
/* clear and set flags are boostable */
return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
default:
/* segment override prefixes are boostable */
if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e)
goto retry; /* prefixes */
/* CS override prefix and call are not boostable */
return (opcode != 0x2e && opcode != 0x9a);
}
@ -264,7 +255,10 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
* Fortunately, we know that the original code is the ideal 5-byte
* long NOP.
*/
memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
if (probe_kernel_read(buf, (void *)addr,
MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
return 0UL;
if (faddr)
memcpy(buf, ideal_nops[NOP_ATOMIC5], 5);
else
@ -276,7 +270,7 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
* Recover the probed instruction at addr for further analysis.
* Caller must lock kprobes by kprobe_mutex, or disable preemption
* for preventing to release referencing kprobes.
* Returns zero if the instruction can not get recovered.
* Returns zero if the instruction can not get recovered (or access failed).
*/
unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
{
@ -348,37 +342,36 @@ static int is_IF_modifier(kprobe_opcode_t *insn)
}
/*
* Copy an instruction and adjust the displacement if the instruction
* uses the %rip-relative addressing mode.
* If it does, Return the address of the 32-bit displacement word.
* If not, return null.
* Only applicable to 64-bit x86.
* Copy an instruction with recovering modified instruction by kprobes
* and adjust the displacement if the instruction uses the %rip-relative
* addressing mode.
* This returns the length of copied instruction, or 0 if it has an error.
*/
int __copy_instruction(u8 *dest, u8 *src)
int __copy_instruction(u8 *dest, u8 *src, struct insn *insn)
{
struct insn insn;
kprobe_opcode_t buf[MAX_INSN_SIZE];
int length;
unsigned long recovered_insn =
recover_probed_instruction(buf, (unsigned long)src);
if (!recovered_insn)
if (!recovered_insn || !insn)
return 0;
kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
insn_get_length(&insn);
length = insn.length;
/* This can access kernel text if given address is not recovered */
if (probe_kernel_read(dest, (void *)recovered_insn, MAX_INSN_SIZE))
return 0;
kernel_insn_init(insn, dest, MAX_INSN_SIZE);
insn_get_length(insn);
/* Another subsystem puts a breakpoint, failed to recover */
if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
return 0;
memcpy(dest, insn.kaddr, length);
#ifdef CONFIG_X86_64
if (insn_rip_relative(&insn)) {
/* Only x86_64 has RIP relative instructions */
if (insn_rip_relative(insn)) {
s64 newdisp;
u8 *disp;
kernel_insn_init(&insn, dest, length);
insn_get_displacement(&insn);
/*
* The copied instruction uses the %rip-relative addressing
* mode. Adjust the displacement for the difference between
@ -391,36 +384,57 @@ int __copy_instruction(u8 *dest, u8 *src)
* extension of the original signed 32-bit displacement would
* have given.
*/
newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest;
newdisp = (u8 *) src + (s64) insn->displacement.value
- (u8 *) dest;
if ((s64) (s32) newdisp != newdisp) {
pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp);
pr_err("\tSrc: %p, Dest: %p, old disp: %x\n", src, dest, insn.displacement.value);
pr_err("\tSrc: %p, Dest: %p, old disp: %x\n",
src, dest, insn->displacement.value);
return 0;
}
disp = (u8 *) dest + insn_offset_displacement(&insn);
disp = (u8 *) dest + insn_offset_displacement(insn);
*(s32 *) disp = (s32) newdisp;
}
#endif
return length;
return insn->length;
}
/* Prepare reljump right after instruction to boost */
static void prepare_boost(struct kprobe *p, struct insn *insn)
{
if (can_boost(insn, p->addr) &&
MAX_INSN_SIZE - insn->length >= RELATIVEJUMP_SIZE) {
/*
* These instructions can be executed directly if it
* jumps back to correct address.
*/
synthesize_reljump(p->ainsn.insn + insn->length,
p->addr + insn->length);
p->ainsn.boostable = true;
} else {
p->ainsn.boostable = false;
}
}
static int arch_copy_kprobe(struct kprobe *p)
{
int ret;
struct insn insn;
int len;
set_memory_rw((unsigned long)p->ainsn.insn & PAGE_MASK, 1);
/* Copy an instruction with recovering if other optprobe modifies it.*/
ret = __copy_instruction(p->ainsn.insn, p->addr);
if (!ret)
len = __copy_instruction(p->ainsn.insn, p->addr, &insn);
if (!len)
return -EINVAL;
/*
* __copy_instruction can modify the displacement of the instruction,
* but it doesn't affect boostable check.
*/
if (can_boost(p->ainsn.insn, p->addr))
p->ainsn.boostable = 0;
else
p->ainsn.boostable = -1;
prepare_boost(p, &insn);
set_memory_ro((unsigned long)p->ainsn.insn & PAGE_MASK, 1);
/* Check whether the instruction modifies Interrupt Flag or not */
p->ainsn.if_modifier = is_IF_modifier(p->ainsn.insn);
@ -459,7 +473,7 @@ void arch_disarm_kprobe(struct kprobe *p)
void arch_remove_kprobe(struct kprobe *p)
{
if (p->ainsn.insn) {
free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
free_insn_slot(p->ainsn.insn, p->ainsn.boostable);
p->ainsn.insn = NULL;
}
}
@ -531,7 +545,7 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
return;
#if !defined(CONFIG_PREEMPT)
if (p->ainsn.boostable == 1 && !p->post_handler) {
if (p->ainsn.boostable && !p->post_handler) {
/* Boost up -- we can execute copied instructions directly */
if (!reenter)
reset_current_kprobe();
@ -851,7 +865,7 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs,
case 0xcf:
case 0xea: /* jmp absolute -- ip is correct */
/* ip is already adjusted, no more changes required */
p->ainsn.boostable = 1;
p->ainsn.boostable = true;
goto no_change;
case 0xe8: /* call relative - Fix return addr */
*tos = orig_ip + (*tos - copy_ip);
@ -876,28 +890,13 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs,
* jmp near and far, absolute indirect
* ip is correct. And this is boostable
*/
p->ainsn.boostable = 1;
p->ainsn.boostable = true;
goto no_change;
}
default:
break;
}
if (p->ainsn.boostable == 0) {
if ((regs->ip > copy_ip) &&
(regs->ip - copy_ip) + 5 < MAX_INSN_SIZE) {
/*
* These instructions can be executed directly if it
* jumps back to correct address.
*/
synthesize_reljump((void *)regs->ip,
(void *)orig_ip + (regs->ip - copy_ip));
p->ainsn.boostable = 1;
} else {
p->ainsn.boostable = -1;
}
}
regs->ip += orig_ip - copy_ip;
no_change:

View File

@ -94,6 +94,6 @@ NOKPROBE_SYMBOL(kprobe_ftrace_handler);
int arch_prepare_kprobe_ftrace(struct kprobe *p)
{
p->ainsn.insn = NULL;
p->ainsn.boostable = -1;
p->ainsn.boostable = false;
return 0;
}

View File

@ -65,7 +65,10 @@ found:
* overwritten by jump destination address. In this case, original
* bytes must be recovered from op->optinsn.copied_insn buffer.
*/
memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
if (probe_kernel_read(buf, (void *)addr,
MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
return 0UL;
if (addr == (unsigned long)kp->addr) {
buf[0] = kp->opcode;
memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
@ -174,11 +177,12 @@ NOKPROBE_SYMBOL(optimized_callback);
static int copy_optimized_instructions(u8 *dest, u8 *src)
{
struct insn insn;
int len = 0, ret;
while (len < RELATIVEJUMP_SIZE) {
ret = __copy_instruction(dest + len, src + len);
if (!ret || !can_boost(dest + len, src + len))
ret = __copy_instruction(dest + len, src + len, &insn);
if (!ret || !can_boost(&insn, src + len))
return -EINVAL;
len += ret;
}
@ -350,6 +354,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
}
buf = (u8 *)op->optinsn.insn;
set_memory_rw((unsigned long)buf & PAGE_MASK, 1);
/* Copy instructions into the out-of-line buffer */
ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
@ -372,6 +377,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
(u8 *)op->kp.addr + op->optinsn.size);
set_memory_ro((unsigned long)buf & PAGE_MASK, 1);
flush_icache_range((unsigned long) buf,
(unsigned long) buf + TMPL_END_IDX +
op->optinsn.size + RELATIVEJUMP_SIZE);

View File

@ -321,7 +321,7 @@ static int etb_set_buffer(struct coresight_device *csdev,
static unsigned long etb_reset_buffer(struct coresight_device *csdev,
struct perf_output_handle *handle,
void *sink_config, bool *lost)
void *sink_config)
{
unsigned long size = 0;
struct cs_buffers *buf = sink_config;
@ -343,7 +343,6 @@ static unsigned long etb_reset_buffer(struct coresight_device *csdev,
* resetting parameters here and squaring off with the ring
* buffer API in the tracer PMU is fine.
*/
*lost = !!local_xchg(&buf->lost, 0);
size = local_xchg(&buf->data_size, 0);
}
@ -385,7 +384,7 @@ static void etb_update_buffer(struct coresight_device *csdev,
(unsigned long)write_ptr);
write_ptr &= ~(ETB_FRAME_SIZE_WORDS - 1);
local_inc(&buf->lost);
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
}
/*
@ -396,7 +395,7 @@ static void etb_update_buffer(struct coresight_device *csdev,
*/
status = readl_relaxed(drvdata->base + ETB_STATUS_REG);
if (status & ETB_STATUS_RAM_FULL) {
local_inc(&buf->lost);
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
to_read = capacity;
read_ptr = write_ptr;
} else {
@ -429,7 +428,7 @@ static void etb_update_buffer(struct coresight_device *csdev,
if (read_ptr > (drvdata->buffer_depth - 1))
read_ptr -= drvdata->buffer_depth;
/* let the decoder know we've skipped ahead */
local_inc(&buf->lost);
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
}
/* finally tell HW where we want to start reading from */

View File

@ -302,7 +302,8 @@ out:
return;
fail_end_stop:
perf_aux_output_end(handle, 0, true);
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
perf_aux_output_end(handle, 0);
fail:
event->hw.state = PERF_HES_STOPPED;
goto out;
@ -310,7 +311,6 @@ fail:
static void etm_event_stop(struct perf_event *event, int mode)
{
bool lost;
int cpu = smp_processor_id();
unsigned long size;
struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
@ -348,10 +348,9 @@ static void etm_event_stop(struct perf_event *event, int mode)
return;
size = sink_ops(sink)->reset_buffer(sink, handle,
event_data->snk_config,
&lost);
event_data->snk_config);
perf_aux_output_end(handle, size, lost);
perf_aux_output_end(handle, size);
}
/* Disabling the path make its elements available to other sessions */

View File

@ -76,7 +76,6 @@ enum cs_mode {
* @nr_pages: max number of pages granted to us
* @offset: offset within the current buffer
* @data_size: how much we collected in this run
* @lost: other than zero if we had a HW buffer wrap around
* @snapshot: is this run in snapshot mode
* @data_pages: a handle the ring buffer
*/
@ -85,7 +84,6 @@ struct cs_buffers {
unsigned int nr_pages;
unsigned long offset;
local_t data_size;
local_t lost;
bool snapshot;
void **data_pages;
};

View File

@ -329,7 +329,7 @@ static int tmc_set_etf_buffer(struct coresight_device *csdev,
static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev,
struct perf_output_handle *handle,
void *sink_config, bool *lost)
void *sink_config)
{
long size = 0;
struct cs_buffers *buf = sink_config;
@ -350,7 +350,6 @@ static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev,
* resetting parameters here and squaring off with the ring
* buffer API in the tracer PMU is fine.
*/
*lost = !!local_xchg(&buf->lost, 0);
size = local_xchg(&buf->data_size, 0);
}
@ -389,7 +388,7 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev,
*/
status = readl_relaxed(drvdata->base + TMC_STS);
if (status & TMC_STS_FULL) {
local_inc(&buf->lost);
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
to_read = drvdata->size;
} else {
to_read = CIRC_CNT(write_ptr, read_ptr, drvdata->size);
@ -434,7 +433,7 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev,
read_ptr -= drvdata->size;
/* Tell the HW */
writel_relaxed(read_ptr, drvdata->base + TMC_RRP);
local_inc(&buf->lost);
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
}
cur = buf->cur;

View File

@ -1234,7 +1234,7 @@ static void __domain_flush_pages(struct protection_domain *domain,
build_inv_iommu_pages(&cmd, address, size, domain->id, pde);
for (i = 0; i < amd_iommus_present; ++i) {
for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
if (!domain->dev_iommu[i])
continue;
@ -1278,7 +1278,7 @@ static void domain_flush_complete(struct protection_domain *domain)
{
int i;
for (i = 0; i < amd_iommus_present; ++i) {
for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
if (domain && !domain->dev_iommu[i])
continue;
@ -3363,7 +3363,7 @@ static int __flush_pasid(struct protection_domain *domain, int pasid,
* IOMMU TLB needs to be flushed before Device TLB to
* prevent device TLB refill from IOMMU TLB
*/
for (i = 0; i < amd_iommus_present; ++i) {
for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
if (domain->dev_iommu[i] == 0)
continue;

View File

@ -167,7 +167,9 @@ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
/* Array to assign indices to IOMMUs*/
struct amd_iommu *amd_iommus[MAX_IOMMUS];
int amd_iommus_present;
/* Number of IOMMUs present in the system */
static int amd_iommus_present;
/* IOMMUs have a non-present cache? */
bool amd_iommu_np_cache __read_mostly;
@ -254,10 +256,6 @@ static int amd_iommu_enable_interrupts(void);
static int __init iommu_go_to_state(enum iommu_init_state state);
static void init_device_table_dma(void);
static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu,
u8 bank, u8 cntr, u8 fxn,
u64 *value, bool is_write);
static inline void update_last_devid(u16 devid)
{
if (devid > amd_iommu_last_bdf)
@ -272,6 +270,11 @@ static inline unsigned long tbl_size(int entry_size)
return 1UL << shift;
}
int amd_iommu_get_num_iommus(void)
{
return amd_iommus_present;
}
/* Access to l1 and l2 indexed register spaces */
static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
@ -1336,7 +1339,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
/* Add IOMMU to internal data structures */
list_add_tail(&iommu->list, &amd_iommu_list);
iommu->index = amd_iommus_present++;
iommu->index = amd_iommus_present++;
if (unlikely(iommu->index >= MAX_IOMMUS)) {
WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n");
@ -1477,6 +1480,8 @@ static int __init init_iommu_all(struct acpi_table_header *table)
return 0;
}
static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
u8 fxn, u64 *value, bool is_write);
static void init_iommu_perf_ctr(struct amd_iommu *iommu)
{
@ -1488,8 +1493,8 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu)
amd_iommu_pc_present = true;
/* Check if the performance counters can be written to */
if ((0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val, true)) ||
(0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val2, false)) ||
if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) ||
(iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) ||
(val != val2)) {
pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n");
amd_iommu_pc_present = false;
@ -2711,6 +2716,18 @@ bool amd_iommu_v2_supported(void)
}
EXPORT_SYMBOL(amd_iommu_v2_supported);
struct amd_iommu *get_amd_iommu(unsigned int idx)
{
unsigned int i = 0;
struct amd_iommu *iommu;
for_each_iommu(iommu)
if (i++ == idx)
return iommu;
return NULL;
}
EXPORT_SYMBOL(get_amd_iommu);
/****************************************************************************
*
* IOMMU EFR Performance Counter support functionality. This code allows
@ -2718,17 +2735,14 @@ EXPORT_SYMBOL(amd_iommu_v2_supported);
*
****************************************************************************/
u8 amd_iommu_pc_get_max_banks(u16 devid)
u8 amd_iommu_pc_get_max_banks(unsigned int idx)
{
struct amd_iommu *iommu;
u8 ret = 0;
struct amd_iommu *iommu = get_amd_iommu(idx);
/* locate the iommu governing the devid */
iommu = amd_iommu_rlookup_table[devid];
if (iommu)
ret = iommu->max_banks;
return iommu->max_banks;
return ret;
return 0;
}
EXPORT_SYMBOL(amd_iommu_pc_get_max_banks);
@ -2738,62 +2752,69 @@ bool amd_iommu_pc_supported(void)
}
EXPORT_SYMBOL(amd_iommu_pc_supported);
u8 amd_iommu_pc_get_max_counters(u16 devid)
u8 amd_iommu_pc_get_max_counters(unsigned int idx)
{
struct amd_iommu *iommu;
u8 ret = 0;
struct amd_iommu *iommu = get_amd_iommu(idx);
/* locate the iommu governing the devid */
iommu = amd_iommu_rlookup_table[devid];
if (iommu)
ret = iommu->max_counters;
return iommu->max_counters;
return ret;
return 0;
}
EXPORT_SYMBOL(amd_iommu_pc_get_max_counters);
static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu,
u8 bank, u8 cntr, u8 fxn,
u64 *value, bool is_write)
static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
u8 fxn, u64 *value, bool is_write)
{
u32 offset;
u32 max_offset_lim;
/* Check for valid iommu and pc register indexing */
if (WARN_ON((fxn > 0x28) || (fxn & 7)))
/* Make sure the IOMMU PC resource is available */
if (!amd_iommu_pc_present)
return -ENODEV;
offset = (u32)(((0x40|bank) << 12) | (cntr << 8) | fxn);
/* Check for valid iommu and pc register indexing */
if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7)))
return -ENODEV;
offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn);
/* Limit the offset to the hw defined mmio region aperture */
max_offset_lim = (u32)(((0x40|iommu->max_banks) << 12) |
max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) |
(iommu->max_counters << 8) | 0x28);
if ((offset < MMIO_CNTR_REG_OFFSET) ||
(offset > max_offset_lim))
return -EINVAL;
if (is_write) {
writel((u32)*value, iommu->mmio_base + offset);
writel((*value >> 32), iommu->mmio_base + offset + 4);
u64 val = *value & GENMASK_ULL(47, 0);
writel((u32)val, iommu->mmio_base + offset);
writel((val >> 32), iommu->mmio_base + offset + 4);
} else {
*value = readl(iommu->mmio_base + offset + 4);
*value <<= 32;
*value = readl(iommu->mmio_base + offset);
*value |= readl(iommu->mmio_base + offset);
*value &= GENMASK_ULL(47, 0);
}
return 0;
}
EXPORT_SYMBOL(amd_iommu_pc_get_set_reg_val);
int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
u64 *value, bool is_write)
int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
{
struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
if (!iommu)
return -EINVAL;
/* Make sure the IOMMU PC resource is available */
if (!amd_iommu_pc_present || iommu == NULL)
return -ENODEV;
return iommu_pc_get_set_reg_val(iommu, bank, cntr, fxn,
value, is_write);
return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false);
}
EXPORT_SYMBOL(amd_iommu_pc_get_reg);
int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
{
if (!iommu)
return -EINVAL;
return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true);
}
EXPORT_SYMBOL(amd_iommu_pc_set_reg);

View File

@ -21,6 +21,7 @@
#include "amd_iommu_types.h"
extern int amd_iommu_get_num_iommus(void);
extern int amd_iommu_init_dma_ops(void);
extern int amd_iommu_init_passthrough(void);
extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
@ -56,13 +57,6 @@ extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid);
extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev);
/* IOMMU Performance Counter functions */
extern bool amd_iommu_pc_supported(void);
extern u8 amd_iommu_pc_get_max_banks(u16 devid);
extern u8 amd_iommu_pc_get_max_counters(u16 devid);
extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
u64 *value, bool is_write);
#ifdef CONFIG_IRQ_REMAP
extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu);
#else

View File

@ -611,9 +611,6 @@ extern struct list_head amd_iommu_list;
*/
extern struct amd_iommu *amd_iommus[MAX_IOMMUS];
/* Number of IOMMUs present in the system */
extern int amd_iommus_present;
/*
* Declarations for the global list of all protection domains
*/

View File

@ -201,7 +201,7 @@ struct coresight_ops_sink {
void *sink_config);
unsigned long (*reset_buffer)(struct coresight_device *csdev,
struct perf_output_handle *handle,
void *sink_config, bool *lost);
void *sink_config);
void (*update_buffer)(struct coresight_device *csdev,
struct perf_output_handle *handle,
void *sink_config);

View File

@ -267,6 +267,8 @@ extern int arch_init_kprobes(void);
extern void show_registers(struct pt_regs *regs);
extern void kprobes_inc_nmissed_count(struct kprobe *p);
extern bool arch_within_kprobe_blacklist(unsigned long addr);
extern bool arch_function_offset_within_entry(unsigned long offset);
extern bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset);
extern bool within_kprobe_blacklist(unsigned long addr);

View File

@ -165,6 +165,13 @@ struct hw_perf_event {
struct list_head bp_list;
};
#endif
struct { /* amd_iommu */
u8 iommu_bank;
u8 iommu_cntr;
u16 padding;
u64 conf;
u64 conf1;
};
};
/*
* If the event is a per task event, this will point to the task in
@ -801,6 +808,7 @@ struct perf_output_handle {
struct ring_buffer *rb;
unsigned long wakeup;
unsigned long size;
u64 aux_flags;
union {
void *addr;
unsigned long head;
@ -849,10 +857,11 @@ perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx)
extern void *perf_aux_output_begin(struct perf_output_handle *handle,
struct perf_event *event);
extern void perf_aux_output_end(struct perf_output_handle *handle,
unsigned long size, bool truncated);
unsigned long size);
extern int perf_aux_output_skip(struct perf_output_handle *handle,
unsigned long size);
extern void *perf_get_aux(struct perf_output_handle *handle);
extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags);
extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
extern void perf_pmu_unregister(struct pmu *pmu);
@ -1112,6 +1121,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks
extern void perf_event_exec(void);
extern void perf_event_comm(struct task_struct *tsk, bool exec);
extern void perf_event_namespaces(struct task_struct *tsk);
extern void perf_event_fork(struct task_struct *tsk);
/* Callchains */
@ -1267,8 +1277,8 @@ static inline void *
perf_aux_output_begin(struct perf_output_handle *handle,
struct perf_event *event) { return NULL; }
static inline void
perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
bool truncated) { }
perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
{ }
static inline int
perf_aux_output_skip(struct perf_output_handle *handle,
unsigned long size) { return -EINVAL; }
@ -1315,6 +1325,7 @@ static inline int perf_unregister_guest_info_callbacks
static inline void perf_event_mmap(struct vm_area_struct *vma) { }
static inline void perf_event_exec(void) { }
static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
static inline void perf_event_namespaces(struct task_struct *tsk) { }
static inline void perf_event_fork(struct task_struct *tsk) { }
static inline void perf_event_init(void) { }
static inline int perf_swevent_get_recursion_context(void) { return -1; }

View File

@ -344,7 +344,8 @@ struct perf_event_attr {
use_clockid : 1, /* use @clockid for time fields */
context_switch : 1, /* context switch data */
write_backward : 1, /* Write ring buffer from end to beginning */
__reserved_1 : 36;
namespaces : 1, /* include namespaces data */
__reserved_1 : 35;
union {
__u32 wakeup_events; /* wakeup every n events */
@ -610,6 +611,23 @@ struct perf_event_header {
__u16 size;
};
struct perf_ns_link_info {
__u64 dev;
__u64 ino;
};
enum {
NET_NS_INDEX = 0,
UTS_NS_INDEX = 1,
IPC_NS_INDEX = 2,
PID_NS_INDEX = 3,
USER_NS_INDEX = 4,
MNT_NS_INDEX = 5,
CGROUP_NS_INDEX = 6,
NR_NAMESPACES, /* number of available namespaces */
};
enum perf_event_type {
/*
@ -862,6 +880,18 @@ enum perf_event_type {
*/
PERF_RECORD_SWITCH_CPU_WIDE = 15,
/*
* struct {
* struct perf_event_header header;
* u32 pid;
* u32 tid;
* u64 nr_namespaces;
* { u64 dev, inode; } [nr_namespaces];
* struct sample_id sample_id;
* };
*/
PERF_RECORD_NAMESPACES = 16,
PERF_RECORD_MAX, /* non-ABI */
};
@ -885,6 +915,7 @@ enum perf_callchain_context {
*/
#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */
#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */
#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */
#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
#define PERF_FLAG_FD_OUTPUT (1UL << 1)

View File

@ -48,6 +48,8 @@
#include <linux/parser.h>
#include <linux/sched/clock.h>
#include <linux/sched/mm.h>
#include <linux/proc_ns.h>
#include <linux/mount.h>
#include "internal.h"
@ -379,6 +381,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
static atomic_t nr_namespaces_events __read_mostly;
static atomic_t nr_task_events __read_mostly;
static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_switch_events __read_mostly;
@ -3991,6 +3994,8 @@ static void unaccount_event(struct perf_event *event)
atomic_dec(&nr_mmap_events);
if (event->attr.comm)
atomic_dec(&nr_comm_events);
if (event->attr.namespaces)
atomic_dec(&nr_namespaces_events);
if (event->attr.task)
atomic_dec(&nr_task_events);
if (event->attr.freq)
@ -6491,6 +6496,7 @@ static void perf_event_task(struct task_struct *task,
void perf_event_fork(struct task_struct *task)
{
perf_event_task(task, NULL, 1);
perf_event_namespaces(task);
}
/*
@ -6592,6 +6598,132 @@ void perf_event_comm(struct task_struct *task, bool exec)
perf_event_comm_event(&comm_event);
}
/*
* namespaces tracking
*/
struct perf_namespaces_event {
struct task_struct *task;
struct {
struct perf_event_header header;
u32 pid;
u32 tid;
u64 nr_namespaces;
struct perf_ns_link_info link_info[NR_NAMESPACES];
} event_id;
};
static int perf_event_namespaces_match(struct perf_event *event)
{
return event->attr.namespaces;
}
static void perf_event_namespaces_output(struct perf_event *event,
void *data)
{
struct perf_namespaces_event *namespaces_event = data;
struct perf_output_handle handle;
struct perf_sample_data sample;
int ret;
if (!perf_event_namespaces_match(event))
return;
perf_event_header__init_id(&namespaces_event->event_id.header,
&sample, event);
ret = perf_output_begin(&handle, event,
namespaces_event->event_id.header.size);
if (ret)
return;
namespaces_event->event_id.pid = perf_event_pid(event,
namespaces_event->task);
namespaces_event->event_id.tid = perf_event_tid(event,
namespaces_event->task);
perf_output_put(&handle, namespaces_event->event_id);
perf_event__output_id_sample(event, &handle, &sample);
perf_output_end(&handle);
}
static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
struct task_struct *task,
const struct proc_ns_operations *ns_ops)
{
struct path ns_path;
struct inode *ns_inode;
void *error;
error = ns_get_path(&ns_path, task, ns_ops);
if (!error) {
ns_inode = ns_path.dentry->d_inode;
ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev);
ns_link_info->ino = ns_inode->i_ino;
}
}
void perf_event_namespaces(struct task_struct *task)
{
struct perf_namespaces_event namespaces_event;
struct perf_ns_link_info *ns_link_info;
if (!atomic_read(&nr_namespaces_events))
return;
namespaces_event = (struct perf_namespaces_event){
.task = task,
.event_id = {
.header = {
.type = PERF_RECORD_NAMESPACES,
.misc = 0,
.size = sizeof(namespaces_event.event_id),
},
/* .pid */
/* .tid */
.nr_namespaces = NR_NAMESPACES,
/* .link_info[NR_NAMESPACES] */
},
};
ns_link_info = namespaces_event.event_id.link_info;
perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX],
task, &mntns_operations);
#ifdef CONFIG_USER_NS
perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX],
task, &userns_operations);
#endif
#ifdef CONFIG_NET_NS
perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX],
task, &netns_operations);
#endif
#ifdef CONFIG_UTS_NS
perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX],
task, &utsns_operations);
#endif
#ifdef CONFIG_IPC_NS
perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX],
task, &ipcns_operations);
#endif
#ifdef CONFIG_PID_NS
perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX],
task, &pidns_operations);
#endif
#ifdef CONFIG_CGROUPS
perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX],
task, &cgroupns_operations);
#endif
perf_iterate_sb(perf_event_namespaces_output,
&namespaces_event,
NULL);
}
/*
* mmap tracking
*/
@ -9146,6 +9278,8 @@ static void account_event(struct perf_event *event)
atomic_inc(&nr_mmap_events);
if (event->attr.comm)
atomic_inc(&nr_comm_events);
if (event->attr.namespaces)
atomic_inc(&nr_namespaces_events);
if (event->attr.task)
atomic_inc(&nr_task_events);
if (event->attr.freq)
@ -9691,6 +9825,11 @@ SYSCALL_DEFINE5(perf_event_open,
return -EACCES;
}
if (attr.namespaces) {
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
}
if (attr.freq) {
if (attr.sample_freq > sysctl_perf_event_sample_rate)
return -EINVAL;

View File

@ -297,6 +297,19 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
rb->paused = 1;
}
void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)
{
/*
* OVERWRITE is determined by perf_aux_output_end() and can't
* be passed in directly.
*/
if (WARN_ON_ONCE(flags & PERF_AUX_FLAG_OVERWRITE))
return;
handle->aux_flags |= flags;
}
EXPORT_SYMBOL_GPL(perf_aux_output_flag);
/*
* This is called before hardware starts writing to the AUX area to
* obtain an output handle and make sure there's room in the buffer.
@ -360,6 +373,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
handle->event = event;
handle->head = aux_head;
handle->size = 0;
handle->aux_flags = 0;
/*
* In overwrite mode, AUX data stores do not depend on aux_tail,
@ -408,34 +422,32 @@ err:
* of the AUX buffer management code is that after pmu::stop(), the AUX
* transaction must be stopped and therefore drop the AUX reference count.
*/
void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
bool truncated)
void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
{
bool wakeup = !!(handle->aux_flags & PERF_AUX_FLAG_TRUNCATED);
struct ring_buffer *rb = handle->rb;
bool wakeup = truncated;
unsigned long aux_head;
u64 flags = 0;
if (truncated)
flags |= PERF_AUX_FLAG_TRUNCATED;
/* in overwrite mode, driver provides aux_head via handle */
if (rb->aux_overwrite) {
flags |= PERF_AUX_FLAG_OVERWRITE;
handle->aux_flags |= PERF_AUX_FLAG_OVERWRITE;
aux_head = handle->head;
local_set(&rb->aux_head, aux_head);
} else {
handle->aux_flags &= ~PERF_AUX_FLAG_OVERWRITE;
aux_head = local_read(&rb->aux_head);
local_add(size, &rb->aux_head);
}
if (size || flags) {
if (size || handle->aux_flags) {
/*
* Only send RECORD_AUX if we have something useful to communicate
*/
perf_event_aux_event(handle->event, aux_head, size, flags);
perf_event_aux_event(handle->event, aux_head, size,
handle->aux_flags);
}
aux_head = rb->user_page->aux_head = local_read(&rb->aux_head);
@ -446,7 +458,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
}
if (wakeup) {
if (truncated)
if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)
handle->event->pending_disable = 1;
perf_output_wakeup(handle);
}

View File

@ -2353,6 +2353,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
}
}
perf_event_namespaces(current);
bad_unshare_cleanup_cred:
if (new_cred)
put_cred(new_cred);

View File

@ -1391,21 +1391,19 @@ bool within_kprobe_blacklist(unsigned long addr)
* This returns encoded errors if it fails to look up symbol or invalid
* combination of parameters.
*/
static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
static kprobe_opcode_t *_kprobe_addr(kprobe_opcode_t *addr,
const char *symbol_name, unsigned int offset)
{
kprobe_opcode_t *addr = p->addr;
if ((p->symbol_name && p->addr) ||
(!p->symbol_name && !p->addr))
if ((symbol_name && addr) || (!symbol_name && !addr))
goto invalid;
if (p->symbol_name) {
kprobe_lookup_name(p->symbol_name, addr);
if (symbol_name) {
kprobe_lookup_name(symbol_name, addr);
if (!addr)
return ERR_PTR(-ENOENT);
}
addr = (kprobe_opcode_t *)(((char *)addr) + p->offset);
addr = (kprobe_opcode_t *)(((char *)addr) + offset);
if (addr)
return addr;
@ -1413,6 +1411,11 @@ invalid:
return ERR_PTR(-EINVAL);
}
static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
{
return _kprobe_addr(p->addr, p->symbol_name, p->offset);
}
/* Check passed kprobe is valid and return kprobe in kprobe_table. */
static struct kprobe *__get_valid_kprobe(struct kprobe *p)
{
@ -1740,11 +1743,12 @@ void unregister_kprobes(struct kprobe **kps, int num)
}
EXPORT_SYMBOL_GPL(unregister_kprobes);
int __weak __kprobes kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data)
int __weak kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data)
{
return NOTIFY_DONE;
}
NOKPROBE_SYMBOL(kprobe_exceptions_notify);
static struct notifier_block kprobe_exceptions_nb = {
.notifier_call = kprobe_exceptions_notify,
@ -1875,6 +1879,25 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
}
NOKPROBE_SYMBOL(pre_handler_kretprobe);
bool __weak arch_function_offset_within_entry(unsigned long offset)
{
return !offset;
}
bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
{
kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
if (IS_ERR(kp_addr))
return false;
if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
!arch_function_offset_within_entry(offset))
return false;
return true;
}
int register_kretprobe(struct kretprobe *rp)
{
int ret = 0;
@ -1882,6 +1905,9 @@ int register_kretprobe(struct kretprobe *rp)
int i;
void *addr;
if (!function_offset_within_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset))
return -EINVAL;
if (kretprobe_blacklist_size) {
addr = kprobe_addr(&rp->kp);
if (IS_ERR(addr))

View File

@ -26,6 +26,7 @@
#include <linux/file.h>
#include <linux/syscalls.h>
#include <linux/cgroup.h>
#include <linux/perf_event.h>
static struct kmem_cache *nsproxy_cachep;
@ -262,6 +263,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
goto out;
}
switch_task_namespaces(tsk, new_nsproxy);
perf_event_namespaces(tsk);
out:
fput(file);
return err;

View File

@ -455,7 +455,7 @@ config UPROBE_EVENTS
select UPROBES
select PROBE_EVENTS
select TRACING
default n
default y
help
This allows the user to add tracing events on top of userspace
dynamic events (similar to tracepoints) on the fly via the trace

View File

@ -4355,6 +4355,7 @@ static const char readme_msg[] =
"\t -:[<group>/]<event>\n"
#ifdef CONFIG_KPROBE_EVENTS
"\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
"place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
#endif
#ifdef CONFIG_UPROBE_EVENTS
"\t place: <path>:<offset>\n"

View File

@ -681,10 +681,6 @@ static int create_trace_kprobe(int argc, char **argv)
return -EINVAL;
}
if (isdigit(argv[1][0])) {
if (is_return) {
pr_info("Return probe point must be a symbol.\n");
return -EINVAL;
}
/* an address specified */
ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr);
if (ret) {
@ -700,8 +696,9 @@ static int create_trace_kprobe(int argc, char **argv)
pr_info("Failed to parse symbol.\n");
return ret;
}
if (offset && is_return) {
pr_info("Return probe must be used without offset.\n");
if (offset && is_return &&
!function_offset_within_entry(NULL, symbol, offset)) {
pr_info("Given offset is not valid for return probe.\n");
return -EINVAL;
}
}

View File

@ -181,10 +181,23 @@ struct kvm_arch_memory_slot {
#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2
#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
(0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
#define VGIC_LEVEL_INFO_LINE_LEVEL 0
#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
/* KVM_IRQ_LINE irq field index values */

View File

@ -201,10 +201,23 @@ struct kvm_arch_memory_slot {
#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2
#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
(0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
#define VGIC_LEVEL_INFO_LINE_LEVEL 0
#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
/* Device Control API on vcpu fd */

View File

@ -413,6 +413,26 @@ struct kvm_get_htab_header {
__u16 n_invalid;
};
/* For KVM_PPC_CONFIGURE_V3_MMU */
struct kvm_ppc_mmuv3_cfg {
__u64 flags;
__u64 process_table; /* second doubleword of partition table entry */
};
/* Flag values for KVM_PPC_CONFIGURE_V3_MMU */
#define KVM_PPC_MMUV3_RADIX 1 /* 1 = radix mode, 0 = HPT */
#define KVM_PPC_MMUV3_GTSE 2 /* global translation shootdown enb. */
/* For KVM_PPC_GET_RMMU_INFO */
struct kvm_ppc_rmmu_info {
struct kvm_ppc_radix_geom {
__u8 page_shift;
__u8 level_bits[4];
__u8 pad[3];
} geometries[8];
__u32 ap_encodings[8];
};
/* Per-vcpu XICS interrupt controller state */
#define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
@ -613,5 +633,7 @@ struct kvm_get_htab_header {
#define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40)
#define KVM_XICS_MASKED (1ULL << 41)
#define KVM_XICS_PENDING (1ULL << 42)
#define KVM_XICS_PRESENTED (1ULL << 43)
#define KVM_XICS_QUEUED (1ULL << 44)
#endif /* __LINUX_KVM_POWERPC_H */

View File

@ -7,6 +7,8 @@
#define LOCK_PREFIX "\n\tlock; "
#include <asm/cmpxchg.h>
/*
* Atomic operations that C can't guarantee us. Useful for
* resource counting etc..
@ -62,4 +64,9 @@ static inline int atomic_dec_and_test(atomic_t *v)
GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
}
static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
{
return cmpxchg(&v->counter, old, new);
}
#endif /* _TOOLS_LINUX_ASM_X86_ATOMIC_H */

View File

@ -0,0 +1,89 @@
#ifndef TOOLS_ASM_X86_CMPXCHG_H
#define TOOLS_ASM_X86_CMPXCHG_H
#include <linux/compiler.h>
/*
* Non-existant functions to indicate usage errors at link time
* (or compile-time if the compiler implements __compiletime_error().
*/
extern void __cmpxchg_wrong_size(void)
__compiletime_error("Bad argument size for cmpxchg");
/*
* Constants for operation sizes. On 32-bit, the 64-bit size it set to
* -1 because sizeof will never return -1, thereby making those switch
* case statements guaranteeed dead code which the compiler will
* eliminate, and allowing the "missing symbol in the default case" to
* indicate a usage error.
*/
#define __X86_CASE_B 1
#define __X86_CASE_W 2
#define __X86_CASE_L 4
#ifdef __x86_64__
#define __X86_CASE_Q 8
#else
#define __X86_CASE_Q -1 /* sizeof will never return -1 */
#endif
/*
* Atomic compare and exchange. Compare OLD with MEM, if identical,
* store NEW in MEM. Return the initial value in MEM. Success is
* indicated by comparing RETURN with OLD.
*/
#define __raw_cmpxchg(ptr, old, new, size, lock) \
({ \
__typeof__(*(ptr)) __ret; \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
switch (size) { \
case __X86_CASE_B: \
{ \
volatile u8 *__ptr = (volatile u8 *)(ptr); \
asm volatile(lock "cmpxchgb %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "q" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case __X86_CASE_W: \
{ \
volatile u16 *__ptr = (volatile u16 *)(ptr); \
asm volatile(lock "cmpxchgw %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case __X86_CASE_L: \
{ \
volatile u32 *__ptr = (volatile u32 *)(ptr); \
asm volatile(lock "cmpxchgl %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case __X86_CASE_Q: \
{ \
volatile u64 *__ptr = (volatile u64 *)(ptr); \
asm volatile(lock "cmpxchgq %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
default: \
__cmpxchg_wrong_size(); \
} \
__ret; \
})
#define __cmpxchg(ptr, old, new, size) \
__raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
#define cmpxchg(ptr, old, new) \
__cmpxchg(ptr, old, new, sizeof(*(ptr)))
#endif /* TOOLS_ASM_X86_CMPXCHG_H */

View File

@ -100,7 +100,7 @@
#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */
#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
@ -186,7 +186,8 @@
*
* Reuse free bits when adding new feature flags!
*/
#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
@ -289,7 +290,8 @@
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
#define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */
#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
@ -321,5 +323,4 @@
#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
#endif /* _ASM_X86_CPUFEATURES_H */

View File

@ -286,7 +286,7 @@ ENDPROC(memcpy_mcsafe_unrolled)
_ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)

View File

@ -63,6 +63,7 @@ FEATURE_TESTS_BASIC := \
lzma \
get_cpuid \
bpf \
sched_getcpu \
sdt
# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list

View File

@ -48,21 +48,22 @@ FILES= \
test-get_cpuid.bin \
test-sdt.bin \
test-cxx.bin \
test-jvmti.bin
test-jvmti.bin \
test-sched_getcpu.bin
FILES := $(addprefix $(OUTPUT),$(FILES))
CC := $(CROSS_COMPILE)gcc -MD
CXX := $(CROSS_COMPILE)g++ -MD
PKG_CONFIG := $(CROSS_COMPILE)pkg-config
CC ?= $(CROSS_COMPILE)gcc
CXX ?= $(CROSS_COMPILE)g++
PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config
LLVM_CONFIG ?= llvm-config
all: $(FILES)
__BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
__BUILD = $(CC) $(CFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1
__BUILDXX = $(CXX) $(CXXFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS)
__BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS)
BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1
###############################
@ -91,6 +92,9 @@ $(OUTPUT)test-libelf.bin:
$(OUTPUT)test-glibc.bin:
$(BUILD)
$(OUTPUT)test-sched_getcpu.bin:
$(BUILD)
DWARFLIBS := -ldw
ifeq ($(findstring -static,${LDFLAGS}),-static)
DWARFLIBS += -lelf -lebl -lz -llzma -lbz2
@ -171,7 +175,7 @@ $(OUTPUT)test-libperl.bin:
$(BUILD) $(FLAGS_PERL_EMBED)
$(OUTPUT)test-libpython.bin:
$(BUILD)
$(BUILD) $(FLAGS_PYTHON_EMBED)
$(OUTPUT)test-libpython-version.bin:
$(BUILD)

View File

@ -117,6 +117,10 @@
# include "test-pthread-attr-setaffinity-np.c"
#undef main
#define main main_test_sched_getcpu
# include "test-sched_getcpu.c"
#undef main
# if 0
/*
* Disable libbabeltrace check for test-all, because the requested
@ -182,6 +186,7 @@ int main(int argc, char *argv[])
main_test_get_cpuid();
main_test_bpf();
main_test_libcrypto();
main_test_sched_getcpu();
main_test_sdt();
return 0;

View File

@ -0,0 +1,7 @@
#define _GNU_SOURCE
#include <sched.h>
int main(void)
{
return sched_getcpu();
}

View File

@ -60,4 +60,12 @@ static inline int atomic_dec_and_test(atomic_t *v)
return __sync_sub_and_fetch(&v->counter, 1) == 0;
}
#define cmpxchg(ptr, oldval, newval) \
__sync_val_compare_and_swap(ptr, oldval, newval)
static inline int atomic_cmpxchg(atomic_t *v, int oldval, int newval)
{
return cmpxchg(&(v)->counter, oldval, newval);
}
#endif /* __TOOLS_ASM_GENERIC_ATOMIC_H */

View File

@ -3,4 +3,10 @@
#include <asm/atomic.h>
/* atomic_cmpxchg_relaxed */
#ifndef atomic_cmpxchg_relaxed
#define atomic_cmpxchg_relaxed atomic_cmpxchg
#define atomic_cmpxchg_release atomic_cmpxchg
#endif /* atomic_cmpxchg_relaxed */
#endif /* __TOOLS_LINUX_ATOMIC_H */

10
tools/include/linux/bug.h Normal file
View File

@ -0,0 +1,10 @@
#ifndef _TOOLS_PERF_LINUX_BUG_H
#define _TOOLS_PERF_LINUX_BUG_H
/* Force a compilation error if condition is true, but also produce a
result (of value 0 and type size_t), so the expression can be used
e.g. in a structure initializer (or where-ever else comma expressions
aren't permitted). */
#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
#endif /* _TOOLS_PERF_LINUX_BUG_H */

View File

@ -12,3 +12,10 @@
#if GCC_VERSION >= 70000 && !defined(__CHECKER__)
# define __fallthrough __attribute__ ((fallthrough))
#endif
#if GCC_VERSION >= 40300
# define __compiletime_error(message) __attribute__((error(message)))
#endif /* GCC_VERSION >= 40300 */
/* &a[0] degrades to a pointer: a different type from an array */
#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))

View File

@ -5,6 +5,10 @@
#include <linux/compiler-gcc.h>
#endif
#ifndef __compiletime_error
# define __compiletime_error(message)
#endif
/* Optimization barrier */
/* The "volatile" is due to gcc bugs */
#define barrier() __asm__ __volatile__("": : :"memory")
@ -13,6 +17,11 @@
# define __always_inline inline __attribute__((always_inline))
#endif
/* Are two types/vars the same type (ignoring qualifiers)? */
#ifndef __same_type
# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
#endif
#ifdef __ANDROID__
/*
* FIXME: Big hammer to get rid of tons of:

View File

@ -13,10 +13,6 @@
#include <linux/hash.h>
#include <linux/log2.h>
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
#define DEFINE_HASHTABLE(name, bits) \
struct hlist_head name[1 << (bits)] = \
{ [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT }

View File

@ -4,6 +4,11 @@
#include <stdarg.h>
#include <stddef.h>
#include <assert.h>
#include <linux/compiler.h>
#ifndef UINT_MAX
#define UINT_MAX (~0U)
#endif
#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
@ -72,6 +77,8 @@
int vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
int scnprintf(char * buf, size_t size, const char * fmt, ...);
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
/*
* This looks more complex than it should be. But we need to
* get the type for the ~ right in round_down (it needs to be

View File

@ -12,6 +12,9 @@
#ifndef _TOOLS_LINUX_LOG2_H
#define _TOOLS_LINUX_LOG2_H
#include <linux/bitops.h>
#include <linux/types.h>
/*
* non-constant log of base 2 calculators
* - the arch may override these in asm/bitops.h if they can be implemented

View File

@ -0,0 +1,151 @@
#ifndef _TOOLS_LINUX_REFCOUNT_H
#define _TOOLS_LINUX_REFCOUNT_H
/*
* Variant of atomic_t specialized for reference counts.
*
* The interface matches the atomic_t interface (to aid in porting) but only
* provides the few functions one should use for reference counting.
*
* It differs in that the counter saturates at UINT_MAX and will not move once
* there. This avoids wrapping the counter and causing 'spurious'
* use-after-free issues.
*
* Memory ordering rules are slightly relaxed wrt regular atomic_t functions
* and provide only what is strictly required for refcounts.
*
* The increments are fully relaxed; these will not provide ordering. The
* rationale is that whatever is used to obtain the object we're increasing the
* reference count on will provide the ordering. For locked data structures,
* its the lock acquire, for RCU/lockless data structures its the dependent
* load.
*
* Do note that inc_not_zero() provides a control dependency which will order
* future stores against the inc, this ensures we'll never modify the object
* if we did not in fact acquire a reference.
*
* The decrements will provide release order, such that all the prior loads and
* stores will be issued before, it also provides a control dependency, which
* will order us against the subsequent free().
*
* The control dependency is against the load of the cmpxchg (ll/sc) that
* succeeded. This means the stores aren't fully ordered, but this is fine
* because the 1->0 transition indicates no concurrency.
*
* Note that the allocator is responsible for ordering things between free()
* and alloc().
*
*/
#include <linux/atomic.h>
#include <linux/kernel.h>
#ifdef NDEBUG
#define REFCOUNT_WARN(cond, str) (void)(cond)
#define __refcount_check
#else
#define REFCOUNT_WARN(cond, str) BUG_ON(cond)
#define __refcount_check __must_check
#endif
typedef struct refcount_struct {
atomic_t refs;
} refcount_t;
#define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), }
static inline void refcount_set(refcount_t *r, unsigned int n)
{
atomic_set(&r->refs, n);
}
static inline unsigned int refcount_read(const refcount_t *r)
{
return atomic_read(&r->refs);
}
/*
* Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN.
*
* Provides no memory ordering, it is assumed the caller has guaranteed the
* object memory to be stable (RCU, etc.). It does provide a control dependency
* and thereby orders future stores. See the comment on top.
*/
static inline __refcount_check
bool refcount_inc_not_zero(refcount_t *r)
{
unsigned int old, new, val = atomic_read(&r->refs);
for (;;) {
new = val + 1;
if (!val)
return false;
if (unlikely(!new))
return true;
old = atomic_cmpxchg_relaxed(&r->refs, val, new);
if (old == val)
break;
val = old;
}
REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
return true;
}
/*
* Similar to atomic_inc(), will saturate at UINT_MAX and WARN.
*
* Provides no memory ordering, it is assumed the caller already has a
* reference on the object, will WARN when this is not so.
*/
static inline void refcount_inc(refcount_t *r)
{
REFCOUNT_WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n");
}
/*
* Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
* decrement when saturated at UINT_MAX.
*
* Provides release memory ordering, such that prior loads and stores are done
* before, and provides a control dependency such that free() must come after.
* See the comment on top.
*/
static inline __refcount_check
bool refcount_sub_and_test(unsigned int i, refcount_t *r)
{
unsigned int old, new, val = atomic_read(&r->refs);
for (;;) {
if (unlikely(val == UINT_MAX))
return false;
new = val - i;
if (new > val) {
REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n");
return false;
}
old = atomic_cmpxchg_release(&r->refs, val, new);
if (old == val)
break;
val = old;
}
return !new;
}
static inline __refcount_check
bool refcount_dec_and_test(refcount_t *r)
{
return refcount_sub_and_test(1, r);
}
#endif /* _ATOMIC_LINUX_REFCOUNT_H */

View File

@ -7,6 +7,7 @@
#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */
#include <asm/types.h>
#include <asm/posix_types.h>
struct page;
struct kmem_cache;

View File

@ -0,0 +1,72 @@
#ifndef _UAPI_LINUX_FCNTL_H
#define _UAPI_LINUX_FCNTL_H
#include <asm/fcntl.h>
#define F_SETLEASE (F_LINUX_SPECIFIC_BASE + 0)
#define F_GETLEASE (F_LINUX_SPECIFIC_BASE + 1)
/*
* Cancel a blocking posix lock; internal use only until we expose an
* asynchronous lock api to userspace:
*/
#define F_CANCELLK (F_LINUX_SPECIFIC_BASE + 5)
/* Create a file descriptor with FD_CLOEXEC set. */
#define F_DUPFD_CLOEXEC (F_LINUX_SPECIFIC_BASE + 6)
/*
* Request nofications on a directory.
* See below for events that may be notified.
*/
#define F_NOTIFY (F_LINUX_SPECIFIC_BASE+2)
/*
* Set and get of pipe page size array
*/
#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7)
#define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8)
/*
* Set/Get seals
*/
#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
/*
* Types of seals
*/
#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
#define F_SEAL_GROW 0x0004 /* prevent file from growing */
#define F_SEAL_WRITE 0x0008 /* prevent writes */
/* (1U << 31) is reserved for signed error codes */
/*
* Types of directory notifications that may be requested.
*/
#define DN_ACCESS 0x00000001 /* File accessed */
#define DN_MODIFY 0x00000002 /* File modified */
#define DN_CREATE 0x00000004 /* File created */
#define DN_DELETE 0x00000008 /* File removed */
#define DN_RENAME 0x00000010 /* File renamed */
#define DN_ATTRIB 0x00000020 /* File changed attibutes */
#define DN_MULTISHOT 0x80000000 /* Don't remove notifier */
#define AT_FDCWD -100 /* Special value used to indicate
openat should use the current
working directory. */
#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */
#define AT_REMOVEDIR 0x200 /* Remove directory instead of
unlinking file. */
#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */
#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */
#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */
#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */
#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */
#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */
#endif /* _UAPI_LINUX_FCNTL_H */

View File

@ -344,7 +344,8 @@ struct perf_event_attr {
use_clockid : 1, /* use @clockid for time fields */
context_switch : 1, /* context switch data */
write_backward : 1, /* Write ring buffer from end to beginning */
__reserved_1 : 36;
namespaces : 1, /* include namespaces data */
__reserved_1 : 35;
union {
__u32 wakeup_events; /* wakeup every n events */
@ -610,6 +611,23 @@ struct perf_event_header {
__u16 size;
};
struct perf_ns_link_info {
__u64 dev;
__u64 ino;
};
enum {
NET_NS_INDEX = 0,
UTS_NS_INDEX = 1,
IPC_NS_INDEX = 2,
PID_NS_INDEX = 3,
USER_NS_INDEX = 4,
MNT_NS_INDEX = 5,
CGROUP_NS_INDEX = 6,
NR_NAMESPACES, /* number of available namespaces */
};
enum perf_event_type {
/*
@ -862,6 +880,18 @@ enum perf_event_type {
*/
PERF_RECORD_SWITCH_CPU_WIDE = 15,
/*
* struct {
* struct perf_event_header header;
* u32 pid;
* u32 tid;
* u64 nr_namespaces;
* { u64 dev, inode; } [nr_namespaces];
* struct sample_id sample_id;
* };
*/
PERF_RECORD_NAMESPACES = 16,
PERF_RECORD_MAX, /* non-ABI */
};
@ -885,6 +915,7 @@ enum perf_callchain_context {
*/
#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */
#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */
#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */
#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
#define PERF_FLAG_FD_OUTPUT (1UL << 1)

View File

@ -0,0 +1,177 @@
#ifndef _UAPI_LINUX_STAT_H
#define _UAPI_LINUX_STAT_H
#include <linux/types.h>
#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
#define S_IFMT 00170000
#define S_IFSOCK 0140000
#define S_IFLNK 0120000
#define S_IFREG 0100000
#define S_IFBLK 0060000
#define S_IFDIR 0040000
#define S_IFCHR 0020000
#define S_IFIFO 0010000
#define S_ISUID 0004000
#define S_ISGID 0002000
#define S_ISVTX 0001000
#define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK)
#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
#define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR)
#define S_ISBLK(m) (((m) & S_IFMT) == S_IFBLK)
#define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO)
#define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK)
#define S_IRWXU 00700
#define S_IRUSR 00400
#define S_IWUSR 00200
#define S_IXUSR 00100
#define S_IRWXG 00070
#define S_IRGRP 00040
#define S_IWGRP 00020
#define S_IXGRP 00010
#define S_IRWXO 00007
#define S_IROTH 00004
#define S_IWOTH 00002
#define S_IXOTH 00001
#endif
/*
* Timestamp structure for the timestamps in struct statx.
*
* tv_sec holds the number of seconds before (negative) or after (positive)
* 00:00:00 1st January 1970 UTC.
*
* tv_nsec holds a number of nanoseconds before (0..-999,999,999 if tv_sec is
* negative) or after (0..999,999,999 if tv_sec is positive) the tv_sec time.
*
* Note that if both tv_sec and tv_nsec are non-zero, then the two values must
* either be both positive or both negative.
*
* __reserved is held in case we need a yet finer resolution.
*/
struct statx_timestamp {
__s64 tv_sec;
__s32 tv_nsec;
__s32 __reserved;
};
/*
* Structures for the extended file attribute retrieval system call
* (statx()).
*
* The caller passes a mask of what they're specifically interested in as a
* parameter to statx(). What statx() actually got will be indicated in
* st_mask upon return.
*
* For each bit in the mask argument:
*
* - if the datum is not supported:
*
* - the bit will be cleared, and
*
* - the datum will be set to an appropriate fabricated value if one is
* available (eg. CIFS can take a default uid and gid), otherwise
*
* - the field will be cleared;
*
* - otherwise, if explicitly requested:
*
* - the datum will be synchronised to the server if AT_STATX_FORCE_SYNC is
* set or if the datum is considered out of date, and
*
* - the field will be filled in and the bit will be set;
*
* - otherwise, if not requested, but available in approximate form without any
* effort, it will be filled in anyway, and the bit will be set upon return
* (it might not be up to date, however, and no attempt will be made to
* synchronise the internal state first);
*
* - otherwise the field and the bit will be cleared before returning.
*
* Items in STATX_BASIC_STATS may be marked unavailable on return, but they
* will have values installed for compatibility purposes so that stat() and
* co. can be emulated in userspace.
*/
struct statx {
/* 0x00 */
__u32 stx_mask; /* What results were written [uncond] */
__u32 stx_blksize; /* Preferred general I/O size [uncond] */
__u64 stx_attributes; /* Flags conveying information about the file [uncond] */
/* 0x10 */
__u32 stx_nlink; /* Number of hard links */
__u32 stx_uid; /* User ID of owner */
__u32 stx_gid; /* Group ID of owner */
__u16 stx_mode; /* File mode */
__u16 __spare0[1];
/* 0x20 */
__u64 stx_ino; /* Inode number */
__u64 stx_size; /* File size */
__u64 stx_blocks; /* Number of 512-byte blocks allocated */
__u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */
/* 0x40 */
struct statx_timestamp stx_atime; /* Last access time */
struct statx_timestamp stx_btime; /* File creation time */
struct statx_timestamp stx_ctime; /* Last attribute change time */
struct statx_timestamp stx_mtime; /* Last data modification time */
/* 0x80 */
__u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */
__u32 stx_rdev_minor;
__u32 stx_dev_major; /* ID of device containing file [uncond] */
__u32 stx_dev_minor;
/* 0x90 */
__u64 __spare2[14]; /* Spare space for future expansion */
/* 0x100 */
};
/*
* Flags to be stx_mask
*
* Query request/result mask for statx() and struct statx::stx_mask.
*
* These bits should be set in the mask argument of statx() to request
* particular items when calling statx().
*/
#define STATX_TYPE 0x00000001U /* Want/got stx_mode & S_IFMT */
#define STATX_MODE 0x00000002U /* Want/got stx_mode & ~S_IFMT */
#define STATX_NLINK 0x00000004U /* Want/got stx_nlink */
#define STATX_UID 0x00000008U /* Want/got stx_uid */
#define STATX_GID 0x00000010U /* Want/got stx_gid */
#define STATX_ATIME 0x00000020U /* Want/got stx_atime */
#define STATX_MTIME 0x00000040U /* Want/got stx_mtime */
#define STATX_CTIME 0x00000080U /* Want/got stx_ctime */
#define STATX_INO 0x00000100U /* Want/got stx_ino */
#define STATX_SIZE 0x00000200U /* Want/got stx_size */
#define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */
#define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */
#define STATX_BTIME 0x00000800U /* Want/got stx_btime */
#define STATX_ALL 0x00000fffU /* All currently supported flags */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
/*
* Attributes to be found in stx_attributes and masked in stx_attributes_mask.
*
* These give information about the features or the state of a file that might
* be of use to ordinary userspace programs such as GUIs or ls rather than
* specialised tools.
*
* Note that the flags marked [I] correspond to generic FS_IOC_FLAGS
* semantically. Where possible, the numerical value is picked to correspond
* also.
*/
#define STATX_ATTR_COMPRESSED 0x00000004 /* [I] File is compressed by the fs */
#define STATX_ATTR_IMMUTABLE 0x00000010 /* [I] File is marked immutable */
#define STATX_ATTR_APPEND 0x00000020 /* [I] File is append-only */
#define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */
#define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */
#define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */
#endif /* _UAPI_LINUX_STAT_H */

View File

@ -439,6 +439,35 @@ int sysfs__read_str(const char *entry, char **buf, size_t *sizep)
return filename__read_str(path, buf, sizep);
}
int sysfs__read_bool(const char *entry, bool *value)
{
char *buf;
size_t size;
int ret;
ret = sysfs__read_str(entry, &buf, &size);
if (ret < 0)
return ret;
switch (buf[0]) {
case '1':
case 'y':
case 'Y':
*value = true;
break;
case '0':
case 'n':
case 'N':
*value = false;
break;
default:
ret = -1;
}
free(buf);
return ret;
}
int sysctl__read_int(const char *sysctl, int *value)
{
char path[PATH_MAX];

View File

@ -37,4 +37,5 @@ int sysctl__read_int(const char *sysctl, int *value);
int sysfs__read_int(const char *entry, int *value);
int sysfs__read_ull(const char *entry, unsigned long long *value);
int sysfs__read_str(const char *entry, char **buf, size_t *sizep);
int sysfs__read_bool(const char *entry, bool *value);
#endif /* __API_FS__ */

View File

@ -2,6 +2,7 @@
#define __SUBCMD_HELP_H
#include <sys/types.h>
#include <stdio.h>
struct cmdnames {
size_t alloc;

View File

@ -1,3 +1,4 @@
#include <ctype.h>
#include "symbol/kallsyms.h"
#include <stdio.h>
#include <stdlib.h>

View File

@ -36,8 +36,7 @@
#include "warn.h"
#include <linux/hashtable.h>
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#include <linux/kernel.h>
#define STATE_FP_SAVED 0x1
#define STATE_FP_SETUP 0x2

View File

@ -31,11 +31,10 @@
#include <stdlib.h>
#include <subcmd/exec-cmd.h>
#include <subcmd/pager.h>
#include <linux/kernel.h>
#include "builtin.h"
#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
struct cmd_struct {
const char *name;
int (*fn)(int, const char **);

View File

@ -31,3 +31,5 @@ config.mak.autogen
.config-detected
util/intel-pt-decoder/inat-tables.c
arch/*/include/generated/
pmu-events/pmu-events.c
pmu-events/jevents

View File

@ -50,5 +50,6 @@ libperf-y += util/
libperf-y += arch/
libperf-y += ui/
libperf-y += scripts/
libperf-y += trace/beauty/
gtk-y += ui/gtk/

View File

@ -30,6 +30,24 @@ OPTIONS
--verbose=::
Verbosity level.
-p::
--pid=::
Trace on existing process id (comma separated list).
-a::
--all-cpus::
Force system-wide collection. Scripts run without a <command>
normally use -a by default, while scripts run with a <command>
normally don't - this option allows the latter to be run in
system-wide mode.
-C::
--cpu=::
Only trace for the list of CPUs provided. Multiple CPUs can
be provided as a comma separated list with no space like: 0,1.
Ranges of CPUs are specified with -: 0-2.
Default is to trace on all online CPUs.
SEE ALSO
--------

View File

@ -8,7 +8,7 @@ perf-list - List all symbolic event types
SYNOPSIS
--------
[verse]
'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|event_glob]
'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|sdt|event_glob]
DESCRIPTION
-----------
@ -24,6 +24,10 @@ Don't print descriptions.
--long-desc::
Print longer event descriptions.
--details::
Print how named events are resolved internally into perf events, and also
any extra expressions computed by perf stat.
[[EVENT_MODIFIERS]]
EVENT MODIFIERS
@ -240,6 +244,8 @@ To limit the list use:
. 'pmu' to print the kernel supplied PMU events.
. 'sdt' to list all Statically Defined Tracepoint events.
. If none of the above is matched, it will apply the supplied glob to all
events, printing the ones that match.

View File

@ -347,6 +347,9 @@ Enable weightened sampling. An additional weight is recorded per sample and can
displayed with the weight and local_weight sort keys. This currently works for TSX
abort events and some memory events in precise mode on modern Intel CPUs.
--namespaces::
Record events of type PERF_RECORD_NAMESPACES.
--transaction::
Record transaction flags for transaction related events.

View File

@ -72,7 +72,8 @@ OPTIONS
--sort=::
Sort histogram entries by given key(s) - multiple keys can be specified
in CSV format. Following sort keys are available:
pid, comm, dso, symbol, parent, cpu, socket, srcline, weight, local_weight.
pid, comm, dso, symbol, parent, cpu, socket, srcline, weight,
local_weight, cgroup_id.
Each key has following meaning:
@ -80,6 +81,7 @@ OPTIONS
- pid: command and tid of the task
- dso: name of library or module executed at the time of sample
- symbol: name of function executed at the time of sample
- symbol_size: size of function executed at the time of sample
- parent: name of function matched to the parent regex filter. Unmatched
entries are displayed as "[other]".
- cpu: cpu number the task ran at the time of sample
@ -91,6 +93,7 @@ OPTIONS
- weight: Event specific weight, e.g. memory latency or transaction
abort cost. This is the global weight.
- local_weight: Local weight version of the weight above.
- cgroup_id: ID derived from cgroup namespace device and inode numbers.
- transaction: Transaction abort flags.
- overhead: Overhead percentage of sample
- overhead_sys: Overhead percentage of sample running in system mode
@ -172,6 +175,9 @@ OPTIONS
By default, every sort keys not specified in -F will be appended
automatically.
If the keys starts with a prefix '+', then it will append the specified
field(s) to the default field order. For example: perf report -F +period,sample.
-p::
--parent=<regex>::
A regex filter to identify parent. The parent is a caller of this
@ -229,6 +235,7 @@ OPTIONS
sort_key can be:
- function: compare on functions (default)
- address: compare on individual code addresses
- srcline: compare on source filename and line number
branch can be:
- branch: include last branch information in callgraph when available.
@ -424,6 +431,10 @@ include::itrace.txt[]
--hierarchy::
Enable hierarchical output.
--inline::
If a callgraph address belongs to an inlined function, the inline stack
will be printed. Each entry is function name or file/line.
include::callchain-overhead-calculation.txt[]
SEE ALSO

View File

@ -132,6 +132,10 @@ OPTIONS for 'perf sched timehist'
--migrations::
Show migration events.
-n::
--next::
Show next task.
-I::
--idle-hist::
Show idle-related events only.

View File

@ -116,7 +116,7 @@ OPTIONS
--fields::
Comma separated list of fields to print. Options are:
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
srcline, period, iregs, brstack, brstacksym, flags, bpf-output,
srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
callindent, insn, insnlen. Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
@ -189,15 +189,20 @@ OPTIONS
i.e., -F "" is not allowed.
The brstack output includes branch related information with raw addresses using the
/v/v/v/v/ syntax in the following order:
/v/v/v/v/cycles syntax in the following order:
FROM: branch source instruction
TO : branch target instruction
M/P/-: M=branch target mispredicted or branch direction was mispredicted, P=target predicted or direction predicted, -=not supported
X/- : X=branch inside a transactional region, -=not in transaction region or not supported
A/- : A=TSX abort entry, -=not aborted region or not supported
cycles
The brstacksym is identical to brstack, except that the FROM and TO addresses are printed in a symbolic form if possible.
When brstackinsn is specified the full assembler sequences of branch sequences for each sample
is printed. This is the full execution path leading to the sample. This is only supported when the
sample was recorded with perf record -b or -j any.
-k::
--vmlinux=<file>::
vmlinux pathname
@ -248,6 +253,9 @@ OPTIONS
--show-mmap-events
Display mmap related events (e.g. MMAP, MMAP2).
--show-namespace-events
Display namespace events i.e. events of type PERF_RECORD_NAMESPACES.
--show-switch-events
Display context switch events i.e. events of type PERF_RECORD_SWITCH or
PERF_RECORD_SWITCH_CPU_WIDE.
@ -299,6 +307,10 @@ include::itrace.txt[]
stop time is not given (i.e, time string is 'x.y,') then analysis goes
to end of file.
--max-blocks::
Set the maximum number of program blocks to print with brstackasm for
each sample.
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script-perl[1],

View File

@ -94,8 +94,7 @@ to activate system-wide monitoring. Default is to count on all CPUs.
-A::
--no-aggr::
Do not aggregate counts across all monitored CPUs in system-wide mode (-a).
This option is only valid in system-wide mode.
Do not aggregate counts across all monitored CPUs.
-n::
--null::
@ -237,6 +236,9 @@ To interpret the results it is usually needed to know on which
CPUs the workload runs on. If needed the CPUs can be forced using
taskset.
--no-merge::
Do not merge results from same PMUs.
EXAMPLES
--------

View File

@ -123,7 +123,8 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
major or all pagefaults. Default value is maj.
--syscalls::
Trace system calls. This options is enabled by default.
Trace system calls. This options is enabled by default, disable with
--no-syscalls.
--call-graph [mode,type,min[,limit],order[,key][,branch]]::
Setup and enable call-graph (stack chain/backtrace) recording.

View File

@ -11,8 +11,8 @@ All fields are in native-endian of the machine that generated the perf.data.
When perf is writing to a pipe it uses a special version of the file
format that does not rely on seeking to adjust data offsets. This
format is not described here. The pipe version can be converted to
normal perf.data with perf inject.
format is described in "Pipe-mode data" section. The pipe data version can be
augmented with additional events using perf inject.
The file starts with a perf_header:
@ -411,6 +411,21 @@ An array bound by the perf_file_section size.
ids points to a array of uint64_t defining the ids for event attr attr.
Pipe-mode data
Pipe-mode avoid seeks in the file by removing the perf_file_section and flags
from the struct perf_header. The trimmed header is:
struct perf_pipe_file_header {
u64 magic;
u64 size;
};
The information about attrs, data, and event_types is instead in the
synthesized events PERF_RECORD_ATTR, PERF_RECORD_HEADER_TRACING_DATA and
PERF_RECORD_HEADER_EVENT_TYPE that are generated by perf record in pipe-mode.
References:
include/uapi/linux/perf_event.h

View File

@ -12,6 +12,7 @@ tools/arch/sparc/include/asm/barrier_32.h
tools/arch/sparc/include/asm/barrier_64.h
tools/arch/tile/include/asm/barrier.h
tools/arch/x86/include/asm/barrier.h
tools/arch/x86/include/asm/cmpxchg.h
tools/arch/x86/include/asm/cpufeatures.h
tools/arch/x86/include/asm/disabled-features.h
tools/arch/x86/include/asm/required-features.h
@ -63,6 +64,7 @@ tools/include/linux/bitops.h
tools/include/linux/compiler.h
tools/include/linux/compiler-gcc.h
tools/include/linux/coresight-pmu.h
tools/include/linux/bug.h
tools/include/linux/filter.h
tools/include/linux/hash.h
tools/include/linux/kernel.h
@ -72,12 +74,15 @@ tools/include/uapi/asm-generic/mman-common.h
tools/include/uapi/asm-generic/mman.h
tools/include/uapi/linux/bpf.h
tools/include/uapi/linux/bpf_common.h
tools/include/uapi/linux/fcntl.h
tools/include/uapi/linux/hw_breakpoint.h
tools/include/uapi/linux/mman.h
tools/include/uapi/linux/perf_event.h
tools/include/uapi/linux/stat.h
tools/include/linux/poison.h
tools/include/linux/rbtree.h
tools/include/linux/rbtree_augmented.h
tools/include/linux/refcount.h
tools/include/linux/string.h
tools/include/linux/stringify.h
tools/include/linux/types.h

View File

@ -170,13 +170,20 @@ PYTHON2_CONFIG := \
override PYTHON_CONFIG := \
$(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG))
grep-libs = $(filter -l%,$(1))
strip-libs = $(filter-out -l%,$(1))
PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
ifeq ($(CC), clang)
PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
ifdef PYTHON_CONFIG
PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
ifeq ($(CC), clang)
PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
endif
FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
endif
FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS)
@ -267,6 +274,7 @@ ifdef NO_LIBELF
NO_LIBUNWIND := 1
NO_LIBDW_DWARF_UNWIND := 1
NO_LIBBPF := 1
NO_JVMTI := 1
else
ifeq ($(feature-libelf), 0)
ifeq ($(feature-glibc), 1)
@ -276,7 +284,7 @@ else
LIBC_SUPPORT := 1
endif
ifeq ($(LIBC_SUPPORT),1)
msg := $(warning No libelf found, disables 'probe' tool and BPF support in 'perf record', please install libelf-dev, libelf-devel or elfutils-libelf-devel);
msg := $(warning No libelf found. Disables 'probe' tool, jvmti and BPF support in 'perf record'. Please install libelf-dev, libelf-devel or elfutils-libelf-devel);
NO_LIBELF := 1
NO_DWARF := 1
@ -284,6 +292,7 @@ else
NO_LIBUNWIND := 1
NO_LIBDW_DWARF_UNWIND := 1
NO_LIBBPF := 1
NO_JVMTI := 1
else
ifneq ($(filter s% -static%,$(LDFLAGS),),)
msg := $(error No static glibc found, please install glibc-static);
@ -317,6 +326,10 @@ ifdef NO_DWARF
NO_LIBDW_DWARF_UNWIND := 1
endif
ifeq ($(feature-sched_getcpu), 1)
CFLAGS += -DHAVE_SCHED_GETCPU_SUPPORT
endif
ifndef NO_LIBELF
CFLAGS += -DHAVE_LIBELF_SUPPORT
EXTLIBS += -lelf
@ -550,8 +563,6 @@ ifndef NO_GTK2
endif
endif
grep-libs = $(filter -l%,$(1))
strip-libs = $(filter-out -l%,$(1))
ifdef NO_LIBPERL
CFLAGS += -DNO_LIBPERL
@ -599,21 +610,9 @@ else
$(call disable-python,No 'python-config' tool was found: disables Python support - please install python-devel/python-dev)
else
PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
ifeq ($(CC), clang)
PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
endif
FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
ifneq ($(feature-libpython), 1)
$(call disable-python,No 'Python.h' (for Python 2.x support) was found: disables Python support - please install python-devel/python-dev)
else
ifneq ($(feature-libpython-version), 1)
$(warning Python 3 is not yet supported; please set)
$(warning PYTHON and/or PYTHON_CONFIG appropriately.)

View File

@ -33,6 +33,7 @@
#include "../../util/cs-etm.h"
#include <stdlib.h>
#include <sys/stat.h>
#define ENABLE_SINK_MAX 128
#define CS_BUS_DEVICE_PATH "/bus/coresight/devices/"

View File

@ -9,6 +9,7 @@
*/
#include <stddef.h>
#include <linux/stringify.h>
#include <dwarf-regs.h>
struct pt_regs_dwarfnum {
@ -16,10 +17,9 @@ struct pt_regs_dwarfnum {
unsigned int dwarfnum;
};
#define STR(s) #s
#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
#define GPR_DWARFNUM_NAME(num) \
{.name = STR(%r##num), .dwarfnum = num}
{.name = __stringify(%r##num), .dwarfnum = num}
#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
/*

View File

@ -1,6 +1,7 @@
#include <elfutils/libdwfl.h>
#include "../../util/unwind-libdw.h"
#include "../../util/perf_regs.h"
#include "../../util/event.h"
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
{

View File

@ -8,9 +8,12 @@
* published by the Free Software Foundation.
*/
#include <errno.h>
#include <stddef.h>
#include <string.h>
#include <dwarf-regs.h>
#include <linux/ptrace.h> /* for struct user_pt_regs */
#include <linux/stringify.h>
#include "util.h"
struct pt_regs_dwarfnum {
@ -20,7 +23,7 @@ struct pt_regs_dwarfnum {
#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
#define GPR_DWARFNUM_NAME(num) \
{.name = STR(%x##num), .dwarfnum = num}
{.name = __stringify(%x##num), .dwarfnum = num}
#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
#define DWARFNUM2OFFSET(index) \
(index * sizeof((struct user_pt_regs *)0)->regs[0])

View File

@ -1,6 +1,6 @@
#include <errno.h>
#ifndef REMOTE_UNWIND_LIBUNWIND
#include <errno.h>
#include <libunwind.h>
#include "perf_regs.h"
#include "../../util/unwind.h"

View File

@ -4,6 +4,8 @@
#include "../util/util.h"
#include "../util/debug.h"
#include "sane_ctype.h"
const char *const arm_triplets[] = {
"arm-eabi-",
"arm-linux-androideabi-",

View File

@ -15,6 +15,7 @@
#include <dwarf-regs.h>
#include <linux/ptrace.h>
#include <linux/kernel.h>
#include <linux/stringify.h>
#include "util.h"
struct pt_regs_dwarfnum {
@ -24,10 +25,10 @@ struct pt_regs_dwarfnum {
};
#define REG_DWARFNUM_NAME(r, num) \
{.name = STR(%)STR(r), .dwarfnum = num, \
{.name = __stringify(%)__stringify(r), .dwarfnum = num, \
.ptregs_offset = offsetof(struct pt_regs, r)}
#define GPR_DWARFNUM_NAME(num) \
{.name = STR(%gpr##num), .dwarfnum = num, \
{.name = __stringify(%gpr##num), .dwarfnum = num, \
.ptregs_offset = offsetof(struct pt_regs, gpr[num])}
#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0, .ptregs_offset = 0}

View File

@ -1,3 +1,4 @@
#include <errno.h>
#include "util/kvm-stat.h"
#include "util/parse-events.h"
#include "util/debug.h"

View File

@ -1,5 +1,11 @@
#include <errno.h>
#include <string.h>
#include <regex.h>
#include "../../perf.h"
#include "../../util/util.h"
#include "../../util/perf_regs.h"
#include "../../util/debug.h"
const struct sample_reg sample_reg_masks[] = {
SMPL_REG(r0, PERF_REG_POWERPC_R0),
@ -47,3 +53,109 @@ const struct sample_reg sample_reg_masks[] = {
SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR),
SMPL_REG_END
};
/* REG or %rREG */
#define SDT_OP_REGEX1 "^(%r)?([1-2]?[0-9]|3[0-1])$"
/* -NUM(REG) or NUM(REG) or -NUM(%rREG) or NUM(%rREG) */
#define SDT_OP_REGEX2 "^(\\-)?([0-9]+)\\((%r)?([1-2]?[0-9]|3[0-1])\\)$"
static regex_t sdt_op_regex1, sdt_op_regex2;
static int sdt_init_op_regex(void)
{
static int initialized;
int ret = 0;
if (initialized)
return 0;
ret = regcomp(&sdt_op_regex1, SDT_OP_REGEX1, REG_EXTENDED);
if (ret)
goto error;
ret = regcomp(&sdt_op_regex2, SDT_OP_REGEX2, REG_EXTENDED);
if (ret)
goto free_regex1;
initialized = 1;
return 0;
free_regex1:
regfree(&sdt_op_regex1);
error:
pr_debug4("Regex compilation error.\n");
return ret;
}
/*
* Parse OP and convert it into uprobe format, which is, +/-NUM(%gprREG).
* Possible variants of OP are:
* Format Example
* -------------------------
* NUM(REG) 48(18)
* -NUM(REG) -48(18)
* NUM(%rREG) 48(%r18)
* -NUM(%rREG) -48(%r18)
* REG 18
* %rREG %r18
* iNUM i0
* i-NUM i-1
*
* SDT marker arguments on Powerpc uses %rREG form with -mregnames flag
* and REG form with -mno-regnames. Here REG is general purpose register,
* which is in 0 to 31 range.
*/
int arch_sdt_arg_parse_op(char *old_op, char **new_op)
{
int ret, new_len;
regmatch_t rm[5];
char prefix;
/* Constant argument. Uprobe does not support it */
if (old_op[0] == 'i') {
pr_debug4("Skipping unsupported SDT argument: %s\n", old_op);
return SDT_ARG_SKIP;
}
ret = sdt_init_op_regex();
if (ret < 0)
return ret;
if (!regexec(&sdt_op_regex1, old_op, 3, rm, 0)) {
/* REG or %rREG --> %gprREG */
new_len = 5; /* % g p r NULL */
new_len += (int)(rm[2].rm_eo - rm[2].rm_so);
*new_op = zalloc(new_len);
if (!*new_op)
return -ENOMEM;
scnprintf(*new_op, new_len, "%%gpr%.*s",
(int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so);
} else if (!regexec(&sdt_op_regex2, old_op, 5, rm, 0)) {
/*
* -NUM(REG) or NUM(REG) or -NUM(%rREG) or NUM(%rREG) -->
* +/-NUM(%gprREG)
*/
prefix = (rm[1].rm_so == -1) ? '+' : '-';
new_len = 8; /* +/- ( % g p r ) NULL */
new_len += (int)(rm[2].rm_eo - rm[2].rm_so);
new_len += (int)(rm[4].rm_eo - rm[4].rm_so);
*new_op = zalloc(new_len);
if (!*new_op)
return -ENOMEM;
scnprintf(*new_op, new_len, "%c%.*s(%%gpr%.*s)", prefix,
(int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so,
(int)(rm[4].rm_eo - rm[4].rm_so), old_op + rm[4].rm_so);
} else {
pr_debug4("Skipping unsupported SDT argument: %s\n", old_op);
return SDT_ARG_SKIP;
}
return SDT_ARG_VALID;
}

View File

@ -10,6 +10,7 @@
#include "symbol.h"
#include "map.h"
#include "probe-event.h"
#include "probe-file.h"
#ifdef HAVE_LIBELF_SUPPORT
bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
@ -79,13 +80,18 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev,
* However, if the user specifies an offset, we fall back to using the
* GEP since all userspace applications (objdump/readelf) show function
* disassembly with offsets from the GEP.
*
* In addition, we shouldn't specify an offset for kretprobes.
*/
if (pev->point.offset || (!pev->uprobes && pev->point.retprobe) ||
!map || !sym)
if (pev->point.offset || !map || !sym)
return;
/* For kretprobes, add an offset only if the kernel supports it */
if (!pev->uprobes && pev->point.retprobe) {
#ifdef HAVE_LIBELF_SUPPORT
if (!kretprobe_offset_is_supported())
#endif
return;
}
lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym);
if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS)

View File

@ -0,0 +1,30 @@
static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *name)
{
struct ins_ops *ops = NULL;
/* catch all kind of jumps */
if (strchr(name, 'j') ||
!strncmp(name, "bct", 3) ||
!strncmp(name, "br", 2))
ops = &jump_ops;
/* override call/returns */
if (!strcmp(name, "bras") ||
!strcmp(name, "brasl") ||
!strcmp(name, "basr"))
ops = &call_ops;
if (!strcmp(name, "br"))
ops = &ret_ops;
arch__associate_ins_ops(arch, name, ops);
return ops;
}
static int s390__annotate_init(struct arch *arch)
{
if (!arch->initialized) {
arch->initialized = true;
arch->associate_instruction_ops = s390__associate_ins_ops;
}
return 0;
}

View File

@ -9,6 +9,7 @@
* as published by the Free Software Foundation.
*/
#include <errno.h>
#include "../../util/kvm-stat.h"
#include <asm/sie.h>

View File

@ -338,6 +338,7 @@
329 common pkey_mprotect sys_pkey_mprotect
330 common pkey_alloc sys_pkey_alloc
331 common pkey_free sys_pkey_free
332 common statx sys_statx
#
# x32-specific system call numbers start at 512 to avoid cache impact

View File

@ -6,7 +6,10 @@
#include "evsel.h"
#include "arch-tests.h"
#include <signal.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <errno.h>
#include <string.h>
static pid_t spawn(void)

View File

@ -1,3 +1,5 @@
#include <errno.h>
#include <inttypes.h>
#include <stdio.h>
#include <unistd.h>
#include <linux/types.h>

View File

@ -13,6 +13,7 @@
*
*/
#include <errno.h>
#include <stdbool.h>
#include "../../util/header.h"

View File

@ -13,6 +13,7 @@
*
*/
#include <errno.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/bitops.h>

View File

@ -13,6 +13,7 @@
*
*/
#include <errno.h>
#include <stdbool.h>
#include <linux/kernel.h>
#include <linux/types.h>

View File

@ -1,3 +1,4 @@
#include <errno.h>
#include "../../util/kvm-stat.h"
#include <asm/svm.h>
#include <asm/vmx.h>

View File

@ -1,5 +1,11 @@
#include <errno.h>
#include <string.h>
#include <regex.h>
#include "../../perf.h"
#include "../../util/util.h"
#include "../../util/perf_regs.h"
#include "../../util/debug.h"
const struct sample_reg sample_reg_masks[] = {
SMPL_REG(AX, PERF_REG_X86_AX),
@ -26,3 +32,224 @@ const struct sample_reg sample_reg_masks[] = {
#endif
SMPL_REG_END
};
struct sdt_name_reg {
const char *sdt_name;
const char *uprobe_name;
};
#define SDT_NAME_REG(n, m) {.sdt_name = "%" #n, .uprobe_name = "%" #m}
#define SDT_NAME_REG_END {.sdt_name = NULL, .uprobe_name = NULL}
static const struct sdt_name_reg sdt_reg_tbl[] = {
SDT_NAME_REG(eax, ax),
SDT_NAME_REG(rax, ax),
SDT_NAME_REG(al, ax),
SDT_NAME_REG(ah, ax),
SDT_NAME_REG(ebx, bx),
SDT_NAME_REG(rbx, bx),
SDT_NAME_REG(bl, bx),
SDT_NAME_REG(bh, bx),
SDT_NAME_REG(ecx, cx),
SDT_NAME_REG(rcx, cx),
SDT_NAME_REG(cl, cx),
SDT_NAME_REG(ch, cx),
SDT_NAME_REG(edx, dx),
SDT_NAME_REG(rdx, dx),
SDT_NAME_REG(dl, dx),
SDT_NAME_REG(dh, dx),
SDT_NAME_REG(esi, si),
SDT_NAME_REG(rsi, si),
SDT_NAME_REG(sil, si),
SDT_NAME_REG(edi, di),
SDT_NAME_REG(rdi, di),
SDT_NAME_REG(dil, di),
SDT_NAME_REG(ebp, bp),
SDT_NAME_REG(rbp, bp),
SDT_NAME_REG(bpl, bp),
SDT_NAME_REG(rsp, sp),
SDT_NAME_REG(esp, sp),
SDT_NAME_REG(spl, sp),
/* rNN registers */
SDT_NAME_REG(r8b, r8),
SDT_NAME_REG(r8w, r8),
SDT_NAME_REG(r8d, r8),
SDT_NAME_REG(r9b, r9),
SDT_NAME_REG(r9w, r9),
SDT_NAME_REG(r9d, r9),
SDT_NAME_REG(r10b, r10),
SDT_NAME_REG(r10w, r10),
SDT_NAME_REG(r10d, r10),
SDT_NAME_REG(r11b, r11),
SDT_NAME_REG(r11w, r11),
SDT_NAME_REG(r11d, r11),
SDT_NAME_REG(r12b, r12),
SDT_NAME_REG(r12w, r12),
SDT_NAME_REG(r12d, r12),
SDT_NAME_REG(r13b, r13),
SDT_NAME_REG(r13w, r13),
SDT_NAME_REG(r13d, r13),
SDT_NAME_REG(r14b, r14),
SDT_NAME_REG(r14w, r14),
SDT_NAME_REG(r14d, r14),
SDT_NAME_REG(r15b, r15),
SDT_NAME_REG(r15w, r15),
SDT_NAME_REG(r15d, r15),
SDT_NAME_REG_END,
};
/*
* Perf only supports OP which is in +/-NUM(REG) form.
* Here plus-minus sign, NUM and parenthesis are optional,
* only REG is mandatory.
*
* SDT events also supports indirect addressing mode with a
* symbol as offset, scaled mode and constants in OP. But
* perf does not support them yet. Below are few examples.
*
* OP with scaled mode:
* (%rax,%rsi,8)
* 10(%ras,%rsi,8)
*
* OP with indirect addressing mode:
* check_action(%rip)
* mp_+52(%rip)
* 44+mp_(%rip)
*
* OP with constant values:
* $0
* $123
* $-1
*/
#define SDT_OP_REGEX "^([+\\-]?)([0-9]*)(\\(?)(%[a-z][a-z0-9]+)(\\)?)$"
static regex_t sdt_op_regex;
static int sdt_init_op_regex(void)
{
static int initialized;
int ret = 0;
if (initialized)
return 0;
ret = regcomp(&sdt_op_regex, SDT_OP_REGEX, REG_EXTENDED);
if (ret < 0) {
pr_debug4("Regex compilation error.\n");
return ret;
}
initialized = 1;
return 0;
}
/*
* Max x86 register name length is 5(ex: %r15d). So, 6th char
* should always contain NULL. This helps to find register name
* length using strlen, insted of maintaing one more variable.
*/
#define SDT_REG_NAME_SIZE 6
/*
* The uprobe parser does not support all gas register names;
* so, we have to replace them (ex. for x86_64: %rax -> %ax).
* Note: If register does not require renaming, just copy
* paste as it is, but don't leave it empty.
*/
static void sdt_rename_register(char *sdt_reg, int sdt_len, char *uprobe_reg)
{
int i = 0;
for (i = 0; sdt_reg_tbl[i].sdt_name != NULL; i++) {
if (!strncmp(sdt_reg_tbl[i].sdt_name, sdt_reg, sdt_len)) {
strcpy(uprobe_reg, sdt_reg_tbl[i].uprobe_name);
return;
}
}
strncpy(uprobe_reg, sdt_reg, sdt_len);
}
int arch_sdt_arg_parse_op(char *old_op, char **new_op)
{
char new_reg[SDT_REG_NAME_SIZE] = {0};
int new_len = 0, ret;
/*
* rm[0]: +/-NUM(REG)
* rm[1]: +/-
* rm[2]: NUM
* rm[3]: (
* rm[4]: REG
* rm[5]: )
*/
regmatch_t rm[6];
/*
* Max prefix length is 2 as it may contains sign(+/-)
* and displacement 0 (Both sign and displacement 0 are
* optional so it may be empty). Use one more character
* to hold last NULL so that strlen can be used to find
* prefix length, instead of maintaing one more variable.
*/
char prefix[3] = {0};
ret = sdt_init_op_regex();
if (ret < 0)
return ret;
/*
* If unsupported OR does not match with regex OR
* register name too long, skip it.
*/
if (strchr(old_op, ',') || strchr(old_op, '$') ||
regexec(&sdt_op_regex, old_op, 6, rm, 0) ||
rm[4].rm_eo - rm[4].rm_so > SDT_REG_NAME_SIZE) {
pr_debug4("Skipping unsupported SDT argument: %s\n", old_op);
return SDT_ARG_SKIP;
}
/*
* Prepare prefix.
* If SDT OP has parenthesis but does not provide
* displacement, add 0 for displacement.
* SDT Uprobe Prefix
* -----------------------------
* +24(%rdi) +24(%di) +
* 24(%rdi) +24(%di) +
* %rdi %di
* (%rdi) +0(%di) +0
* -80(%rbx) -80(%bx) -
*/
if (rm[3].rm_so != rm[3].rm_eo) {
if (rm[1].rm_so != rm[1].rm_eo)
prefix[0] = *(old_op + rm[1].rm_so);
else if (rm[2].rm_so != rm[2].rm_eo)
prefix[0] = '+';
else
strncpy(prefix, "+0", 2);
}
/* Rename register */
sdt_rename_register(old_op + rm[4].rm_so, rm[4].rm_eo - rm[4].rm_so,
new_reg);
/* Prepare final OP which should be valid for uprobe_events */
new_len = strlen(prefix) +
(rm[2].rm_eo - rm[2].rm_so) +
(rm[3].rm_eo - rm[3].rm_so) +
strlen(new_reg) +
(rm[5].rm_eo - rm[5].rm_so) +
1; /* NULL */
*new_op = zalloc(new_len);
if (!*new_op)
return -ENOMEM;
scnprintf(*new_op, new_len, "%.*s%.*s%.*s%.*s%.*s",
strlen(prefix), prefix,
(int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so,
(int)(rm[3].rm_eo - rm[3].rm_so), old_op + rm[3].rm_so,
strlen(new_reg), new_reg,
(int)(rm[5].rm_eo - rm[5].rm_so), old_op + rm[5].rm_so);
return SDT_ARG_VALID;
}

View File

@ -1,6 +1,7 @@
#include <elfutils/libdwfl.h>
#include "../../util/unwind-libdw.h"
#include "../../util/perf_regs.h"
#include "../../util/event.h"
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
{

View File

@ -25,17 +25,17 @@
# endif
#endif
int bench_numa(int argc, const char **argv, const char *prefix);
int bench_sched_messaging(int argc, const char **argv, const char *prefix);
int bench_sched_pipe(int argc, const char **argv, const char *prefix);
int bench_mem_memcpy(int argc, const char **argv, const char *prefix);
int bench_mem_memset(int argc, const char **argv, const char *prefix);
int bench_futex_hash(int argc, const char **argv, const char *prefix);
int bench_futex_wake(int argc, const char **argv, const char *prefix);
int bench_futex_wake_parallel(int argc, const char **argv, const char *prefix);
int bench_futex_requeue(int argc, const char **argv, const char *prefix);
int bench_numa(int argc, const char **argv);
int bench_sched_messaging(int argc, const char **argv);
int bench_sched_pipe(int argc, const char **argv);
int bench_mem_memcpy(int argc, const char **argv);
int bench_mem_memset(int argc, const char **argv);
int bench_futex_hash(int argc, const char **argv);
int bench_futex_wake(int argc, const char **argv);
int bench_futex_wake_parallel(int argc, const char **argv);
int bench_futex_requeue(int argc, const char **argv);
/* pi futexes */
int bench_futex_lock_pi(int argc, const char **argv, const char *prefix);
int bench_futex_lock_pi(int argc, const char **argv);
#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0

Some files were not shown because too many files have changed in this diff Show More