Infrastructure:

We were not handling POLLHUP notifications for event file descriptors.
 
 Fix it by filtering entries in the events file descriptor array after
 poll returns, refcounting mmaps so that when the last fd pointing to
 a perf mmap goes away we do the unmap.
 
 User visible:
 
 Now 'record' and 'trace' properly exit when a target thread exits.
 
 Arnaldo Carvalho de Melo (14):
   perf evlist: Introduce perf_evlist__filter_pollfd method
   perf tests: Add test for perf_evlist__filter_pollfd()
   perf evlist: Monitor POLLERR and POLLHUP events too
   perf evlist: We need to poll all event file descriptors
   perf evlist: Allow growing pollfd on add method
   perf tests: Add pollfd growing test
   perf kvm stat live: Use perf_evlist__add_pollfd() instead of local equivalent
   perf evlist: Introduce poll method for common code idiom
   tools lib api: Adopt fdarray class from perf's evlist
   perf evlist: Refcount mmaps
   tools lib fd array: Allow associating an integer cookie with each entry
   perf evlist: Unmap when all refcounts to fd are gone and events drained
   perf record: Filter out POLLHUP'ed file descriptors
   perf trace: Filter out POLLHUP'ed file descriptors
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJUJI75AAoJENZQFvNTUqpA0TIP/1M8t0AnGhdq2+PtP3z8p3ik
 UP9vaa0T3GnA03jcV3CrjR6KwKXTvJsRrvfFOWjTiLUmIU+3YNLICKaeAfSukARk
 zp17TKP8GBXUur8Lo8VX+dl4lN16axYUBRRCwndWrVzEbG8vZtEe8Fgo/RH5jQ1V
 J19jA7YPubFV9iG4gQjJ0lg+ONuB10QgrwnBjsM5OckT2I37RiYasw6cTHNln0AY
 WRF93S63D+7/798D8zcDR+7c4OBPBJARfxHV8LtoAG/47BTndUHJUxL//kOaQEMf
 UkoNu3DW3KXsBR3BYdwyxo2H6GWyZlquvW6MA+bRLPJBSuelYgXIs7Ogjii6tXZc
 8jJT7EUoegpcr6ircNEkvG4mmLjUReU3DofVqj4nozVZ7NnJbIncCTX2d5PyJ1qV
 1m5EluGyZY0DG4A3pFxK9JnnrC0TTWynqAKvbFTFmLA9Fe39sCLvWVDdKtiBedap
 lj9bgpJnqVh54WiPXSaoZIGJ/fxdlHzAS6hyylclt1S1hnby0WMUM3ZG3kz2nY5S
 64GXGa+Wp0Me3g3lsv0Vv9yU+hBHbhnp97vKfE9Ucd6W1AT387yA9UNDYGiwC/hI
 mp8wPMERqjbaUArjjDvCjqu31fiIQrLz3G+znnhS9x5l8EkJ7ubMAazV9dPgXKmL
 JvS2Um2j5KbXG8d7I8Yi
 =84lL
 -----END PGP SIGNATURE-----

Merge tag 'perf-fdarray-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf tooling updates from Arnaldo Carvalho de Melo.

Infrastructure changes:

  * We were not handling POLLHUP notifications for event file descriptors.

    Fix it by filtering entries in the events file descriptor array after
    poll() returns, refcounting mmaps so that when the last fd pointing to
    a perf mmap goes away we do the unmap. (Arnaldo Carvalho de Melo)

User visible changes:

  * Now 'record' and 'trace' properly exit when a target thread exits.
    (Arnaldo Carvalho de Melo)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2014-09-26 11:12:46 +02:00
commit cf8102f64c
17 changed files with 501 additions and 43 deletions

View File

@ -10,9 +10,14 @@ LIB_OBJS=
LIB_H += fs/debugfs.h
LIB_H += fs/fs.h
# See comment below about piggybacking...
LIB_H += fd/array.h
LIB_OBJS += $(OUTPUT)fs/debugfs.o
LIB_OBJS += $(OUTPUT)fs/fs.o
# XXX piggybacking here, need to introduce libapikfd, or rename this
# to plain libapik.a and make it have it all api goodies
LIB_OBJS += $(OUTPUT)fd/array.o
LIBFILE = libapikfs.a
@ -29,7 +34,7 @@ $(LIBFILE): $(LIB_OBJS)
$(LIB_OBJS): $(LIB_H)
libapi_dirs:
$(QUIET_MKDIR)mkdir -p $(OUTPUT)fs/
$(QUIET_MKDIR)mkdir -p $(OUTPUT)fd $(OUTPUT)fs
$(OUTPUT)%.o: %.c libapi_dirs
$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $<

127
tools/lib/api/fd/array.c Normal file
View File

@ -0,0 +1,127 @@
/*
* Copyright (C) 2014, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
*
* Released under the GPL v2. (and only v2, not any later version)
*/
#include "array.h"
#include <errno.h>
#include <fcntl.h>
#include <poll.h>
#include <stdlib.h>
#include <unistd.h>
void fdarray__init(struct fdarray *fda, int nr_autogrow)
{
fda->entries = NULL;
fda->priv = NULL;
fda->nr = fda->nr_alloc = 0;
fda->nr_autogrow = nr_autogrow;
}
int fdarray__grow(struct fdarray *fda, int nr)
{
void *priv;
int nr_alloc = fda->nr_alloc + nr;
size_t psize = sizeof(fda->priv[0]) * nr_alloc;
size_t size = sizeof(struct pollfd) * nr_alloc;
struct pollfd *entries = realloc(fda->entries, size);
if (entries == NULL)
return -ENOMEM;
priv = realloc(fda->priv, psize);
if (priv == NULL) {
free(entries);
return -ENOMEM;
}
fda->nr_alloc = nr_alloc;
fda->entries = entries;
fda->priv = priv;
return 0;
}
struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow)
{
struct fdarray *fda = calloc(1, sizeof(*fda));
if (fda != NULL) {
if (fdarray__grow(fda, nr_alloc)) {
free(fda);
fda = NULL;
} else {
fda->nr_autogrow = nr_autogrow;
}
}
return fda;
}
void fdarray__exit(struct fdarray *fda)
{
free(fda->entries);
free(fda->priv);
fdarray__init(fda, 0);
}
void fdarray__delete(struct fdarray *fda)
{
fdarray__exit(fda);
free(fda);
}
int fdarray__add(struct fdarray *fda, int fd, short revents)
{
int pos = fda->nr;
if (fda->nr == fda->nr_alloc &&
fdarray__grow(fda, fda->nr_autogrow) < 0)
return -ENOMEM;
fda->entries[fda->nr].fd = fd;
fda->entries[fda->nr].events = revents;
fda->nr++;
return pos;
}
int fdarray__filter(struct fdarray *fda, short revents,
void (*entry_destructor)(struct fdarray *fda, int fd))
{
int fd, nr = 0;
if (fda->nr == 0)
return 0;
for (fd = 0; fd < fda->nr; ++fd) {
if (fda->entries[fd].revents & revents) {
if (entry_destructor)
entry_destructor(fda, fd);
continue;
}
if (fd != nr) {
fda->entries[nr] = fda->entries[fd];
fda->priv[nr] = fda->priv[fd];
}
++nr;
}
return fda->nr = nr;
}
int fdarray__poll(struct fdarray *fda, int timeout)
{
return poll(fda->entries, fda->nr, timeout);
}
int fdarray__fprintf(struct fdarray *fda, FILE *fp)
{
int fd, printed = fprintf(fp, "%d [ ", fda->nr);
for (fd = 0; fd < fda->nr; ++fd)
printed += fprintf(fp, "%s%d", fd ? ", " : "", fda->entries[fd].fd);
return printed + fprintf(fp, " ]");
}

46
tools/lib/api/fd/array.h Normal file
View File

@ -0,0 +1,46 @@
#ifndef __API_FD_ARRAY__
#define __API_FD_ARRAY__
#include <stdio.h>
struct pollfd;
/**
* struct fdarray: Array of file descriptors
*
* @priv: Per array entry priv area, users should access just its contents,
* not set it to anything, as it is kept in synch with @entries, being
* realloc'ed, * for instance, in fdarray__{grow,filter}.
*
* I.e. using 'fda->priv[N].idx = * value' where N < fda->nr is ok,
* but doing 'fda->priv = malloc(M)' is not allowed.
*/
struct fdarray {
int nr;
int nr_alloc;
int nr_autogrow;
struct pollfd *entries;
union {
int idx;
} *priv;
};
void fdarray__init(struct fdarray *fda, int nr_autogrow);
void fdarray__exit(struct fdarray *fda);
struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow);
void fdarray__delete(struct fdarray *fda);
int fdarray__add(struct fdarray *fda, int fd, short revents);
int fdarray__poll(struct fdarray *fda, int timeout);
int fdarray__filter(struct fdarray *fda, short revents,
void (*entry_destructor)(struct fdarray *fda, int fd));
int fdarray__grow(struct fdarray *fda, int extra);
int fdarray__fprintf(struct fdarray *fda, FILE *fp);
static inline int fdarray__available_entries(struct fdarray *fda)
{
return fda->nr_alloc - fda->nr;
}
#endif /* __API_FD_ARRAY__ */

View File

@ -402,6 +402,7 @@ LIB_OBJS += $(OUTPUT)tests/perf-record.o
LIB_OBJS += $(OUTPUT)tests/rdpmc.o
LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o
LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o
LIB_OBJS += $(OUTPUT)tests/fdarray.o
LIB_OBJS += $(OUTPUT)tests/pmu.o
LIB_OBJS += $(OUTPUT)tests/hists_common.o
LIB_OBJS += $(OUTPUT)tests/hists_link.o
@ -769,7 +770,7 @@ $(LIBTRACEEVENT)-clean:
install-traceevent-plugins: $(LIBTRACEEVENT)
$(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) install_plugins
LIBAPIKFS_SOURCES = $(wildcard $(LIB_PATH)fs/*.[ch])
LIBAPIKFS_SOURCES = $(wildcard $(LIB_PATH)fs/*.[ch] $(LIB_PATH)fd/*.[ch])
# if subdir is set, we've been called from above so target has been built
# already

View File

@ -919,15 +919,8 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm)
signal(SIGINT, sig_handler);
signal(SIGTERM, sig_handler);
/* copy pollfds -- need to add timerfd and stdin */
nr_fds = kvm->evlist->nr_fds;
pollfds = zalloc(sizeof(struct pollfd) * (nr_fds + 2));
if (!pollfds) {
err = -ENOMEM;
goto out;
}
memcpy(pollfds, kvm->evlist->pollfd,
sizeof(struct pollfd) * kvm->evlist->nr_fds);
/* use pollfds -- need to add timerfd and stdin */
nr_fds = kvm->evlist->pollfd.nr;
/* add timer fd */
if (perf_kvm__timerfd_create(kvm) < 0) {
@ -935,17 +928,21 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm)
goto out;
}
pollfds[nr_fds].fd = kvm->timerfd;
pollfds[nr_fds].events = POLLIN;
if (perf_evlist__add_pollfd(kvm->evlist, kvm->timerfd))
goto out;
nr_fds++;
pollfds[nr_fds].fd = fileno(stdin);
pollfds[nr_fds].events = POLLIN;
if (perf_evlist__add_pollfd(kvm->evlist, fileno(stdin)))
goto out;
nr_stdin = nr_fds;
nr_fds++;
if (fd_set_nonblock(fileno(stdin)) != 0)
goto out;
pollfds = kvm->evlist->pollfd.entries;
/* everything is good - enable the events and process */
perf_evlist__enable(kvm->evlist);
@ -979,7 +976,6 @@ out:
close(kvm->timerfd);
tcsetattr(0, TCSAFLUSH, &save);
free(pollfds);
return err;
}

View File

@ -308,7 +308,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
struct record_opts *opts = &rec->opts;
struct perf_data_file *file = &rec->file;
struct perf_session *session;
bool disabled = false;
bool disabled = false, draining = false;
rec->progname = argv[0];
@ -457,9 +457,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
}
if (hits == rec->samples) {
if (done)
if (done || draining)
break;
err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1);
err = perf_evlist__poll(rec->evlist, -1);
/*
* Propagate error, only if there's any. Ignore positive
* number of returned events and interrupt error.
@ -467,6 +467,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
if (err > 0 || (err < 0 && errno == EINTR))
err = 0;
waking++;
if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
draining = true;
}
/*

View File

@ -964,7 +964,7 @@ static int __cmd_top(struct perf_top *top)
perf_evlist__enable(top->evlist);
/* Wait for a minimal set of events before starting the snapshot */
poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
perf_evlist__poll(top->evlist, 100);
perf_top__mmap_read(top);
@ -991,7 +991,7 @@ static int __cmd_top(struct perf_top *top)
perf_top__mmap_read(top);
if (hits == top->samples)
ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
ret = perf_evlist__poll(top->evlist, 100);
}
ret = 0;

View File

@ -2044,6 +2044,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
int err = -1, i;
unsigned long before;
const bool forks = argc > 0;
bool draining = false;
char sbuf[STRERR_BUFSIZE];
trace->live = true;
@ -2171,8 +2172,12 @@ next_event:
if (trace->nr_events == before) {
int timeout = done ? 100 : -1;
if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
draining = true;
goto again;
}
} else {
goto again;
}

View File

@ -157,6 +157,14 @@ static struct test {
.desc = "Test tracking with sched_switch",
.func = test__switch_tracking,
},
{
.desc = "Filter fds with revents mask in a fdarray",
.func = test__fdarray__filter,
},
{
.desc = "Add fd to a fdarray, making it autogrow",
.func = test__fdarray__add,
},
{
.func = NULL,
},

174
tools/perf/tests/fdarray.c Normal file
View File

@ -0,0 +1,174 @@
#include <api/fd/array.h>
#include "util/debug.h"
#include "tests/tests.h"
static void fdarray__init_revents(struct fdarray *fda, short revents)
{
int fd;
fda->nr = fda->nr_alloc;
for (fd = 0; fd < fda->nr; ++fd) {
fda->entries[fd].fd = fda->nr - fd;
fda->entries[fd].revents = revents;
}
}
static int fdarray__fprintf_prefix(struct fdarray *fda, const char *prefix, FILE *fp)
{
int printed = 0;
if (!verbose)
return 0;
printed += fprintf(fp, "\n%s: ", prefix);
return printed + fdarray__fprintf(fda, fp);
}
int test__fdarray__filter(void)
{
int nr_fds, expected_fd[2], fd, err = TEST_FAIL;
struct fdarray *fda = fdarray__new(5, 5);
if (fda == NULL) {
pr_debug("\nfdarray__new() failed!");
goto out;
}
fdarray__init_revents(fda, POLLIN);
nr_fds = fdarray__filter(fda, POLLHUP, NULL);
if (nr_fds != fda->nr_alloc) {
pr_debug("\nfdarray__filter()=%d != %d shouldn't have filtered anything",
nr_fds, fda->nr_alloc);
goto out_delete;
}
fdarray__init_revents(fda, POLLHUP);
nr_fds = fdarray__filter(fda, POLLHUP, NULL);
if (nr_fds != 0) {
pr_debug("\nfdarray__filter()=%d != %d, should have filtered all fds",
nr_fds, fda->nr_alloc);
goto out_delete;
}
fdarray__init_revents(fda, POLLHUP);
fda->entries[2].revents = POLLIN;
expected_fd[0] = fda->entries[2].fd;
pr_debug("\nfiltering all but fda->entries[2]:");
fdarray__fprintf_prefix(fda, "before", stderr);
nr_fds = fdarray__filter(fda, POLLHUP, NULL);
fdarray__fprintf_prefix(fda, " after", stderr);
if (nr_fds != 1) {
pr_debug("\nfdarray__filter()=%d != 1, should have left just one event", nr_fds);
goto out_delete;
}
if (fda->entries[0].fd != expected_fd[0]) {
pr_debug("\nfda->entries[0].fd=%d != %d\n",
fda->entries[0].fd, expected_fd[0]);
goto out_delete;
}
fdarray__init_revents(fda, POLLHUP);
fda->entries[0].revents = POLLIN;
expected_fd[0] = fda->entries[0].fd;
fda->entries[3].revents = POLLIN;
expected_fd[1] = fda->entries[3].fd;
pr_debug("\nfiltering all but (fda->entries[0], fda->entries[3]):");
fdarray__fprintf_prefix(fda, "before", stderr);
nr_fds = fdarray__filter(fda, POLLHUP, NULL);
fdarray__fprintf_prefix(fda, " after", stderr);
if (nr_fds != 2) {
pr_debug("\nfdarray__filter()=%d != 2, should have left just two events",
nr_fds);
goto out_delete;
}
for (fd = 0; fd < 2; ++fd) {
if (fda->entries[fd].fd != expected_fd[fd]) {
pr_debug("\nfda->entries[%d].fd=%d != %d\n", fd,
fda->entries[fd].fd, expected_fd[fd]);
goto out_delete;
}
}
pr_debug("\n");
err = 0;
out_delete:
fdarray__delete(fda);
out:
return err;
}
int test__fdarray__add(void)
{
int err = TEST_FAIL;
struct fdarray *fda = fdarray__new(2, 2);
if (fda == NULL) {
pr_debug("\nfdarray__new() failed!");
goto out;
}
#define FDA_CHECK(_idx, _fd, _revents) \
if (fda->entries[_idx].fd != _fd) { \
pr_debug("\n%d: fda->entries[%d](%d) != %d!", \
__LINE__, _idx, fda->entries[1].fd, _fd); \
goto out_delete; \
} \
if (fda->entries[_idx].events != (_revents)) { \
pr_debug("\n%d: fda->entries[%d].revents(%d) != %d!", \
__LINE__, _idx, fda->entries[_idx].fd, _revents); \
goto out_delete; \
}
#define FDA_ADD(_idx, _fd, _revents, _nr) \
if (fdarray__add(fda, _fd, _revents) < 0) { \
pr_debug("\n%d: fdarray__add(fda, %d, %d) failed!", \
__LINE__,_fd, _revents); \
goto out_delete; \
} \
if (fda->nr != _nr) { \
pr_debug("\n%d: fdarray__add(fda, %d, %d)=%d != %d", \
__LINE__,_fd, _revents, fda->nr, _nr); \
goto out_delete; \
} \
FDA_CHECK(_idx, _fd, _revents)
FDA_ADD(0, 1, POLLIN, 1);
FDA_ADD(1, 2, POLLERR, 2);
fdarray__fprintf_prefix(fda, "before growing array", stderr);
FDA_ADD(2, 35, POLLHUP, 3);
if (fda->entries == NULL) {
pr_debug("\nfdarray__add(fda, 35, POLLHUP) should have allocated fda->pollfd!");
goto out_delete;
}
fdarray__fprintf_prefix(fda, "after 3rd add", stderr);
FDA_ADD(3, 88, POLLIN | POLLOUT, 4);
fdarray__fprintf_prefix(fda, "after 4th add", stderr);
FDA_CHECK(0, 1, POLLIN);
FDA_CHECK(1, 2, POLLERR);
FDA_CHECK(2, 35, POLLHUP);
FDA_CHECK(3, 88, POLLIN | POLLOUT);
#undef FDA_ADD
#undef FDA_CHECK
pr_debug("\n");
err = 0;
out_delete:
fdarray__delete(fda);
out:
return err;
}

View File

@ -105,7 +105,7 @@ int test__syscall_open_tp_fields(void)
}
if (nr_events == before)
poll(evlist->pollfd, evlist->nr_fds, 10);
perf_evlist__poll(evlist, 10);
if (++nr_polls > 5) {
pr_debug("%s: no events!\n", __func__);

View File

@ -268,7 +268,7 @@ int test__PERF_RECORD(void)
* perf_event_attr.wakeup_events, just PERF_EVENT_SAMPLE does.
*/
if (total_events == before && false)
poll(evlist->pollfd, evlist->nr_fds, -1);
perf_evlist__poll(evlist, -1);
sleep(1);
if (++wakeups > 5) {

View File

@ -105,7 +105,7 @@ retry:
}
if (!exited || !nr_exit) {
poll(evlist->pollfd, evlist->nr_fds, -1);
perf_evlist__poll(evlist, -1);
goto retry;
}

View File

@ -49,6 +49,8 @@ int test__thread_mg_share(void);
int test__hists_output(void);
int test__hists_cumulate(void);
int test__switch_tracking(void);
int test__fdarray__filter(void);
int test__fdarray__add(void);
#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
#ifdef HAVE_DWARF_UNWIND_SUPPORT

View File

@ -25,6 +25,9 @@
#include <linux/bitops.h>
#include <linux/hash.h>
static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx);
static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx);
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
@ -37,6 +40,7 @@ void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
INIT_HLIST_HEAD(&evlist->heads[i]);
INIT_LIST_HEAD(&evlist->entries);
perf_evlist__set_maps(evlist, cpus, threads);
fdarray__init(&evlist->pollfd, 64);
evlist->workload.pid = -1;
}
@ -102,7 +106,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist)
void perf_evlist__exit(struct perf_evlist *evlist)
{
zfree(&evlist->mmap);
zfree(&evlist->pollfd);
fdarray__exit(&evlist->pollfd);
}
void perf_evlist__delete(struct perf_evlist *evlist)
@ -402,7 +406,7 @@ int perf_evlist__enable_event_idx(struct perf_evlist *evlist,
return perf_evlist__enable_event_thread(evlist, evsel, idx);
}
static int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
{
int nr_cpus = cpu_map__nr(evlist->cpus);
int nr_threads = thread_map__nr(evlist->threads);
@ -416,16 +420,50 @@ static int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
nfds += nr_cpus * nr_threads;
}
evlist->pollfd = malloc(sizeof(struct pollfd) * nfds);
return evlist->pollfd != NULL ? 0 : -ENOMEM;
if (fdarray__available_entries(&evlist->pollfd) < nfds &&
fdarray__grow(&evlist->pollfd, nfds) < 0)
return -ENOMEM;
return 0;
}
void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx)
{
fcntl(fd, F_SETFL, O_NONBLOCK);
evlist->pollfd[evlist->nr_fds].fd = fd;
evlist->pollfd[evlist->nr_fds].events = POLLIN;
evlist->nr_fds++;
int pos = fdarray__add(&evlist->pollfd, fd, POLLIN | POLLERR | POLLHUP);
/*
* Save the idx so that when we filter out fds POLLHUP'ed we can
* close the associated evlist->mmap[] entry.
*/
if (pos >= 0) {
evlist->pollfd.priv[pos].idx = idx;
fcntl(fd, F_SETFL, O_NONBLOCK);
}
return pos;
}
int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
{
return __perf_evlist__add_pollfd(evlist, fd, -1);
}
static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd)
{
struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd);
perf_evlist__mmap_put(evlist, fda->priv[fd].idx);
}
int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
{
return fdarray__filter(&evlist->pollfd, revents_and_mask,
perf_evlist__munmap_filtered);
}
int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
{
return fdarray__poll(&evlist->pollfd, timeout);
}
static void perf_evlist__id_hash(struct perf_evlist *evlist,
@ -638,14 +676,36 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
return event;
}
static bool perf_mmap__empty(struct perf_mmap *md)
{
return perf_mmap__read_head(md) != md->prev;
}
static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx)
{
++evlist->mmap[idx].refcnt;
}
static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx)
{
BUG_ON(evlist->mmap[idx].refcnt == 0);
if (--evlist->mmap[idx].refcnt == 0)
__perf_evlist__munmap(evlist, idx);
}
void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
{
struct perf_mmap *md = &evlist->mmap[idx];
if (!evlist->overwrite) {
struct perf_mmap *md = &evlist->mmap[idx];
unsigned int old = md->prev;
perf_mmap__write_tail(md, old);
}
if (md->refcnt == 1 && perf_mmap__empty(md))
perf_evlist__mmap_put(evlist, idx);
}
static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
@ -653,6 +713,7 @@ static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
if (evlist->mmap[idx].base != NULL) {
munmap(evlist->mmap[idx].base, evlist->mmap_len);
evlist->mmap[idx].base = NULL;
evlist->mmap[idx].refcnt = 0;
}
}
@ -686,6 +747,20 @@ struct mmap_params {
static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
struct mmap_params *mp, int fd)
{
/*
* The last one will be done at perf_evlist__mmap_consume(), so that we
* make sure we don't prevent tools from consuming every last event in
* the ring buffer.
*
* I.e. we can get the POLLHUP meaning that the fd doesn't exist
* anymore, but the last events for it are still in the ring buffer,
* waiting to be consumed.
*
* Tools can chose to ignore this at their own discretion, but the
* evlist layer can't just drop it when filtering events in
* perf_evlist__filter_pollfd().
*/
evlist->mmap[idx].refcnt = 2;
evlist->mmap[idx].prev = 0;
evlist->mmap[idx].mask = mp->mask;
evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot,
@ -697,7 +772,6 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
return -1;
}
perf_evlist__add_pollfd(evlist, fd);
return 0;
}
@ -722,6 +796,13 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
} else {
if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
return -1;
perf_evlist__mmap_get(evlist, idx);
}
if (__perf_evlist__add_pollfd(evlist, fd, idx) < 0) {
perf_evlist__mmap_put(evlist, idx);
return -1;
}
if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
@ -881,7 +962,7 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0)
return -ENOMEM;
if (evlist->pollfd == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
return -ENOMEM;
evlist->overwrite = overwrite;

View File

@ -2,6 +2,7 @@
#define __PERF_EVLIST_H 1
#include <linux/list.h>
#include <api/fd/array.h>
#include <stdio.h>
#include "../perf.h"
#include "event.h"
@ -17,9 +18,15 @@ struct record_opts;
#define PERF_EVLIST__HLIST_BITS 8
#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
/**
* struct perf_mmap - perf's ring buffer mmap details
*
* @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this
*/
struct perf_mmap {
void *base;
int mask;
int refcnt;
unsigned int prev;
char event_copy[PERF_SAMPLE_MAX_SIZE];
};
@ -29,7 +36,6 @@ struct perf_evlist {
struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
int nr_entries;
int nr_groups;
int nr_fds;
int nr_mmaps;
size_t mmap_len;
int id_pos;
@ -40,8 +46,8 @@ struct perf_evlist {
pid_t pid;
} workload;
bool overwrite;
struct fdarray pollfd;
struct perf_mmap *mmap;
struct pollfd *pollfd;
struct thread_map *threads;
struct cpu_map *cpus;
struct perf_evsel *selected;
@ -82,7 +88,11 @@ perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist,
void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
int cpu, int thread, u64 id);
void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd);
int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd);
int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask);
int perf_evlist__poll(struct perf_evlist *evlist, int timeout);
struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id);

View File

@ -736,7 +736,7 @@ static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist,
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &timeout))
return NULL;
n = poll(evlist->pollfd, evlist->nr_fds, timeout);
n = perf_evlist__poll(evlist, timeout);
if (n < 0) {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
@ -753,9 +753,9 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist,
PyObject *list = PyList_New(0);
int i;
for (i = 0; i < evlist->nr_fds; ++i) {
for (i = 0; i < evlist->pollfd.nr; ++i) {
PyObject *file;
FILE *fp = fdopen(evlist->pollfd[i].fd, "r");
FILE *fp = fdopen(evlist->pollfd.entries[i].fd, "r");
if (fp == NULL)
goto free_list;