From 1ddec7f0d0ab5b71cf2cc5a782441c20e7afbcfb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 12 Aug 2014 23:04:11 -0300 Subject: [PATCH 01/14] perf evlist: Introduce perf_evlist__filter_pollfd method To remove all entries in evlist->pollfd[] that have revents matching at least one of the bits in the specified mask. It'll adjust evlist->nr_fds to the number of unfiltered fds and will return this value, as a convenience and to avoid requiring direct access to internal state of perf_evlist objects. This will be used after polling the evlist fds so that we remove fds that were closed by the kernel. Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-y2sca7z3wicvvy40a50lozwm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 21 +++++++++++++++++++++ tools/perf/util/evlist.h | 2 ++ 2 files changed, 23 insertions(+) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index a3e28b49128a..023bc3873ae9 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -428,6 +428,27 @@ void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) evlist->nr_fds++; } +int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) +{ + int fd, nr_fds = 0; + + if (evlist->nr_fds == 0) + return 0; + + for (fd = 0; fd < evlist->nr_fds; ++fd) { + if (evlist->pollfd[fd].revents & revents_and_mask) + continue; + + if (fd != nr_fds) + evlist->pollfd[nr_fds] = evlist->pollfd[fd]; + + ++nr_fds; + } + + evlist->nr_fds = nr_fds; + return nr_fds; +} + static void perf_evlist__id_hash(struct perf_evlist *evlist, struct perf_evsel *evsel, int cpu, int thread, u64 id) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 106de53a6a74..1082420951f9 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -84,6 +84,8 @@ void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd); +int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask); + struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id); struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id); From 54dbfae3007b0c61727abba45af1e4c226908d82 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 12 Aug 2014 23:34:06 -0300 Subject: [PATCH 02/14] perf tests: Add test for perf_evlist__filter_pollfd() That will use a synthetic evlist with just what is touched by this new method to check that it works as expected. Output in verbose mode: $ perf test -v pollfd 33: Filter fds with revents mask in a pollfd array : --- start --- filtering all but pollfd[2]: before: 5 [ 5, 4, 3, 2, 1 ] after: 1 [ 3 ] filtering all but (pollfd[0], pollfd[3]): before: 5 [ 5, 4, 3, 2, 1 ] after: 2 [ 5, 2 ] test child finished with 0 ---- end ---- Filter fds with revents mask in a pollfd array: Ok $ Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-x7c8liszdvc3ocmanf2cet8p@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 1 + tools/perf/tests/builtin-test.c | 4 ++ tools/perf/tests/evlist.c | 103 ++++++++++++++++++++++++++++++++ tools/perf/tests/tests.h | 1 + 4 files changed, 109 insertions(+) create mode 100644 tools/perf/tests/evlist.c diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 171f4e65601b..f287c2522cf5 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -400,6 +400,7 @@ LIB_OBJS += $(OUTPUT)tests/open-syscall-tp-fields.o LIB_OBJS += $(OUTPUT)tests/mmap-basic.o LIB_OBJS += $(OUTPUT)tests/perf-record.o LIB_OBJS += $(OUTPUT)tests/rdpmc.o +LIB_OBJS += $(OUTPUT)tests/evlist.o LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o LIB_OBJS += $(OUTPUT)tests/pmu.o diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 6a4145e5ad2c..41e556edbe02 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -157,6 +157,10 @@ static struct test { .desc = "Test tracking with sched_switch", .func = test__switch_tracking, }, + { + .desc = "Filter fds with revents mask in a pollfd array", + .func = test__perf_evlist__filter_pollfd, + }, { .func = NULL, }, diff --git a/tools/perf/tests/evlist.c b/tools/perf/tests/evlist.c new file mode 100644 index 000000000000..77579158f4d9 --- /dev/null +++ b/tools/perf/tests/evlist.c @@ -0,0 +1,103 @@ +#include "util/evlist.h" +#include "util/debug.h" +#include "tests/tests.h" + +static void perf_evlist__init_pollfd(struct perf_evlist *evlist, + int nr_fds_alloc, short revents) +{ + int fd; + + evlist->nr_fds = nr_fds_alloc; + + for (fd = 0; fd < nr_fds_alloc; ++fd) { + evlist->pollfd[fd].fd = nr_fds_alloc - fd; + evlist->pollfd[fd].revents = revents; + } +} + +static int perf_evlist__fprintf_pollfd(struct perf_evlist *evlist, + const char *prefix, FILE *fp) +{ + int printed = 0, fd; + + if (!verbose) + return 0; + + printed += fprintf(fp, "\n%s: %3d [ ", prefix, evlist->nr_fds); + for (fd = 0; fd < evlist->nr_fds; ++fd) + printed += fprintf(fp, "%s%d", fd ? ", " : "", evlist->pollfd[fd].fd); + printed += fprintf(fp, " ]"); + return printed; +} + +int test__perf_evlist__filter_pollfd(void) +{ + const int nr_fds_alloc = 5; + int nr_fds, expected_fd[2], fd; + struct pollfd pollfd[nr_fds_alloc]; + struct perf_evlist evlist_alloc = { + .pollfd = pollfd, + }, *evlist = &evlist_alloc; + + perf_evlist__init_pollfd(evlist, nr_fds_alloc, POLLIN); + nr_fds = perf_evlist__filter_pollfd(evlist, POLLHUP); + if (nr_fds != nr_fds_alloc) { + pr_debug("\nperf_evlist__filter_pollfd()=%d != %d shouldn't have filtered anything", + nr_fds, nr_fds_alloc); + return TEST_FAIL; + } + + perf_evlist__init_pollfd(evlist, nr_fds_alloc, POLLHUP); + nr_fds = perf_evlist__filter_pollfd(evlist, POLLHUP); + if (nr_fds != 0) { + pr_debug("\nperf_evlist__filter_pollfd()=%d != %d, should have filtered all fds", + nr_fds, nr_fds_alloc); + return TEST_FAIL; + } + + perf_evlist__init_pollfd(evlist, nr_fds_alloc, POLLHUP); + pollfd[2].revents = POLLIN; + expected_fd[0] = pollfd[2].fd; + + pr_debug("\nfiltering all but pollfd[2]:"); + perf_evlist__fprintf_pollfd(evlist, "before", stderr); + nr_fds = perf_evlist__filter_pollfd(evlist, POLLHUP); + perf_evlist__fprintf_pollfd(evlist, " after", stderr); + if (nr_fds != 1) { + pr_debug("\nperf_evlist__filter_pollfd()=%d != 1, should have left just one event", + nr_fds); + return TEST_FAIL; + } + + if (pollfd[0].fd != expected_fd[0]) { + pr_debug("\npollfd[0].fd=%d != %d\n", pollfd[0].fd, expected_fd[0]); + return TEST_FAIL; + } + + perf_evlist__init_pollfd(evlist, nr_fds_alloc, POLLHUP); + pollfd[0].revents = POLLIN; + expected_fd[0] = pollfd[0].fd; + pollfd[3].revents = POLLIN; + expected_fd[1] = pollfd[3].fd; + + pr_debug("\nfiltering all but (pollfd[0], pollfd[3]):"); + perf_evlist__fprintf_pollfd(evlist, "before", stderr); + nr_fds = perf_evlist__filter_pollfd(evlist, POLLHUP); + perf_evlist__fprintf_pollfd(evlist, " after", stderr); + if (nr_fds != 2) { + pr_debug("\nperf_evlist__filter_pollfd()=%d != 2, should have left just two events", + nr_fds); + return TEST_FAIL; + } + + for (fd = 0; fd < 2; ++fd) { + if (pollfd[fd].fd != expected_fd[fd]) { + pr_debug("\npollfd[%d].fd=%d != %d\n", fd, pollfd[fd].fd, expected_fd[fd]); + return TEST_FAIL; + } + } + + pr_debug("\n"); + + return 0; +} diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index be8be10e3957..72c4c039bd80 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -49,6 +49,7 @@ int test__thread_mg_share(void); int test__hists_output(void); int test__hists_cumulate(void); int test__switch_tracking(void); +int test__perf_evlist__filter_pollfd(void); #if defined(__x86_64__) || defined(__i386__) || defined(__arm__) #ifdef HAVE_DWARF_UNWIND_SUPPORT From 8179672c2f7b9c41a7ef3e8c907d214fa92ed614 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 13 Aug 2014 11:26:21 -0300 Subject: [PATCH 03/14] perf evlist: Monitor POLLERR and POLLHUP events too We want to know when the fd went away, like when a monitored thread exits. If we do not monitor such events, then the tools will wait forever on events from a vanished thread, like when running: $ sleep 5s & $ perf record -p `pidof sleep` This builds upon the kernel patch by Jiri Olsa that actually makes a poll on those file descriptors to return POLLHUP. It is also needed to change the tools to use perf_evlist__filter_pollfd() to check if there are remainings fds to monitor or if all are gone, in which case they will exit the poll/mmap/read loop. Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-a4fslwspov0bs69nj825hqpq@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 023bc3873ae9..502cd11ab17e 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -424,7 +424,7 @@ void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) { fcntl(fd, F_SETFL, O_NONBLOCK); evlist->pollfd[evlist->nr_fds].fd = fd; - evlist->pollfd[evlist->nr_fds].events = POLLIN; + evlist->pollfd[evlist->nr_fds].events = POLLIN | POLLERR | POLLHUP; evlist->nr_fds++; } From 033fa713db66b96d5779e6a93d50ff821bc1abd2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 8 Sep 2014 12:55:12 -0300 Subject: [PATCH 04/14] perf evlist: We need to poll all event file descriptors Because we want to notice when they get POLLHUP'ed, so that we can figure out when all threads exited in a workload being monitored. We can't just monitor the fds that were mmaped, we need to notice when all the fds that were PERF_EVENT_IOC_SET_OUTPUT'ed too, because the mmap stays even after the fd that originally was used to do the mmap call went away, its only when all the set-output fds for a mmap are gone that the mmap is. Cc: Adrian Hunter Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20140908151016.GH17728@krava.brq.redhat.com Link: http://lkml.kernel.org/n/tip-24omlq5asrfg4uo3muuzn2bl@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 502cd11ab17e..6b13bfa7ac2c 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -717,8 +717,6 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx, evlist->mmap[idx].base = NULL; return -1; } - - perf_evlist__add_pollfd(evlist, fd); return 0; } @@ -745,6 +743,8 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, return -1; } + perf_evlist__add_pollfd(evlist, fd); + if ((evsel->attr.read_format & PERF_FORMAT_ID) && perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0) return -1; From ad6765dd3b2f043e819bdec565db8f5a2f781e06 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 18 Aug 2014 16:44:06 -0300 Subject: [PATCH 05/14] perf evlist: Allow growing pollfd on add method This way we will be able to add more file descriptors to be polled, like stdin or some timer fd. At this point we might as well yank the pollfd class from evlist so that it can be used in other places. Cc: Adrian Hunter Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-o2mzsjl7taumsoc35ryol00i@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 38 +++++++++++++++++++++++++++++++++----- tools/perf/util/evlist.h | 5 +++-- 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6b13bfa7ac2c..bcf157c8a9da 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -402,7 +402,21 @@ int perf_evlist__enable_event_idx(struct perf_evlist *evlist, return perf_evlist__enable_event_thread(evlist, evsel, idx); } -static int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) +static int perf_evlist__grow_pollfd(struct perf_evlist *evlist, int hint) +{ + int nr_fds_alloc = evlist->nr_fds_alloc + hint; + size_t size = sizeof(struct pollfd) * nr_fds_alloc; + struct pollfd *pollfd = realloc(evlist->pollfd, size); + + if (pollfd == NULL) + return -ENOMEM; + + evlist->nr_fds_alloc = nr_fds_alloc; + evlist->pollfd = pollfd; + return 0; +} + +int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) { int nr_cpus = cpu_map__nr(evlist->cpus); int nr_threads = thread_map__nr(evlist->threads); @@ -416,16 +430,28 @@ static int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) nfds += nr_cpus * nr_threads; } - evlist->pollfd = malloc(sizeof(struct pollfd) * nfds); - return evlist->pollfd != NULL ? 0 : -ENOMEM; + if (evlist->nr_fds_alloc - evlist->nr_fds < nfds && + perf_evlist__grow_pollfd(evlist, nfds) < 0) + return -ENOMEM; + + return 0; } -void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) +int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) { + /* + * XXX: 64 is arbitrary, just not to call realloc at each fd. + * Find a better autogrowing heuristic + */ + if (evlist->nr_fds == evlist->nr_fds_alloc && + perf_evlist__grow_pollfd(evlist, 64) < 0) + return -ENOMEM; + fcntl(fd, F_SETFL, O_NONBLOCK); evlist->pollfd[evlist->nr_fds].fd = fd; evlist->pollfd[evlist->nr_fds].events = POLLIN | POLLERR | POLLHUP; evlist->nr_fds++; + return 0; } int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) @@ -717,6 +743,7 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx, evlist->mmap[idx].base = NULL; return -1; } + return 0; } @@ -743,7 +770,8 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, return -1; } - perf_evlist__add_pollfd(evlist, fd); + if (perf_evlist__add_pollfd(evlist, fd) < 0) + return -1; if ((evsel->attr.read_format & PERF_FORMAT_ID) && perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 1082420951f9..bbc2fd01b5c5 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -30,6 +30,7 @@ struct perf_evlist { int nr_entries; int nr_groups; int nr_fds; + int nr_fds_alloc; int nr_mmaps; size_t mmap_len; int id_pos; @@ -82,8 +83,8 @@ perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist, void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, int cpu, int thread, u64 id); -void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd); - +int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd); +int perf_evlist__alloc_pollfd(struct perf_evlist *evlist); int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask); struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id); From 9ae28035b8677b82e1d71cea4f793cb5504ec104 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 18 Aug 2014 16:49:00 -0300 Subject: [PATCH 06/14] perf tests: Add pollfd growing test [acme@ssdandy linux]$ perf test "Add fd" 34: Add fd to pollfd array, making it autogrow : Ok [acme@ssdandy linux]$ perf test -v "Add fd" 34: Add fd to pollfd array, making it autogrow : --- start --- test child forked, pid 19817 before growing array: 2 [ 1, 2 ] after 3rd add_pollfd: 3 [ 1, 2, 35 ] after 4th add_pollfd: 4 [ 1, 2, 35, 88 ] test child finished with 0 ---- end ---- Add fd to pollfd array, making it autogrow: Ok [acme@ssdandy linux]$ Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-smflpyta146bzog7z0effjss@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 4 ++ tools/perf/tests/evlist.c | 111 ++++++++++++++++++++++++++++++++ tools/perf/tests/tests.h | 1 + 3 files changed, 116 insertions(+) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 41e556edbe02..174c3ffc5713 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -161,6 +161,10 @@ static struct test { .desc = "Filter fds with revents mask in a pollfd array", .func = test__perf_evlist__filter_pollfd, }, + { + .desc = "Add fd to pollfd array, making it autogrow", + .func = test__perf_evlist__add_pollfd, + }, { .func = NULL, }, diff --git a/tools/perf/tests/evlist.c b/tools/perf/tests/evlist.c index 77579158f4d9..99d7dfd4e20a 100644 --- a/tools/perf/tests/evlist.c +++ b/tools/perf/tests/evlist.c @@ -1,5 +1,7 @@ #include "util/evlist.h" #include "util/debug.h" +#include "util/thread_map.h" +#include "util/cpumap.h" #include "tests/tests.h" static void perf_evlist__init_pollfd(struct perf_evlist *evlist, @@ -101,3 +103,112 @@ int test__perf_evlist__filter_pollfd(void) return 0; } + +int test__perf_evlist__add_pollfd(void) +{ + struct perf_evsel evsel = { + .system_wide = false, + }; + struct thread_map threads = { + .nr = 2, + }; + struct perf_evlist evlist_alloc = { + .pollfd = NULL, + .threads = &threads, + }, *evlist = &evlist_alloc; + + INIT_LIST_HEAD(&evlist->entries); + list_add(&evsel.node, &evlist->entries); + + if (perf_evlist__alloc_pollfd(evlist) < 0) { + pr_debug("\nperf_evlist__alloc_pollfd(evlist) failed!"); + return TEST_FAIL; + } + + if (evlist->nr_fds_alloc != threads.nr) { + pr_debug("\n_evlist__alloc_pollfd: nr_fds_alloc=%d != (threads->nr(%d) * cpu_map->nr(%d))=%d", + evlist->nr_fds_alloc, thread_map__nr(evlist->threads), cpu_map__nr(evlist->cpus), + thread_map__nr(evlist->threads) * cpu_map__nr(evlist->cpus)); + return TEST_FAIL; + } + + if (perf_evlist__add_pollfd(evlist, 1) < 0) { + pr_debug("\nperf_evlist__add_pollfd(evlist, 1) failed!"); + return TEST_FAIL; + } + + if (evlist->nr_fds != 1) { + pr_debug("\nperf_evlist__add_pollfd(evlist, 1)=%d != 1", evlist->nr_fds); + return TEST_FAIL; + } + + if (perf_evlist__add_pollfd(evlist, 2) < 0) { + pr_debug("\nperf_evlist__add_pollfd(evlist, 2) failed!"); + return TEST_FAIL; + } + + if (evlist->nr_fds != 2) { + pr_debug("\nperf_evlist__add_pollfd(evlist, 2)=%d != 2", evlist->nr_fds); + return TEST_FAIL; + } + + perf_evlist__fprintf_pollfd(evlist, "before growing array", stderr); + + if (perf_evlist__add_pollfd(evlist, 35) < 0) { + pr_debug("\nperf_evlist__add_pollfd(evlist, 35) failed!"); + return TEST_FAIL; + } + + if (evlist->nr_fds != 3) { + pr_debug("\nperf_evlist__add_pollfd(evlist, 35)=%d != 3", evlist->nr_fds); + return TEST_FAIL; + } + + if (evlist->pollfd == NULL) { + pr_debug("\nperf_evlist__add_pollfd(evlist, 35) should have allocated evlist->pollfd!"); + return TEST_FAIL; + } + + perf_evlist__fprintf_pollfd(evlist, "after 3rd add_pollfd", stderr); + + if (evlist->pollfd[2].fd != 35) { + pr_debug("\nevlist->pollfd[2](%d) != 35!", evlist->pollfd[2].fd); + return TEST_FAIL; + } + + if (perf_evlist__add_pollfd(evlist, 88) < 0) { + pr_debug("\nperf_evlist__add_pollfd(evlist, 88) failed!"); + return TEST_FAIL; + } + + if (evlist->nr_fds != 4) { + pr_debug("\nperf_evlist__add_pollfd(evlist, 88)=%d != 2", evlist->nr_fds); + return TEST_FAIL; + } + + perf_evlist__fprintf_pollfd(evlist, "after 4th add_pollfd", stderr); + + if (evlist->pollfd[0].fd != 1) { + pr_debug("\nevlist->pollfd[0](%d) != 1!", evlist->pollfd[0].fd); + return TEST_FAIL; + } + + if (evlist->pollfd[1].fd != 2) { + pr_debug("\nevlist->pollfd[1](%d) != 2!", evlist->pollfd[1].fd); + return TEST_FAIL; + } + + if (evlist->pollfd[2].fd != 35) { + pr_debug("\nevlist->pollfd[2](%d) != 35!", evlist->pollfd[2].fd); + return TEST_FAIL; + } + + if (evlist->pollfd[3].fd != 88) { + pr_debug("\nevlist->pollfd[3](%d) != 88!", evlist->pollfd[3].fd); + return TEST_FAIL; + } + + pr_debug("\n"); + + return 0; +} diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 72c4c039bd80..699d4bb61dc7 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -50,6 +50,7 @@ int test__hists_output(void); int test__hists_cumulate(void); int test__switch_tracking(void); int test__perf_evlist__filter_pollfd(void); +int test__perf_evlist__add_pollfd(void); #if defined(__x86_64__) || defined(__i386__) || defined(__arm__) #ifdef HAVE_DWARF_UNWIND_SUPPORT From 0a04c9e0b2181aff8348b5e80d9d96ec8df1ffb3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 18 Aug 2014 17:12:30 -0300 Subject: [PATCH 07/14] perf kvm stat live: Use perf_evlist__add_pollfd() instead of local equivalent Since we can add file descriptors to the evlist pollfd and it will autogrow, no need to copy all events to a local pollfd array, just add the timer and stdin file descriptors. Reviewed-by: David Ahern Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-2hvp9iromiheh6rl4oaa08x5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index f5d3ae483110..a440219b0be0 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -919,15 +919,8 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm) signal(SIGINT, sig_handler); signal(SIGTERM, sig_handler); - /* copy pollfds -- need to add timerfd and stdin */ + /* use pollfds -- need to add timerfd and stdin */ nr_fds = kvm->evlist->nr_fds; - pollfds = zalloc(sizeof(struct pollfd) * (nr_fds + 2)); - if (!pollfds) { - err = -ENOMEM; - goto out; - } - memcpy(pollfds, kvm->evlist->pollfd, - sizeof(struct pollfd) * kvm->evlist->nr_fds); /* add timer fd */ if (perf_kvm__timerfd_create(kvm) < 0) { @@ -935,17 +928,21 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm) goto out; } - pollfds[nr_fds].fd = kvm->timerfd; - pollfds[nr_fds].events = POLLIN; + if (perf_evlist__add_pollfd(kvm->evlist, kvm->timerfd)) + goto out; + nr_fds++; - pollfds[nr_fds].fd = fileno(stdin); - pollfds[nr_fds].events = POLLIN; + if (perf_evlist__add_pollfd(kvm->evlist, fileno(stdin))) + goto out; + nr_stdin = nr_fds; nr_fds++; if (fd_set_nonblock(fileno(stdin)) != 0) goto out; + pollfds = kvm->evlist->pollfd; + /* everything is good - enable the events and process */ perf_evlist__enable(kvm->evlist); @@ -979,7 +976,6 @@ out: close(kvm->timerfd); tcsetattr(0, TCSAFLUSH, &save); - free(pollfds); return err; } From f66a889dbc96dd342c87232d74f0956076707746 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 18 Aug 2014 17:25:59 -0300 Subject: [PATCH 08/14] perf evlist: Introduce poll method for common code idiom Since we have access two evlist members in all these poll calls, provide a helper. This will also help to make the patch introducing the pollfd class more clear, as the evlist specific uses will be hiden away perf_evlist__poll(). Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-jr9d4aop4lvy9453qahbcgp0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- tools/perf/builtin-top.c | 4 ++-- tools/perf/builtin-trace.c | 2 +- tools/perf/tests/open-syscall-tp-fields.c | 2 +- tools/perf/tests/perf-record.c | 2 +- tools/perf/tests/task-exit.c | 2 +- tools/perf/util/evlist.c | 5 +++++ tools/perf/util/evlist.h | 2 ++ tools/perf/util/python.c | 2 +- 9 files changed, 15 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index a1b040394170..a8c2e9dfb125 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -459,7 +459,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) if (hits == rec->samples) { if (done) break; - err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1); + err = perf_evlist__poll(rec->evlist, -1); /* * Propagate error, only if there's any. Ignore positive * number of returned events and interrupt error. diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index e13864be2acb..832fb527ed90 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -964,7 +964,7 @@ static int __cmd_top(struct perf_top *top) perf_evlist__enable(top->evlist); /* Wait for a minimal set of events before starting the snapshot */ - poll(top->evlist->pollfd, top->evlist->nr_fds, 100); + perf_evlist__poll(top->evlist, 100); perf_top__mmap_read(top); @@ -991,7 +991,7 @@ static int __cmd_top(struct perf_top *top) perf_top__mmap_read(top); if (hits == top->samples) - ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100); + ret = perf_evlist__poll(top->evlist, 100); } ret = 0; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index a9e96ff49c7f..b8fedf3f9921 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2171,7 +2171,7 @@ next_event: if (trace->nr_events == before) { int timeout = done ? 100 : -1; - if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0) + if (perf_evlist__poll(evlist, timeout) > 0) goto again; } else { goto again; diff --git a/tools/perf/tests/open-syscall-tp-fields.c b/tools/perf/tests/open-syscall-tp-fields.c index 922bdb627950..127dcae0b760 100644 --- a/tools/perf/tests/open-syscall-tp-fields.c +++ b/tools/perf/tests/open-syscall-tp-fields.c @@ -105,7 +105,7 @@ int test__syscall_open_tp_fields(void) } if (nr_events == before) - poll(evlist->pollfd, evlist->nr_fds, 10); + perf_evlist__poll(evlist, 10); if (++nr_polls > 5) { pr_debug("%s: no events!\n", __func__); diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 2ce753c1db63..7a228a2a070b 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -268,7 +268,7 @@ int test__PERF_RECORD(void) * perf_event_attr.wakeup_events, just PERF_EVENT_SAMPLE does. */ if (total_events == before && false) - poll(evlist->pollfd, evlist->nr_fds, -1); + perf_evlist__poll(evlist, -1); sleep(1); if (++wakeups > 5) { diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 87522f01c7ad..3a8fedef83bc 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -105,7 +105,7 @@ retry: } if (!exited || !nr_exit) { - poll(evlist->pollfd, evlist->nr_fds, -1); + perf_evlist__poll(evlist, -1); goto retry; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index bcf157c8a9da..5ff3c667542f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -475,6 +475,11 @@ int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mas return nr_fds; } +int perf_evlist__poll(struct perf_evlist *evlist, int timeout) +{ + return poll(evlist->pollfd, evlist->nr_fds, timeout); +} + static void perf_evlist__id_hash(struct perf_evlist *evlist, struct perf_evsel *evsel, int cpu, int thread, u64 id) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index bbc2fd01b5c5..d7e99b67c94f 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -87,6 +87,8 @@ int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd); int perf_evlist__alloc_pollfd(struct perf_evlist *evlist); int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask); +int perf_evlist__poll(struct perf_evlist *evlist, int timeout); + struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id); struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 12aa9b0d0ba1..4472f8be8e35 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -736,7 +736,7 @@ static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist, if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &timeout)) return NULL; - n = poll(evlist->pollfd, evlist->nr_fds, timeout); + n = perf_evlist__poll(evlist, timeout); if (n < 0) { PyErr_SetFromErrno(PyExc_OSError); return NULL; From 1b85337d0685d1dc5a6f9061434ba4316d69f3b8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 3 Sep 2014 18:02:59 -0300 Subject: [PATCH 09/14] tools lib api: Adopt fdarray class from perf's evlist The extensible file description array that grew in the perf_evlist class can be useful for other tools, as it is not something that only evlists need, so move it to tools/lib/api/fd to ease sharing it. v2: Don't use {} like in: libapi_dirs: $(QUIET_MKDIR)mkdir -p $(OUTPUT){fs,fd}/ in Makefiles, as it will not work in some systems, as in ubuntu13.10. v3: Add fd/*.[ch] to LIBAPIKFS_SOURCES (Fix from Jiri Olsa) v4: Leave the fcntl(fd, O_NONBLOCK) in the evlist layer, remains to be checked if it is really needed there, but has no place in the fdarray class (Fix from Jiri Olsa) v5: Remove evlist details from fdarray grow/filter tests. Improve it a bit doing more tests about expected internal state. Cc: Adrian Hunter Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-kleuni3hckbc3s0lu6yb9x40@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/Makefile | 7 +- tools/lib/api/fd/array.c | 107 ++++++++++++++++ tools/lib/api/fd/array.h | 32 +++++ tools/perf/Makefile.perf | 4 +- tools/perf/builtin-kvm.c | 4 +- tools/perf/tests/builtin-test.c | 8 +- tools/perf/tests/evlist.c | 214 -------------------------------- tools/perf/tests/fdarray.c | 174 ++++++++++++++++++++++++++ tools/perf/tests/tests.h | 4 +- tools/perf/util/evlist.c | 57 ++------- tools/perf/util/evlist.h | 5 +- tools/perf/util/python.c | 4 +- 12 files changed, 342 insertions(+), 278 deletions(-) create mode 100644 tools/lib/api/fd/array.c create mode 100644 tools/lib/api/fd/array.h delete mode 100644 tools/perf/tests/evlist.c create mode 100644 tools/perf/tests/fdarray.c diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index ce00f7ee6455..36c08b1f4afb 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -10,9 +10,14 @@ LIB_OBJS= LIB_H += fs/debugfs.h LIB_H += fs/fs.h +# See comment below about piggybacking... +LIB_H += fd/array.h LIB_OBJS += $(OUTPUT)fs/debugfs.o LIB_OBJS += $(OUTPUT)fs/fs.o +# XXX piggybacking here, need to introduce libapikfd, or rename this +# to plain libapik.a and make it have it all api goodies +LIB_OBJS += $(OUTPUT)fd/array.o LIBFILE = libapikfs.a @@ -29,7 +34,7 @@ $(LIBFILE): $(LIB_OBJS) $(LIB_OBJS): $(LIB_H) libapi_dirs: - $(QUIET_MKDIR)mkdir -p $(OUTPUT)fs/ + $(QUIET_MKDIR)mkdir -p $(OUTPUT)fd $(OUTPUT)fs $(OUTPUT)%.o: %.c libapi_dirs $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $< diff --git a/tools/lib/api/fd/array.c b/tools/lib/api/fd/array.c new file mode 100644 index 000000000000..4889c7d42961 --- /dev/null +++ b/tools/lib/api/fd/array.c @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2014, Red Hat Inc, Arnaldo Carvalho de Melo + * + * Released under the GPL v2. (and only v2, not any later version) + */ +#include "array.h" +#include +#include +#include +#include +#include + +void fdarray__init(struct fdarray *fda, int nr_autogrow) +{ + fda->entries = NULL; + fda->nr = fda->nr_alloc = 0; + fda->nr_autogrow = nr_autogrow; +} + +int fdarray__grow(struct fdarray *fda, int nr) +{ + int nr_alloc = fda->nr_alloc + nr; + size_t size = sizeof(struct pollfd) * nr_alloc; + struct pollfd *entries = realloc(fda->entries, size); + + if (entries == NULL) + return -ENOMEM; + + fda->nr_alloc = nr_alloc; + fda->entries = entries; + return 0; +} + +struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow) +{ + struct fdarray *fda = calloc(1, sizeof(*fda)); + + if (fda != NULL) { + if (fdarray__grow(fda, nr_alloc)) { + free(fda); + fda = NULL; + } else { + fda->nr_autogrow = nr_autogrow; + } + } + + return fda; +} + +void fdarray__exit(struct fdarray *fda) +{ + free(fda->entries); + fdarray__init(fda, 0); +} + +void fdarray__delete(struct fdarray *fda) +{ + fdarray__exit(fda); + free(fda); +} + +int fdarray__add(struct fdarray *fda, int fd, short revents) +{ + if (fda->nr == fda->nr_alloc && + fdarray__grow(fda, fda->nr_autogrow) < 0) + return -ENOMEM; + + fda->entries[fda->nr].fd = fd; + fda->entries[fda->nr].events = revents; + fda->nr++; + return 0; +} + +int fdarray__filter(struct fdarray *fda, short revents) +{ + int fd, nr = 0; + + if (fda->nr == 0) + return 0; + + for (fd = 0; fd < fda->nr; ++fd) { + if (fda->entries[fd].revents & revents) + continue; + + if (fd != nr) + fda->entries[nr] = fda->entries[fd]; + + ++nr; + } + + return fda->nr = nr; +} + +int fdarray__poll(struct fdarray *fda, int timeout) +{ + return poll(fda->entries, fda->nr, timeout); +} + +int fdarray__fprintf(struct fdarray *fda, FILE *fp) +{ + int fd, printed = fprintf(fp, "%d [ ", fda->nr); + + for (fd = 0; fd < fda->nr; ++fd) + printed += fprintf(fp, "%s%d", fd ? ", " : "", fda->entries[fd].fd); + + return printed + fprintf(fp, " ]"); +} diff --git a/tools/lib/api/fd/array.h b/tools/lib/api/fd/array.h new file mode 100644 index 000000000000..de38361ba69e --- /dev/null +++ b/tools/lib/api/fd/array.h @@ -0,0 +1,32 @@ +#ifndef __API_FD_ARRAY__ +#define __API_FD_ARRAY__ + +#include + +struct pollfd; + +struct fdarray { + int nr; + int nr_alloc; + int nr_autogrow; + struct pollfd *entries; +}; + +void fdarray__init(struct fdarray *fda, int nr_autogrow); +void fdarray__exit(struct fdarray *fda); + +struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow); +void fdarray__delete(struct fdarray *fda); + +int fdarray__add(struct fdarray *fda, int fd, short revents); +int fdarray__poll(struct fdarray *fda, int timeout); +int fdarray__filter(struct fdarray *fda, short revents); +int fdarray__grow(struct fdarray *fda, int extra); +int fdarray__fprintf(struct fdarray *fda, FILE *fp); + +static inline int fdarray__available_entries(struct fdarray *fda) +{ + return fda->nr_alloc - fda->nr; +} + +#endif /* __API_FD_ARRAY__ */ diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index f287c2522cf5..262916f4a377 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -400,9 +400,9 @@ LIB_OBJS += $(OUTPUT)tests/open-syscall-tp-fields.o LIB_OBJS += $(OUTPUT)tests/mmap-basic.o LIB_OBJS += $(OUTPUT)tests/perf-record.o LIB_OBJS += $(OUTPUT)tests/rdpmc.o -LIB_OBJS += $(OUTPUT)tests/evlist.o LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o +LIB_OBJS += $(OUTPUT)tests/fdarray.o LIB_OBJS += $(OUTPUT)tests/pmu.o LIB_OBJS += $(OUTPUT)tests/hists_common.o LIB_OBJS += $(OUTPUT)tests/hists_link.o @@ -770,7 +770,7 @@ $(LIBTRACEEVENT)-clean: install-traceevent-plugins: $(LIBTRACEEVENT) $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) install_plugins -LIBAPIKFS_SOURCES = $(wildcard $(LIB_PATH)fs/*.[ch]) +LIBAPIKFS_SOURCES = $(wildcard $(LIB_PATH)fs/*.[ch] $(LIB_PATH)fd/*.[ch]) # if subdir is set, we've been called from above so target has been built # already diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index a440219b0be0..1e639d6265cc 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -920,7 +920,7 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm) signal(SIGTERM, sig_handler); /* use pollfds -- need to add timerfd and stdin */ - nr_fds = kvm->evlist->nr_fds; + nr_fds = kvm->evlist->pollfd.nr; /* add timer fd */ if (perf_kvm__timerfd_create(kvm) < 0) { @@ -941,7 +941,7 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm) if (fd_set_nonblock(fileno(stdin)) != 0) goto out; - pollfds = kvm->evlist->pollfd; + pollfds = kvm->evlist->pollfd.entries; /* everything is good - enable the events and process */ perf_evlist__enable(kvm->evlist); diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 174c3ffc5713..ac655b0700e7 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -158,12 +158,12 @@ static struct test { .func = test__switch_tracking, }, { - .desc = "Filter fds with revents mask in a pollfd array", - .func = test__perf_evlist__filter_pollfd, + .desc = "Filter fds with revents mask in a fdarray", + .func = test__fdarray__filter, }, { - .desc = "Add fd to pollfd array, making it autogrow", - .func = test__perf_evlist__add_pollfd, + .desc = "Add fd to a fdarray, making it autogrow", + .func = test__fdarray__add, }, { .func = NULL, diff --git a/tools/perf/tests/evlist.c b/tools/perf/tests/evlist.c deleted file mode 100644 index 99d7dfd4e20a..000000000000 --- a/tools/perf/tests/evlist.c +++ /dev/null @@ -1,214 +0,0 @@ -#include "util/evlist.h" -#include "util/debug.h" -#include "util/thread_map.h" -#include "util/cpumap.h" -#include "tests/tests.h" - -static void perf_evlist__init_pollfd(struct perf_evlist *evlist, - int nr_fds_alloc, short revents) -{ - int fd; - - evlist->nr_fds = nr_fds_alloc; - - for (fd = 0; fd < nr_fds_alloc; ++fd) { - evlist->pollfd[fd].fd = nr_fds_alloc - fd; - evlist->pollfd[fd].revents = revents; - } -} - -static int perf_evlist__fprintf_pollfd(struct perf_evlist *evlist, - const char *prefix, FILE *fp) -{ - int printed = 0, fd; - - if (!verbose) - return 0; - - printed += fprintf(fp, "\n%s: %3d [ ", prefix, evlist->nr_fds); - for (fd = 0; fd < evlist->nr_fds; ++fd) - printed += fprintf(fp, "%s%d", fd ? ", " : "", evlist->pollfd[fd].fd); - printed += fprintf(fp, " ]"); - return printed; -} - -int test__perf_evlist__filter_pollfd(void) -{ - const int nr_fds_alloc = 5; - int nr_fds, expected_fd[2], fd; - struct pollfd pollfd[nr_fds_alloc]; - struct perf_evlist evlist_alloc = { - .pollfd = pollfd, - }, *evlist = &evlist_alloc; - - perf_evlist__init_pollfd(evlist, nr_fds_alloc, POLLIN); - nr_fds = perf_evlist__filter_pollfd(evlist, POLLHUP); - if (nr_fds != nr_fds_alloc) { - pr_debug("\nperf_evlist__filter_pollfd()=%d != %d shouldn't have filtered anything", - nr_fds, nr_fds_alloc); - return TEST_FAIL; - } - - perf_evlist__init_pollfd(evlist, nr_fds_alloc, POLLHUP); - nr_fds = perf_evlist__filter_pollfd(evlist, POLLHUP); - if (nr_fds != 0) { - pr_debug("\nperf_evlist__filter_pollfd()=%d != %d, should have filtered all fds", - nr_fds, nr_fds_alloc); - return TEST_FAIL; - } - - perf_evlist__init_pollfd(evlist, nr_fds_alloc, POLLHUP); - pollfd[2].revents = POLLIN; - expected_fd[0] = pollfd[2].fd; - - pr_debug("\nfiltering all but pollfd[2]:"); - perf_evlist__fprintf_pollfd(evlist, "before", stderr); - nr_fds = perf_evlist__filter_pollfd(evlist, POLLHUP); - perf_evlist__fprintf_pollfd(evlist, " after", stderr); - if (nr_fds != 1) { - pr_debug("\nperf_evlist__filter_pollfd()=%d != 1, should have left just one event", - nr_fds); - return TEST_FAIL; - } - - if (pollfd[0].fd != expected_fd[0]) { - pr_debug("\npollfd[0].fd=%d != %d\n", pollfd[0].fd, expected_fd[0]); - return TEST_FAIL; - } - - perf_evlist__init_pollfd(evlist, nr_fds_alloc, POLLHUP); - pollfd[0].revents = POLLIN; - expected_fd[0] = pollfd[0].fd; - pollfd[3].revents = POLLIN; - expected_fd[1] = pollfd[3].fd; - - pr_debug("\nfiltering all but (pollfd[0], pollfd[3]):"); - perf_evlist__fprintf_pollfd(evlist, "before", stderr); - nr_fds = perf_evlist__filter_pollfd(evlist, POLLHUP); - perf_evlist__fprintf_pollfd(evlist, " after", stderr); - if (nr_fds != 2) { - pr_debug("\nperf_evlist__filter_pollfd()=%d != 2, should have left just two events", - nr_fds); - return TEST_FAIL; - } - - for (fd = 0; fd < 2; ++fd) { - if (pollfd[fd].fd != expected_fd[fd]) { - pr_debug("\npollfd[%d].fd=%d != %d\n", fd, pollfd[fd].fd, expected_fd[fd]); - return TEST_FAIL; - } - } - - pr_debug("\n"); - - return 0; -} - -int test__perf_evlist__add_pollfd(void) -{ - struct perf_evsel evsel = { - .system_wide = false, - }; - struct thread_map threads = { - .nr = 2, - }; - struct perf_evlist evlist_alloc = { - .pollfd = NULL, - .threads = &threads, - }, *evlist = &evlist_alloc; - - INIT_LIST_HEAD(&evlist->entries); - list_add(&evsel.node, &evlist->entries); - - if (perf_evlist__alloc_pollfd(evlist) < 0) { - pr_debug("\nperf_evlist__alloc_pollfd(evlist) failed!"); - return TEST_FAIL; - } - - if (evlist->nr_fds_alloc != threads.nr) { - pr_debug("\n_evlist__alloc_pollfd: nr_fds_alloc=%d != (threads->nr(%d) * cpu_map->nr(%d))=%d", - evlist->nr_fds_alloc, thread_map__nr(evlist->threads), cpu_map__nr(evlist->cpus), - thread_map__nr(evlist->threads) * cpu_map__nr(evlist->cpus)); - return TEST_FAIL; - } - - if (perf_evlist__add_pollfd(evlist, 1) < 0) { - pr_debug("\nperf_evlist__add_pollfd(evlist, 1) failed!"); - return TEST_FAIL; - } - - if (evlist->nr_fds != 1) { - pr_debug("\nperf_evlist__add_pollfd(evlist, 1)=%d != 1", evlist->nr_fds); - return TEST_FAIL; - } - - if (perf_evlist__add_pollfd(evlist, 2) < 0) { - pr_debug("\nperf_evlist__add_pollfd(evlist, 2) failed!"); - return TEST_FAIL; - } - - if (evlist->nr_fds != 2) { - pr_debug("\nperf_evlist__add_pollfd(evlist, 2)=%d != 2", evlist->nr_fds); - return TEST_FAIL; - } - - perf_evlist__fprintf_pollfd(evlist, "before growing array", stderr); - - if (perf_evlist__add_pollfd(evlist, 35) < 0) { - pr_debug("\nperf_evlist__add_pollfd(evlist, 35) failed!"); - return TEST_FAIL; - } - - if (evlist->nr_fds != 3) { - pr_debug("\nperf_evlist__add_pollfd(evlist, 35)=%d != 3", evlist->nr_fds); - return TEST_FAIL; - } - - if (evlist->pollfd == NULL) { - pr_debug("\nperf_evlist__add_pollfd(evlist, 35) should have allocated evlist->pollfd!"); - return TEST_FAIL; - } - - perf_evlist__fprintf_pollfd(evlist, "after 3rd add_pollfd", stderr); - - if (evlist->pollfd[2].fd != 35) { - pr_debug("\nevlist->pollfd[2](%d) != 35!", evlist->pollfd[2].fd); - return TEST_FAIL; - } - - if (perf_evlist__add_pollfd(evlist, 88) < 0) { - pr_debug("\nperf_evlist__add_pollfd(evlist, 88) failed!"); - return TEST_FAIL; - } - - if (evlist->nr_fds != 4) { - pr_debug("\nperf_evlist__add_pollfd(evlist, 88)=%d != 2", evlist->nr_fds); - return TEST_FAIL; - } - - perf_evlist__fprintf_pollfd(evlist, "after 4th add_pollfd", stderr); - - if (evlist->pollfd[0].fd != 1) { - pr_debug("\nevlist->pollfd[0](%d) != 1!", evlist->pollfd[0].fd); - return TEST_FAIL; - } - - if (evlist->pollfd[1].fd != 2) { - pr_debug("\nevlist->pollfd[1](%d) != 2!", evlist->pollfd[1].fd); - return TEST_FAIL; - } - - if (evlist->pollfd[2].fd != 35) { - pr_debug("\nevlist->pollfd[2](%d) != 35!", evlist->pollfd[2].fd); - return TEST_FAIL; - } - - if (evlist->pollfd[3].fd != 88) { - pr_debug("\nevlist->pollfd[3](%d) != 88!", evlist->pollfd[3].fd); - return TEST_FAIL; - } - - pr_debug("\n"); - - return 0; -} diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c new file mode 100644 index 000000000000..a0fea62ec368 --- /dev/null +++ b/tools/perf/tests/fdarray.c @@ -0,0 +1,174 @@ +#include +#include "util/debug.h" +#include "tests/tests.h" + +static void fdarray__init_revents(struct fdarray *fda, short revents) +{ + int fd; + + fda->nr = fda->nr_alloc; + + for (fd = 0; fd < fda->nr; ++fd) { + fda->entries[fd].fd = fda->nr - fd; + fda->entries[fd].revents = revents; + } +} + +static int fdarray__fprintf_prefix(struct fdarray *fda, const char *prefix, FILE *fp) +{ + int printed = 0; + + if (!verbose) + return 0; + + printed += fprintf(fp, "\n%s: ", prefix); + return printed + fdarray__fprintf(fda, fp); +} + +int test__fdarray__filter(void) +{ + int nr_fds, expected_fd[2], fd, err = TEST_FAIL; + struct fdarray *fda = fdarray__new(5, 5); + + if (fda == NULL) { + pr_debug("\nfdarray__new() failed!"); + goto out; + } + + fdarray__init_revents(fda, POLLIN); + nr_fds = fdarray__filter(fda, POLLHUP); + if (nr_fds != fda->nr_alloc) { + pr_debug("\nfdarray__filter()=%d != %d shouldn't have filtered anything", + nr_fds, fda->nr_alloc); + goto out_delete; + } + + fdarray__init_revents(fda, POLLHUP); + nr_fds = fdarray__filter(fda, POLLHUP); + if (nr_fds != 0) { + pr_debug("\nfdarray__filter()=%d != %d, should have filtered all fds", + nr_fds, fda->nr_alloc); + goto out_delete; + } + + fdarray__init_revents(fda, POLLHUP); + fda->entries[2].revents = POLLIN; + expected_fd[0] = fda->entries[2].fd; + + pr_debug("\nfiltering all but fda->entries[2]:"); + fdarray__fprintf_prefix(fda, "before", stderr); + nr_fds = fdarray__filter(fda, POLLHUP); + fdarray__fprintf_prefix(fda, " after", stderr); + if (nr_fds != 1) { + pr_debug("\nfdarray__filter()=%d != 1, should have left just one event", nr_fds); + goto out_delete; + } + + if (fda->entries[0].fd != expected_fd[0]) { + pr_debug("\nfda->entries[0].fd=%d != %d\n", + fda->entries[0].fd, expected_fd[0]); + goto out_delete; + } + + fdarray__init_revents(fda, POLLHUP); + fda->entries[0].revents = POLLIN; + expected_fd[0] = fda->entries[0].fd; + fda->entries[3].revents = POLLIN; + expected_fd[1] = fda->entries[3].fd; + + pr_debug("\nfiltering all but (fda->entries[0], fda->entries[3]):"); + fdarray__fprintf_prefix(fda, "before", stderr); + nr_fds = fdarray__filter(fda, POLLHUP); + fdarray__fprintf_prefix(fda, " after", stderr); + if (nr_fds != 2) { + pr_debug("\nfdarray__filter()=%d != 2, should have left just two events", + nr_fds); + goto out_delete; + } + + for (fd = 0; fd < 2; ++fd) { + if (fda->entries[fd].fd != expected_fd[fd]) { + pr_debug("\nfda->entries[%d].fd=%d != %d\n", fd, + fda->entries[fd].fd, expected_fd[fd]); + goto out_delete; + } + } + + pr_debug("\n"); + + err = 0; +out_delete: + fdarray__delete(fda); +out: + return err; +} + +int test__fdarray__add(void) +{ + int err = TEST_FAIL; + struct fdarray *fda = fdarray__new(2, 2); + + if (fda == NULL) { + pr_debug("\nfdarray__new() failed!"); + goto out; + } + +#define FDA_CHECK(_idx, _fd, _revents) \ + if (fda->entries[_idx].fd != _fd) { \ + pr_debug("\n%d: fda->entries[%d](%d) != %d!", \ + __LINE__, _idx, fda->entries[1].fd, _fd); \ + goto out_delete; \ + } \ + if (fda->entries[_idx].events != (_revents)) { \ + pr_debug("\n%d: fda->entries[%d].revents(%d) != %d!", \ + __LINE__, _idx, fda->entries[_idx].fd, _revents); \ + goto out_delete; \ + } + +#define FDA_ADD(_idx, _fd, _revents, _nr) \ + if (fdarray__add(fda, _fd, _revents) < 0) { \ + pr_debug("\n%d: fdarray__add(fda, %d, %d) failed!", \ + __LINE__,_fd, _revents); \ + goto out_delete; \ + } \ + if (fda->nr != _nr) { \ + pr_debug("\n%d: fdarray__add(fda, %d, %d)=%d != %d", \ + __LINE__,_fd, _revents, fda->nr, _nr); \ + goto out_delete; \ + } \ + FDA_CHECK(_idx, _fd, _revents) + + FDA_ADD(0, 1, POLLIN, 1); + FDA_ADD(1, 2, POLLERR, 2); + + fdarray__fprintf_prefix(fda, "before growing array", stderr); + + FDA_ADD(2, 35, POLLHUP, 3); + + if (fda->entries == NULL) { + pr_debug("\nfdarray__add(fda, 35, POLLHUP) should have allocated fda->pollfd!"); + goto out_delete; + } + + fdarray__fprintf_prefix(fda, "after 3rd add", stderr); + + FDA_ADD(3, 88, POLLIN | POLLOUT, 4); + + fdarray__fprintf_prefix(fda, "after 4th add", stderr); + + FDA_CHECK(0, 1, POLLIN); + FDA_CHECK(1, 2, POLLERR); + FDA_CHECK(2, 35, POLLHUP); + FDA_CHECK(3, 88, POLLIN | POLLOUT); + +#undef FDA_ADD +#undef FDA_CHECK + + pr_debug("\n"); + + err = 0; +out_delete: + fdarray__delete(fda); +out: + return err; +} diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 699d4bb61dc7..00e776a87a9c 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -49,8 +49,8 @@ int test__thread_mg_share(void); int test__hists_output(void); int test__hists_cumulate(void); int test__switch_tracking(void); -int test__perf_evlist__filter_pollfd(void); -int test__perf_evlist__add_pollfd(void); +int test__fdarray__filter(void); +int test__fdarray__add(void); #if defined(__x86_64__) || defined(__i386__) || defined(__arm__) #ifdef HAVE_DWARF_UNWIND_SUPPORT diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 5ff3c667542f..398dab1a08cc 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -37,6 +37,7 @@ void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, INIT_HLIST_HEAD(&evlist->heads[i]); INIT_LIST_HEAD(&evlist->entries); perf_evlist__set_maps(evlist, cpus, threads); + fdarray__init(&evlist->pollfd, 64); evlist->workload.pid = -1; } @@ -102,7 +103,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist) void perf_evlist__exit(struct perf_evlist *evlist) { zfree(&evlist->mmap); - zfree(&evlist->pollfd); + fdarray__exit(&evlist->pollfd); } void perf_evlist__delete(struct perf_evlist *evlist) @@ -402,20 +403,6 @@ int perf_evlist__enable_event_idx(struct perf_evlist *evlist, return perf_evlist__enable_event_thread(evlist, evsel, idx); } -static int perf_evlist__grow_pollfd(struct perf_evlist *evlist, int hint) -{ - int nr_fds_alloc = evlist->nr_fds_alloc + hint; - size_t size = sizeof(struct pollfd) * nr_fds_alloc; - struct pollfd *pollfd = realloc(evlist->pollfd, size); - - if (pollfd == NULL) - return -ENOMEM; - - evlist->nr_fds_alloc = nr_fds_alloc; - evlist->pollfd = pollfd; - return 0; -} - int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) { int nr_cpus = cpu_map__nr(evlist->cpus); @@ -430,8 +417,8 @@ int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) nfds += nr_cpus * nr_threads; } - if (evlist->nr_fds_alloc - evlist->nr_fds < nfds && - perf_evlist__grow_pollfd(evlist, nfds) < 0) + if (fdarray__available_entries(&evlist->pollfd) < nfds && + fdarray__grow(&evlist->pollfd, nfds) < 0) return -ENOMEM; return 0; @@ -439,45 +426,19 @@ int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) { - /* - * XXX: 64 is arbitrary, just not to call realloc at each fd. - * Find a better autogrowing heuristic - */ - if (evlist->nr_fds == evlist->nr_fds_alloc && - perf_evlist__grow_pollfd(evlist, 64) < 0) - return -ENOMEM; - fcntl(fd, F_SETFL, O_NONBLOCK); - evlist->pollfd[evlist->nr_fds].fd = fd; - evlist->pollfd[evlist->nr_fds].events = POLLIN | POLLERR | POLLHUP; - evlist->nr_fds++; - return 0; + + return fdarray__add(&evlist->pollfd, fd, POLLIN | POLLERR | POLLHUP); } int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) { - int fd, nr_fds = 0; - - if (evlist->nr_fds == 0) - return 0; - - for (fd = 0; fd < evlist->nr_fds; ++fd) { - if (evlist->pollfd[fd].revents & revents_and_mask) - continue; - - if (fd != nr_fds) - evlist->pollfd[nr_fds] = evlist->pollfd[fd]; - - ++nr_fds; - } - - evlist->nr_fds = nr_fds; - return nr_fds; + return fdarray__filter(&evlist->pollfd, revents_and_mask); } int perf_evlist__poll(struct perf_evlist *evlist, int timeout) { - return poll(evlist->pollfd, evlist->nr_fds, timeout); + return fdarray__poll(&evlist->pollfd, timeout); } static void perf_evlist__id_hash(struct perf_evlist *evlist, @@ -935,7 +896,7 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0) return -ENOMEM; - if (evlist->pollfd == NULL && perf_evlist__alloc_pollfd(evlist) < 0) + if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) return -ENOMEM; evlist->overwrite = overwrite; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index d7e99b67c94f..fc013704d903 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -2,6 +2,7 @@ #define __PERF_EVLIST_H 1 #include +#include #include #include "../perf.h" #include "event.h" @@ -29,8 +30,6 @@ struct perf_evlist { struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; int nr_entries; int nr_groups; - int nr_fds; - int nr_fds_alloc; int nr_mmaps; size_t mmap_len; int id_pos; @@ -41,8 +40,8 @@ struct perf_evlist { pid_t pid; } workload; bool overwrite; + struct fdarray pollfd; struct perf_mmap *mmap; - struct pollfd *pollfd; struct thread_map *threads; struct cpu_map *cpus; struct perf_evsel *selected; diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 4472f8be8e35..3dda85ca50c1 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -753,9 +753,9 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist, PyObject *list = PyList_New(0); int i; - for (i = 0; i < evlist->nr_fds; ++i) { + for (i = 0; i < evlist->pollfd.nr; ++i) { PyObject *file; - FILE *fp = fdopen(evlist->pollfd[i].fd, "r"); + FILE *fp = fdopen(evlist->pollfd.entries[i].fd, "r"); if (fp == NULL) goto free_list; From 82396986032915c1572bfb74b224fcc2e4e8ba7c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 8 Sep 2014 13:26:35 -0300 Subject: [PATCH 10/14] perf evlist: Refcount mmaps We need to know how many fds are using a perf mmap via PERF_EVENT_IOC_SET_OUTPUT, so that we can know when to ditch an mmap, refcount it. v2: Automatically unmap it when the refcount hits one, which will happen when all fds are filtered by perf_evlist__filter_pollfd(), in later patches. Cc: Adrian Hunter Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20140908153824.GG2773@kernel.org Link: http://lkml.kernel.org/n/tip-cpv7v2lw0g74ucmxa39xdpms@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 47 ++++++++++++++++++++++++++++++++++++++-- tools/perf/util/evlist.h | 6 +++++ 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 398dab1a08cc..efddee5a23e9 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -28,6 +28,8 @@ #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) +static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx); + void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, struct thread_map *threads) { @@ -651,14 +653,36 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) return event; } +static bool perf_mmap__empty(struct perf_mmap *md) +{ + return perf_mmap__read_head(md) != md->prev; +} + +static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx) +{ + ++evlist->mmap[idx].refcnt; +} + +static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx) +{ + BUG_ON(evlist->mmap[idx].refcnt == 0); + + if (--evlist->mmap[idx].refcnt == 0) + __perf_evlist__munmap(evlist, idx); +} + void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) { + struct perf_mmap *md = &evlist->mmap[idx]; + if (!evlist->overwrite) { - struct perf_mmap *md = &evlist->mmap[idx]; unsigned int old = md->prev; perf_mmap__write_tail(md, old); } + + if (md->refcnt == 1 && perf_mmap__empty(md)) + perf_evlist__mmap_put(evlist, idx); } static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx) @@ -666,6 +690,7 @@ static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx) if (evlist->mmap[idx].base != NULL) { munmap(evlist->mmap[idx].base, evlist->mmap_len); evlist->mmap[idx].base = NULL; + evlist->mmap[idx].refcnt = 0; } } @@ -699,6 +724,20 @@ struct mmap_params { static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx, struct mmap_params *mp, int fd) { + /* + * The last one will be done at perf_evlist__mmap_consume(), so that we + * make sure we don't prevent tools from consuming every last event in + * the ring buffer. + * + * I.e. we can get the POLLHUP meaning that the fd doesn't exist + * anymore, but the last events for it are still in the ring buffer, + * waiting to be consumed. + * + * Tools can chose to ignore this at their own discretion, but the + * evlist layer can't just drop it when filtering events in + * perf_evlist__filter_pollfd(). + */ + evlist->mmap[idx].refcnt = 2; evlist->mmap[idx].prev = 0; evlist->mmap[idx].mask = mp->mask; evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot, @@ -734,10 +773,14 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, } else { if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) return -1; + + perf_evlist__mmap_get(evlist, idx); } - if (perf_evlist__add_pollfd(evlist, fd) < 0) + if (perf_evlist__add_pollfd(evlist, fd) < 0) { + perf_evlist__mmap_put(evlist, idx); return -1; + } if ((evsel->attr.read_format & PERF_FORMAT_ID) && perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index fc013704d903..bd312b01e876 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -18,9 +18,15 @@ struct record_opts; #define PERF_EVLIST__HLIST_BITS 8 #define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS) +/** + * struct perf_mmap - perf's ring buffer mmap details + * + * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this + */ struct perf_mmap { void *base; int mask; + int refcnt; unsigned int prev; char event_copy[PERF_SAMPLE_MAX_SIZE]; }; From 2171a9256862ec139a042832a9ae737b942ca882 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 8 Sep 2014 11:24:01 -0300 Subject: [PATCH 11/14] tools lib fd array: Allow associating an integer cookie with each entry We will use this in perf's evlist class so that it can, at fdarray__filter() time, to unmap the associated ring buffer. We may need to have further info associated with each fdarray entry, in that case we'll make that int array a 'union fdarray_priv' one and put a pointer there so that users can stash whatever they want there. For now, an int is enough tho. v2: Add clarification to the per array entry priv area, as well as make it a union, which makes usage a bit longer, but if/when we make it use more space by allowing per entry pointers existing users source code will not have to be changed, just rebuilt. Cc: Adrian Hunter Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Signed-off-by: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/n/tip-0p00bn83quck3fio3kcs9vca@git.kernel.org --- tools/lib/api/fd/array.c | 30 +++++++++++++++++++++++++----- tools/lib/api/fd/array.h | 16 +++++++++++++++- tools/perf/tests/fdarray.c | 8 ++++---- tools/perf/util/evlist.c | 2 +- 4 files changed, 45 insertions(+), 11 deletions(-) diff --git a/tools/lib/api/fd/array.c b/tools/lib/api/fd/array.c index 4889c7d42961..0e636c4339b8 100644 --- a/tools/lib/api/fd/array.c +++ b/tools/lib/api/fd/array.c @@ -13,21 +13,31 @@ void fdarray__init(struct fdarray *fda, int nr_autogrow) { fda->entries = NULL; + fda->priv = NULL; fda->nr = fda->nr_alloc = 0; fda->nr_autogrow = nr_autogrow; } int fdarray__grow(struct fdarray *fda, int nr) { + void *priv; int nr_alloc = fda->nr_alloc + nr; + size_t psize = sizeof(fda->priv[0]) * nr_alloc; size_t size = sizeof(struct pollfd) * nr_alloc; struct pollfd *entries = realloc(fda->entries, size); if (entries == NULL) return -ENOMEM; + priv = realloc(fda->priv, psize); + if (priv == NULL) { + free(entries); + return -ENOMEM; + } + fda->nr_alloc = nr_alloc; fda->entries = entries; + fda->priv = priv; return 0; } @@ -50,6 +60,7 @@ struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow) void fdarray__exit(struct fdarray *fda) { free(fda->entries); + free(fda->priv); fdarray__init(fda, 0); } @@ -61,6 +72,8 @@ void fdarray__delete(struct fdarray *fda) int fdarray__add(struct fdarray *fda, int fd, short revents) { + int pos = fda->nr; + if (fda->nr == fda->nr_alloc && fdarray__grow(fda, fda->nr_autogrow) < 0) return -ENOMEM; @@ -68,10 +81,11 @@ int fdarray__add(struct fdarray *fda, int fd, short revents) fda->entries[fda->nr].fd = fd; fda->entries[fda->nr].events = revents; fda->nr++; - return 0; + return pos; } -int fdarray__filter(struct fdarray *fda, short revents) +int fdarray__filter(struct fdarray *fda, short revents, + void (*entry_destructor)(struct fdarray *fda, int fd)) { int fd, nr = 0; @@ -79,11 +93,17 @@ int fdarray__filter(struct fdarray *fda, short revents) return 0; for (fd = 0; fd < fda->nr; ++fd) { - if (fda->entries[fd].revents & revents) - continue; + if (fda->entries[fd].revents & revents) { + if (entry_destructor) + entry_destructor(fda, fd); - if (fd != nr) + continue; + } + + if (fd != nr) { fda->entries[nr] = fda->entries[fd]; + fda->priv[nr] = fda->priv[fd]; + } ++nr; } diff --git a/tools/lib/api/fd/array.h b/tools/lib/api/fd/array.h index de38361ba69e..45db01818f45 100644 --- a/tools/lib/api/fd/array.h +++ b/tools/lib/api/fd/array.h @@ -5,11 +5,24 @@ struct pollfd; +/** + * struct fdarray: Array of file descriptors + * + * @priv: Per array entry priv area, users should access just its contents, + * not set it to anything, as it is kept in synch with @entries, being + * realloc'ed, * for instance, in fdarray__{grow,filter}. + * + * I.e. using 'fda->priv[N].idx = * value' where N < fda->nr is ok, + * but doing 'fda->priv = malloc(M)' is not allowed. + */ struct fdarray { int nr; int nr_alloc; int nr_autogrow; struct pollfd *entries; + union { + int idx; + } *priv; }; void fdarray__init(struct fdarray *fda, int nr_autogrow); @@ -20,7 +33,8 @@ void fdarray__delete(struct fdarray *fda); int fdarray__add(struct fdarray *fda, int fd, short revents); int fdarray__poll(struct fdarray *fda, int timeout); -int fdarray__filter(struct fdarray *fda, short revents); +int fdarray__filter(struct fdarray *fda, short revents, + void (*entry_destructor)(struct fdarray *fda, int fd)); int fdarray__grow(struct fdarray *fda, int extra); int fdarray__fprintf(struct fdarray *fda, FILE *fp); diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c index a0fea62ec368..d24b837951d4 100644 --- a/tools/perf/tests/fdarray.c +++ b/tools/perf/tests/fdarray.c @@ -36,7 +36,7 @@ int test__fdarray__filter(void) } fdarray__init_revents(fda, POLLIN); - nr_fds = fdarray__filter(fda, POLLHUP); + nr_fds = fdarray__filter(fda, POLLHUP, NULL); if (nr_fds != fda->nr_alloc) { pr_debug("\nfdarray__filter()=%d != %d shouldn't have filtered anything", nr_fds, fda->nr_alloc); @@ -44,7 +44,7 @@ int test__fdarray__filter(void) } fdarray__init_revents(fda, POLLHUP); - nr_fds = fdarray__filter(fda, POLLHUP); + nr_fds = fdarray__filter(fda, POLLHUP, NULL); if (nr_fds != 0) { pr_debug("\nfdarray__filter()=%d != %d, should have filtered all fds", nr_fds, fda->nr_alloc); @@ -57,7 +57,7 @@ int test__fdarray__filter(void) pr_debug("\nfiltering all but fda->entries[2]:"); fdarray__fprintf_prefix(fda, "before", stderr); - nr_fds = fdarray__filter(fda, POLLHUP); + nr_fds = fdarray__filter(fda, POLLHUP, NULL); fdarray__fprintf_prefix(fda, " after", stderr); if (nr_fds != 1) { pr_debug("\nfdarray__filter()=%d != 1, should have left just one event", nr_fds); @@ -78,7 +78,7 @@ int test__fdarray__filter(void) pr_debug("\nfiltering all but (fda->entries[0], fda->entries[3]):"); fdarray__fprintf_prefix(fda, "before", stderr); - nr_fds = fdarray__filter(fda, POLLHUP); + nr_fds = fdarray__filter(fda, POLLHUP, NULL); fdarray__fprintf_prefix(fda, " after", stderr); if (nr_fds != 2) { pr_debug("\nfdarray__filter()=%d != 2, should have left just two events", diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index efddee5a23e9..61d18dc83e8e 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -435,7 +435,7 @@ int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) { - return fdarray__filter(&evlist->pollfd, revents_and_mask); + return fdarray__filter(&evlist->pollfd, revents_and_mask, NULL); } int perf_evlist__poll(struct perf_evlist *evlist, int timeout) From e4b356b56cfe77b800a9bc2e6efefa6a069b8a78 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 8 Sep 2014 11:27:49 -0300 Subject: [PATCH 12/14] perf evlist: Unmap when all refcounts to fd are gone and events drained As noticed by receiving a POLLHUP for all its pollfd entries. That will remove the refcount taken in perf_evlist__mmap_per_evsel(), and when all events are consumed via perf_evlist__mmap_read() + perf_evlist__mmap_consume(), the ring buffer will be unmap'ed. Thanks to Jiri Olsa for pointing out that we must wait till all events are consumed, not being ok to unmmap just when receiving all the POLLHUPs. Cc: Adrian Hunter Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-t10w1xk4myp7ca7m9fvip6a0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 61d18dc83e8e..3cebc9a8d52e 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -25,11 +25,12 @@ #include #include +static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx); +static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx); + #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) -static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx); - void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, struct thread_map *threads) { @@ -426,16 +427,38 @@ int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) return 0; } +static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx) +{ + int pos = fdarray__add(&evlist->pollfd, fd, POLLIN | POLLERR | POLLHUP); + /* + * Save the idx so that when we filter out fds POLLHUP'ed we can + * close the associated evlist->mmap[] entry. + */ + if (pos >= 0) { + evlist->pollfd.priv[pos].idx = idx; + + fcntl(fd, F_SETFL, O_NONBLOCK); + } + + return pos; +} + int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) { - fcntl(fd, F_SETFL, O_NONBLOCK); + return __perf_evlist__add_pollfd(evlist, fd, -1); +} - return fdarray__add(&evlist->pollfd, fd, POLLIN | POLLERR | POLLHUP); +static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd) +{ + struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd); + + perf_evlist__mmap_put(evlist, fda->priv[fd].idx); } int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) { - return fdarray__filter(&evlist->pollfd, revents_and_mask, NULL); + return fdarray__filter(&evlist->pollfd, revents_and_mask, + perf_evlist__munmap_filtered); } int perf_evlist__poll(struct perf_evlist *evlist, int timeout) @@ -777,7 +800,7 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, perf_evlist__mmap_get(evlist, idx); } - if (perf_evlist__add_pollfd(evlist, fd) < 0) { + if (__perf_evlist__add_pollfd(evlist, fd, idx) < 0) { perf_evlist__mmap_put(evlist, idx); return -1; } From 6dcf45ef9877863fb68c065e5ade3cdb6217c504 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 13 Aug 2014 11:33:59 -0300 Subject: [PATCH 13/14] perf record: Filter out POLLHUP'ed file descriptors So that we don't continue polling on vanished file descriptors, i.e. file descriptors for events monitoring threads that exited. I.e. the following 'perf record' command now exits as expected, instead of staying in an eternal loop: $ sleep 5s & $ perf record -p `pidof sleep` Reported-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-8dg8o21t2ntzly2bfh53p3sg@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index a8c2e9dfb125..320b198b54dd 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -308,7 +308,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) struct record_opts *opts = &rec->opts; struct perf_data_file *file = &rec->file; struct perf_session *session; - bool disabled = false; + bool disabled = false, draining = false; rec->progname = argv[0]; @@ -457,7 +457,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) } if (hits == rec->samples) { - if (done) + if (done || draining) break; err = perf_evlist__poll(rec->evlist, -1); /* @@ -467,6 +467,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) if (err > 0 || (err < 0 && errno == EINTR)) err = 0; waking++; + + if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0) + draining = true; } /* From 46fb3c21d20415dd2693570c33d0ea6eb8745e04 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 22 Sep 2014 14:39:48 -0300 Subject: [PATCH 14/14] perf trace: Filter out POLLHUP'ed file descriptors So that we don't continue polling on vanished file descriptors, i.e. file descriptors for events monitoring threads that exited. I.e. the following 'trace' command now exits as expected, instead of staying in an eternal loop: $ sleep 5s & $ trace -p `pidof sleep` Reported-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-6qegv786zbf6i8us6t4rxug9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index b8fedf3f9921..fe39dc620179 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2044,6 +2044,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) int err = -1, i; unsigned long before; const bool forks = argc > 0; + bool draining = false; char sbuf[STRERR_BUFSIZE]; trace->live = true; @@ -2171,8 +2172,12 @@ next_event: if (trace->nr_events == before) { int timeout = done ? 100 : -1; - if (perf_evlist__poll(evlist, timeout) > 0) + if (!draining && perf_evlist__poll(evlist, timeout) > 0) { + if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0) + draining = true; + goto again; + } } else { goto again; }