aa38e19f05
Unlike ppoll(2) and epoll(7), Linux io_uring completions can be polled from userspace. Previously userspace polling was only allowed when all AioHandler's had an ->io_poll() callback. This prevented starvation of fds by userspace pollable handlers. Add the FDMonOps->need_wait() callback that enables userspace polling even when some AioHandlers lack ->io_poll(). For example, it's now possible to do userspace polling when a TCP/IP socket is monitored thanks to Linux io_uring. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Link: https://lore.kernel.org/r/20200305170806.1313245-7-stefanha@redhat.com Message-Id: <20200305170806.1313245-7-stefanha@redhat.com>
108 lines
2.9 KiB
C
108 lines
2.9 KiB
C
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
/*
|
|
* poll(2) file descriptor monitoring
|
|
*
|
|
* Uses ppoll(2) when available, g_poll() otherwise.
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "aio-posix.h"
|
|
#include "qemu/rcu_queue.h"
|
|
|
|
/*
|
|
* These thread-local variables are used only in fdmon_poll_wait() around the
|
|
* call to the poll() system call. In particular they are not used while
|
|
* aio_poll is performing callbacks, which makes it much easier to think about
|
|
* reentrancy!
|
|
*
|
|
* Stack-allocated arrays would be perfect but they have size limitations;
|
|
* heap allocation is expensive enough that we want to reuse arrays across
|
|
* calls to aio_poll(). And because poll() has to be called without holding
|
|
* any lock, the arrays cannot be stored in AioContext. Thread-local data
|
|
* has none of the disadvantages of these three options.
|
|
*/
|
|
static __thread GPollFD *pollfds;
|
|
static __thread AioHandler **nodes;
|
|
static __thread unsigned npfd, nalloc;
|
|
static __thread Notifier pollfds_cleanup_notifier;
|
|
|
|
static void pollfds_cleanup(Notifier *n, void *unused)
|
|
{
|
|
g_assert(npfd == 0);
|
|
g_free(pollfds);
|
|
g_free(nodes);
|
|
nalloc = 0;
|
|
}
|
|
|
|
static void add_pollfd(AioHandler *node)
|
|
{
|
|
if (npfd == nalloc) {
|
|
if (nalloc == 0) {
|
|
pollfds_cleanup_notifier.notify = pollfds_cleanup;
|
|
qemu_thread_atexit_add(&pollfds_cleanup_notifier);
|
|
nalloc = 8;
|
|
} else {
|
|
g_assert(nalloc <= INT_MAX);
|
|
nalloc *= 2;
|
|
}
|
|
pollfds = g_renew(GPollFD, pollfds, nalloc);
|
|
nodes = g_renew(AioHandler *, nodes, nalloc);
|
|
}
|
|
nodes[npfd] = node;
|
|
pollfds[npfd] = (GPollFD) {
|
|
.fd = node->pfd.fd,
|
|
.events = node->pfd.events,
|
|
};
|
|
npfd++;
|
|
}
|
|
|
|
static int fdmon_poll_wait(AioContext *ctx, AioHandlerList *ready_list,
|
|
int64_t timeout)
|
|
{
|
|
AioHandler *node;
|
|
int ret;
|
|
|
|
assert(npfd == 0);
|
|
|
|
QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
|
|
if (!QLIST_IS_INSERTED(node, node_deleted) && node->pfd.events
|
|
&& aio_node_check(ctx, node->is_external)) {
|
|
add_pollfd(node);
|
|
}
|
|
}
|
|
|
|
/* epoll(7) is faster above a certain number of fds */
|
|
if (fdmon_epoll_try_upgrade(ctx, npfd)) {
|
|
return ctx->fdmon_ops->wait(ctx, ready_list, timeout);
|
|
}
|
|
|
|
ret = qemu_poll_ns(pollfds, npfd, timeout);
|
|
if (ret > 0) {
|
|
int i;
|
|
|
|
for (i = 0; i < npfd; i++) {
|
|
int revents = pollfds[i].revents;
|
|
|
|
if (revents) {
|
|
aio_add_ready_handler(ready_list, nodes[i], revents);
|
|
}
|
|
}
|
|
}
|
|
|
|
npfd = 0;
|
|
return ret;
|
|
}
|
|
|
|
static void fdmon_poll_update(AioContext *ctx,
|
|
AioHandler *old_node,
|
|
AioHandler *new_node)
|
|
{
|
|
/* Do nothing, AioHandler already contains the state we'll need */
|
|
}
|
|
|
|
const FDMonOps fdmon_poll_ops = {
|
|
.update = fdmon_poll_update,
|
|
.wait = fdmon_poll_wait,
|
|
.need_wait = aio_poll_disabled,
|
|
};
|