aa38e19f05
Unlike ppoll(2) and epoll(7), Linux io_uring completions can be polled from userspace. Previously userspace polling was only allowed when all AioHandler's had an ->io_poll() callback. This prevented starvation of fds by userspace pollable handlers. Add the FDMonOps->need_wait() callback that enables userspace polling even when some AioHandlers lack ->io_poll(). For example, it's now possible to do userspace polling when a TCP/IP socket is monitored thanks to Linux io_uring. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Link: https://lore.kernel.org/r/20200305170806.1313245-7-stefanha@redhat.com Message-Id: <20200305170806.1313245-7-stefanha@redhat.com>
156 lines
4.0 KiB
C
156 lines
4.0 KiB
C
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
/*
|
|
* epoll(7) file descriptor monitoring
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include <sys/epoll.h>
|
|
#include "qemu/rcu_queue.h"
|
|
#include "aio-posix.h"
|
|
|
|
/* The fd number threshold to switch to epoll */
|
|
#define EPOLL_ENABLE_THRESHOLD 64
|
|
|
|
void fdmon_epoll_disable(AioContext *ctx)
|
|
{
|
|
if (ctx->epollfd >= 0) {
|
|
close(ctx->epollfd);
|
|
ctx->epollfd = -1;
|
|
}
|
|
|
|
/* Switch back */
|
|
ctx->fdmon_ops = &fdmon_poll_ops;
|
|
}
|
|
|
|
static inline int epoll_events_from_pfd(int pfd_events)
|
|
{
|
|
return (pfd_events & G_IO_IN ? EPOLLIN : 0) |
|
|
(pfd_events & G_IO_OUT ? EPOLLOUT : 0) |
|
|
(pfd_events & G_IO_HUP ? EPOLLHUP : 0) |
|
|
(pfd_events & G_IO_ERR ? EPOLLERR : 0);
|
|
}
|
|
|
|
static void fdmon_epoll_update(AioContext *ctx,
|
|
AioHandler *old_node,
|
|
AioHandler *new_node)
|
|
{
|
|
struct epoll_event event = {
|
|
.data.ptr = new_node,
|
|
.events = new_node ? epoll_events_from_pfd(new_node->pfd.events) : 0,
|
|
};
|
|
int r;
|
|
|
|
if (!new_node) {
|
|
r = epoll_ctl(ctx->epollfd, EPOLL_CTL_DEL, old_node->pfd.fd, &event);
|
|
} else if (!old_node) {
|
|
r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, new_node->pfd.fd, &event);
|
|
} else {
|
|
r = epoll_ctl(ctx->epollfd, EPOLL_CTL_MOD, new_node->pfd.fd, &event);
|
|
}
|
|
|
|
if (r) {
|
|
fdmon_epoll_disable(ctx);
|
|
}
|
|
}
|
|
|
|
static int fdmon_epoll_wait(AioContext *ctx, AioHandlerList *ready_list,
|
|
int64_t timeout)
|
|
{
|
|
GPollFD pfd = {
|
|
.fd = ctx->epollfd,
|
|
.events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR,
|
|
};
|
|
AioHandler *node;
|
|
int i, ret = 0;
|
|
struct epoll_event events[128];
|
|
|
|
/* Fall back while external clients are disabled */
|
|
if (atomic_read(&ctx->external_disable_cnt)) {
|
|
return fdmon_poll_ops.wait(ctx, ready_list, timeout);
|
|
}
|
|
|
|
if (timeout > 0) {
|
|
ret = qemu_poll_ns(&pfd, 1, timeout);
|
|
if (ret > 0) {
|
|
timeout = 0;
|
|
}
|
|
}
|
|
if (timeout <= 0 || ret > 0) {
|
|
ret = epoll_wait(ctx->epollfd, events,
|
|
ARRAY_SIZE(events),
|
|
timeout);
|
|
if (ret <= 0) {
|
|
goto out;
|
|
}
|
|
for (i = 0; i < ret; i++) {
|
|
int ev = events[i].events;
|
|
int revents = (ev & EPOLLIN ? G_IO_IN : 0) |
|
|
(ev & EPOLLOUT ? G_IO_OUT : 0) |
|
|
(ev & EPOLLHUP ? G_IO_HUP : 0) |
|
|
(ev & EPOLLERR ? G_IO_ERR : 0);
|
|
|
|
node = events[i].data.ptr;
|
|
aio_add_ready_handler(ready_list, node, revents);
|
|
}
|
|
}
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
static const FDMonOps fdmon_epoll_ops = {
|
|
.update = fdmon_epoll_update,
|
|
.wait = fdmon_epoll_wait,
|
|
.need_wait = aio_poll_disabled,
|
|
};
|
|
|
|
static bool fdmon_epoll_try_enable(AioContext *ctx)
|
|
{
|
|
AioHandler *node;
|
|
struct epoll_event event;
|
|
|
|
QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
|
|
int r;
|
|
if (QLIST_IS_INSERTED(node, node_deleted) || !node->pfd.events) {
|
|
continue;
|
|
}
|
|
event.events = epoll_events_from_pfd(node->pfd.events);
|
|
event.data.ptr = node;
|
|
r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
|
|
if (r) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
ctx->fdmon_ops = &fdmon_epoll_ops;
|
|
return true;
|
|
}
|
|
|
|
bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd)
|
|
{
|
|
if (ctx->epollfd < 0) {
|
|
return false;
|
|
}
|
|
|
|
/* Do not upgrade while external clients are disabled */
|
|
if (atomic_read(&ctx->external_disable_cnt)) {
|
|
return false;
|
|
}
|
|
|
|
if (npfd >= EPOLL_ENABLE_THRESHOLD) {
|
|
if (fdmon_epoll_try_enable(ctx)) {
|
|
return true;
|
|
} else {
|
|
fdmon_epoll_disable(ctx);
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void fdmon_epoll_setup(AioContext *ctx)
|
|
{
|
|
ctx->epollfd = epoll_create1(EPOLL_CLOEXEC);
|
|
if (ctx->epollfd == -1) {
|
|
fprintf(stderr, "Failed to create epoll instance: %s", strerror(errno));
|
|
}
|
|
}
|