From 8a947c7a586e16a048894e1a0a73d154435e90ef Mon Sep 17 00:00:00 2001 From: Haiyue Wang Date: Tue, 22 Feb 2022 00:24:01 +0800 Subject: [PATCH 1/2] aio-posix: fix build failure io_uring 2.2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The io_uring fixed "Don't truncate addr fields to 32-bit on 32-bit": https://git.kernel.dk/cgit/liburing/commit/?id=d84c29b19ed0b130000619cff40141bb1fc3615b This leads to build failure: ../util/fdmon-io_uring.c: In function ‘add_poll_remove_sqe’: ../util/fdmon-io_uring.c:182:36: error: passing argument 2 of ‘io_uring_prep_poll_remove’ makes integer from pointer without a cast [-Werror=int-conversion] 182 | io_uring_prep_poll_remove(sqe, node); | ^~~~ | | | AioHandler * In file included from /root/io/qemu/include/block/aio.h:18, from ../util/aio-posix.h:20, from ../util/fdmon-io_uring.c:49: /usr/include/liburing.h:415:17: note: expected ‘__u64’ {aka ‘long long unsigned int’} but argument is of type ‘AioHandler *’ 415 | __u64 user_data) | ~~~~~~^~~~~~~~~ cc1: all warnings being treated as errors Use LIBURING_HAVE_DATA64 to check whether the io_uring supports 64-bit variants of the get/set userdata, to convert the paramter to the right data type. Signed-off-by: Haiyue Wang Message-Id: <20220221162401.45415-1-haiyue.wang@intel.com> Signed-off-by: Stefan Hajnoczi --- util/fdmon-io_uring.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/util/fdmon-io_uring.c b/util/fdmon-io_uring.c index 1461dfa407..ab43052dd7 100644 --- a/util/fdmon-io_uring.c +++ b/util/fdmon-io_uring.c @@ -179,7 +179,11 @@ static void add_poll_remove_sqe(AioContext *ctx, AioHandler *node) { struct io_uring_sqe *sqe = get_sqe(ctx); +#ifdef LIBURING_HAVE_DATA64 + io_uring_prep_poll_remove(sqe, (__u64)(uintptr_t)node); +#else io_uring_prep_poll_remove(sqe, node); +#endif } /* Add a timeout that self-cancels when another cqe becomes ready */ From fc8796465c6cd4091efe6a2f8b353f07324f49c7 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 23 Feb 2022 15:57:03 +0000 Subject: [PATCH 2/2] aio-posix: fix spurious ->poll_ready() callbacks in main loop When ->poll() succeeds the AioHandler is placed on the ready list with revents set to the magic value 0. This magic value causes aio_dispatch_handler() to invoke ->poll_ready() instead of ->io_read() for G_IO_IN or ->io_write() for G_IO_OUT. This magic value 0 hack works for the IOThread where AioHandlers are placed on ->ready_list and processed by aio_dispatch_ready_handlers(). It does not work for the main loop where all AioHandlers are processed by aio_dispatch_handlers(), even those that are not ready and have a revents value of 0. As a result the main loop invokes ->poll_ready() on AioHandlers that are not ready. These spurious ->poll_ready() calls waste CPU cycles and could lead to crashes if the code assumes ->poll() must have succeeded before ->poll_ready() is called (a reasonable asumption but I haven't seen it in practice). Stop using revents to track whether ->poll_ready() will be called on an AioHandler. Introduce a separate AioHandler->poll_ready field instead. This eliminates spurious ->poll_ready() calls in the main loop. Fixes: 826cc32423db2a99d184dbf4f507c737d7e7a4ae ("aio-posix: split poll check from ready handler") Signed-off-by: Stefan Hajnoczi Reported-by: Jason Wang Tested-by: Jason Wang Message-id: 20220223155703.136833-1-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi --- util/aio-posix.c | 32 ++++++++++++++++++-------------- util/aio-posix.h | 1 + 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/util/aio-posix.c b/util/aio-posix.c index 7b9f629218..be0182a3c6 100644 --- a/util/aio-posix.c +++ b/util/aio-posix.c @@ -23,15 +23,6 @@ #include "trace.h" #include "aio-posix.h" -/* - * G_IO_IN and G_IO_OUT are not appropriate revents values for polling, since - * the handler may not need to access the file descriptor. For example, the - * handler doesn't need to read from an EventNotifier if it polled a memory - * location and a read syscall would be slow. Define our own unique revents - * value to indicate that polling determined this AioHandler is ready. - */ -#define REVENTS_POLL_READY 0 - /* Stop userspace polling on a handler if it isn't active for some time */ #define POLL_IDLE_INTERVAL_NS (7 * NANOSECONDS_PER_SECOND) @@ -49,6 +40,14 @@ void aio_add_ready_handler(AioHandlerList *ready_list, QLIST_INSERT_HEAD(ready_list, node, node_ready); } +static void aio_add_poll_ready_handler(AioHandlerList *ready_list, + AioHandler *node) +{ + QLIST_SAFE_REMOVE(node, node_ready); /* remove from nested parent's list */ + node->poll_ready = true; + QLIST_INSERT_HEAD(ready_list, node, node_ready); +} + static AioHandler *find_aio_handler(AioContext *ctx, int fd) { AioHandler *node; @@ -76,6 +75,7 @@ static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node) } node->pfd.revents = 0; + node->poll_ready = false; /* If the fd monitor has already marked it deleted, leave it alone */ if (QLIST_IS_INSERTED(node, node_deleted)) { @@ -247,7 +247,7 @@ static bool poll_set_started(AioContext *ctx, AioHandlerList *ready_list, /* Poll one last time in case ->io_poll_end() raced with the event */ if (!started && node->io_poll(node->opaque)) { - aio_add_ready_handler(ready_list, node, REVENTS_POLL_READY); + aio_add_poll_ready_handler(ready_list, node); progress = true; } } @@ -282,6 +282,7 @@ bool aio_pending(AioContext *ctx) QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { int revents; + /* TODO should this check poll ready? */ revents = node->pfd.revents & node->pfd.events; if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read && aio_node_check(ctx, node->is_external)) { @@ -323,11 +324,15 @@ static void aio_free_deleted_handlers(AioContext *ctx) static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node) { bool progress = false; + bool poll_ready; int revents; revents = node->pfd.revents & node->pfd.events; node->pfd.revents = 0; + poll_ready = node->poll_ready; + node->poll_ready = false; + /* * Start polling AioHandlers when they become ready because activity is * likely to continue. Note that starvation is theoretically possible when @@ -344,7 +349,7 @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node) QLIST_INSERT_HEAD(&ctx->poll_aio_handlers, node, node_poll); } if (!QLIST_IS_INSERTED(node, node_deleted) && - revents == 0 && + poll_ready && revents == 0 && aio_node_check(ctx, node->is_external) && node->io_poll_ready) { node->io_poll_ready(node->opaque); @@ -432,7 +437,7 @@ static bool run_poll_handlers_once(AioContext *ctx, QLIST_FOREACH_SAFE(node, &ctx->poll_aio_handlers, node_poll, tmp) { if (aio_node_check(ctx, node->is_external) && node->io_poll(node->opaque)) { - aio_add_ready_handler(ready_list, node, REVENTS_POLL_READY); + aio_add_poll_ready_handler(ready_list, node); node->poll_idle_timeout = now + POLL_IDLE_INTERVAL_NS; @@ -491,8 +496,7 @@ static bool remove_idle_poll_handlers(AioContext *ctx, * this causes progress. */ if (node->io_poll(node->opaque)) { - aio_add_ready_handler(ready_list, node, - REVENTS_POLL_READY); + aio_add_poll_ready_handler(ready_list, node); progress = true; } } diff --git a/util/aio-posix.h b/util/aio-posix.h index 7f2c37a684..80b927c7f4 100644 --- a/util/aio-posix.h +++ b/util/aio-posix.h @@ -37,6 +37,7 @@ struct AioHandler { unsigned flags; /* see fdmon-io_uring.c */ #endif int64_t poll_idle_timeout; /* when to stop userspace polling */ + bool poll_ready; /* has polling detected an event? */ bool is_external; };