From 00eb93b5886519a35a06bf403e5be4c4cb3df25a Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 7 Nov 2022 19:35:55 +0300 Subject: [PATCH 01/50] block: Inline bdrv_detach_child() The only caller is bdrv_root_unref_child(), let's just do the logic directly in it. It simplifies further conversion of bdrv_root_unref_child() to transaction actions. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz Message-Id: <20221107163558.618889-2-vsementsov@yandex-team.ru> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block.c | 46 +++++++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/block.c b/block.c index a18f052374..c0c1b3df91 100644 --- a/block.c +++ b/block.c @@ -3070,30 +3070,6 @@ static BdrvChild *bdrv_attach_child_noperm(BlockDriverState *parent_bs, tran, errp); } -static void bdrv_detach_child(BdrvChild *child) -{ - BlockDriverState *old_bs = child->bs; - - GLOBAL_STATE_CODE(); - bdrv_replace_child_noperm(child, NULL); - bdrv_child_free(child); - - if (old_bs) { - /* - * Update permissions for old node. We're just taking a parent away, so - * we're loosening restrictions. Errors of permission update are not - * fatal in this case, ignore them. - */ - bdrv_refresh_perms(old_bs, NULL); - - /* - * When the parent requiring a non-default AioContext is removed, the - * node moves back to the main AioContext - */ - bdrv_try_change_aio_context(old_bs, qemu_get_aio_context(), NULL, NULL); - } -} - /* * This function steals the reference to child_bs from the caller. * That reference is later dropped by bdrv_root_unref_child(). @@ -3182,12 +3158,28 @@ out: /* Callers must ensure that child->frozen is false. */ void bdrv_root_unref_child(BdrvChild *child) { - BlockDriverState *child_bs; + BlockDriverState *child_bs = child->bs; GLOBAL_STATE_CODE(); + bdrv_replace_child_noperm(child, NULL); + bdrv_child_free(child); + + if (child_bs) { + /* + * Update permissions for old node. We're just taking a parent away, so + * we're loosening restrictions. Errors of permission update are not + * fatal in this case, ignore them. + */ + bdrv_refresh_perms(child_bs, NULL); + + /* + * When the parent requiring a non-default AioContext is removed, the + * node moves back to the main AioContext + */ + bdrv_try_change_aio_context(child_bs, qemu_get_aio_context(), NULL, + NULL); + } - child_bs = child->bs; - bdrv_detach_child(child); bdrv_unref(child_bs); } From f38eaec4c3618dfc4a23e20435cefb5bf8325264 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 7 Nov 2022 19:35:56 +0300 Subject: [PATCH 02/50] block: drop bdrv_remove_filter_or_cow_child Drop this simple wrapper used only in one place. We have too many graph modifying functions even without it. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz Message-Id: <20221107163558.618889-3-vsementsov@yandex-team.ru> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/block.c b/block.c index c0c1b3df91..dc761209ac 100644 --- a/block.c +++ b/block.c @@ -93,8 +93,6 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, static void bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs); static void bdrv_remove_child(BdrvChild *child, Transaction *tran); -static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, - Transaction *tran); static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, @@ -5065,17 +5063,6 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran) tran_add(tran, &bdrv_remove_child_drv, child); } -/* - * A function to remove backing-chain child of @bs if exists: cow child for - * format nodes (always .backing) and filter child for filters (may be .file or - * .backing) - */ -static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, - Transaction *tran) -{ - bdrv_remove_child(bdrv_filter_or_cow_child(bs), tran); -} - static int bdrv_replace_node_noperm(BlockDriverState *from, BlockDriverState *to, bool auto_skip, Transaction *tran, @@ -5160,7 +5147,7 @@ static int bdrv_replace_node_common(BlockDriverState *from, } if (detach_subchain) { - bdrv_remove_filter_or_cow_child(to_cow_parent, tran); + bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran); } found = g_hash_table_new(NULL, NULL); From f1316edbfcea3b31181f22d737ac7cf80b395355 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 7 Nov 2022 19:35:57 +0300 Subject: [PATCH 03/50] block: bdrv_refresh_perms(): allow external tran Allow passing external Transaction pointer, stop creating extra Transaction objects. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz Message-Id: <20221107163558.618889-4-vsementsov@yandex-team.ru> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/block.c b/block.c index dc761209ac..f2f9178832 100644 --- a/block.c +++ b/block.c @@ -2591,15 +2591,24 @@ char *bdrv_perm_names(uint64_t perm) } -static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp) +/* @tran is allowed to be NULL. In this case no rollback is possible */ +static int bdrv_refresh_perms(BlockDriverState *bs, Transaction *tran, + Error **errp) { int ret; - Transaction *tran = tran_new(); + Transaction *local_tran = NULL; g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs); GLOBAL_STATE_CODE(); + if (!tran) { + tran = local_tran = tran_new(); + } + ret = bdrv_list_refresh_perms(list, NULL, tran, errp); - tran_finalize(tran, ret); + + if (local_tran) { + tran_finalize(local_tran, ret); + } return ret; } @@ -2615,7 +2624,7 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, bdrv_child_set_perm(c, perm, shared, tran); - ret = bdrv_refresh_perms(c->bs, &local_err); + ret = bdrv_refresh_perms(c->bs, tran, &local_err); tran_finalize(tran, ret); @@ -3099,7 +3108,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, goto out; } - ret = bdrv_refresh_perms(child_bs, errp); + ret = bdrv_refresh_perms(child_bs, tran, errp); out: tran_finalize(tran, ret); @@ -3140,7 +3149,7 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, goto out; } - ret = bdrv_refresh_perms(parent_bs, errp); + ret = bdrv_refresh_perms(parent_bs, tran, errp); if (ret < 0) { goto out; } @@ -3168,7 +3177,7 @@ void bdrv_root_unref_child(BdrvChild *child) * we're loosening restrictions. Errors of permission update are not * fatal in this case, ignore them. */ - bdrv_refresh_perms(child_bs, NULL); + bdrv_refresh_perms(child_bs, NULL, NULL); /* * When the parent requiring a non-default AioContext is removed, the @@ -3410,7 +3419,7 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, goto out; } - ret = bdrv_refresh_perms(bs, errp); + ret = bdrv_refresh_perms(bs, tran, errp); out: tran_finalize(tran, ret); @@ -5223,7 +5232,7 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, goto out; } - ret = bdrv_refresh_perms(bs_new, errp); + ret = bdrv_refresh_perms(bs_new, tran, errp); out: tran_finalize(tran, ret); @@ -6523,7 +6532,7 @@ int bdrv_activate(BlockDriverState *bs, Error **errp) */ if (bs->open_flags & BDRV_O_INACTIVE) { bs->open_flags &= ~BDRV_O_INACTIVE; - ret = bdrv_refresh_perms(bs, errp); + ret = bdrv_refresh_perms(bs, NULL, errp); if (ret < 0) { bs->open_flags |= BDRV_O_INACTIVE; return ret; @@ -6668,7 +6677,7 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs) * We only tried to loosen restrictions, so errors are not fatal, ignore * them. */ - bdrv_refresh_perms(bs, NULL); + bdrv_refresh_perms(bs, NULL, NULL); /* Recursively inactivate children */ QLIST_FOREACH(child, &bs->children, next) { From fb0ff4d1baf8012e7f358daf007967d65e1f545a Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 7 Nov 2022 19:35:58 +0300 Subject: [PATCH 04/50] block: refactor bdrv_list_refresh_perms to allow any list of nodes We are going to increase usage of collecting nodes in a list to then update, and calling bdrv_topological_dfs() each time is not convenient, and not correct as we are going to interleave graph modifying with filling the node list. So, let's switch to a function that takes any list of nodes, adds all their subtrees and do topological sort. And finally, refresh permissions. While being here, make the function public, as we'll want to use it from blockdev.c in near future. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz Message-Id: <20221107163558.618889-5-vsementsov@yandex-team.ru> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block.c | 51 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/block.c b/block.c index f2f9178832..385ed3cd53 100644 --- a/block.c +++ b/block.c @@ -2521,8 +2521,12 @@ static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q, return 0; } -static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q, - Transaction *tran, Error **errp) +/* + * @list is a product of bdrv_topological_dfs() (may be called several times) - + * a topologically sorted subgraph. + */ +static int bdrv_do_refresh_perms(GSList *list, BlockReopenQueue *q, + Transaction *tran, Error **errp) { int ret; BlockDriverState *bs; @@ -2544,6 +2548,24 @@ static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q, return 0; } +/* + * @list is any list of nodes. List is completed by all subtrees and + * topologically sorted. It's not a problem if some node occurs in the @list + * several times. + */ +static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q, + Transaction *tran, Error **errp) +{ + g_autoptr(GHashTable) found = g_hash_table_new(NULL, NULL); + g_autoptr(GSList) refresh_list = NULL; + + for ( ; list; list = list->next) { + refresh_list = bdrv_topological_dfs(refresh_list, found, list->data); + } + + return bdrv_do_refresh_perms(refresh_list, q, tran, errp); +} + void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, uint64_t *shared_perm) { @@ -2604,7 +2626,7 @@ static int bdrv_refresh_perms(BlockDriverState *bs, Transaction *tran, tran = local_tran = tran_new(); } - ret = bdrv_list_refresh_perms(list, NULL, tran, errp); + ret = bdrv_do_refresh_perms(list, NULL, tran, errp); if (local_tran) { tran_finalize(local_tran, ret); @@ -4360,7 +4382,6 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) BlockReopenQueueEntry *bs_entry, *next; AioContext *ctx; Transaction *tran = tran_new(); - g_autoptr(GHashTable) found = NULL; g_autoptr(GSList) refresh_list = NULL; assert(qemu_get_current_aio_context() == qemu_get_aio_context()); @@ -4390,18 +4411,15 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) bs_entry->prepared = true; } - found = g_hash_table_new(NULL, NULL); QTAILQ_FOREACH(bs_entry, bs_queue, entry) { BDRVReopenState *state = &bs_entry->state; - refresh_list = bdrv_topological_dfs(refresh_list, found, state->bs); + refresh_list = g_slist_prepend(refresh_list, state->bs); if (state->old_backing_bs) { - refresh_list = bdrv_topological_dfs(refresh_list, found, - state->old_backing_bs); + refresh_list = g_slist_prepend(refresh_list, state->old_backing_bs); } if (state->old_file_bs) { - refresh_list = bdrv_topological_dfs(refresh_list, found, - state->old_file_bs); + refresh_list = g_slist_prepend(refresh_list, state->old_file_bs); } } @@ -5118,7 +5136,6 @@ static int bdrv_replace_node_common(BlockDriverState *from, Error **errp) { Transaction *tran = tran_new(); - g_autoptr(GHashTable) found = NULL; g_autoptr(GSList) refresh_list = NULL; BlockDriverState *to_cow_parent = NULL; int ret; @@ -5159,10 +5176,8 @@ static int bdrv_replace_node_common(BlockDriverState *from, bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran); } - found = g_hash_table_new(NULL, NULL); - - refresh_list = bdrv_topological_dfs(refresh_list, found, to); - refresh_list = bdrv_topological_dfs(refresh_list, found, from); + refresh_list = g_slist_prepend(refresh_list, to); + refresh_list = g_slist_prepend(refresh_list, from); ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp); if (ret < 0) { @@ -5247,7 +5262,6 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, { int ret; Transaction *tran = tran_new(); - g_autoptr(GHashTable) found = NULL; g_autoptr(GSList) refresh_list = NULL; BlockDriverState *old_bs = child->bs; @@ -5259,9 +5273,8 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, bdrv_replace_child_tran(child, new_bs, tran); - found = g_hash_table_new(NULL, NULL); - refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs); - refresh_list = bdrv_topological_dfs(refresh_list, found, new_bs); + refresh_list = g_slist_prepend(refresh_list, old_bs); + refresh_list = g_slist_prepend(refresh_list, new_bs); ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp); From 6d47eb0c8bf2d50682c7dccae74d24104076fe23 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:40:56 +0100 Subject: [PATCH 05/50] qed: Don't yield in bdrv_qed_co_drain_begin() We want to change .bdrv_co_drained_begin() back to be a non-coroutine callback, so in preparation, avoid yielding in its implementation. Because we increase bs->in_flight and bdrv_drained_begin() polls, the behaviour is unchanged. Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Emanuele Giuseppe Esposito Reviewed-by: Hanna Reitz Message-Id: <20221118174110.55183-2-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block/qed.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/block/qed.c b/block/qed.c index 0ab159b468..af2fbec0b7 100644 --- a/block/qed.c +++ b/block/qed.c @@ -282,9 +282,8 @@ static void coroutine_fn qed_unplug_allocating_write_reqs(BDRVQEDState *s) qemu_co_mutex_unlock(&s->table_lock); } -static void coroutine_fn qed_need_check_timer_entry(void *opaque) +static void coroutine_fn qed_need_check_timer(BDRVQEDState *s) { - BDRVQEDState *s = opaque; int ret; trace_qed_need_check_timer_cb(s); @@ -310,9 +309,20 @@ static void coroutine_fn qed_need_check_timer_entry(void *opaque) (void) ret; } +static void coroutine_fn qed_need_check_timer_entry(void *opaque) +{ + BDRVQEDState *s = opaque; + + qed_need_check_timer(opaque); + bdrv_dec_in_flight(s->bs); +} + static void qed_need_check_timer_cb(void *opaque) { + BDRVQEDState *s = opaque; Coroutine *co = qemu_coroutine_create(qed_need_check_timer_entry, opaque); + + bdrv_inc_in_flight(s->bs); qemu_coroutine_enter(co); } @@ -363,8 +373,12 @@ static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs) * header is flushed. */ if (s->need_check_timer && timer_pending(s->need_check_timer)) { + Coroutine *co; + qed_cancel_need_check_timer(s); - qed_need_check_timer_entry(s); + co = qemu_coroutine_create(qed_need_check_timer_entry, s); + bdrv_inc_in_flight(bs); + aio_co_enter(bdrv_get_aio_context(bs), co); } } From 7bce1c299834557bffd92294608ea528648cfe75 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:40:57 +0100 Subject: [PATCH 06/50] test-bdrv-drain: Don't yield in .bdrv_co_drained_begin/end() We want to change .bdrv_co_drained_begin/end() back to be non-coroutine callbacks, so in preparation, avoid yielding in their implementation. This does almost the same as the existing logic in bdrv_drain_invoke(), by creating and entering coroutines internally. However, since the test case is by far the heaviest user of coroutine code in drain callbacks, it is preferable to have the complexity in the test case rather than the drain core, which is already complicated enough without this. The behaviour for bdrv_drain_begin() is unchanged because we increase bs->in_flight and this is still polled. However, bdrv_drain_end() doesn't wait for the spawned coroutine to complete any more. This is fine, we don't rely on bdrv_drain_end() restarting all operations immediately before the next aio_poll(). Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Emanuele Giuseppe Esposito Reviewed-by: Hanna Reitz Message-Id: <20221118174110.55183-3-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- tests/unit/test-bdrv-drain.c | 64 ++++++++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c index 09dc4a4891..24f34e24ad 100644 --- a/tests/unit/test-bdrv-drain.c +++ b/tests/unit/test-bdrv-drain.c @@ -38,12 +38,22 @@ typedef struct BDRVTestState { bool sleep_in_drain_begin; } BDRVTestState; +static void coroutine_fn sleep_in_drain_begin(void *opaque) +{ + BlockDriverState *bs = opaque; + + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); + bdrv_dec_in_flight(bs); +} + static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) { BDRVTestState *s = bs->opaque; s->drain_count++; if (s->sleep_in_drain_begin) { - qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); + Coroutine *co = qemu_coroutine_create(sleep_in_drain_begin, bs); + bdrv_inc_in_flight(bs); + aio_co_enter(bdrv_get_aio_context(bs), co); } } @@ -1916,6 +1926,21 @@ static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs, return 0; } +static void coroutine_fn bdrv_replace_test_drain_co(void *opaque) +{ + BlockDriverState *bs = opaque; + BDRVReplaceTestState *s = bs->opaque; + + /* Keep waking io_co up until it is done */ + while (s->io_co) { + aio_co_wake(s->io_co); + s->io_co = NULL; + qemu_coroutine_yield(); + } + s->drain_co = NULL; + bdrv_dec_in_flight(bs); +} + /** * If .drain_count is 0, wake up .io_co if there is one; and set * .was_drained. @@ -1926,20 +1951,27 @@ static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs) BDRVReplaceTestState *s = bs->opaque; if (!s->drain_count) { - /* Keep waking io_co up until it is done */ - s->drain_co = qemu_coroutine_self(); - while (s->io_co) { - aio_co_wake(s->io_co); - s->io_co = NULL; - qemu_coroutine_yield(); - } - s->drain_co = NULL; - + s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs); + bdrv_inc_in_flight(bs); + aio_co_enter(bdrv_get_aio_context(bs), s->drain_co); s->was_drained = true; } s->drain_count++; } +static void coroutine_fn bdrv_replace_test_read_entry(void *opaque) +{ + BlockDriverState *bs = opaque; + char data; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1); + int ret; + + /* Queue a read request post-drain */ + ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0); + g_assert(ret >= 0); + bdrv_dec_in_flight(bs); +} + /** * Reduce .drain_count, set .was_undrained once it reaches 0. * If .drain_count reaches 0 and the node has a backing file, issue a @@ -1951,17 +1983,13 @@ static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs) g_assert(s->drain_count > 0); if (!--s->drain_count) { - int ret; - s->was_undrained = true; if (bs->backing) { - char data; - QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1); - - /* Queue a read request post-drain */ - ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0); - g_assert(ret >= 0); + Coroutine *co = qemu_coroutine_create(bdrv_replace_test_read_entry, + bs); + bdrv_inc_in_flight(bs); + aio_co_enter(bdrv_get_aio_context(bs), co); } } } From 5e8ac21717373cbe96ef7a91e216bf5788815d63 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:40:58 +0100 Subject: [PATCH 07/50] block: Revert .bdrv_drained_begin/end to non-coroutine_fn Polling during bdrv_drained_end() can be problematic (and in the future, we may get cases for bdrv_drained_begin() where polling is forbidden, and we don't care about already in-flight requests, but just want to prevent new requests from arriving). The .bdrv_drained_begin/end callbacks running in a coroutine is the only reason why we have to do this polling, so make them non-coroutine callbacks again. None of the callers actually yield any more. This means that bdrv_drained_end() effectively doesn't poll any more, even if AIO_WAIT_WHILE() loops are still there (their condition is false from the beginning). This is generally not a problem, but in test-bdrv-drain, some additional explicit aio_poll() calls need to be added because the test case wants to verify the final state after BHs have executed. Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Emanuele Giuseppe Esposito Reviewed-by: Hanna Reitz Message-Id: <20221118174110.55183-4-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 4 +-- block/io.c | 49 +++++--------------------------- block/qed.c | 6 ++-- block/throttle.c | 8 +++--- include/block/block_int-common.h | 10 ++++--- tests/unit/test-bdrv-drain.c | 18 ++++++------ 6 files changed, 32 insertions(+), 63 deletions(-) diff --git a/block.c b/block.c index 385ed3cd53..466770b9ac 100644 --- a/block.c +++ b/block.c @@ -1713,8 +1713,8 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, assert(is_power_of_2(bs->bl.request_alignment)); for (i = 0; i < bs->quiesce_counter; i++) { - if (drv->bdrv_co_drain_begin) { - drv->bdrv_co_drain_begin(bs); + if (drv->bdrv_drain_begin) { + drv->bdrv_drain_begin(bs); } } diff --git a/block/io.c b/block/io.c index b9424024f9..c2ed4b2af9 100644 --- a/block/io.c +++ b/block/io.c @@ -252,55 +252,20 @@ typedef struct { int *drained_end_counter; } BdrvCoDrainData; -static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) -{ - BdrvCoDrainData *data = opaque; - BlockDriverState *bs = data->bs; - - if (data->begin) { - bs->drv->bdrv_co_drain_begin(bs); - } else { - bs->drv->bdrv_co_drain_end(bs); - } - - /* Set data->done and decrement drained_end_counter before bdrv_wakeup() */ - qatomic_mb_set(&data->done, true); - if (!data->begin) { - qatomic_dec(data->drained_end_counter); - } - bdrv_dec_in_flight(bs); - - g_free(data); -} - -/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */ +/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, int *drained_end_counter) { - BdrvCoDrainData *data; - - if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) || - (!begin && !bs->drv->bdrv_co_drain_end)) { + if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) || + (!begin && !bs->drv->bdrv_drain_end)) { return; } - data = g_new(BdrvCoDrainData, 1); - *data = (BdrvCoDrainData) { - .bs = bs, - .done = false, - .begin = begin, - .drained_end_counter = drained_end_counter, - }; - - if (!begin) { - qatomic_inc(drained_end_counter); + if (begin) { + bs->drv->bdrv_drain_begin(bs); + } else { + bs->drv->bdrv_drain_end(bs); } - - /* Make sure the driver callback completes during the polling phase for - * drain_begin. */ - bdrv_inc_in_flight(bs); - data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data); - aio_co_schedule(bdrv_get_aio_context(bs), data->co); } /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ diff --git a/block/qed.c b/block/qed.c index af2fbec0b7..e8ee332542 100644 --- a/block/qed.c +++ b/block/qed.c @@ -262,7 +262,7 @@ static bool coroutine_fn qed_plug_allocating_write_reqs(BDRVQEDState *s) assert(!s->allocating_write_reqs_plugged); if (s->allocating_acb != NULL) { /* Another allocating write came concurrently. This cannot happen - * from bdrv_qed_co_drain_begin, but it can happen when the timer runs. + * from bdrv_qed_drain_begin, but it can happen when the timer runs. */ qemu_co_mutex_unlock(&s->table_lock); return false; @@ -365,7 +365,7 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs, } } -static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs) +static void bdrv_qed_drain_begin(BlockDriverState *bs) { BDRVQEDState *s = bs->opaque; @@ -1661,7 +1661,7 @@ static BlockDriver bdrv_qed = { .bdrv_co_check = bdrv_qed_co_check, .bdrv_detach_aio_context = bdrv_qed_detach_aio_context, .bdrv_attach_aio_context = bdrv_qed_attach_aio_context, - .bdrv_co_drain_begin = bdrv_qed_co_drain_begin, + .bdrv_drain_begin = bdrv_qed_drain_begin, }; static void bdrv_qed_init(void) diff --git a/block/throttle.c b/block/throttle.c index 131eba3ab4..88851c84f4 100644 --- a/block/throttle.c +++ b/block/throttle.c @@ -214,7 +214,7 @@ static void throttle_reopen_abort(BDRVReopenState *reopen_state) reopen_state->opaque = NULL; } -static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs) +static void throttle_drain_begin(BlockDriverState *bs) { ThrottleGroupMember *tgm = bs->opaque; if (qatomic_fetch_inc(&tgm->io_limits_disabled) == 0) { @@ -222,7 +222,7 @@ static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs) } } -static void coroutine_fn throttle_co_drain_end(BlockDriverState *bs) +static void throttle_drain_end(BlockDriverState *bs) { ThrottleGroupMember *tgm = bs->opaque; assert(tgm->io_limits_disabled); @@ -261,8 +261,8 @@ static BlockDriver bdrv_throttle = { .bdrv_reopen_commit = throttle_reopen_commit, .bdrv_reopen_abort = throttle_reopen_abort, - .bdrv_co_drain_begin = throttle_co_drain_begin, - .bdrv_co_drain_end = throttle_co_drain_end, + .bdrv_drain_begin = throttle_drain_begin, + .bdrv_drain_end = throttle_drain_end, .is_filter = true, .strong_runtime_opts = throttle_strong_runtime_opts, diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 31ae91e56e..40d646d1ed 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -735,17 +735,19 @@ struct BlockDriver { void (*bdrv_io_unplug)(BlockDriverState *bs); /** - * bdrv_co_drain_begin is called if implemented in the beginning of a + * bdrv_drain_begin is called if implemented in the beginning of a * drain operation to drain and stop any internal sources of requests in * the driver. - * bdrv_co_drain_end is called if implemented at the end of the drain. + * bdrv_drain_end is called if implemented at the end of the drain. * * They should be used by the driver to e.g. manage scheduled I/O * requests, or toggle an internal state. After the end of the drain new * requests will continue normally. + * + * Implementations of both functions must not call aio_poll(). */ - void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs); - void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs); + void (*bdrv_drain_begin)(BlockDriverState *bs); + void (*bdrv_drain_end)(BlockDriverState *bs); bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs); bool coroutine_fn (*bdrv_co_can_store_new_dirty_bitmap)( diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c index 24f34e24ad..695519ee02 100644 --- a/tests/unit/test-bdrv-drain.c +++ b/tests/unit/test-bdrv-drain.c @@ -46,7 +46,7 @@ static void coroutine_fn sleep_in_drain_begin(void *opaque) bdrv_dec_in_flight(bs); } -static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) +static void bdrv_test_drain_begin(BlockDriverState *bs) { BDRVTestState *s = bs->opaque; s->drain_count++; @@ -57,7 +57,7 @@ static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) } } -static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs) +static void bdrv_test_drain_end(BlockDriverState *bs) { BDRVTestState *s = bs->opaque; s->drain_count--; @@ -111,8 +111,8 @@ static BlockDriver bdrv_test = { .bdrv_close = bdrv_test_close, .bdrv_co_preadv = bdrv_test_co_preadv, - .bdrv_co_drain_begin = bdrv_test_co_drain_begin, - .bdrv_co_drain_end = bdrv_test_co_drain_end, + .bdrv_drain_begin = bdrv_test_drain_begin, + .bdrv_drain_end = bdrv_test_drain_end, .bdrv_child_perm = bdrv_default_perms, @@ -1703,6 +1703,7 @@ static void test_blockjob_commit_by_drained_end(void) bdrv_drained_begin(bs_child); g_assert(!job_has_completed); bdrv_drained_end(bs_child); + aio_poll(qemu_get_aio_context(), false); g_assert(job_has_completed); bdrv_unref(bs_parents[0]); @@ -1858,6 +1859,7 @@ static void test_drop_intermediate_poll(void) g_assert(!job_has_completed); ret = bdrv_drop_intermediate(chain[1], chain[0], NULL); + aio_poll(qemu_get_aio_context(), false); g_assert(ret == 0); g_assert(job_has_completed); @@ -1946,7 +1948,7 @@ static void coroutine_fn bdrv_replace_test_drain_co(void *opaque) * .was_drained. * Increment .drain_count. */ -static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs) +static void bdrv_replace_test_drain_begin(BlockDriverState *bs) { BDRVReplaceTestState *s = bs->opaque; @@ -1977,7 +1979,7 @@ static void coroutine_fn bdrv_replace_test_read_entry(void *opaque) * If .drain_count reaches 0 and the node has a backing file, issue a * read request. */ -static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs) +static void bdrv_replace_test_drain_end(BlockDriverState *bs) { BDRVReplaceTestState *s = bs->opaque; @@ -2002,8 +2004,8 @@ static BlockDriver bdrv_replace_test = { .bdrv_close = bdrv_replace_test_close, .bdrv_co_preadv = bdrv_replace_test_co_preadv, - .bdrv_co_drain_begin = bdrv_replace_test_co_drain_begin, - .bdrv_co_drain_end = bdrv_replace_test_co_drain_end, + .bdrv_drain_begin = bdrv_replace_test_drain_begin, + .bdrv_drain_end = bdrv_replace_test_drain_end, .bdrv_child_perm = bdrv_default_perms, }; From 2f65df6e16dea2d6e7212fa675f4779d9281e26f Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:40:59 +0100 Subject: [PATCH 08/50] block: Remove drained_end_counter drained_end_counter is unused now, nobody changes its value any more. It can be removed. In cases where we had two almost identical functions that only differed in whether the caller passes drained_end_counter, or whether they would poll for a local drained_end_counter to reach 0, these become a single function. Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Emanuele Giuseppe Esposito Message-Id: <20221118174110.55183-5-kwolf@redhat.com> Reviewed-by: Hanna Reitz Signed-off-by: Kevin Wolf --- block.c | 5 +- block/block-backend.c | 4 +- block/io.c | 98 ++++++++------------------------ blockjob.c | 2 +- include/block/block-io.h | 24 -------- include/block/block_int-common.h | 6 +- 6 files changed, 30 insertions(+), 109 deletions(-) diff --git a/block.c b/block.c index 466770b9ac..ce71545551 100644 --- a/block.c +++ b/block.c @@ -1235,11 +1235,10 @@ static bool bdrv_child_cb_drained_poll(BdrvChild *child) return bdrv_drain_poll(bs, false, NULL, false); } -static void bdrv_child_cb_drained_end(BdrvChild *child, - int *drained_end_counter) +static void bdrv_child_cb_drained_end(BdrvChild *child) { BlockDriverState *bs = child->opaque; - bdrv_drained_end_no_poll(bs, drained_end_counter); + bdrv_drained_end(bs); } static int bdrv_child_cb_inactivate(BdrvChild *child) diff --git a/block/block-backend.c b/block/block-backend.c index bf0ea3cfed..91e1d33a58 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -129,7 +129,7 @@ static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format, } static void blk_root_drained_begin(BdrvChild *child); static bool blk_root_drained_poll(BdrvChild *child); -static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter); +static void blk_root_drained_end(BdrvChild *child); static void blk_root_change_media(BdrvChild *child, bool load); static void blk_root_resize(BdrvChild *child); @@ -2556,7 +2556,7 @@ static bool blk_root_drained_poll(BdrvChild *child) return busy || !!blk->in_flight; } -static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter) +static void blk_root_drained_end(BdrvChild *child) { BlockBackend *blk = child->opaque; assert(blk->quiesce_counter); diff --git a/block/io.c b/block/io.c index c2ed4b2af9..f4ca62b034 100644 --- a/block/io.c +++ b/block/io.c @@ -58,28 +58,19 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore, } } -static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c, - int *drained_end_counter) +void bdrv_parent_drained_end_single(BdrvChild *c) { + IO_OR_GS_CODE(); + assert(c->parent_quiesce_counter > 0); c->parent_quiesce_counter--; if (c->klass->drained_end) { - c->klass->drained_end(c, drained_end_counter); + c->klass->drained_end(c); } } -void bdrv_parent_drained_end_single(BdrvChild *c) -{ - int drained_end_counter = 0; - AioContext *ctx = bdrv_child_get_parent_aio_context(c); - IO_OR_GS_CODE(); - bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter); - AIO_WAIT_WHILE(ctx, qatomic_read(&drained_end_counter) > 0); -} - static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, - bool ignore_bds_parents, - int *drained_end_counter) + bool ignore_bds_parents) { BdrvChild *c; @@ -87,7 +78,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { continue; } - bdrv_parent_drained_end_single_no_poll(c, drained_end_counter); + bdrv_parent_drained_end_single(c); } } @@ -249,12 +240,10 @@ typedef struct { bool poll; BdrvChild *parent; bool ignore_bds_parents; - int *drained_end_counter; } BdrvCoDrainData; /* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */ -static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, - int *drained_end_counter) +static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) { if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) || (!begin && !bs->drv->bdrv_drain_end)) { @@ -305,8 +294,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, BdrvChild *parent, bool ignore_bds_parents, bool poll); static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, - BdrvChild *parent, bool ignore_bds_parents, - int *drained_end_counter); + BdrvChild *parent, bool ignore_bds_parents); static void bdrv_co_drain_bh_cb(void *opaque) { @@ -319,14 +307,12 @@ static void bdrv_co_drain_bh_cb(void *opaque) aio_context_acquire(ctx); bdrv_dec_in_flight(bs); if (data->begin) { - assert(!data->drained_end_counter); bdrv_do_drained_begin(bs, data->recursive, data->parent, data->ignore_bds_parents, data->poll); } else { assert(!data->poll); bdrv_do_drained_end(bs, data->recursive, data->parent, - data->ignore_bds_parents, - data->drained_end_counter); + data->ignore_bds_parents); } aio_context_release(ctx); } else { @@ -342,8 +328,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, bool begin, bool recursive, BdrvChild *parent, bool ignore_bds_parents, - bool poll, - int *drained_end_counter) + bool poll) { BdrvCoDrainData data; Coroutine *self = qemu_coroutine_self(); @@ -363,7 +348,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, .parent = parent, .ignore_bds_parents = ignore_bds_parents, .poll = poll, - .drained_end_counter = drained_end_counter, }; if (bs) { @@ -406,7 +390,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, } bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); - bdrv_drain_invoke(bs, true, NULL); + bdrv_drain_invoke(bs, true); } static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, @@ -417,7 +401,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, if (qemu_in_coroutine()) { bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents, - poll, NULL); + poll); return; } @@ -461,38 +445,24 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs) /** * This function does not poll, nor must any of its recursively called - * functions. The *drained_end_counter pointee will be incremented - * once for every background operation scheduled, and decremented once - * the operation settles. Therefore, the pointer must remain valid - * until the pointee reaches 0. That implies that whoever sets up the - * pointee has to poll until it is 0. - * - * We use atomic operations to access *drained_end_counter, because - * (1) when called from bdrv_set_aio_context_ignore(), the subgraph of - * @bs may contain nodes in different AioContexts, - * (2) bdrv_drain_all_end() uses the same counter for all nodes, - * regardless of which AioContext they are in. + * functions. */ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, - BdrvChild *parent, bool ignore_bds_parents, - int *drained_end_counter) + BdrvChild *parent, bool ignore_bds_parents) { BdrvChild *child; int old_quiesce_counter; - assert(drained_end_counter != NULL); - if (qemu_in_coroutine()) { bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents, - false, drained_end_counter); + false); return; } assert(bs->quiesce_counter > 0); /* Re-enable things in child-to-parent order */ - bdrv_drain_invoke(bs, false, drained_end_counter); - bdrv_parent_drained_end(bs, parent, ignore_bds_parents, - drained_end_counter); + bdrv_drain_invoke(bs, false); + bdrv_parent_drained_end(bs, parent, ignore_bds_parents); old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); if (old_quiesce_counter == 1) { @@ -503,32 +473,21 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, assert(!ignore_bds_parents); bs->recursive_quiesce_counter--; QLIST_FOREACH(child, &bs->children, next) { - bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents, - drained_end_counter); + bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents); } } } void bdrv_drained_end(BlockDriverState *bs) { - int drained_end_counter = 0; IO_OR_GS_CODE(); - bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter); - BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); -} - -void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter) -{ - IO_CODE(); - bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter); + bdrv_do_drained_end(bs, false, NULL, false); } void bdrv_subtree_drained_end(BlockDriverState *bs) { - int drained_end_counter = 0; IO_OR_GS_CODE(); - bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter); - BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); + bdrv_do_drained_end(bs, true, NULL, false); } void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) @@ -543,16 +502,12 @@ void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) { - int drained_end_counter = 0; int i; IO_OR_GS_CODE(); for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { - bdrv_do_drained_end(child->bs, true, child, false, - &drained_end_counter); + bdrv_do_drained_end(child->bs, true, child, false); } - - BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0); } void bdrv_drain(BlockDriverState *bs) @@ -610,7 +565,7 @@ void bdrv_drain_all_begin(void) GLOBAL_STATE_CODE(); if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL); + bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true); return; } @@ -649,22 +604,19 @@ void bdrv_drain_all_begin(void) void bdrv_drain_all_end_quiesce(BlockDriverState *bs) { - int drained_end_counter = 0; GLOBAL_STATE_CODE(); g_assert(bs->quiesce_counter > 0); g_assert(!bs->refcnt); while (bs->quiesce_counter) { - bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter); + bdrv_do_drained_end(bs, false, NULL, true); } - BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); } void bdrv_drain_all_end(void) { BlockDriverState *bs = NULL; - int drained_end_counter = 0; GLOBAL_STATE_CODE(); /* @@ -680,13 +632,11 @@ void bdrv_drain_all_end(void) AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter); + bdrv_do_drained_end(bs, false, NULL, true); aio_context_release(aio_context); } assert(qemu_get_current_aio_context() == qemu_get_aio_context()); - AIO_WAIT_WHILE(NULL, qatomic_read(&drained_end_counter) > 0); - assert(bdrv_drain_all_count > 0); bdrv_drain_all_count--; } diff --git a/blockjob.c b/blockjob.c index 3c8f3543a2..b7daf2a9f6 100644 --- a/blockjob.c +++ b/blockjob.c @@ -120,7 +120,7 @@ static bool child_job_drained_poll(BdrvChild *c) } } -static void child_job_drained_end(BdrvChild *c, int *drained_end_counter) +static void child_job_drained_end(BdrvChild *c) { BlockJob *job = c->opaque; job_resume(&job->job); diff --git a/include/block/block-io.h b/include/block/block-io.h index b099d7db45..054e964c9b 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -237,21 +237,6 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, int64_t bytes, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags); -/** - * bdrv_drained_end_no_poll: - * - * Same as bdrv_drained_end(), but do not poll for the subgraph to - * actually become unquiesced. Therefore, no graph changes will occur - * with this function. - * - * *drained_end_counter is incremented for every background operation - * that is scheduled, and will be decremented for every operation once - * it settles. The caller must poll until it reaches 0. The counter - * should be accessed using atomic operations only. - */ -void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter); - - /* * "I/O or GS" API functions. These functions can run without * the BQL, but only in one specific iothread/main loop. @@ -311,9 +296,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); * bdrv_parent_drained_end_single: * * End a quiesced section for the parent of @c. - * - * This polls @bs's AioContext until all scheduled sub-drained_ends - * have settled, which may result in graph changes. */ void bdrv_parent_drained_end_single(BdrvChild *c); @@ -361,12 +343,6 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs); * bdrv_drained_end: * * End a quiescent section started by bdrv_drained_begin(). - * - * This polls @bs's AioContext until all scheduled sub-drained_ends - * have settled. On one hand, that may result in graph changes. On - * the other, this requires that the caller either runs in the main - * loop; or that all involved nodes (@bs and all of its parents) are - * in the caller's AioContext. */ void bdrv_drained_end(BlockDriverState *bs); diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 40d646d1ed..2b97576f6d 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -939,15 +939,11 @@ struct BdrvChildClass { * These functions must not change the graph (and therefore also must not * call aio_poll(), which could change the graph indirectly). * - * If drained_end() schedules background operations, it must atomically - * increment *drained_end_counter for each such operation and atomically - * decrement it once the operation has settled. - * * Note that this can be nested. If drained_begin() was called twice, new * I/O is allowed only after drained_end() was called twice, too. */ void (*drained_begin)(BdrvChild *child); - void (*drained_end)(BdrvChild *child, int *drained_end_counter); + void (*drained_end)(BdrvChild *child); /* * Returns whether the parent has pending requests for the child. This From c7bc05f78ab31fb02fc9635f60b9bd22efc8d121 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:00 +0100 Subject: [PATCH 09/50] block: Inline bdrv_drain_invoke() bdrv_drain_invoke() has now two entirely separate cases that share no code any more and are selected depending on a bool parameter. Each case has only one caller. Just inline the function. Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Stefan Hajnoczi Reviewed-by: Emanuele Giuseppe Esposito Reviewed-by: Hanna Reitz Message-Id: <20221118174110.55183-6-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block/io.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/block/io.c b/block/io.c index f4ca62b034..a25103be6f 100644 --- a/block/io.c +++ b/block/io.c @@ -242,21 +242,6 @@ typedef struct { bool ignore_bds_parents; } BdrvCoDrainData; -/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */ -static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) -{ - if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) || - (!begin && !bs->drv->bdrv_drain_end)) { - return; - } - - if (begin) { - bs->drv->bdrv_drain_begin(bs); - } else { - bs->drv->bdrv_drain_end(bs); - } -} - /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, BdrvChild *ignore_parent, bool ignore_bds_parents) @@ -390,7 +375,9 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, } bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); - bdrv_drain_invoke(bs, true); + if (bs->drv && bs->drv->bdrv_drain_begin) { + bs->drv->bdrv_drain_begin(bs); + } } static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, @@ -461,7 +448,9 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, assert(bs->quiesce_counter > 0); /* Re-enable things in child-to-parent order */ - bdrv_drain_invoke(bs, false); + if (bs->drv && bs->drv->bdrv_drain_end) { + bs->drv->bdrv_drain_end(bs); + } bdrv_parent_drained_end(bs, parent, ignore_bds_parents); old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); From 2e117866d7c96cc17e84cd2946fee1bf3292d814 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:01 +0100 Subject: [PATCH 10/50] block: Fix locking for bdrv_reopen_queue_child() Callers don't agree whether bdrv_reopen_queue_child() should be called with the AioContext lock held or not. Standardise on holding the lock (as done by QMP blockdev-reopen and the replication block driver) and fix bdrv_reopen() to do the same. Signed-off-by: Kevin Wolf Message-Id: <20221118174110.55183-7-kwolf@redhat.com> Reviewed-by: Hanna Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index ce71545551..3266519455 100644 --- a/block.c +++ b/block.c @@ -4174,6 +4174,8 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, * bs_queue, or the existing bs_queue being used. * * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). + * + * To be called with bs->aio_context locked. */ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockDriverState *bs, @@ -4332,6 +4334,7 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, return bs_queue; } +/* To be called with bs->aio_context locked */ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, BlockDriverState *bs, QDict *options, bool keep_old_opts) @@ -4492,11 +4495,11 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, GLOBAL_STATE_CODE(); bdrv_subtree_drained_begin(bs); + queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); + if (ctx != qemu_get_aio_context()) { aio_context_release(ctx); } - - queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); ret = bdrv_reopen_multiple(queue, errp); if (ctx != qemu_get_aio_context()) { From d22933acd2f470eeef779e4d444e848f76dcfaf8 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:02 +0100 Subject: [PATCH 11/50] block: Drain individual nodes during reopen bdrv_reopen() and friends use subtree drains as a lazy way of covering all the nodes they touch. Turns out that this lazy way is a lot more complicated than just draining the nodes individually, even not accounting for the additional complexity in the drain mechanism itself. Simplify the code by switching to draining the individual nodes that are already managed in the BlockReopenQueue anyway. Signed-off-by: Kevin Wolf Message-Id: <20221118174110.55183-8-kwolf@redhat.com> Reviewed-by: Hanna Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block.c | 16 +++++++++------- block/replication.c | 6 ------ blockdev.c | 13 ------------- 3 files changed, 9 insertions(+), 26 deletions(-) diff --git a/block.c b/block.c index 3266519455..dd329a16ce 100644 --- a/block.c +++ b/block.c @@ -4173,7 +4173,7 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, * returns a pointer to bs_queue, which is either the newly allocated * bs_queue, or the existing bs_queue being used. * - * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). + * bs is drained here and undrained by bdrv_reopen_queue_free(). * * To be called with bs->aio_context locked. */ @@ -4195,12 +4195,10 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, int flags; QemuOpts *opts; - /* Make sure that the caller remembered to use a drained section. This is - * important to avoid graph changes between the recursive queuing here and - * bdrv_reopen_multiple(). */ - assert(bs->quiesce_counter > 0); GLOBAL_STATE_CODE(); + bdrv_drained_begin(bs); + if (bs_queue == NULL) { bs_queue = g_new0(BlockReopenQueue, 1); QTAILQ_INIT(bs_queue); @@ -4351,6 +4349,12 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue) if (bs_queue) { BlockReopenQueueEntry *bs_entry, *next; QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { + AioContext *ctx = bdrv_get_aio_context(bs_entry->state.bs); + + aio_context_acquire(ctx); + bdrv_drained_end(bs_entry->state.bs); + aio_context_release(ctx); + qobject_unref(bs_entry->state.explicit_options); qobject_unref(bs_entry->state.options); g_free(bs_entry); @@ -4494,7 +4498,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, GLOBAL_STATE_CODE(); - bdrv_subtree_drained_begin(bs); queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); if (ctx != qemu_get_aio_context()) { @@ -4505,7 +4508,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, if (ctx != qemu_get_aio_context()) { aio_context_acquire(ctx); } - bdrv_subtree_drained_end(bs); return ret; } diff --git a/block/replication.c b/block/replication.c index f1eed25e43..c62f48a874 100644 --- a/block/replication.c +++ b/block/replication.c @@ -374,9 +374,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, s->orig_secondary_read_only = bdrv_is_read_only(secondary_disk->bs); } - bdrv_subtree_drained_begin(hidden_disk->bs); - bdrv_subtree_drained_begin(secondary_disk->bs); - if (s->orig_hidden_read_only) { QDict *opts = qdict_new(); qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable); @@ -401,9 +398,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, aio_context_acquire(ctx); } } - - bdrv_subtree_drained_end(hidden_disk->bs); - bdrv_subtree_drained_end(secondary_disk->bs); } static void backup_job_cleanup(BlockDriverState *bs) diff --git a/blockdev.c b/blockdev.c index 75eef8535e..d2f80b0386 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3515,8 +3515,6 @@ fail: void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) { BlockReopenQueue *queue = NULL; - GSList *drained = NULL; - GSList *p; /* Add each one of the BDS that we want to reopen to the queue */ for (; reopen_list != NULL; reopen_list = reopen_list->next) { @@ -3553,9 +3551,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) ctx = bdrv_get_aio_context(bs); aio_context_acquire(ctx); - bdrv_subtree_drained_begin(bs); queue = bdrv_reopen_queue(queue, bs, qdict, false); - drained = g_slist_prepend(drained, bs); aio_context_release(ctx); } @@ -3566,15 +3562,6 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) fail: bdrv_reopen_queue_free(queue); - for (p = drained; p; p = p->next) { - BlockDriverState *bs = p->data; - AioContext *ctx = bdrv_get_aio_context(bs); - - aio_context_acquire(ctx); - bdrv_subtree_drained_end(bs); - aio_context_release(ctx); - } - g_slist_free(drained); } void qmp_blockdev_del(const char *node_name, Error **errp) From 631086deefc32690ee56efed1c5b891dec31ae37 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:03 +0100 Subject: [PATCH 12/50] block: Don't use subtree drains in bdrv_drop_intermediate() Instead of using a subtree drain from the top node (which also drains child nodes of base that we're not even interested in), use a normal drain for base, which automatically drains all of the parents, too. Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz Message-Id: <20221118174110.55183-9-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index dd329a16ce..db043346d8 100644 --- a/block.c +++ b/block.c @@ -5600,7 +5600,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, GLOBAL_STATE_CODE(); bdrv_ref(top); - bdrv_subtree_drained_begin(top); + bdrv_drained_begin(base); if (!top->drv || !base->drv) { goto exit; @@ -5673,7 +5673,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, ret = 0; exit: - bdrv_subtree_drained_end(top); + bdrv_drained_end(base); bdrv_unref(top); return ret; } From 92140b9f3f07d80e2c27edcc6e32f392be2135e6 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:04 +0100 Subject: [PATCH 13/50] stream: Replace subtree drain with a single node drain The subtree drain was introduced in commit b1e1af394d9 as a way to avoid graph changes between finding the base node and changing the block graph as necessary on completion of the image streaming job. The block graph could change between these two points because bdrv_set_backing_hd() first drains the parent node, which involved polling and can do anything. Subtree draining was an imperfect way to make this less likely (because with it, fewer callbacks are called during this window). Everyone agreed that it's not really the right solution, and it was only committed as a stopgap solution. This replaces the subtree drain with a solution that simply drains the parent node before we try to find the base node, and then call a version of bdrv_set_backing_hd() that doesn't drain, but just asserts that the parent node is already drained. This way, any graph changes caused by draining happen before we start looking at the graph and things stay consistent between finding the base node and changing the graph. Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz Message-Id: <20221118174110.55183-10-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 17 ++++++++++++++--- block/stream.c | 26 ++++++++++++++++---------- include/block/block-global-state.h | 3 +++ 3 files changed, 33 insertions(+), 13 deletions(-) diff --git a/block.c b/block.c index db043346d8..97bfb1494f 100644 --- a/block.c +++ b/block.c @@ -3426,14 +3426,15 @@ static int bdrv_set_backing_noperm(BlockDriverState *bs, return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp); } -int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, - Error **errp) +int bdrv_set_backing_hd_drained(BlockDriverState *bs, + BlockDriverState *backing_hd, + Error **errp) { int ret; Transaction *tran = tran_new(); GLOBAL_STATE_CODE(); - bdrv_drained_begin(bs); + assert(bs->quiesce_counter > 0); ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp); if (ret < 0) { @@ -3443,7 +3444,17 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, ret = bdrv_refresh_perms(bs, tran, errp); out: tran_finalize(tran, ret); + return ret; +} +int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + Error **errp) +{ + int ret; + GLOBAL_STATE_CODE(); + + bdrv_drained_begin(bs); + ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp); bdrv_drained_end(bs); return ret; diff --git a/block/stream.c b/block/stream.c index 694709bd25..8744ad103f 100644 --- a/block/stream.c +++ b/block/stream.c @@ -64,13 +64,16 @@ static int stream_prepare(Job *job) bdrv_cor_filter_drop(s->cor_filter_bs); s->cor_filter_bs = NULL; - bdrv_subtree_drained_begin(s->above_base); + /* + * bdrv_set_backing_hd() requires that unfiltered_bs is drained. Drain + * already here and use bdrv_set_backing_hd_drained() instead because + * the polling during drained_begin() might change the graph, and if we do + * this only later, we may end up working with the wrong base node (or it + * might even have gone away by the time we want to use it). + */ + bdrv_drained_begin(unfiltered_bs); base = bdrv_filter_or_cow_bs(s->above_base); - if (base) { - bdrv_ref(base); - } - unfiltered_base = bdrv_skip_filters(base); if (bdrv_cow_child(unfiltered_bs)) { @@ -82,7 +85,13 @@ static int stream_prepare(Job *job) } } - bdrv_set_backing_hd(unfiltered_bs, base, &local_err); + bdrv_set_backing_hd_drained(unfiltered_bs, base, &local_err); + + /* + * This call will do I/O, so the graph can change again from here on. + * We have already completed the graph change, so we are not in danger + * of operating on the wrong node any more if this happens. + */ ret = bdrv_change_backing_file(unfiltered_bs, base_id, base_fmt, false); if (local_err) { error_report_err(local_err); @@ -92,10 +101,7 @@ static int stream_prepare(Job *job) } out: - if (base) { - bdrv_unref(base); - } - bdrv_subtree_drained_end(s->above_base); + bdrv_drained_end(unfiltered_bs); return ret; } diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index c7bd4a2088..00e0cf8aea 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -82,6 +82,9 @@ int bdrv_open_file_child(const char *filename, BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp); int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, Error **errp); +int bdrv_set_backing_hd_drained(BlockDriverState *bs, + BlockDriverState *backing_hd, + Error **errp); int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, const char *bdref_key, Error **errp); BlockDriverState *bdrv_open(const char *filename, const char *reference, From 299403aedaeb7f08d8e98aa8614b29d4e5546066 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:05 +0100 Subject: [PATCH 14/50] block: Remove subtree drains Subtree drains are not used any more. Remove them. After this, BdrvChildClass.attach/detach() don't poll any more. Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz Message-Id: <20221118174110.55183-11-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 20 +-- block/io.c | 121 +++----------- include/block/block-io.h | 18 +-- include/block/block_int-common.h | 1 - include/block/block_int-io.h | 12 -- tests/unit/test-bdrv-drain.c | 261 ++----------------------------- 6 files changed, 44 insertions(+), 389 deletions(-) diff --git a/block.c b/block.c index 97bfb1494f..7ea0b82049 100644 --- a/block.c +++ b/block.c @@ -1232,7 +1232,7 @@ static void bdrv_child_cb_drained_begin(BdrvChild *child) static bool bdrv_child_cb_drained_poll(BdrvChild *child) { BlockDriverState *bs = child->opaque; - return bdrv_drain_poll(bs, false, NULL, false); + return bdrv_drain_poll(bs, NULL, false); } static void bdrv_child_cb_drained_end(BdrvChild *child) @@ -1482,8 +1482,6 @@ static void bdrv_child_cb_attach(BdrvChild *child) assert(!bs->file); bs->file = child; } - - bdrv_apply_subtree_drain(child, bs); } static void bdrv_child_cb_detach(BdrvChild *child) @@ -1494,8 +1492,6 @@ static void bdrv_child_cb_detach(BdrvChild *child) bdrv_backing_detach(child); } - bdrv_unapply_subtree_drain(child, bs); - assert_bdrv_graph_writable(bs); QLIST_REMOVE(child, next); if (child == bs->backing) { @@ -2882,9 +2878,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, } if (old_bs) { - /* Detach first so that the recursive drain sections coming from @child - * are already gone and we only end the drain sections that came from - * elsewhere. */ if (child->klass->detach) { child->klass->detach(child); } @@ -2899,17 +2892,14 @@ static void bdrv_replace_child_noperm(BdrvChild *child, QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); /* - * Detaching the old node may have led to the new node's - * quiesce_counter having been decreased. Not a problem, we - * just need to recognize this here and then invoke - * drained_end appropriately more often. + * Polling in bdrv_parent_drained_begin_single() may have led to the new + * node's quiesce_counter having been decreased. Not a problem, we just + * need to recognize this here and then invoke drained_end appropriately + * more often. */ assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; - /* Attach only after starting new drained sections, so that recursive - * drain sections coming from @child don't get an extra .drained_begin - * callback. */ if (child->klass->attach) { child->klass->attach(child); } diff --git a/block/io.c b/block/io.c index a25103be6f..75224480d0 100644 --- a/block/io.c +++ b/block/io.c @@ -236,17 +236,15 @@ typedef struct { BlockDriverState *bs; bool done; bool begin; - bool recursive; bool poll; BdrvChild *parent; bool ignore_bds_parents; } BdrvCoDrainData; /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ -bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, - BdrvChild *ignore_parent, bool ignore_bds_parents) +bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, + bool ignore_bds_parents) { - BdrvChild *child, *next; IO_OR_GS_CODE(); if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) { @@ -257,29 +255,19 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, return true; } - if (recursive) { - assert(!ignore_bds_parents); - QLIST_FOREACH_SAFE(child, &bs->children, next, next) { - if (bdrv_drain_poll(child->bs, recursive, child, false)) { - return true; - } - } - } - return false; } -static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive, +static bool bdrv_drain_poll_top_level(BlockDriverState *bs, BdrvChild *ignore_parent) { - return bdrv_drain_poll(bs, recursive, ignore_parent, false); + return bdrv_drain_poll(bs, ignore_parent, false); } -static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, - BdrvChild *parent, bool ignore_bds_parents, - bool poll); -static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, - BdrvChild *parent, bool ignore_bds_parents); +static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, + bool ignore_bds_parents, bool poll); +static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, + bool ignore_bds_parents); static void bdrv_co_drain_bh_cb(void *opaque) { @@ -292,12 +280,11 @@ static void bdrv_co_drain_bh_cb(void *opaque) aio_context_acquire(ctx); bdrv_dec_in_flight(bs); if (data->begin) { - bdrv_do_drained_begin(bs, data->recursive, data->parent, - data->ignore_bds_parents, data->poll); + bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents, + data->poll); } else { assert(!data->poll); - bdrv_do_drained_end(bs, data->recursive, data->parent, - data->ignore_bds_parents); + bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents); } aio_context_release(ctx); } else { @@ -310,7 +297,7 @@ static void bdrv_co_drain_bh_cb(void *opaque) } static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - bool begin, bool recursive, + bool begin, BdrvChild *parent, bool ignore_bds_parents, bool poll) @@ -329,7 +316,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, .bs = bs, .done = false, .begin = begin, - .recursive = recursive, .parent = parent, .ignore_bds_parents = ignore_bds_parents, .poll = poll, @@ -380,29 +366,16 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, } } -static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, - BdrvChild *parent, bool ignore_bds_parents, - bool poll) +static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, + bool ignore_bds_parents, bool poll) { - BdrvChild *child, *next; - if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents, - poll); + bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll); return; } bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents); - if (recursive) { - assert(!ignore_bds_parents); - bs->recursive_quiesce_counter++; - QLIST_FOREACH_SAFE(child, &bs->children, next, next) { - bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents, - false); - } - } - /* * Wait for drained requests to finish. * @@ -414,35 +387,27 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, */ if (poll) { assert(!ignore_bds_parents); - BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent)); + BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent)); } } void bdrv_drained_begin(BlockDriverState *bs) { IO_OR_GS_CODE(); - bdrv_do_drained_begin(bs, false, NULL, false, true); -} - -void bdrv_subtree_drained_begin(BlockDriverState *bs) -{ - IO_OR_GS_CODE(); - bdrv_do_drained_begin(bs, true, NULL, false, true); + bdrv_do_drained_begin(bs, NULL, false, true); } /** * This function does not poll, nor must any of its recursively called * functions. */ -static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, - BdrvChild *parent, bool ignore_bds_parents) +static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, + bool ignore_bds_parents) { - BdrvChild *child; int old_quiesce_counter; if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents, - false); + bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false); return; } assert(bs->quiesce_counter > 0); @@ -457,46 +422,12 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, if (old_quiesce_counter == 1) { aio_enable_external(bdrv_get_aio_context(bs)); } - - if (recursive) { - assert(!ignore_bds_parents); - bs->recursive_quiesce_counter--; - QLIST_FOREACH(child, &bs->children, next) { - bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents); - } - } } void bdrv_drained_end(BlockDriverState *bs) { IO_OR_GS_CODE(); - bdrv_do_drained_end(bs, false, NULL, false); -} - -void bdrv_subtree_drained_end(BlockDriverState *bs) -{ - IO_OR_GS_CODE(); - bdrv_do_drained_end(bs, true, NULL, false); -} - -void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) -{ - int i; - IO_OR_GS_CODE(); - - for (i = 0; i < new_parent->recursive_quiesce_counter; i++) { - bdrv_do_drained_begin(child->bs, true, child, false, true); - } -} - -void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) -{ - int i; - IO_OR_GS_CODE(); - - for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { - bdrv_do_drained_end(child->bs, true, child, false); - } + bdrv_do_drained_end(bs, NULL, false); } void bdrv_drain(BlockDriverState *bs) @@ -529,7 +460,7 @@ static bool bdrv_drain_all_poll(void) while ((bs = bdrv_next_all_states(bs))) { AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - result |= bdrv_drain_poll(bs, false, NULL, true); + result |= bdrv_drain_poll(bs, NULL, true); aio_context_release(aio_context); } @@ -554,7 +485,7 @@ void bdrv_drain_all_begin(void) GLOBAL_STATE_CODE(); if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true); + bdrv_co_yield_to_drain(NULL, true, NULL, true, true); return; } @@ -579,7 +510,7 @@ void bdrv_drain_all_begin(void) AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - bdrv_do_drained_begin(bs, false, NULL, true, false); + bdrv_do_drained_begin(bs, NULL, true, false); aio_context_release(aio_context); } @@ -599,7 +530,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs) g_assert(!bs->refcnt); while (bs->quiesce_counter) { - bdrv_do_drained_end(bs, false, NULL, true); + bdrv_do_drained_end(bs, NULL, true); } } @@ -621,7 +552,7 @@ void bdrv_drain_all_end(void) AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - bdrv_do_drained_end(bs, false, NULL, true); + bdrv_do_drained_end(bs, NULL, true); aio_context_release(aio_context); } diff --git a/include/block/block-io.h b/include/block/block-io.h index 054e964c9b..9c36a16a1f 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -302,8 +302,7 @@ void bdrv_parent_drained_end_single(BdrvChild *c); /** * bdrv_drain_poll: * - * Poll for pending requests in @bs, its parents (except for @ignore_parent), - * and if @recursive is true its children as well (used for subtree drain). + * Poll for pending requests in @bs and its parents (except for @ignore_parent). * * If @ignore_bds_parents is true, parents that are BlockDriverStates must * ignore the drain request because they will be drained separately (used for @@ -311,8 +310,8 @@ void bdrv_parent_drained_end_single(BdrvChild *c); * * This is part of bdrv_drained_begin. */ -bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, - BdrvChild *ignore_parent, bool ignore_bds_parents); +bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, + bool ignore_bds_parents); /** * bdrv_drained_begin: @@ -333,12 +332,6 @@ void bdrv_drained_begin(BlockDriverState *bs); void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent, bool ignore_bds_parents); -/** - * Like bdrv_drained_begin, but recursively begins a quiesced section for - * exclusive access to all child nodes as well. - */ -void bdrv_subtree_drained_begin(BlockDriverState *bs); - /** * bdrv_drained_end: * @@ -346,9 +339,4 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs); */ void bdrv_drained_end(BlockDriverState *bs); -/** - * End a quiescent section started by bdrv_subtree_drained_begin(). - */ -void bdrv_subtree_drained_end(BlockDriverState *bs); - #endif /* BLOCK_IO_H */ diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 2b97576f6d..791dddfd7d 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -1184,7 +1184,6 @@ struct BlockDriverState { /* Accessed with atomic ops. */ int quiesce_counter; - int recursive_quiesce_counter; unsigned int write_gen; /* Current data generation */ diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h index 4b0b3e17ef..8bc061ebb8 100644 --- a/include/block/block_int-io.h +++ b/include/block/block_int-io.h @@ -179,16 +179,4 @@ void bdrv_bsc_invalidate_range(BlockDriverState *bs, */ void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes); - -/* - * "I/O or GS" API functions. These functions can run without - * the BQL, but only in one specific iothread/main loop. - * - * See include/block/block-io.h for more information about - * the "I/O or GS" API. - */ - -void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); -void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); - #endif /* BLOCK_INT_IO_H */ diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c index 695519ee02..dda08de8db 100644 --- a/tests/unit/test-bdrv-drain.c +++ b/tests/unit/test-bdrv-drain.c @@ -156,7 +156,6 @@ static void call_in_coroutine(void (*entry)(void)) enum drain_type { BDRV_DRAIN_ALL, BDRV_DRAIN, - BDRV_SUBTREE_DRAIN, DRAIN_TYPE_MAX, }; @@ -165,7 +164,6 @@ static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs) switch (drain_type) { case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break; case BDRV_DRAIN: bdrv_drained_begin(bs); break; - case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_begin(bs); break; default: g_assert_not_reached(); } } @@ -175,7 +173,6 @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs) switch (drain_type) { case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break; case BDRV_DRAIN: bdrv_drained_end(bs); break; - case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_end(bs); break; default: g_assert_not_reached(); } } @@ -271,11 +268,6 @@ static void test_drv_cb_drain(void) test_drv_cb_common(BDRV_DRAIN, false); } -static void test_drv_cb_drain_subtree(void) -{ - test_drv_cb_common(BDRV_SUBTREE_DRAIN, true); -} - static void test_drv_cb_co_drain_all(void) { call_in_coroutine(test_drv_cb_drain_all); @@ -286,11 +278,6 @@ static void test_drv_cb_co_drain(void) call_in_coroutine(test_drv_cb_drain); } -static void test_drv_cb_co_drain_subtree(void) -{ - call_in_coroutine(test_drv_cb_drain_subtree); -} - static void test_quiesce_common(enum drain_type drain_type, bool recursive) { BlockBackend *blk; @@ -332,11 +319,6 @@ static void test_quiesce_drain(void) test_quiesce_common(BDRV_DRAIN, false); } -static void test_quiesce_drain_subtree(void) -{ - test_quiesce_common(BDRV_SUBTREE_DRAIN, true); -} - static void test_quiesce_co_drain_all(void) { call_in_coroutine(test_quiesce_drain_all); @@ -347,11 +329,6 @@ static void test_quiesce_co_drain(void) call_in_coroutine(test_quiesce_drain); } -static void test_quiesce_co_drain_subtree(void) -{ - call_in_coroutine(test_quiesce_drain_subtree); -} - static void test_nested(void) { BlockBackend *blk; @@ -402,158 +379,6 @@ static void test_nested(void) blk_unref(blk); } -static void test_multiparent(void) -{ - BlockBackend *blk_a, *blk_b; - BlockDriverState *bs_a, *bs_b, *backing; - BDRVTestState *a_s, *b_s, *backing_s; - - blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); - bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, - &error_abort); - a_s = bs_a->opaque; - blk_insert_bs(blk_a, bs_a, &error_abort); - - blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); - bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, - &error_abort); - b_s = bs_b->opaque; - blk_insert_bs(blk_b, bs_b, &error_abort); - - backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); - backing_s = backing->opaque; - bdrv_set_backing_hd(bs_a, backing, &error_abort); - bdrv_set_backing_hd(bs_b, backing, &error_abort); - - g_assert_cmpint(bs_a->quiesce_counter, ==, 0); - g_assert_cmpint(bs_b->quiesce_counter, ==, 0); - g_assert_cmpint(backing->quiesce_counter, ==, 0); - g_assert_cmpint(a_s->drain_count, ==, 0); - g_assert_cmpint(b_s->drain_count, ==, 0); - g_assert_cmpint(backing_s->drain_count, ==, 0); - - do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); - - g_assert_cmpint(bs_a->quiesce_counter, ==, 1); - g_assert_cmpint(bs_b->quiesce_counter, ==, 1); - g_assert_cmpint(backing->quiesce_counter, ==, 1); - g_assert_cmpint(a_s->drain_count, ==, 1); - g_assert_cmpint(b_s->drain_count, ==, 1); - g_assert_cmpint(backing_s->drain_count, ==, 1); - - do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); - - g_assert_cmpint(bs_a->quiesce_counter, ==, 2); - g_assert_cmpint(bs_b->quiesce_counter, ==, 2); - g_assert_cmpint(backing->quiesce_counter, ==, 2); - g_assert_cmpint(a_s->drain_count, ==, 2); - g_assert_cmpint(b_s->drain_count, ==, 2); - g_assert_cmpint(backing_s->drain_count, ==, 2); - - do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); - - g_assert_cmpint(bs_a->quiesce_counter, ==, 1); - g_assert_cmpint(bs_b->quiesce_counter, ==, 1); - g_assert_cmpint(backing->quiesce_counter, ==, 1); - g_assert_cmpint(a_s->drain_count, ==, 1); - g_assert_cmpint(b_s->drain_count, ==, 1); - g_assert_cmpint(backing_s->drain_count, ==, 1); - - do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); - - g_assert_cmpint(bs_a->quiesce_counter, ==, 0); - g_assert_cmpint(bs_b->quiesce_counter, ==, 0); - g_assert_cmpint(backing->quiesce_counter, ==, 0); - g_assert_cmpint(a_s->drain_count, ==, 0); - g_assert_cmpint(b_s->drain_count, ==, 0); - g_assert_cmpint(backing_s->drain_count, ==, 0); - - bdrv_unref(backing); - bdrv_unref(bs_a); - bdrv_unref(bs_b); - blk_unref(blk_a); - blk_unref(blk_b); -} - -static void test_graph_change_drain_subtree(void) -{ - BlockBackend *blk_a, *blk_b; - BlockDriverState *bs_a, *bs_b, *backing; - BDRVTestState *a_s, *b_s, *backing_s; - - blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); - bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, - &error_abort); - a_s = bs_a->opaque; - blk_insert_bs(blk_a, bs_a, &error_abort); - - blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); - bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, - &error_abort); - b_s = bs_b->opaque; - blk_insert_bs(blk_b, bs_b, &error_abort); - - backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); - backing_s = backing->opaque; - bdrv_set_backing_hd(bs_a, backing, &error_abort); - - g_assert_cmpint(bs_a->quiesce_counter, ==, 0); - g_assert_cmpint(bs_b->quiesce_counter, ==, 0); - g_assert_cmpint(backing->quiesce_counter, ==, 0); - g_assert_cmpint(a_s->drain_count, ==, 0); - g_assert_cmpint(b_s->drain_count, ==, 0); - g_assert_cmpint(backing_s->drain_count, ==, 0); - - do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); - do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); - do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); - do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); - do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); - - bdrv_set_backing_hd(bs_b, backing, &error_abort); - g_assert_cmpint(bs_a->quiesce_counter, ==, 5); - g_assert_cmpint(bs_b->quiesce_counter, ==, 5); - g_assert_cmpint(backing->quiesce_counter, ==, 5); - g_assert_cmpint(a_s->drain_count, ==, 5); - g_assert_cmpint(b_s->drain_count, ==, 5); - g_assert_cmpint(backing_s->drain_count, ==, 5); - - bdrv_set_backing_hd(bs_b, NULL, &error_abort); - g_assert_cmpint(bs_a->quiesce_counter, ==, 3); - g_assert_cmpint(bs_b->quiesce_counter, ==, 2); - g_assert_cmpint(backing->quiesce_counter, ==, 3); - g_assert_cmpint(a_s->drain_count, ==, 3); - g_assert_cmpint(b_s->drain_count, ==, 2); - g_assert_cmpint(backing_s->drain_count, ==, 3); - - bdrv_set_backing_hd(bs_b, backing, &error_abort); - g_assert_cmpint(bs_a->quiesce_counter, ==, 5); - g_assert_cmpint(bs_b->quiesce_counter, ==, 5); - g_assert_cmpint(backing->quiesce_counter, ==, 5); - g_assert_cmpint(a_s->drain_count, ==, 5); - g_assert_cmpint(b_s->drain_count, ==, 5); - g_assert_cmpint(backing_s->drain_count, ==, 5); - - do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); - do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); - do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); - do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); - do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); - - g_assert_cmpint(bs_a->quiesce_counter, ==, 0); - g_assert_cmpint(bs_b->quiesce_counter, ==, 0); - g_assert_cmpint(backing->quiesce_counter, ==, 0); - g_assert_cmpint(a_s->drain_count, ==, 0); - g_assert_cmpint(b_s->drain_count, ==, 0); - g_assert_cmpint(backing_s->drain_count, ==, 0); - - bdrv_unref(backing); - bdrv_unref(bs_a); - bdrv_unref(bs_b); - blk_unref(blk_a); - blk_unref(blk_b); -} - static void test_graph_change_drain_all(void) { BlockBackend *blk_a, *blk_b; @@ -773,12 +598,6 @@ static void test_iothread_drain(void) test_iothread_common(BDRV_DRAIN, 1); } -static void test_iothread_drain_subtree(void) -{ - test_iothread_common(BDRV_SUBTREE_DRAIN, 0); - test_iothread_common(BDRV_SUBTREE_DRAIN, 1); -} - typedef struct TestBlockJob { BlockJob common; @@ -863,7 +682,6 @@ enum test_job_result { enum test_job_drain_node { TEST_JOB_DRAIN_SRC, TEST_JOB_DRAIN_SRC_CHILD, - TEST_JOB_DRAIN_SRC_PARENT, }; static void test_blockjob_common_drain_node(enum drain_type drain_type, @@ -901,9 +719,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, case TEST_JOB_DRAIN_SRC_CHILD: drain_bs = src_backing; break; - case TEST_JOB_DRAIN_SRC_PARENT: - drain_bs = src_overlay; - break; default: g_assert_not_reached(); } @@ -1055,10 +870,6 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, TEST_JOB_DRAIN_SRC); test_blockjob_common_drain_node(drain_type, use_iothread, result, TEST_JOB_DRAIN_SRC_CHILD); - if (drain_type == BDRV_SUBTREE_DRAIN) { - test_blockjob_common_drain_node(drain_type, use_iothread, result, - TEST_JOB_DRAIN_SRC_PARENT); - } } static void test_blockjob_drain_all(void) @@ -1071,11 +882,6 @@ static void test_blockjob_drain(void) test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_SUCCESS); } -static void test_blockjob_drain_subtree(void) -{ - test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_SUCCESS); -} - static void test_blockjob_error_drain_all(void) { test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_RUN); @@ -1088,12 +894,6 @@ static void test_blockjob_error_drain(void) test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_PREPARE); } -static void test_blockjob_error_drain_subtree(void) -{ - test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_RUN); - test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_PREPARE); -} - static void test_blockjob_iothread_drain_all(void) { test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_SUCCESS); @@ -1104,11 +904,6 @@ static void test_blockjob_iothread_drain(void) test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_SUCCESS); } -static void test_blockjob_iothread_drain_subtree(void) -{ - test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_SUCCESS); -} - static void test_blockjob_iothread_error_drain_all(void) { test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_RUN); @@ -1121,12 +916,6 @@ static void test_blockjob_iothread_error_drain(void) test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_PREPARE); } -static void test_blockjob_iothread_error_drain_subtree(void) -{ - test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_RUN); - test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_PREPARE); -} - typedef struct BDRVTestTopState { BdrvChild *wait_child; @@ -1273,14 +1062,6 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, bdrv_drain(child_bs); bdrv_unref(child_bs); break; - case BDRV_SUBTREE_DRAIN: - /* Would have to ref/unref bs here for !detach_instead_of_delete, but - * then the whole test becomes pointless because the graph changes - * don't occur during the drain any more. */ - assert(detach_instead_of_delete); - bdrv_subtree_drained_begin(bs); - bdrv_subtree_drained_end(bs); - break; case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); bdrv_drain_all_end(); @@ -1315,11 +1096,6 @@ static void test_detach_by_drain(void) do_test_delete_by_drain(true, BDRV_DRAIN); } -static void test_detach_by_drain_subtree(void) -{ - do_test_delete_by_drain(true, BDRV_SUBTREE_DRAIN); -} - struct detach_by_parent_data { BlockDriverState *parent_b; @@ -1452,7 +1228,10 @@ static void test_detach_indirect(bool by_parent_cb) g_assert(acb != NULL); /* Drain and check the expected result */ - bdrv_subtree_drained_begin(parent_b); + bdrv_drained_begin(parent_b); + bdrv_drained_begin(a); + bdrv_drained_begin(b); + bdrv_drained_begin(c); g_assert(detach_by_parent_data.child_c != NULL); @@ -1467,12 +1246,15 @@ static void test_detach_indirect(bool by_parent_cb) g_assert(QLIST_NEXT(child_a, next) == NULL); g_assert_cmpint(parent_a->quiesce_counter, ==, 1); - g_assert_cmpint(parent_b->quiesce_counter, ==, 1); + g_assert_cmpint(parent_b->quiesce_counter, ==, 3); g_assert_cmpint(a->quiesce_counter, ==, 1); - g_assert_cmpint(b->quiesce_counter, ==, 0); + g_assert_cmpint(b->quiesce_counter, ==, 1); g_assert_cmpint(c->quiesce_counter, ==, 1); - bdrv_subtree_drained_end(parent_b); + bdrv_drained_end(parent_b); + bdrv_drained_end(a); + bdrv_drained_end(b); + bdrv_drained_end(c); bdrv_unref(parent_b); blk_unref(blk); @@ -2202,70 +1984,47 @@ int main(int argc, char **argv) g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain); - g_test_add_func("/bdrv-drain/driver-cb/drain_subtree", - test_drv_cb_drain_subtree); g_test_add_func("/bdrv-drain/driver-cb/co/drain_all", test_drv_cb_co_drain_all); g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain); - g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree", - test_drv_cb_co_drain_subtree); - g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); - g_test_add_func("/bdrv-drain/quiesce/drain_subtree", - test_quiesce_drain_subtree); g_test_add_func("/bdrv-drain/quiesce/co/drain_all", test_quiesce_co_drain_all); g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain); - g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree", - test_quiesce_co_drain_subtree); g_test_add_func("/bdrv-drain/nested", test_nested); - g_test_add_func("/bdrv-drain/multiparent", test_multiparent); - g_test_add_func("/bdrv-drain/graph-change/drain_subtree", - test_graph_change_drain_subtree); g_test_add_func("/bdrv-drain/graph-change/drain_all", test_graph_change_drain_all); g_test_add_func("/bdrv-drain/iothread/drain_all", test_iothread_drain_all); g_test_add_func("/bdrv-drain/iothread/drain", test_iothread_drain); - g_test_add_func("/bdrv-drain/iothread/drain_subtree", - test_iothread_drain_subtree); g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); - g_test_add_func("/bdrv-drain/blockjob/drain_subtree", - test_blockjob_drain_subtree); g_test_add_func("/bdrv-drain/blockjob/error/drain_all", test_blockjob_error_drain_all); g_test_add_func("/bdrv-drain/blockjob/error/drain", test_blockjob_error_drain); - g_test_add_func("/bdrv-drain/blockjob/error/drain_subtree", - test_blockjob_error_drain_subtree); g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all", test_blockjob_iothread_drain_all); g_test_add_func("/bdrv-drain/blockjob/iothread/drain", test_blockjob_iothread_drain); - g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree", - test_blockjob_iothread_drain_subtree); g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_all", test_blockjob_iothread_error_drain_all); g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain", test_blockjob_iothread_error_drain); - g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_subtree", - test_blockjob_iothread_error_drain_subtree); g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain); g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all); g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain); - g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree); g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb); g_test_add_func("/bdrv-drain/detach/driver_cb", test_detach_by_driver_cb); From 57e05be343f33f4e5899a8d8946a8596d68424a1 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:06 +0100 Subject: [PATCH 15/50] block: Call drain callbacks only once We only need to call both the BlockDriver's callback and the parent callbacks when going from undrained to drained or vice versa. A second drain section doesn't make a difference for the driver or the parent, they weren't supposed to send new requests before and after the second drain. One thing that gets in the way is the 'ignore_bds_parents' parameter in bdrv_do_drained_begin_quiesce() and bdrv_do_drained_end(): It means that bdrv_drain_all_begin() increases bs->quiesce_counter, but does not quiesce the parent through BdrvChildClass callbacks. If an additional drain section is started now, bs->quiesce_counter will be non-zero, but we would still need to quiesce the parent through BdrvChildClass in order to keep things consistent (and unquiesce it on the matching bdrv_drained_end(), even though the counter would not reach 0 yet as long as the bdrv_drain_all() section is still active). Instead of keeping track of this, let's just get rid of the parameter. It was introduced in commit 6cd5c9d7b2d as an optimisation so that during bdrv_drain_all(), we wouldn't recursively drain all parents up to the root for each node, resulting in quadratic complexity. As it happens, calling the callbacks only once solves the same problem, so as of this patch, we'll still have O(n) complexity and ignore_bds_parents is not needed any more. This patch only ignores the 'ignore_bds_parents' parameter. It will be removed in a separate patch. Signed-off-by: Kevin Wolf Reviewed-by: Hanna Reitz Message-Id: <20221118174110.55183-12-kwolf@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block.c | 25 +++++++------------------ block/io.c | 30 ++++++++++++++++++------------ include/block/block_int-common.h | 8 ++++---- tests/unit/test-bdrv-drain.c | 16 ++++++++++------ 4 files changed, 39 insertions(+), 40 deletions(-) diff --git a/block.c b/block.c index 7ea0b82049..b8bab06e55 100644 --- a/block.c +++ b/block.c @@ -2855,7 +2855,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, { BlockDriverState *old_bs = child->bs; int new_bs_quiesce_counter; - int drain_saldo; assert(!child->frozen); assert(old_bs != new_bs); @@ -2865,16 +2864,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child, assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); } - new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); - drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter; - /* * If the new child node is drained but the old one was not, flush * all outstanding requests to the old child node. */ - while (drain_saldo > 0 && child->klass->drained_begin) { + new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); + if (new_bs_quiesce_counter && !child->quiesced_parent) { bdrv_parent_drained_begin_single(child, true); - drain_saldo--; } if (old_bs) { @@ -2890,16 +2886,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, if (new_bs) { assert_bdrv_graph_writable(new_bs); QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); - - /* - * Polling in bdrv_parent_drained_begin_single() may have led to the new - * node's quiesce_counter having been decreased. Not a problem, we just - * need to recognize this here and then invoke drained_end appropriately - * more often. - */ - assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); - drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; - if (child->klass->attach) { child->klass->attach(child); } @@ -2908,10 +2894,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child, /* * If the old child node was drained but the new one is not, allow * requests to come in only after the new node has been attached. + * + * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single() + * polls, which could have changed the value. */ - while (drain_saldo < 0 && child->klass->drained_end) { + new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); + if (!new_bs_quiesce_counter && child->quiesced_parent) { bdrv_parent_drained_end_single(child); - drain_saldo++; } } diff --git a/block/io.c b/block/io.c index 75224480d0..87d6f22ec4 100644 --- a/block/io.c +++ b/block/io.c @@ -62,8 +62,9 @@ void bdrv_parent_drained_end_single(BdrvChild *c) { IO_OR_GS_CODE(); - assert(c->parent_quiesce_counter > 0); - c->parent_quiesce_counter--; + assert(c->quiesced_parent); + c->quiesced_parent = false; + if (c->klass->drained_end) { c->klass->drained_end(c); } @@ -110,7 +111,10 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) { AioContext *ctx = bdrv_child_get_parent_aio_context(c); IO_OR_GS_CODE(); - c->parent_quiesce_counter++; + + assert(!c->quiesced_parent); + c->quiesced_parent = true; + if (c->klass->drained_begin) { c->klass->drained_begin(c); } @@ -358,11 +362,12 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, /* Stop things in parent-to-child order */ if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { aio_disable_external(bdrv_get_aio_context(bs)); - } - bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); - if (bs->drv && bs->drv->bdrv_drain_begin) { - bs->drv->bdrv_drain_begin(bs); + /* TODO Remove ignore_bds_parents, we don't consider it any more */ + bdrv_parent_drained_begin(bs, parent, false); + if (bs->drv && bs->drv->bdrv_drain_begin) { + bs->drv->bdrv_drain_begin(bs); + } } } @@ -413,13 +418,14 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, assert(bs->quiesce_counter > 0); /* Re-enable things in child-to-parent order */ - if (bs->drv && bs->drv->bdrv_drain_end) { - bs->drv->bdrv_drain_end(bs); - } - bdrv_parent_drained_end(bs, parent, ignore_bds_parents); - old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); if (old_quiesce_counter == 1) { + if (bs->drv && bs->drv->bdrv_drain_end) { + bs->drv->bdrv_drain_end(bs); + } + /* TODO Remove ignore_bds_parents, we don't consider it any more */ + bdrv_parent_drained_end(bs, parent, false); + aio_enable_external(bdrv_get_aio_context(bs)); } } diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 791dddfd7d..a6bc6b7fe9 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -980,13 +980,13 @@ struct BdrvChild { bool frozen; /* - * How many times the parent of this child has been drained + * True if the parent of this child has been drained by this BdrvChild * (through klass->drained_*). - * Usually, this is equal to bs->quiesce_counter (potentially - * reduced by bdrv_drain_all_count). It may differ while the + * + * It is generally true if bs->quiesce_counter > 0. It may differ while the * child is entering or leaving a drained section. */ - int parent_quiesce_counter; + bool quiesced_parent; QLIST_ENTRY(BdrvChild) next; QLIST_ENTRY(BdrvChild) next_parent; diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c index dda08de8db..172bc6debc 100644 --- a/tests/unit/test-bdrv-drain.c +++ b/tests/unit/test-bdrv-drain.c @@ -296,7 +296,11 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive) do_drain_begin(drain_type, bs); - g_assert_cmpint(bs->quiesce_counter, ==, 1); + if (drain_type == BDRV_DRAIN_ALL) { + g_assert_cmpint(bs->quiesce_counter, ==, 2); + } else { + g_assert_cmpint(bs->quiesce_counter, ==, 1); + } g_assert_cmpint(backing->quiesce_counter, ==, !!recursive); do_drain_end(drain_type, bs); @@ -348,8 +352,8 @@ static void test_nested(void) for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) { for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) { - int backing_quiesce = (outer != BDRV_DRAIN) + - (inner != BDRV_DRAIN); + int backing_quiesce = (outer == BDRV_DRAIN_ALL) + + (inner == BDRV_DRAIN_ALL); g_assert_cmpint(bs->quiesce_counter, ==, 0); g_assert_cmpint(backing->quiesce_counter, ==, 0); @@ -359,10 +363,10 @@ static void test_nested(void) do_drain_begin(outer, bs); do_drain_begin(inner, bs); - g_assert_cmpint(bs->quiesce_counter, ==, 2); + g_assert_cmpint(bs->quiesce_counter, ==, 2 + !!backing_quiesce); g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce); - g_assert_cmpint(s->drain_count, ==, 2); - g_assert_cmpint(backing_s->drain_count, ==, backing_quiesce); + g_assert_cmpint(s->drain_count, ==, 1); + g_assert_cmpint(backing_s->drain_count, ==, !!backing_quiesce); do_drain_end(inner, bs); do_drain_end(outer, bs); From a82a3bd135078d14f1bb4b5e50f51e77d3748270 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:07 +0100 Subject: [PATCH 16/50] block: Remove ignore_bds_parents parameter from drain_begin/end. ignore_bds_parents is now ignored during drain_begin and drain_end, so we can just remove it there. It is still a valid optimisation for drain_all in bdrv_drained_poll(), so leave it around there. Signed-off-by: Kevin Wolf Message-Id: <20221118174110.55183-13-kwolf@redhat.com> Reviewed-by: Hanna Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block.c | 2 +- block/io.c | 58 +++++++++++++++------------------------- include/block/block-io.h | 3 +-- 3 files changed, 24 insertions(+), 39 deletions(-) diff --git a/block.c b/block.c index b8bab06e55..1a2a8d9de9 100644 --- a/block.c +++ b/block.c @@ -1226,7 +1226,7 @@ static char *bdrv_child_get_parent_desc(BdrvChild *c) static void bdrv_child_cb_drained_begin(BdrvChild *child) { BlockDriverState *bs = child->opaque; - bdrv_do_drained_begin_quiesce(bs, NULL, false); + bdrv_do_drained_begin_quiesce(bs, NULL); } static bool bdrv_child_cb_drained_poll(BdrvChild *child) diff --git a/block/io.c b/block/io.c index 87d6f22ec4..2e9503df6a 100644 --- a/block/io.c +++ b/block/io.c @@ -45,13 +45,12 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs); static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes, BdrvRequestFlags flags); -static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore, - bool ignore_bds_parents) +static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) { BdrvChild *c, *next; QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { - if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { + if (c == ignore) { continue; } bdrv_parent_drained_begin_single(c, false); @@ -70,13 +69,12 @@ void bdrv_parent_drained_end_single(BdrvChild *c) } } -static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, - bool ignore_bds_parents) +static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) { BdrvChild *c; QLIST_FOREACH(c, &bs->parents, next_parent) { - if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { + if (c == ignore) { continue; } bdrv_parent_drained_end_single(c); @@ -242,7 +240,6 @@ typedef struct { bool begin; bool poll; BdrvChild *parent; - bool ignore_bds_parents; } BdrvCoDrainData; /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ @@ -269,9 +266,8 @@ static bool bdrv_drain_poll_top_level(BlockDriverState *bs, } static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, - bool ignore_bds_parents, bool poll); -static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, - bool ignore_bds_parents); + bool poll); +static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent); static void bdrv_co_drain_bh_cb(void *opaque) { @@ -284,11 +280,10 @@ static void bdrv_co_drain_bh_cb(void *opaque) aio_context_acquire(ctx); bdrv_dec_in_flight(bs); if (data->begin) { - bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents, - data->poll); + bdrv_do_drained_begin(bs, data->parent, data->poll); } else { assert(!data->poll); - bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents); + bdrv_do_drained_end(bs, data->parent); } aio_context_release(ctx); } else { @@ -303,7 +298,6 @@ static void bdrv_co_drain_bh_cb(void *opaque) static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, bool begin, BdrvChild *parent, - bool ignore_bds_parents, bool poll) { BdrvCoDrainData data; @@ -321,7 +315,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, .done = false, .begin = begin, .parent = parent, - .ignore_bds_parents = ignore_bds_parents, .poll = poll, }; @@ -353,8 +346,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, } } -void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - BdrvChild *parent, bool ignore_bds_parents) +void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) { IO_OR_GS_CODE(); assert(!qemu_in_coroutine()); @@ -362,9 +354,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, /* Stop things in parent-to-child order */ if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { aio_disable_external(bdrv_get_aio_context(bs)); - - /* TODO Remove ignore_bds_parents, we don't consider it any more */ - bdrv_parent_drained_begin(bs, parent, false); + bdrv_parent_drained_begin(bs, parent); if (bs->drv && bs->drv->bdrv_drain_begin) { bs->drv->bdrv_drain_begin(bs); } @@ -372,14 +362,14 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, } static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, - bool ignore_bds_parents, bool poll) + bool poll) { if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll); + bdrv_co_yield_to_drain(bs, true, parent, poll); return; } - bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents); + bdrv_do_drained_begin_quiesce(bs, parent); /* * Wait for drained requests to finish. @@ -391,7 +381,6 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, * nodes. */ if (poll) { - assert(!ignore_bds_parents); BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent)); } } @@ -399,20 +388,19 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, void bdrv_drained_begin(BlockDriverState *bs) { IO_OR_GS_CODE(); - bdrv_do_drained_begin(bs, NULL, false, true); + bdrv_do_drained_begin(bs, NULL, true); } /** * This function does not poll, nor must any of its recursively called * functions. */ -static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, - bool ignore_bds_parents) +static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) { int old_quiesce_counter; if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false); + bdrv_co_yield_to_drain(bs, false, parent, false); return; } assert(bs->quiesce_counter > 0); @@ -423,9 +411,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, if (bs->drv && bs->drv->bdrv_drain_end) { bs->drv->bdrv_drain_end(bs); } - /* TODO Remove ignore_bds_parents, we don't consider it any more */ - bdrv_parent_drained_end(bs, parent, false); - + bdrv_parent_drained_end(bs, parent); aio_enable_external(bdrv_get_aio_context(bs)); } } @@ -433,7 +419,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, void bdrv_drained_end(BlockDriverState *bs) { IO_OR_GS_CODE(); - bdrv_do_drained_end(bs, NULL, false); + bdrv_do_drained_end(bs, NULL); } void bdrv_drain(BlockDriverState *bs) @@ -491,7 +477,7 @@ void bdrv_drain_all_begin(void) GLOBAL_STATE_CODE(); if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(NULL, true, NULL, true, true); + bdrv_co_yield_to_drain(NULL, true, NULL, true); return; } @@ -516,7 +502,7 @@ void bdrv_drain_all_begin(void) AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - bdrv_do_drained_begin(bs, NULL, true, false); + bdrv_do_drained_begin(bs, NULL, false); aio_context_release(aio_context); } @@ -536,7 +522,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs) g_assert(!bs->refcnt); while (bs->quiesce_counter) { - bdrv_do_drained_end(bs, NULL, true); + bdrv_do_drained_end(bs, NULL); } } @@ -558,7 +544,7 @@ void bdrv_drain_all_end(void) AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - bdrv_do_drained_end(bs, NULL, true); + bdrv_do_drained_end(bs, NULL); aio_context_release(aio_context); } diff --git a/include/block/block-io.h b/include/block/block-io.h index 9c36a16a1f..8f5e75756a 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -329,8 +329,7 @@ void bdrv_drained_begin(BlockDriverState *bs); * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already * running requests to complete. */ -void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - BdrvChild *parent, bool ignore_bds_parents); +void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent); /** * bdrv_drained_end: From 05c272ff0cf1b16cc3606f746182dd99b774f553 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:08 +0100 Subject: [PATCH 17/50] block: Drop out of coroutine in bdrv_do_drained_begin_quiesce() The next patch adds a parent drain to bdrv_attach_child_common(), which shouldn't be, but is currently called from coroutines in some cases (e.g. .bdrv_co_create implementations generally open new nodes). Therefore, the assertion that we're not in a coroutine doesn't hold true any more. We could just remove the assertion because there is nothing in the function that should be in conflict with running in a coroutine, but just to be on the safe side, we can reverse the caller relationship between bdrv_do_drained_begin() and bdrv_do_drained_begin_quiesce() so that the latter also just drops out of coroutine context and we can still be certain in the future that any drain code doesn't run in coroutines. As a nice side effect, the structure of bdrv_do_drained_begin() is now symmetrical with bdrv_do_drained_end(). Signed-off-by: Kevin Wolf Message-Id: <20221118174110.55183-14-kwolf@redhat.com> Reviewed-by: Hanna Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block/io.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/block/io.c b/block/io.c index 2e9503df6a..5e9150d92c 100644 --- a/block/io.c +++ b/block/io.c @@ -346,10 +346,15 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, } } -void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) +static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, + bool poll) { IO_OR_GS_CODE(); - assert(!qemu_in_coroutine()); + + if (qemu_in_coroutine()) { + bdrv_co_yield_to_drain(bs, true, parent, poll); + return; + } /* Stop things in parent-to-child order */ if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { @@ -359,17 +364,6 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) bs->drv->bdrv_drain_begin(bs); } } -} - -static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, - bool poll) -{ - if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs, true, parent, poll); - return; - } - - bdrv_do_drained_begin_quiesce(bs, parent); /* * Wait for drained requests to finish. @@ -385,6 +379,11 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, } } +void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) +{ + bdrv_do_drained_begin(bs, parent, false); +} + void bdrv_drained_begin(BlockDriverState *bs) { IO_OR_GS_CODE(); From 23987471285a26397e3152a9244b652445fd36c4 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:09 +0100 Subject: [PATCH 18/50] block: Don't poll in bdrv_replace_child_noperm() In order to make sure that bdrv_replace_child_noperm() doesn't have to poll any more, get rid of the bdrv_parent_drained_begin_single() call. This is possible now because we can require that the parent is already drained through the child in question when the function is called and we don't call the parent drain callbacks more than once. The additional drain calls needed in callers cause the test case to run its code in the drain handler too early (bdrv_attach_child() drains now), so modify it to only enable the code after the test setup has completed. Signed-off-by: Kevin Wolf Message-Id: <20221118174110.55183-15-kwolf@redhat.com> Reviewed-by: Hanna Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block.c | 103 ++++++++++++++++++++++++++++++----- block/io.c | 2 +- include/block/block-io.h | 8 +++ tests/unit/test-bdrv-drain.c | 10 ++++ 4 files changed, 108 insertions(+), 15 deletions(-) diff --git a/block.c b/block.c index 1a2a8d9de9..faaeca8472 100644 --- a/block.c +++ b/block.c @@ -2407,6 +2407,20 @@ static void bdrv_replace_child_abort(void *opaque) GLOBAL_STATE_CODE(); /* old_bs reference is transparently moved from @s to @s->child */ + if (!s->child->bs) { + /* + * The parents were undrained when removing old_bs from the child. New + * requests can't have been made, though, because the child was empty. + * + * TODO Make bdrv_replace_child_noperm() transactionable to avoid + * undraining the parent in the first place. Once this is done, having + * new_bs drained when calling bdrv_replace_child_tran() is not a + * requirement any more. + */ + bdrv_parent_drained_begin_single(s->child, false); + assert(!bdrv_parent_drained_poll_single(s->child)); + } + assert(s->child->quiesced_parent); bdrv_replace_child_noperm(s->child, s->old_bs); bdrv_unref(new_bs); } @@ -2422,12 +2436,19 @@ static TransactionActionDrv bdrv_replace_child_drv = { * * Note: real unref of old_bs is done only on commit. * + * Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be + * kept drained until the transaction is completed. + * * The function doesn't update permissions, caller is responsible for this. */ static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, Transaction *tran) { BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); + + assert(child->quiesced_parent); + assert(!new_bs || new_bs->quiesce_counter); + *s = (BdrvReplaceChildState) { .child = child, .old_bs = child->bs, @@ -2850,6 +2871,14 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) return permissions[qapi_perm]; } +/* + * Replaces the node that a BdrvChild points to without updating permissions. + * + * If @new_bs is non-NULL, the parent of @child must already be drained through + * @child. + * + * This function does not poll. + */ static void bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs) { @@ -2857,6 +2886,28 @@ static void bdrv_replace_child_noperm(BdrvChild *child, int new_bs_quiesce_counter; assert(!child->frozen); + + /* + * If we want to change the BdrvChild to point to a drained node as its new + * child->bs, we need to make sure that its new parent is drained, too. In + * other words, either child->quiesce_parent must already be true or we must + * be able to set it and keep the parent's quiesce_counter consistent with + * that, but without polling or starting new requests (this function + * guarantees that it doesn't poll, and starting new requests would be + * against the invariants of drain sections). + * + * To keep things simple, we pick the first option (child->quiesce_parent + * must already be true). We also generalise the rule a bit to make it + * easier to verify in callers and more likely to be covered in test cases: + * The parent must be quiesced through this child even if new_bs isn't + * currently drained. + * + * The only exception is for callers that always pass new_bs == NULL. In + * this case, we obviously never need to consider the case of a drained + * new_bs, so we can keep the callers simpler by allowing them not to drain + * the parent. + */ + assert(!new_bs || child->quiesced_parent); assert(old_bs != new_bs); GLOBAL_STATE_CODE(); @@ -2864,15 +2915,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); } - /* - * If the new child node is drained but the old one was not, flush - * all outstanding requests to the old child node. - */ - new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); - if (new_bs_quiesce_counter && !child->quiesced_parent) { - bdrv_parent_drained_begin_single(child, true); - } - if (old_bs) { if (child->klass->detach) { child->klass->detach(child); @@ -2892,11 +2934,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child, } /* - * If the old child node was drained but the new one is not, allow - * requests to come in only after the new node has been attached. - * - * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single() - * polls, which could have changed the value. + * If the parent was drained through this BdrvChild previously, but new_bs + * is not drained, allow requests to come in only after the new node has + * been attached. */ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); if (!new_bs_quiesce_counter && child->quiesced_parent) { @@ -3033,6 +3073,24 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, } bdrv_ref(child_bs); + /* + * Let every new BdrvChild start with a drained parent. Inserting the child + * in the graph with bdrv_replace_child_noperm() will undrain it if + * @child_bs is not drained. + * + * The child was only just created and is not yet visible in global state + * until bdrv_replace_child_noperm() inserts it into the graph, so nobody + * could have sent requests and polling is not necessary. + * + * Note that this means that the parent isn't fully drained yet, we only + * stop new requests from coming in. This is fine, we don't care about the + * old requests here, they are not for this child. If another place enters a + * drain section for the same parent, but wants it to be fully quiesced, it + * will not run most of the the code in .drained_begin() again (which is not + * a problem, we already did this), but it will still poll until the parent + * is fully quiesced, so it will not be negatively affected either. + */ + bdrv_parent_drained_begin_single(new_child, false); bdrv_replace_child_noperm(new_child, child_bs); BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); @@ -5078,12 +5136,24 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran) } if (child->bs) { + BlockDriverState *bs = child->bs; + bdrv_drained_begin(bs); bdrv_replace_child_tran(child, NULL, tran); + bdrv_drained_end(bs); } tran_add(tran, &bdrv_remove_child_drv, child); } +static void undrain_on_clean_cb(void *opaque) +{ + bdrv_drained_end(opaque); +} + +static TransactionActionDrv undrain_on_clean = { + .clean = undrain_on_clean_cb, +}; + static int bdrv_replace_node_noperm(BlockDriverState *from, BlockDriverState *to, bool auto_skip, Transaction *tran, @@ -5093,6 +5163,11 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, GLOBAL_STATE_CODE(); + bdrv_drained_begin(from); + bdrv_drained_begin(to); + tran_add(tran, &undrain_on_clean, from); + tran_add(tran, &undrain_on_clean, to); + QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { assert(c->bs == from); if (!should_update_child(c, to)) { diff --git a/block/io.c b/block/io.c index 5e9150d92c..ae64830eac 100644 --- a/block/io.c +++ b/block/io.c @@ -81,7 +81,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) } } -static bool bdrv_parent_drained_poll_single(BdrvChild *c) +bool bdrv_parent_drained_poll_single(BdrvChild *c) { if (c->klass->drained_poll) { return c->klass->drained_poll(c); diff --git a/include/block/block-io.h b/include/block/block-io.h index 8f5e75756a..65e6d2569b 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -292,6 +292,14 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); */ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); +/** + * bdrv_parent_drained_poll_single: + * + * Returns true if there is any pending activity to cease before @c can be + * called quiesced, false otherwise. + */ +bool bdrv_parent_drained_poll_single(BdrvChild *c); + /** * bdrv_parent_drained_end_single: * diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c index 172bc6debc..2686a8acee 100644 --- a/tests/unit/test-bdrv-drain.c +++ b/tests/unit/test-bdrv-drain.c @@ -1654,6 +1654,7 @@ static void test_drop_intermediate_poll(void) typedef struct BDRVReplaceTestState { + bool setup_completed; bool was_drained; bool was_undrained; bool has_read; @@ -1738,6 +1739,10 @@ static void bdrv_replace_test_drain_begin(BlockDriverState *bs) { BDRVReplaceTestState *s = bs->opaque; + if (!s->setup_completed) { + return; + } + if (!s->drain_count) { s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs); bdrv_inc_in_flight(bs); @@ -1769,6 +1774,10 @@ static void bdrv_replace_test_drain_end(BlockDriverState *bs) { BDRVReplaceTestState *s = bs->opaque; + if (!s->setup_completed) { + return; + } + g_assert(s->drain_count > 0); if (!--s->drain_count) { s->was_undrained = true; @@ -1867,6 +1876,7 @@ static void do_test_replace_child_mid_drain(int old_drain_count, bdrv_ref(old_child_bs); bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds, BDRV_CHILD_COW, &error_abort); + parent_s->setup_completed = true; for (i = 0; i < old_drain_count; i++) { bdrv_drained_begin(old_child_bs); From 606ed756c1d69cba4822be8923248d2fd714f069 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:10 +0100 Subject: [PATCH 19/50] block: Remove poll parameter from bdrv_parent_drained_begin_single() All callers of bdrv_parent_drained_begin_single() pass poll=false now, so we don't need the parameter any more. Signed-off-by: Kevin Wolf Message-Id: <20221118174110.55183-16-kwolf@redhat.com> Reviewed-by: Hanna Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block.c | 4 ++-- block/io.c | 8 ++------ include/block/block-io.h | 5 ++--- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/block.c b/block.c index faaeca8472..97073092c4 100644 --- a/block.c +++ b/block.c @@ -2417,7 +2417,7 @@ static void bdrv_replace_child_abort(void *opaque) * new_bs drained when calling bdrv_replace_child_tran() is not a * requirement any more. */ - bdrv_parent_drained_begin_single(s->child, false); + bdrv_parent_drained_begin_single(s->child); assert(!bdrv_parent_drained_poll_single(s->child)); } assert(s->child->quiesced_parent); @@ -3090,7 +3090,7 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, * a problem, we already did this), but it will still poll until the parent * is fully quiesced, so it will not be negatively affected either. */ - bdrv_parent_drained_begin_single(new_child, false); + bdrv_parent_drained_begin_single(new_child); bdrv_replace_child_noperm(new_child, child_bs); BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); diff --git a/block/io.c b/block/io.c index ae64830eac..38e57d1f67 100644 --- a/block/io.c +++ b/block/io.c @@ -53,7 +53,7 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) if (c == ignore) { continue; } - bdrv_parent_drained_begin_single(c, false); + bdrv_parent_drained_begin_single(c); } } @@ -105,9 +105,8 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore, return busy; } -void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) +void bdrv_parent_drained_begin_single(BdrvChild *c) { - AioContext *ctx = bdrv_child_get_parent_aio_context(c); IO_OR_GS_CODE(); assert(!c->quiesced_parent); @@ -116,9 +115,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) if (c->klass->drained_begin) { c->klass->drained_begin(c); } - if (poll) { - AIO_WAIT_WHILE(ctx, bdrv_parent_drained_poll_single(c)); - } } static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src) diff --git a/include/block/block-io.h b/include/block/block-io.h index 65e6d2569b..92aaa7c1e9 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -287,10 +287,9 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); /** * bdrv_parent_drained_begin_single: * - * Begin a quiesced section for the parent of @c. If @poll is true, wait for - * any pending activity to cease. + * Begin a quiesced section for the parent of @c. */ -void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); +void bdrv_parent_drained_begin_single(BdrvChild *c); /** * bdrv_parent_drained_poll_single: From 7b52a921c12c01be3b2ce331081dd9accea99948 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:24 -0500 Subject: [PATCH 20/50] block-io: introduce coroutine_fn duplicates for bdrv_common_block_status_above callers bdrv_common_block_status_above() is a g_c_w, and it is being called by many "wrapper" functions like bdrv_is_allocated(), bdrv_is_allocated_above() and bdrv_block_status_above(). Because we want to eventually split the coroutine from non-coroutine case in g_c_w, create duplicate wrappers that take care of directly calling the same coroutine functions called in the g_c_w. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-2-eesposit@redhat.com> Signed-off-by: Kevin Wolf --- block/io.c | 58 +++++++++++++++++++++++++++++++++++++--- include/block/block-io.h | 15 +++++++++++ 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/block/io.c b/block/io.c index 38e57d1f67..f4444b7777 100644 --- a/block/io.c +++ b/block/io.c @@ -2533,6 +2533,17 @@ bdrv_co_common_block_status_above(BlockDriverState *bs, return ret; } +int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs, + BlockDriverState *base, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file) +{ + IO_CODE(); + return bdrv_co_common_block_status_above(bs, base, false, true, offset, + bytes, pnum, map, file, NULL); +} + int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) @@ -2578,6 +2589,22 @@ int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, return (pnum == bytes) && (ret & BDRV_BLOCK_ZERO); } +int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset, + int64_t bytes, int64_t *pnum) +{ + int ret; + int64_t dummy; + IO_CODE(); + + ret = bdrv_co_common_block_status_above(bs, bs, true, false, offset, + bytes, pnum ? pnum : &dummy, NULL, + NULL, NULL); + if (ret < 0) { + return ret; + } + return !!(ret & BDRV_BLOCK_ALLOCATED); +} + int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes, int64_t *pnum) { @@ -2594,6 +2621,29 @@ int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes, return !!(ret & BDRV_BLOCK_ALLOCATED); } +/* See bdrv_is_allocated_above for documentation */ +int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top, + BlockDriverState *base, + bool include_base, int64_t offset, + int64_t bytes, int64_t *pnum) +{ + int depth; + int ret; + IO_CODE(); + + ret = bdrv_co_common_block_status_above(top, base, include_base, false, + offset, bytes, pnum, NULL, NULL, + &depth); + if (ret < 0) { + return ret; + } + + if (ret & BDRV_BLOCK_ALLOCATED) { + return depth; + } + return 0; +} + /* * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP] * @@ -2617,10 +2667,12 @@ int bdrv_is_allocated_above(BlockDriverState *top, int64_t bytes, int64_t *pnum) { int depth; - int ret = bdrv_common_block_status_above(top, base, include_base, false, - offset, bytes, pnum, NULL, NULL, - &depth); + int ret; IO_CODE(); + + ret = bdrv_common_block_status_above(top, base, include_base, false, + offset, bytes, pnum, NULL, NULL, + &depth); if (ret < 0) { return ret; } diff --git a/include/block/block-io.h b/include/block/block-io.h index 92aaa7c1e9..72919254cd 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -94,14 +94,29 @@ bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs); int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file); + +int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs, + BlockDriverState *base, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file); int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file); + +int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset, + int64_t bytes, int64_t *pnum); int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes, int64_t *pnum); + +int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top, + BlockDriverState *base, + bool include_base, int64_t offset, + int64_t bytes, int64_t *pnum); int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, bool include_base, int64_t offset, int64_t bytes, int64_t *pnum); + int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, int64_t bytes); From 43a0d4f08b7a7bae90c0753db2b49441ef3e7f6e Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:25 -0500 Subject: [PATCH 21/50] block-copy: add coroutine_fn annotations These functions end up calling bdrv_common_block_status_above(), a generated_co_wrapper function. In addition, they also happen to be always called in coroutine context, meaning all callers are coroutine_fn. This means that the g_c_w function will enter the qemu_in_coroutine() case and eventually suspend (or in other words call qemu_coroutine_yield()). Therefore we can mark such functions coroutine_fn too. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Paolo Bonzini Reviewed-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-3-eesposit@redhat.com> Signed-off-by: Kevin Wolf --- block/block-copy.c | 21 ++++++++++++--------- include/block/block-copy.h | 5 +++-- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/block/block-copy.c b/block/block-copy.c index bb947afdda..5e59d6262f 100644 --- a/block/block-copy.c +++ b/block/block-copy.c @@ -577,8 +577,9 @@ static coroutine_fn int block_copy_task_entry(AioTask *task) return ret; } -static int block_copy_block_status(BlockCopyState *s, int64_t offset, - int64_t bytes, int64_t *pnum) +static coroutine_fn int block_copy_block_status(BlockCopyState *s, + int64_t offset, + int64_t bytes, int64_t *pnum) { int64_t num; BlockDriverState *base; @@ -590,8 +591,8 @@ static int block_copy_block_status(BlockCopyState *s, int64_t offset, base = NULL; } - ret = bdrv_block_status_above(s->source->bs, base, offset, bytes, &num, - NULL, NULL); + ret = bdrv_co_block_status_above(s->source->bs, base, offset, bytes, &num, + NULL, NULL); if (ret < 0 || num < s->cluster_size) { /* * On error or if failed to obtain large enough chunk just fallback to @@ -613,8 +614,9 @@ static int block_copy_block_status(BlockCopyState *s, int64_t offset, * Check if the cluster starting at offset is allocated or not. * return via pnum the number of contiguous clusters sharing this allocation. */ -static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset, - int64_t *pnum) +static int coroutine_fn block_copy_is_cluster_allocated(BlockCopyState *s, + int64_t offset, + int64_t *pnum) { BlockDriverState *bs = s->source->bs; int64_t count, total_count = 0; @@ -624,7 +626,7 @@ static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset, assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); while (true) { - ret = bdrv_is_allocated(bs, offset, bytes, &count); + ret = bdrv_co_is_allocated(bs, offset, bytes, &count); if (ret < 0) { return ret; } @@ -669,8 +671,9 @@ void block_copy_reset(BlockCopyState *s, int64_t offset, int64_t bytes) * @return 0 when the cluster at @offset was unallocated, * 1 otherwise, and -ret on error. */ -int64_t block_copy_reset_unallocated(BlockCopyState *s, - int64_t offset, int64_t *count) +int64_t coroutine_fn block_copy_reset_unallocated(BlockCopyState *s, + int64_t offset, + int64_t *count) { int ret; int64_t clusters, bytes; diff --git a/include/block/block-copy.h b/include/block/block-copy.h index ba0b425d78..8cea4f9b90 100644 --- a/include/block/block-copy.h +++ b/include/block/block-copy.h @@ -36,8 +36,9 @@ void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm); void block_copy_state_free(BlockCopyState *s); void block_copy_reset(BlockCopyState *s, int64_t offset, int64_t bytes); -int64_t block_copy_reset_unallocated(BlockCopyState *s, - int64_t offset, int64_t *count); +int64_t coroutine_fn block_copy_reset_unallocated(BlockCopyState *s, + int64_t offset, + int64_t *count); int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes, bool ignore_ratelimit, uint64_t timeout_ns, From 6f58ac55396bc624c78e73939d5fe6a44a13d150 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:26 -0500 Subject: [PATCH 22/50] nbd/server.c: add coroutine_fn annotations These functions end up calling bdrv_*() implemented as generated_co_wrapper functions. In addition, they also happen to be always called in coroutine context, meaning all callers are coroutine_fn. This means that the g_c_w function will enter the qemu_in_coroutine() case and eventually suspend (or in other words call qemu_coroutine_yield()). Therefore we can mark such functions coroutine_fn too. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Reviewed-by: Paolo Bonzini Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-4-eesposit@redhat.com> Signed-off-by: Kevin Wolf --- nbd/server.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/nbd/server.c b/nbd/server.c index 0570596312..47c70e62a3 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -2138,14 +2138,15 @@ static int nbd_extent_array_add(NBDExtentArray *ea, return 0; } -static int blockstatus_to_extents(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, NBDExtentArray *ea) +static int coroutine_fn blockstatus_to_extents(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + NBDExtentArray *ea) { while (bytes) { uint32_t flags; int64_t num; - int ret = bdrv_block_status_above(bs, NULL, offset, bytes, &num, - NULL, NULL); + int ret = bdrv_co_block_status_above(bs, NULL, offset, bytes, &num, + NULL, NULL); if (ret < 0) { return ret; @@ -2165,13 +2166,14 @@ static int blockstatus_to_extents(BlockDriverState *bs, uint64_t offset, return 0; } -static int blockalloc_to_extents(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, NBDExtentArray *ea) +static int coroutine_fn blockalloc_to_extents(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + NBDExtentArray *ea) { while (bytes) { int64_t num; - int ret = bdrv_is_allocated_above(bs, NULL, false, offset, bytes, - &num); + int ret = bdrv_co_is_allocated_above(bs, NULL, false, offset, bytes, + &num); if (ret < 0) { return ret; @@ -2217,11 +2219,12 @@ static int nbd_co_send_extents(NBDClient *client, uint64_t handle, } /* Get block status from the exported device and send it to the client */ -static int nbd_co_send_block_status(NBDClient *client, uint64_t handle, - BlockDriverState *bs, uint64_t offset, - uint32_t length, bool dont_fragment, - bool last, uint32_t context_id, - Error **errp) +static int +coroutine_fn nbd_co_send_block_status(NBDClient *client, uint64_t handle, + BlockDriverState *bs, uint64_t offset, + uint32_t length, bool dont_fragment, + bool last, uint32_t context_id, + Error **errp) { int ret; unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS; From ff7e261bb91ecf44df12a551582187ddf6b187fe Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:27 -0500 Subject: [PATCH 23/50] block-backend: replace bdrv_*_above with blk_*_above Avoid mixing bdrv_* functions with blk_*, so create blk_* counterparts for bdrv_block_status_above and bdrv_is_allocated_above. Note that since blk_co_block_status_above only calls the g_c_w function bdrv_common_block_status_above and is marked as coroutine_fn, call directly bdrv_co_common_block_status_above() to avoid using a g_c_w. Same applies to blk_co_is_allocated_above. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-5-eesposit@redhat.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/block-backend.c | 21 ++++++++++++++++++++ block/commit.c | 4 ++-- include/sysemu/block-backend-io.h | 9 +++++++++ nbd/server.c | 32 +++++++++++++++---------------- 4 files changed, 48 insertions(+), 18 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index 91e1d33a58..ba7bf1d6bc 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1424,6 +1424,27 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags); } +int coroutine_fn blk_co_block_status_above(BlockBackend *blk, + BlockDriverState *base, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file) +{ + IO_CODE(); + return bdrv_co_block_status_above(blk_bs(blk), base, offset, bytes, pnum, + map, file); +} + +int coroutine_fn blk_co_is_allocated_above(BlockBackend *blk, + BlockDriverState *base, + bool include_base, int64_t offset, + int64_t bytes, int64_t *pnum) +{ + IO_CODE(); + return bdrv_co_is_allocated_above(blk_bs(blk), base, include_base, offset, + bytes, pnum); +} + typedef struct BlkRwCo { BlockBackend *blk; int64_t offset; diff --git a/block/commit.c b/block/commit.c index 0029b31944..b346341767 100644 --- a/block/commit.c +++ b/block/commit.c @@ -155,8 +155,8 @@ static int coroutine_fn commit_run(Job *job, Error **errp) break; } /* Copy if allocated above the base */ - ret = bdrv_is_allocated_above(blk_bs(s->top), s->base_overlay, true, - offset, COMMIT_BUFFER_SIZE, &n); + ret = blk_co_is_allocated_above(s->top, s->base_overlay, true, + offset, COMMIT_BUFFER_SIZE, &n); copy = (ret > 0); trace_commit_one_iteration(s, offset, n, ret); if (copy) { diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h index 50f5aa2e07..ee3eb12610 100644 --- a/include/sysemu/block-backend-io.h +++ b/include/sysemu/block-backend-io.h @@ -92,6 +92,15 @@ int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, int64_t bytes, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags); +int coroutine_fn blk_co_block_status_above(BlockBackend *blk, + BlockDriverState *base, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file); +int coroutine_fn blk_co_is_allocated_above(BlockBackend *blk, + BlockDriverState *base, + bool include_base, int64_t offset, + int64_t bytes, int64_t *pnum); /* * "I/O or GS" API functions. These functions can run without diff --git a/nbd/server.c b/nbd/server.c index 47c70e62a3..67ed333578 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -1988,7 +1988,7 @@ static int coroutine_fn nbd_co_send_structured_error(NBDClient *client, } /* Do a sparse read and send the structured reply to the client. - * Returns -errno if sending fails. bdrv_block_status_above() failure is + * Returns -errno if sending fails. blk_co_block_status_above() failure is * reported to the client, at which point this function succeeds. */ static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client, @@ -2004,10 +2004,10 @@ static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client, while (progress < size) { int64_t pnum; - int status = bdrv_block_status_above(blk_bs(exp->common.blk), NULL, - offset + progress, - size - progress, &pnum, NULL, - NULL); + int status = blk_co_block_status_above(exp->common.blk, NULL, + offset + progress, + size - progress, &pnum, NULL, + NULL); bool final; if (status < 0) { @@ -2138,15 +2138,15 @@ static int nbd_extent_array_add(NBDExtentArray *ea, return 0; } -static int coroutine_fn blockstatus_to_extents(BlockDriverState *bs, +static int coroutine_fn blockstatus_to_extents(BlockBackend *blk, uint64_t offset, uint64_t bytes, NBDExtentArray *ea) { while (bytes) { uint32_t flags; int64_t num; - int ret = bdrv_co_block_status_above(bs, NULL, offset, bytes, &num, - NULL, NULL); + int ret = blk_co_block_status_above(blk, NULL, offset, bytes, &num, + NULL, NULL); if (ret < 0) { return ret; @@ -2166,14 +2166,14 @@ static int coroutine_fn blockstatus_to_extents(BlockDriverState *bs, return 0; } -static int coroutine_fn blockalloc_to_extents(BlockDriverState *bs, +static int coroutine_fn blockalloc_to_extents(BlockBackend *blk, uint64_t offset, uint64_t bytes, NBDExtentArray *ea) { while (bytes) { int64_t num; - int ret = bdrv_co_is_allocated_above(bs, NULL, false, offset, bytes, - &num); + int ret = blk_co_is_allocated_above(blk, NULL, false, offset, bytes, + &num); if (ret < 0) { return ret; @@ -2221,7 +2221,7 @@ static int nbd_co_send_extents(NBDClient *client, uint64_t handle, /* Get block status from the exported device and send it to the client */ static int coroutine_fn nbd_co_send_block_status(NBDClient *client, uint64_t handle, - BlockDriverState *bs, uint64_t offset, + BlockBackend *blk, uint64_t offset, uint32_t length, bool dont_fragment, bool last, uint32_t context_id, Error **errp) @@ -2231,9 +2231,9 @@ coroutine_fn nbd_co_send_block_status(NBDClient *client, uint64_t handle, g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents); if (context_id == NBD_META_ID_BASE_ALLOCATION) { - ret = blockstatus_to_extents(bs, offset, length, ea); + ret = blockstatus_to_extents(blk, offset, length, ea); } else { - ret = blockalloc_to_extents(bs, offset, length, ea); + ret = blockalloc_to_extents(blk, offset, length, ea); } if (ret < 0) { return nbd_co_send_structured_error( @@ -2560,7 +2560,7 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, if (client->export_meta.base_allocation) { ret = nbd_co_send_block_status(client, request->handle, - blk_bs(exp->common.blk), + exp->common.blk, request->from, request->len, dont_fragment, !--contexts_remaining, @@ -2573,7 +2573,7 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, if (client->export_meta.allocation_depth) { ret = nbd_co_send_block_status(client, request->handle, - blk_bs(exp->common.blk), + exp->common.blk, request->from, request->len, dont_fragment, !--contexts_remaining, From f7f93a478a0c5d134d80afff7203e0aef788e5cd Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:28 -0500 Subject: [PATCH 24/50] block/vmdk: add coroutine_fn annotations These functions end up calling bdrv_create() implemented as generated_co_wrapper functions. In addition, they also happen to be always called in coroutine context, meaning all callers are coroutine_fn. This means that the g_c_w function will enter the qemu_in_coroutine() case and eventually suspend (or in other words call qemu_coroutine_yield()). Therefore we can mark such functions coroutine_fn too. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Paolo Bonzini Reviewed-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-6-eesposit@redhat.com> Signed-off-by: Kevin Wolf --- block/vmdk.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/block/vmdk.c b/block/vmdk.c index bac3d8db50..8204eb9ff4 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -2285,10 +2285,11 @@ exit: return ret; } -static int vmdk_create_extent(const char *filename, int64_t filesize, - bool flat, bool compress, bool zeroed_grain, - BlockBackend **pbb, - QemuOpts *opts, Error **errp) +static int coroutine_fn vmdk_create_extent(const char *filename, + int64_t filesize, bool flat, + bool compress, bool zeroed_grain, + BlockBackend **pbb, + QemuOpts *opts, Error **errp) { int ret; BlockBackend *blk = NULL; @@ -2366,14 +2367,14 @@ static int filename_decompose(const char *filename, char *path, char *prefix, * non-split format. * idx >= 1: get the n-th extent if in a split subformat */ -typedef BlockBackend *(*vmdk_create_extent_fn)(int64_t size, - int idx, - bool flat, - bool split, - bool compress, - bool zeroed_grain, - void *opaque, - Error **errp); +typedef BlockBackend * coroutine_fn (*vmdk_create_extent_fn)(int64_t size, + int idx, + bool flat, + bool split, + bool compress, + bool zeroed_grain, + void *opaque, + Error **errp); static void vmdk_desc_add_extent(GString *desc, const char *extent_line_fmt, @@ -2616,7 +2617,7 @@ typedef struct { QemuOpts *opts; } VMDKCreateOptsData; -static BlockBackend *vmdk_co_create_opts_cb(int64_t size, int idx, +static BlockBackend * coroutine_fn vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split, bool compress, bool zeroed_grain, void *opaque, Error **errp) @@ -2768,10 +2769,11 @@ exit: return ret; } -static BlockBackend *vmdk_co_create_cb(int64_t size, int idx, - bool flat, bool split, bool compress, - bool zeroed_grain, void *opaque, - Error **errp) +static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx, + bool flat, bool split, + bool compress, + bool zeroed_grain, + void *opaque, Error **errp) { int ret; BlockDriverState *bs; From a212e675cde67fb783b7f8e5b31dd02e9c880f58 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:29 -0500 Subject: [PATCH 25/50] block: avoid duplicating filename string in bdrv_create We know that the string will stay around until the function returns, and the parameter of drv->bdrv_co_create_opts is const char*, so it must not be modified either. Suggested-by: Kevin Wolf Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-7-eesposit@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/block.c b/block.c index 97073092c4..9d77ec8818 100644 --- a/block.c +++ b/block.c @@ -551,7 +551,7 @@ int bdrv_create(BlockDriver *drv, const char* filename, Coroutine *co; CreateCo cco = { .drv = drv, - .filename = g_strdup(filename), + .filename = filename, .opts = opts, .ret = NOT_DONE, .err = NULL, @@ -559,8 +559,7 @@ int bdrv_create(BlockDriver *drv, const char* filename, if (!drv->bdrv_co_create_opts) { error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); - ret = -ENOTSUP; - goto out; + return -ENOTSUP; } if (qemu_in_coroutine()) { @@ -583,8 +582,6 @@ int bdrv_create(BlockDriver *drv, const char* filename, } } -out: - g_free(cco.filename); return ret; } From 84bdf21f97e670d61615d44a95d2f33b41f849b1 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:30 -0500 Subject: [PATCH 26/50] block: distinguish between bdrv_create running in coroutine and not Call two different functions depending on whether bdrv_create is in coroutine or not, following the same pattern as generated_co_wrapper functions. This allows to also call the coroutine function directly, without using CreateCo or relying in bdrv_create(). Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-8-eesposit@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 71 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/block.c b/block.c index 9d77ec8818..0a1d484a27 100644 --- a/block.c +++ b/block.c @@ -526,63 +526,62 @@ typedef struct CreateCo { Error *err; } CreateCo; +static int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename, + QemuOpts *opts, Error **errp) +{ + int ret; + GLOBAL_STATE_CODE(); + ERRP_GUARD(); + + if (!drv->bdrv_co_create_opts) { + error_setg(errp, "Driver '%s' does not support image creation", + drv->format_name); + return -ENOTSUP; + } + + ret = drv->bdrv_co_create_opts(drv, filename, opts, errp); + if (ret < 0 && !*errp) { + error_setg_errno(errp, -ret, "Could not create image"); + } + + return ret; +} + static void coroutine_fn bdrv_create_co_entry(void *opaque) { - Error *local_err = NULL; - int ret; - CreateCo *cco = opaque; - assert(cco->drv); GLOBAL_STATE_CODE(); - ret = cco->drv->bdrv_co_create_opts(cco->drv, - cco->filename, cco->opts, &local_err); - error_propagate(&cco->err, local_err); - cco->ret = ret; + cco->ret = bdrv_co_create(cco->drv, cco->filename, cco->opts, &cco->err); + aio_wait_kick(); } int bdrv_create(BlockDriver *drv, const char* filename, QemuOpts *opts, Error **errp) { - int ret; - GLOBAL_STATE_CODE(); - Coroutine *co; - CreateCo cco = { - .drv = drv, - .filename = filename, - .opts = opts, - .ret = NOT_DONE, - .err = NULL, - }; - - if (!drv->bdrv_co_create_opts) { - error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); - return -ENOTSUP; - } - if (qemu_in_coroutine()) { /* Fast-path if already in coroutine context */ - bdrv_create_co_entry(&cco); + return bdrv_co_create(drv, filename, opts, errp); } else { + Coroutine *co; + CreateCo cco = { + .drv = drv, + .filename = filename, + .opts = opts, + .ret = NOT_DONE, + .err = NULL, + }; + co = qemu_coroutine_create(bdrv_create_co_entry, &cco); qemu_coroutine_enter(co); while (cco.ret == NOT_DONE) { aio_poll(qemu_get_aio_context(), true); } + error_propagate(errp, cco.err); + return cco.ret; } - - ret = cco.ret; - if (ret < 0) { - if (cco.err) { - error_propagate(errp, cco.err); - } else { - error_setg_errno(errp, -ret, "Could not create image"); - } - } - - return ret; } /** From 2475a0d0f4b0b450f25f7672c7072b4fdae6df00 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:31 -0500 Subject: [PATCH 27/50] block: bdrv_create_file is a coroutine_fn It is always called in coroutine_fn callbacks, therefore it can directly call bdrv_co_create(). Rename it to bdrv_co_create_file too. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-9-eesposit@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 5 +++-- block/crypto.c | 2 +- block/parallels.c | 2 +- block/qcow.c | 2 +- block/qcow2.c | 4 ++-- block/qed.c | 2 +- block/raw-format.c | 2 +- block/vdi.c | 2 +- block/vhdx.c | 2 +- block/vmdk.c | 2 +- block/vpc.c | 2 +- include/block/block-global-state.h | 3 ++- 12 files changed, 16 insertions(+), 14 deletions(-) diff --git a/block.c b/block.c index 0a1d484a27..c8ac91eb63 100644 --- a/block.c +++ b/block.c @@ -719,7 +719,8 @@ out: return ret; } -int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) +int coroutine_fn bdrv_co_create_file(const char *filename, QemuOpts *opts, + Error **errp) { QemuOpts *protocol_opts; BlockDriver *drv; @@ -760,7 +761,7 @@ int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) goto out; } - ret = bdrv_create(drv, filename, protocol_opts, errp); + ret = bdrv_co_create(drv, filename, protocol_opts, errp); out: qemu_opts_del(protocol_opts); qobject_unref(qdict); diff --git a/block/crypto.c b/block/crypto.c index 2fb8add458..bbeb9f437c 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -703,7 +703,7 @@ static int coroutine_fn block_crypto_co_create_opts_luks(BlockDriver *drv, } /* Create protocol layer */ - ret = bdrv_create_file(filename, opts, errp); + ret = bdrv_co_create_file(filename, opts, errp); if (ret < 0) { goto fail; } diff --git a/block/parallels.c b/block/parallels.c index fa08c1104b..bbea2f2221 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -646,7 +646,7 @@ static int coroutine_fn parallels_co_create_opts(BlockDriver *drv, } /* Create and open the file (protocol layer) */ - ret = bdrv_create_file(filename, opts, errp); + ret = bdrv_co_create_file(filename, opts, errp); if (ret < 0) { goto done; } diff --git a/block/qcow.c b/block/qcow.c index 8bffc41531..5d99f00411 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -973,7 +973,7 @@ static int coroutine_fn qcow_co_create_opts(BlockDriver *drv, } /* Create and open the file (protocol layer) */ - ret = bdrv_create_file(filename, opts, errp); + ret = bdrv_co_create_file(filename, opts, errp); if (ret < 0) { goto fail; } diff --git a/block/qcow2.c b/block/qcow2.c index 941782a011..bafbd077b9 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3871,7 +3871,7 @@ static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv, } /* Create and open the file (protocol layer) */ - ret = bdrv_create_file(filename, opts, errp); + ret = bdrv_co_create_file(filename, opts, errp); if (ret < 0) { goto finish; } @@ -3886,7 +3886,7 @@ static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv, /* Create and open an external data file (protocol layer) */ val = qdict_get_try_str(qdict, BLOCK_OPT_DATA_FILE); if (val) { - ret = bdrv_create_file(val, opts, errp); + ret = bdrv_co_create_file(val, opts, errp); if (ret < 0) { goto finish; } diff --git a/block/qed.c b/block/qed.c index e8ee332542..faa606618e 100644 --- a/block/qed.c +++ b/block/qed.c @@ -778,7 +778,7 @@ static int coroutine_fn bdrv_qed_co_create_opts(BlockDriver *drv, } /* Create and open the file (protocol layer) */ - ret = bdrv_create_file(filename, opts, errp); + ret = bdrv_co_create_file(filename, opts, errp); if (ret < 0) { goto fail; } diff --git a/block/raw-format.c b/block/raw-format.c index a68014ef0b..28905b09ee 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -433,7 +433,7 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv, QemuOpts *opts, Error **errp) { - return bdrv_create_file(filename, opts, errp); + return bdrv_co_create_file(filename, opts, errp); } static int raw_open(BlockDriverState *bs, QDict *options, int flags, diff --git a/block/vdi.c b/block/vdi.c index c0c111c4b9..479bcfe820 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -934,7 +934,7 @@ static int coroutine_fn vdi_co_create_opts(BlockDriver *drv, qdict = qemu_opts_to_qdict_filtered(opts, NULL, &vdi_create_opts, true); /* Create and open the file (protocol layer) */ - ret = bdrv_create_file(filename, opts, errp); + ret = bdrv_co_create_file(filename, opts, errp); if (ret < 0) { goto done; } diff --git a/block/vhdx.c b/block/vhdx.c index bad9ca691b..4c929800fe 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -2084,7 +2084,7 @@ static int coroutine_fn vhdx_co_create_opts(BlockDriver *drv, } /* Create and open the file (protocol layer) */ - ret = bdrv_create_file(filename, opts, errp); + ret = bdrv_co_create_file(filename, opts, errp); if (ret < 0) { goto fail; } diff --git a/block/vmdk.c b/block/vmdk.c index 8204eb9ff4..8894dac2d4 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -2294,7 +2294,7 @@ static int coroutine_fn vmdk_create_extent(const char *filename, int ret; BlockBackend *blk = NULL; - ret = bdrv_create_file(filename, opts, errp); + ret = bdrv_co_create_file(filename, opts, errp); if (ret < 0) { goto exit; } diff --git a/block/vpc.c b/block/vpc.c index 95841f259a..6ee95dcb96 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -1111,7 +1111,7 @@ static int coroutine_fn vpc_co_create_opts(BlockDriver *drv, } /* Create and open the file (protocol layer) */ - ret = bdrv_create_file(filename, opts, errp); + ret = bdrv_co_create_file(filename, opts, errp); if (ret < 0) { goto fail; } diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index 00e0cf8aea..387a7cbb2e 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -57,7 +57,8 @@ BlockDriver *bdrv_find_protocol(const char *filename, BlockDriver *bdrv_find_format(const char *format_name); int bdrv_create(BlockDriver *drv, const char* filename, QemuOpts *opts, Error **errp); -int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp); +int coroutine_fn bdrv_co_create_file(const char *filename, QemuOpts *opts, + Error **errp); BlockDriverState *bdrv_new(void); int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, From 1bd542016cc2c132e3d52dbc9e663966dfc10e72 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:32 -0500 Subject: [PATCH 28/50] block: rename generated_co_wrapper in co_wrapper_mixed In preparation to the incoming new function specifiers, rename g_c_w with a more meaningful name and document it. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-10-eesposit@redhat.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/coroutines.h | 4 +- docs/devel/block-coroutine-wrapper.rst | 6 +-- include/block/block-common.h | 11 +++-- include/block/block-io.h | 44 ++++++++--------- include/sysemu/block-backend-io.h | 68 +++++++++++++------------- scripts/block-coroutine-wrapper.py | 6 +-- 6 files changed, 71 insertions(+), 68 deletions(-) diff --git a/block/coroutines.h b/block/coroutines.h index 3a2bad564f..17da4db963 100644 --- a/block/coroutines.h +++ b/block/coroutines.h @@ -71,7 +71,7 @@ nbd_co_do_establish_connection(BlockDriverState *bs, bool blocking, * the "I/O or GS" API. */ -int generated_co_wrapper +int co_wrapper_mixed bdrv_common_block_status_above(BlockDriverState *bs, BlockDriverState *base, bool include_base, @@ -82,7 +82,7 @@ bdrv_common_block_status_above(BlockDriverState *bs, int64_t *map, BlockDriverState **file, int *depth); -int generated_co_wrapper +int co_wrapper_mixed nbd_do_establish_connection(BlockDriverState *bs, bool blocking, Error **errp); #endif /* BLOCK_COROUTINES_H */ diff --git a/docs/devel/block-coroutine-wrapper.rst b/docs/devel/block-coroutine-wrapper.rst index 412851986b..64acc8d65d 100644 --- a/docs/devel/block-coroutine-wrapper.rst +++ b/docs/devel/block-coroutine-wrapper.rst @@ -26,12 +26,12 @@ called ``bdrv_foo()``. In this case the script can help. To trigger the generation: 1. You need ``bdrv_foo`` declaration somewhere (for example, in - ``block/coroutines.h``) with the ``generated_co_wrapper`` mark, + ``block/coroutines.h``) with the ``co_wrapper_mixed`` mark, like this: .. code-block:: c - int generated_co_wrapper bdrv_foo(); + int co_wrapper_mixed bdrv_foo(); 2. You need to feed this declaration to block-coroutine-wrapper script. For this, add the .h (or .c) file with the declaration to the @@ -46,7 +46,7 @@ Links 1. The script location is ``scripts/block-coroutine-wrapper.py``. -2. Generic place for private ``generated_co_wrapper`` declarations is +2. Generic place for private ``co_wrapper_mixed`` declarations is ``block/coroutines.h``, for public declarations: ``include/block/block.h`` diff --git a/include/block/block-common.h b/include/block/block-common.h index 297704c1e9..ec2309055b 100644 --- a/include/block/block-common.h +++ b/include/block/block-common.h @@ -35,14 +35,17 @@ #include "qemu/transactions.h" /* - * generated_co_wrapper + * co_wrapper{*}: Function specifiers used by block-coroutine-wrapper.py * - * Function specifier, which does nothing but mark functions to be + * Function specifiers, which do nothing but mark functions to be * generated by scripts/block-coroutine-wrapper.py * - * Read more in docs/devel/block-coroutine-wrapper.rst + * Usage: read docs/devel/block-coroutine-wrapper.rst + * + * co_wrapper_mixed functions can be called by both coroutine and + * non-coroutine context. */ -#define generated_co_wrapper +#define co_wrapper_mixed /* block.c */ typedef struct BlockDriver BlockDriver; diff --git a/include/block/block-io.h b/include/block/block-io.h index 72919254cd..72cf45975b 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -39,19 +39,19 @@ * to catch when they are accidentally called by the wrong API. */ -int generated_co_wrapper bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, - int64_t bytes, - BdrvRequestFlags flags); +int co_wrapper_mixed bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, + int64_t bytes, + BdrvRequestFlags flags); int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags); -int generated_co_wrapper bdrv_pread(BdrvChild *child, int64_t offset, - int64_t bytes, void *buf, - BdrvRequestFlags flags); -int generated_co_wrapper bdrv_pwrite(BdrvChild *child, int64_t offset, - int64_t bytes, const void *buf, - BdrvRequestFlags flags); -int generated_co_wrapper bdrv_pwrite_sync(BdrvChild *child, int64_t offset, - int64_t bytes, const void *buf, - BdrvRequestFlags flags); +int co_wrapper_mixed bdrv_pread(BdrvChild *child, int64_t offset, + int64_t bytes, void *buf, + BdrvRequestFlags flags); +int co_wrapper_mixed bdrv_pwrite(BdrvChild *child, int64_t offset, + int64_t bytes, const void *buf, + BdrvRequestFlags flags); +int co_wrapper_mixed bdrv_pwrite_sync(BdrvChild *child, int64_t offset, + int64_t bytes, const void *buf, + BdrvRequestFlags flags); int coroutine_fn bdrv_co_pwrite_sync(BdrvChild *child, int64_t offset, int64_t bytes, const void *buf, BdrvRequestFlags flags); @@ -281,22 +281,22 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, void bdrv_drain(BlockDriverState *bs); -int generated_co_wrapper +int co_wrapper_mixed bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); -int generated_co_wrapper bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, - BdrvCheckMode fix); +int co_wrapper_mixed bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, + BdrvCheckMode fix); /* Invalidate any cached metadata used by image formats */ -int generated_co_wrapper bdrv_invalidate_cache(BlockDriverState *bs, - Error **errp); -int generated_co_wrapper bdrv_flush(BlockDriverState *bs); -int generated_co_wrapper bdrv_pdiscard(BdrvChild *child, int64_t offset, - int64_t bytes); -int generated_co_wrapper +int co_wrapper_mixed bdrv_invalidate_cache(BlockDriverState *bs, + Error **errp); +int co_wrapper_mixed bdrv_flush(BlockDriverState *bs); +int co_wrapper_mixed bdrv_pdiscard(BdrvChild *child, int64_t offset, + int64_t bytes); +int co_wrapper_mixed bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); -int generated_co_wrapper +int co_wrapper_mixed bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); /** diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h index ee3eb12610..7ec6d978d4 100644 --- a/include/sysemu/block-backend-io.h +++ b/include/sysemu/block-backend-io.h @@ -110,77 +110,77 @@ int coroutine_fn blk_co_is_allocated_above(BlockBackend *blk, * the "I/O or GS" API. */ -int generated_co_wrapper blk_pread(BlockBackend *blk, int64_t offset, - int64_t bytes, void *buf, - BdrvRequestFlags flags); +int co_wrapper_mixed blk_pread(BlockBackend *blk, int64_t offset, + int64_t bytes, void *buf, + BdrvRequestFlags flags); int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset, int64_t bytes, void *buf, BdrvRequestFlags flags); -int generated_co_wrapper blk_preadv(BlockBackend *blk, int64_t offset, - int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags); +int co_wrapper_mixed blk_preadv(BlockBackend *blk, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags); -int generated_co_wrapper blk_preadv_part(BlockBackend *blk, int64_t offset, - int64_t bytes, QEMUIOVector *qiov, - size_t qiov_offset, - BdrvRequestFlags flags); +int co_wrapper_mixed blk_preadv_part(BlockBackend *blk, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + size_t qiov_offset, + BdrvRequestFlags flags); int coroutine_fn blk_co_preadv_part(BlockBackend *blk, int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); -int generated_co_wrapper blk_pwrite(BlockBackend *blk, int64_t offset, - int64_t bytes, const void *buf, - BdrvRequestFlags flags); +int co_wrapper_mixed blk_pwrite(BlockBackend *blk, int64_t offset, + int64_t bytes, const void *buf, + BdrvRequestFlags flags); int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset, int64_t bytes, const void *buf, BdrvRequestFlags flags); -int generated_co_wrapper blk_pwritev(BlockBackend *blk, int64_t offset, - int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags); +int co_wrapper_mixed blk_pwritev(BlockBackend *blk, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags); -int generated_co_wrapper blk_pwritev_part(BlockBackend *blk, int64_t offset, - int64_t bytes, QEMUIOVector *qiov, - size_t qiov_offset, - BdrvRequestFlags flags); +int co_wrapper_mixed blk_pwritev_part(BlockBackend *blk, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + size_t qiov_offset, + BdrvRequestFlags flags); int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); -int generated_co_wrapper blk_pwrite_compressed(BlockBackend *blk, - int64_t offset, int64_t bytes, - const void *buf); +int co_wrapper_mixed blk_pwrite_compressed(BlockBackend *blk, + int64_t offset, int64_t bytes, + const void *buf); int coroutine_fn blk_co_pwrite_compressed(BlockBackend *blk, int64_t offset, int64_t bytes, const void *buf); -int generated_co_wrapper blk_pwrite_zeroes(BlockBackend *blk, int64_t offset, - int64_t bytes, - BdrvRequestFlags flags); +int co_wrapper_mixed blk_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int64_t bytes, + BdrvRequestFlags flags); int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, int64_t bytes, BdrvRequestFlags flags); -int generated_co_wrapper blk_pdiscard(BlockBackend *blk, int64_t offset, - int64_t bytes); +int co_wrapper_mixed blk_pdiscard(BlockBackend *blk, int64_t offset, + int64_t bytes); int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes); -int generated_co_wrapper blk_flush(BlockBackend *blk); +int co_wrapper_mixed blk_flush(BlockBackend *blk); int coroutine_fn blk_co_flush(BlockBackend *blk); -int generated_co_wrapper blk_ioctl(BlockBackend *blk, unsigned long int req, - void *buf); +int co_wrapper_mixed blk_ioctl(BlockBackend *blk, unsigned long int req, + void *buf); int coroutine_fn blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf); -int generated_co_wrapper blk_truncate(BlockBackend *blk, int64_t offset, - bool exact, PreallocMode prealloc, - BdrvRequestFlags flags, Error **errp); +int co_wrapper_mixed blk_truncate(BlockBackend *blk, int64_t offset, + bool exact, PreallocMode prealloc, + BdrvRequestFlags flags, Error **errp); int coroutine_fn blk_co_truncate(BlockBackend *blk, int64_t offset, bool exact, PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py index 08be813407..56e6425356 100644 --- a/scripts/block-coroutine-wrapper.py +++ b/scripts/block-coroutine-wrapper.py @@ -2,7 +2,7 @@ """Generate coroutine wrappers for block subsystem. The program parses one or several concatenated c files from stdin, -searches for functions with the 'generated_co_wrapper' specifier +searches for functions with the 'co_wrapper_mixed' specifier and generates corresponding wrappers on stdout. Usage: block-coroutine-wrapper.py generated-file.c FILE.[ch]... @@ -74,8 +74,8 @@ class FuncDecl: return '\n'.join(format.format_map(arg.__dict__) for arg in self.args) -# Match wrappers declared with a generated_co_wrapper mark -func_decl_re = re.compile(r'^int\s*generated_co_wrapper\s*' +# Match wrappers declared with a co_wrapper_mixed mark +func_decl_re = re.compile(r'^int\s*co_wrapper_mixed\s*' r'(?P[a-z][a-z0-9_]*)' r'\((?P[^)]*)\);$', re.MULTILINE) From 76a2f554c1e7b8acb332f765034fdf0ab3525202 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:33 -0500 Subject: [PATCH 29/50] block-coroutine-wrapper.py: introduce co_wrapper This new annotation starts just a function wrapper that creates a new coroutine. It assumes the caller is not a coroutine. It will be the default annotation to be used in the future. This is much better as c_w_mixed, because it is clear if the caller is a coroutine or not, and provides the advantage of automating the code creation. In the future all c_w_mixed functions will be substituted by co_wrapper. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-11-eesposit@redhat.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- docs/devel/block-coroutine-wrapper.rst | 6 +- include/block/block-common.h | 8 +- scripts/block-coroutine-wrapper.py | 110 +++++++++++++++++-------- 3 files changed, 86 insertions(+), 38 deletions(-) diff --git a/docs/devel/block-coroutine-wrapper.rst b/docs/devel/block-coroutine-wrapper.rst index 64acc8d65d..6dd2cdcab3 100644 --- a/docs/devel/block-coroutine-wrapper.rst +++ b/docs/devel/block-coroutine-wrapper.rst @@ -26,12 +26,12 @@ called ``bdrv_foo()``. In this case the script can help. To trigger the generation: 1. You need ``bdrv_foo`` declaration somewhere (for example, in - ``block/coroutines.h``) with the ``co_wrapper_mixed`` mark, + ``block/coroutines.h``) with the ``co_wrapper`` mark, like this: .. code-block:: c - int co_wrapper_mixed bdrv_foo(); + int co_wrapper bdrv_foo(); 2. You need to feed this declaration to block-coroutine-wrapper script. For this, add the .h (or .c) file with the declaration to the @@ -46,7 +46,7 @@ Links 1. The script location is ``scripts/block-coroutine-wrapper.py``. -2. Generic place for private ``co_wrapper_mixed`` declarations is +2. Generic place for private ``co_wrapper`` declarations is ``block/coroutines.h``, for public declarations: ``include/block/block.h`` diff --git a/include/block/block-common.h b/include/block/block-common.h index ec2309055b..847e4d4626 100644 --- a/include/block/block-common.h +++ b/include/block/block-common.h @@ -42,9 +42,13 @@ * * Usage: read docs/devel/block-coroutine-wrapper.rst * - * co_wrapper_mixed functions can be called by both coroutine and - * non-coroutine context. + * There are 2 kind of specifiers: + * - co_wrapper functions can be called by only non-coroutine context, because + * they always generate a new coroutine. + * - co_wrapper_mixed functions can be called by both coroutine and + * non-coroutine context. */ +#define co_wrapper #define co_wrapper_mixed /* block.c */ diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py index 56e6425356..2090c3bf73 100644 --- a/scripts/block-coroutine-wrapper.py +++ b/scripts/block-coroutine-wrapper.py @@ -2,7 +2,7 @@ """Generate coroutine wrappers for block subsystem. The program parses one or several concatenated c files from stdin, -searches for functions with the 'co_wrapper_mixed' specifier +searches for functions with the 'co_wrapper' specifier and generates corresponding wrappers on stdout. Usage: block-coroutine-wrapper.py generated-file.c FILE.[ch]... @@ -62,10 +62,25 @@ class ParamDecl: class FuncDecl: - def __init__(self, return_type: str, name: str, args: str) -> None: + def __init__(self, return_type: str, name: str, args: str, + variant: str) -> None: self.return_type = return_type.strip() self.name = name.strip() + self.struct_name = snake_to_camel(self.name) self.args = [ParamDecl(arg.strip()) for arg in args.split(',')] + self.create_only_co = 'mixed' not in variant + + subsystem, subname = self.name.split('_', 1) + self.co_name = f'{subsystem}_co_{subname}' + + t = self.args[0].type + if t == 'BlockDriverState *': + bs = 'bs' + elif t == 'BdrvChild *': + bs = 'child->bs' + else: + bs = 'blk_bs(blk)' + self.bs = bs def gen_list(self, format: str) -> str: return ', '.join(format.format_map(arg.__dict__) for arg in self.args) @@ -74,8 +89,9 @@ class FuncDecl: return '\n'.join(format.format_map(arg.__dict__) for arg in self.args) -# Match wrappers declared with a co_wrapper_mixed mark -func_decl_re = re.compile(r'^int\s*co_wrapper_mixed\s*' +# Match wrappers declared with a co_wrapper mark +func_decl_re = re.compile(r'^int\s*co_wrapper' + r'(?P(_[a-z][a-z0-9_]*)?)\s*' r'(?P[a-z][a-z0-9_]*)' r'\((?P[^)]*)\);$', re.MULTILINE) @@ -84,7 +100,8 @@ def func_decl_iter(text: str) -> Iterator: for m in func_decl_re.finditer(text): yield FuncDecl(return_type='int', name=m.group('wrapper_name'), - args=m.group('args')) + args=m.group('args'), + variant=m.group('variant')) def snake_to_camel(func_name: str) -> str: @@ -97,24 +114,67 @@ def snake_to_camel(func_name: str) -> str: return ''.join(words) +def create_mixed_wrapper(func: FuncDecl) -> str: + """ + Checks if we are already in coroutine + """ + name = func.co_name + struct_name = func.struct_name + return f"""\ +int {func.name}({ func.gen_list('{decl}') }) +{{ + if (qemu_in_coroutine()) {{ + return {name}({ func.gen_list('{name}') }); + }} else {{ + {struct_name} s = {{ + .poll_state.bs = {func.bs}, + .poll_state.in_progress = true, + +{ func.gen_block(' .{name} = {name},') } + }}; + + s.poll_state.co = qemu_coroutine_create({name}_entry, &s); + + return bdrv_poll_co(&s.poll_state); + }} +}}""" + + +def create_co_wrapper(func: FuncDecl) -> str: + """ + Assumes we are not in coroutine, and creates one + """ + name = func.co_name + struct_name = func.struct_name + return f"""\ +int {func.name}({ func.gen_list('{decl}') }) +{{ + {struct_name} s = {{ + .poll_state.bs = {func.bs}, + .poll_state.in_progress = true, + +{ func.gen_block(' .{name} = {name},') } + }}; + assert(!qemu_in_coroutine()); + + s.poll_state.co = qemu_coroutine_create({name}_entry, &s); + + return bdrv_poll_co(&s.poll_state); +}}""" + + def gen_wrapper(func: FuncDecl) -> str: assert not '_co_' in func.name assert func.return_type == 'int' assert func.args[0].type in ['BlockDriverState *', 'BdrvChild *', 'BlockBackend *'] - subsystem, subname = func.name.split('_', 1) + name = func.co_name + struct_name = func.struct_name - name = f'{subsystem}_co_{subname}' - - t = func.args[0].type - if t == 'BlockDriverState *': - bs = 'bs' - elif t == 'BdrvChild *': - bs = 'child->bs' - else: - bs = 'blk_bs(blk)' - struct_name = snake_to_camel(name) + creation_function = create_mixed_wrapper + if func.create_only_co: + creation_function = create_co_wrapper return f"""\ /* @@ -136,23 +196,7 @@ static void coroutine_fn {name}_entry(void *opaque) aio_wait_kick(); }} -int {func.name}({ func.gen_list('{decl}') }) -{{ - if (qemu_in_coroutine()) {{ - return {name}({ func.gen_list('{name}') }); - }} else {{ - {struct_name} s = {{ - .poll_state.bs = {bs}, - .poll_state.in_progress = true, - -{ func.gen_block(' .{name} = {name},') } - }}; - - s.poll_state.co = qemu_coroutine_create({name}_entry, &s); - - return bdrv_poll_co(&s.poll_state); - }} -}}""" +{creation_function(func)}""" def gen_wrappers(input_code: str) -> str: From 0582fb8293ad4a5d67810fb362789eba6e0ae75e Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:34 -0500 Subject: [PATCH 30/50] block-coroutine-wrapper.py: support functions without bs arg Right now, we take the first parameter of the function to get the BlockDriverState to pass to bdrv_poll_co(), that internally calls functions that figure in which aiocontext the coroutine should run. However, it is useless to pass a bs just to get its own AioContext, so instead pass it directly, and default to the main loop if no BlockDriverState is passed as parameter. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-12-eesposit@redhat.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/block-gen.h | 6 +++--- scripts/block-coroutine-wrapper.py | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/block/block-gen.h b/block/block-gen.h index f80cf4897d..08d977f493 100644 --- a/block/block-gen.h +++ b/block/block-gen.h @@ -30,7 +30,7 @@ /* Base structure for argument packing structures */ typedef struct BdrvPollCo { - BlockDriverState *bs; + AioContext *ctx; bool in_progress; int ret; Coroutine *co; /* Keep pointer here for debugging */ @@ -40,8 +40,8 @@ static inline int bdrv_poll_co(BdrvPollCo *s) { assert(!qemu_in_coroutine()); - bdrv_coroutine_enter(s->bs, s->co); - BDRV_POLL_WHILE(s->bs, s->in_progress); + aio_co_enter(s->ctx, s->co); + AIO_WAIT_WHILE(s->ctx, s->in_progress); return s->ret; } diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py index 2090c3bf73..f540003af1 100644 --- a/scripts/block-coroutine-wrapper.py +++ b/scripts/block-coroutine-wrapper.py @@ -75,12 +75,14 @@ class FuncDecl: t = self.args[0].type if t == 'BlockDriverState *': - bs = 'bs' + ctx = 'bdrv_get_aio_context(bs)' elif t == 'BdrvChild *': - bs = 'child->bs' + ctx = 'bdrv_get_aio_context(child->bs)' + elif t == 'BlockBackend *': + ctx = 'blk_get_aio_context(blk)' else: - bs = 'blk_bs(blk)' - self.bs = bs + ctx = 'qemu_get_aio_context()' + self.ctx = ctx def gen_list(self, format: str) -> str: return ', '.join(format.format_map(arg.__dict__) for arg in self.args) @@ -127,7 +129,7 @@ int {func.name}({ func.gen_list('{decl}') }) return {name}({ func.gen_list('{name}') }); }} else {{ {struct_name} s = {{ - .poll_state.bs = {func.bs}, + .poll_state.ctx = {func.ctx}, .poll_state.in_progress = true, { func.gen_block(' .{name} = {name},') } @@ -150,7 +152,7 @@ def create_co_wrapper(func: FuncDecl) -> str: int {func.name}({ func.gen_list('{decl}') }) {{ {struct_name} s = {{ - .poll_state.bs = {func.bs}, + .poll_state.ctx = {func.ctx}, .poll_state.in_progress = true, { func.gen_block(' .{name} = {name},') } @@ -166,8 +168,6 @@ int {func.name}({ func.gen_list('{decl}') }) def gen_wrapper(func: FuncDecl) -> str: assert not '_co_' in func.name assert func.return_type == 'int' - assert func.args[0].type in ['BlockDriverState *', 'BdrvChild *', - 'BlockBackend *'] name = func.co_name struct_name = func.struct_name From 6700dfb1b8c2828aa0c851136892c4774de87c95 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:35 -0500 Subject: [PATCH 31/50] block-coroutine-wrapper.py: support also basic return types Extend the regex to cover also return type, pointers included. This implies that the value returned by the function cannot be a simple "int" anymore, but the custom return type. Therefore remove poll_state->ret and instead use a per-function custom "ret" field. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-13-eesposit@redhat.com> Signed-off-by: Kevin Wolf --- block/block-gen.h | 5 +---- scripts/block-coroutine-wrapper.py | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/block/block-gen.h b/block/block-gen.h index 08d977f493..89b7daaa1f 100644 --- a/block/block-gen.h +++ b/block/block-gen.h @@ -32,18 +32,15 @@ typedef struct BdrvPollCo { AioContext *ctx; bool in_progress; - int ret; Coroutine *co; /* Keep pointer here for debugging */ } BdrvPollCo; -static inline int bdrv_poll_co(BdrvPollCo *s) +static inline void bdrv_poll_co(BdrvPollCo *s) { assert(!qemu_in_coroutine()); aio_co_enter(s->ctx, s->co); AIO_WAIT_WHILE(s->ctx, s->in_progress); - - return s->ret; } #endif /* BLOCK_BLOCK_GEN_H */ diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py index f540003af1..71a06e917a 100644 --- a/scripts/block-coroutine-wrapper.py +++ b/scripts/block-coroutine-wrapper.py @@ -92,7 +92,8 @@ class FuncDecl: # Match wrappers declared with a co_wrapper mark -func_decl_re = re.compile(r'^int\s*co_wrapper' +func_decl_re = re.compile(r'^(?P[a-zA-Z][a-zA-Z0-9_]* [\*]?)' + r'\s*co_wrapper' r'(?P(_[a-z][a-z0-9_]*)?)\s*' r'(?P[a-z][a-z0-9_]*)' r'\((?P[^)]*)\);$', re.MULTILINE) @@ -100,7 +101,7 @@ func_decl_re = re.compile(r'^int\s*co_wrapper' def func_decl_iter(text: str) -> Iterator: for m in func_decl_re.finditer(text): - yield FuncDecl(return_type='int', + yield FuncDecl(return_type=m.group('return_type'), name=m.group('wrapper_name'), args=m.group('args'), variant=m.group('variant')) @@ -123,7 +124,7 @@ def create_mixed_wrapper(func: FuncDecl) -> str: name = func.co_name struct_name = func.struct_name return f"""\ -int {func.name}({ func.gen_list('{decl}') }) +{func.return_type} {func.name}({ func.gen_list('{decl}') }) {{ if (qemu_in_coroutine()) {{ return {name}({ func.gen_list('{name}') }); @@ -137,7 +138,8 @@ int {func.name}({ func.gen_list('{decl}') }) s.poll_state.co = qemu_coroutine_create({name}_entry, &s); - return bdrv_poll_co(&s.poll_state); + bdrv_poll_co(&s.poll_state); + return s.ret; }} }}""" @@ -149,7 +151,7 @@ def create_co_wrapper(func: FuncDecl) -> str: name = func.co_name struct_name = func.struct_name return f"""\ -int {func.name}({ func.gen_list('{decl}') }) +{func.return_type} {func.name}({ func.gen_list('{decl}') }) {{ {struct_name} s = {{ .poll_state.ctx = {func.ctx}, @@ -161,13 +163,13 @@ int {func.name}({ func.gen_list('{decl}') }) s.poll_state.co = qemu_coroutine_create({name}_entry, &s); - return bdrv_poll_co(&s.poll_state); + bdrv_poll_co(&s.poll_state); + return s.ret; }}""" def gen_wrapper(func: FuncDecl) -> str: assert not '_co_' in func.name - assert func.return_type == 'int' name = func.co_name struct_name = func.struct_name @@ -183,6 +185,7 @@ def gen_wrapper(func: FuncDecl) -> str: typedef struct {struct_name} {{ BdrvPollCo poll_state; + {func.return_type} ret; { func.gen_block(' {decl};') } }} {struct_name}; @@ -190,7 +193,7 @@ static void coroutine_fn {name}_entry(void *opaque) {{ {struct_name} *s = opaque; - s->poll_state.ret = {name}({ func.gen_list('s->{name}') }); + s->ret = {name}({ func.gen_list('s->{name}') }); s->poll_state.in_progress = false; aio_wait_kick(); From 741443eb4301eb130dab812c7ae7cfd71a68a679 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:36 -0500 Subject: [PATCH 32/50] block: convert bdrv_create to co_wrapper This function is never called in coroutine context, therefore instead of manually creating a new coroutine, delegate it to the block-coroutine-wrapper script, defining it as co_wrapper. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-14-eesposit@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 41 ++---------------------------- include/block/block-global-state.h | 8 ++++-- 2 files changed, 8 insertions(+), 41 deletions(-) diff --git a/block.c b/block.c index c8ac91eb63..6191ac1f44 100644 --- a/block.c +++ b/block.c @@ -526,8 +526,8 @@ typedef struct CreateCo { Error *err; } CreateCo; -static int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename, - QemuOpts *opts, Error **errp) +int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename, + QemuOpts *opts, Error **errp) { int ret; GLOBAL_STATE_CODE(); @@ -547,43 +547,6 @@ static int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename, return ret; } -static void coroutine_fn bdrv_create_co_entry(void *opaque) -{ - CreateCo *cco = opaque; - GLOBAL_STATE_CODE(); - - cco->ret = bdrv_co_create(cco->drv, cco->filename, cco->opts, &cco->err); - aio_wait_kick(); -} - -int bdrv_create(BlockDriver *drv, const char* filename, - QemuOpts *opts, Error **errp) -{ - GLOBAL_STATE_CODE(); - - if (qemu_in_coroutine()) { - /* Fast-path if already in coroutine context */ - return bdrv_co_create(drv, filename, opts, errp); - } else { - Coroutine *co; - CreateCo cco = { - .drv = drv, - .filename = filename, - .opts = opts, - .ret = NOT_DONE, - .err = NULL, - }; - - co = qemu_coroutine_create(bdrv_create_co_entry, &cco); - qemu_coroutine_enter(co); - while (cco.ret == NOT_DONE) { - aio_poll(qemu_get_aio_context(), true); - } - error_propagate(errp, cco.err); - return cco.ret; - } -} - /** * Helper function for bdrv_create_file_fallback(): Resize @blk to at * least the given @minimum_size. diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index 387a7cbb2e..1f8b54f2df 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -55,8 +55,12 @@ BlockDriver *bdrv_find_protocol(const char *filename, bool allow_protocol_prefix, Error **errp); BlockDriver *bdrv_find_format(const char *format_name); -int bdrv_create(BlockDriver *drv, const char* filename, - QemuOpts *opts, Error **errp); + +int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename, + QemuOpts *opts, Error **errp); +int co_wrapper bdrv_create(BlockDriver *drv, const char *filename, + QemuOpts *opts, Error **errp); + int coroutine_fn bdrv_co_create_file(const char *filename, QemuOpts *opts, Error **errp); From 0508d0be4b547be8da97c95ea9e1f433077dd2ec Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:37 -0500 Subject: [PATCH 33/50] block/dirty-bitmap: convert coroutine-only functions to co_wrapper bdrv_can_store_new_dirty_bitmap and bdrv_remove_persistent_dirty_bitmap check if they are running in a coroutine, directly calling the coroutine callback if it's the case. Except that no coroutine calls such functions, therefore that check can be removed, and function creation can be offloaded to c_w. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-15-eesposit@redhat.com> Signed-off-by: Kevin Wolf --- block/dirty-bitmap.c | 88 +----------------------------------- block/meson.build | 1 + include/block/block-common.h | 5 +- include/block/block-io.h | 10 +++- include/block/dirty-bitmap.h | 10 +++- 5 files changed, 22 insertions(+), 92 deletions(-) diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 9c39550698..956feeb2ae 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -388,7 +388,7 @@ void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs) * not fail. * This function doesn't release corresponding BdrvDirtyBitmap. */ -static int coroutine_fn +int coroutine_fn bdrv_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name, Error **errp) { @@ -399,45 +399,6 @@ bdrv_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name, return 0; } -typedef struct BdrvRemovePersistentDirtyBitmapCo { - BlockDriverState *bs; - const char *name; - Error **errp; - int ret; -} BdrvRemovePersistentDirtyBitmapCo; - -static void coroutine_fn -bdrv_co_remove_persistent_dirty_bitmap_entry(void *opaque) -{ - BdrvRemovePersistentDirtyBitmapCo *s = opaque; - - s->ret = bdrv_co_remove_persistent_dirty_bitmap(s->bs, s->name, s->errp); - aio_wait_kick(); -} - -int bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name, - Error **errp) -{ - if (qemu_in_coroutine()) { - return bdrv_co_remove_persistent_dirty_bitmap(bs, name, errp); - } else { - Coroutine *co; - BdrvRemovePersistentDirtyBitmapCo s = { - .bs = bs, - .name = name, - .errp = errp, - .ret = -EINPROGRESS, - }; - - co = qemu_coroutine_create(bdrv_co_remove_persistent_dirty_bitmap_entry, - &s); - bdrv_coroutine_enter(bs, co); - BDRV_POLL_WHILE(bs, s.ret == -EINPROGRESS); - - return s.ret; - } -} - bool bdrv_supports_persistent_dirty_bitmap(BlockDriverState *bs) { @@ -447,7 +408,7 @@ bdrv_supports_persistent_dirty_bitmap(BlockDriverState *bs) return false; } -static bool coroutine_fn +bool coroutine_fn bdrv_co_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name, uint32_t granularity, Error **errp) { @@ -470,51 +431,6 @@ bdrv_co_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name, return drv->bdrv_co_can_store_new_dirty_bitmap(bs, name, granularity, errp); } -typedef struct BdrvCanStoreNewDirtyBitmapCo { - BlockDriverState *bs; - const char *name; - uint32_t granularity; - Error **errp; - bool ret; - - bool in_progress; -} BdrvCanStoreNewDirtyBitmapCo; - -static void coroutine_fn bdrv_co_can_store_new_dirty_bitmap_entry(void *opaque) -{ - BdrvCanStoreNewDirtyBitmapCo *s = opaque; - - s->ret = bdrv_co_can_store_new_dirty_bitmap(s->bs, s->name, s->granularity, - s->errp); - s->in_progress = false; - aio_wait_kick(); -} - -bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name, - uint32_t granularity, Error **errp) -{ - IO_CODE(); - if (qemu_in_coroutine()) { - return bdrv_co_can_store_new_dirty_bitmap(bs, name, granularity, errp); - } else { - Coroutine *co; - BdrvCanStoreNewDirtyBitmapCo s = { - .bs = bs, - .name = name, - .granularity = granularity, - .errp = errp, - .in_progress = true, - }; - - co = qemu_coroutine_create(bdrv_co_can_store_new_dirty_bitmap_entry, - &s); - bdrv_coroutine_enter(bs, co); - BDRV_POLL_WHILE(bs, s.in_progress); - - return s.ret; - } -} - void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap) { bdrv_dirty_bitmaps_lock(bitmap->bs); diff --git a/block/meson.build b/block/meson.build index b7c68b83a3..c48a59571e 100644 --- a/block/meson.build +++ b/block/meson.build @@ -137,6 +137,7 @@ block_gen_c = custom_target('block-gen.c', output: 'block-gen.c', input: files( '../include/block/block-io.h', + '../include/block/dirty-bitmap.h', '../include/block/block-global-state.h', '../include/sysemu/block-backend-io.h', 'coroutines.h' diff --git a/include/block/block-common.h b/include/block/block-common.h index 847e4d4626..6cf603ab06 100644 --- a/include/block/block-common.h +++ b/include/block/block-common.h @@ -29,8 +29,6 @@ #include "qemu/iov.h" #include "qemu/coroutine.h" #include "block/accounting.h" -#include "block/dirty-bitmap.h" -#include "block/blockjob.h" #include "qemu/hbitmap.h" #include "qemu/transactions.h" @@ -51,6 +49,9 @@ #define co_wrapper #define co_wrapper_mixed +#include "block/dirty-bitmap.h" +#include "block/blockjob.h" + /* block.c */ typedef struct BlockDriver BlockDriver; typedef struct BdrvChild BdrvChild; diff --git a/include/block/block-io.h b/include/block/block-io.h index 72cf45975b..52869ea08e 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -215,8 +215,14 @@ AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c); void bdrv_io_plug(BlockDriverState *bs); void bdrv_io_unplug(BlockDriverState *bs); -bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name, - uint32_t granularity, Error **errp); +bool coroutine_fn bdrv_co_can_store_new_dirty_bitmap(BlockDriverState *bs, + const char *name, + uint32_t granularity, + Error **errp); +bool co_wrapper bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, + const char *name, + uint32_t granularity, + Error **errp); /** * diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index 6528336c4c..c3700cec76 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -34,8 +34,14 @@ int bdrv_dirty_bitmap_check(const BdrvDirtyBitmap *bitmap, uint32_t flags, Error **errp); void bdrv_release_dirty_bitmap(BdrvDirtyBitmap *bitmap); void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs); -int bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name, - Error **errp); + +int coroutine_fn bdrv_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, + const char *name, + Error **errp); +int co_wrapper bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs, + const char *name, + Error **errp); + void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap); void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap); void bdrv_enable_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap); From da0bd744344adb1f285002467d7fa84699dc2fce Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 7 Dec 2022 14:18:21 +0100 Subject: [PATCH 34/50] block: Factor out bdrv_drain_all_begin_nopoll() Provide a separate function that just quiesces the users of a node to prevent new requests from coming in, but without waiting for the already in-flight I/O to complete. This function can be used in contexts where polling is not allowed. Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-2-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf --- block/io.c | 19 +++++++++++++------ include/block/block-global-state.h | 1 + 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/block/io.c b/block/io.c index f4444b7777..d160d2e273 100644 --- a/block/io.c +++ b/block/io.c @@ -466,16 +466,11 @@ static bool bdrv_drain_all_poll(void) * NOTE: no new block jobs or BlockDriverStates can be created between * the bdrv_drain_all_begin() and bdrv_drain_all_end() calls. */ -void bdrv_drain_all_begin(void) +void bdrv_drain_all_begin_nopoll(void) { BlockDriverState *bs = NULL; GLOBAL_STATE_CODE(); - if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(NULL, true, NULL, true); - return; - } - /* * bdrv queue is managed by record/replay, * waiting for finishing the I/O requests may @@ -500,6 +495,18 @@ void bdrv_drain_all_begin(void) bdrv_do_drained_begin(bs, NULL, false); aio_context_release(aio_context); } +} + +void bdrv_drain_all_begin(void) +{ + BlockDriverState *bs = NULL; + + if (qemu_in_coroutine()) { + bdrv_co_yield_to_drain(NULL, true, NULL, true); + return; + } + + bdrv_drain_all_begin_nopoll(); /* Now poll the in-flight requests */ AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll()); diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index 1f8b54f2df..b0a3cfe6b8 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -152,6 +152,7 @@ int bdrv_inactivate_all(void); int bdrv_flush_all(void); void bdrv_close_all(void); void bdrv_drain_all_begin(void); +void bdrv_drain_all_begin_nopoll(void); void bdrv_drain_all_end(void); void bdrv_drain_all(void); From aead9dc9d1acc5876951885c064ec4153fbd0ed8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 7 Dec 2022 14:18:22 +0100 Subject: [PATCH 35/50] graph-lock: Introduce a lock to protect block graph operations Block layer graph operations are always run under BQL in the main loop. This is proved by the assertion qemu_in_main_thread() and its wrapper macro GLOBAL_STATE_CODE. However, there are also concurrent coroutines running in other iothreads that always try to traverse the graph. Currently this is protected (among various other things) by the AioContext lock, but once this is removed, we need to make sure that reads do not happen while modifying the graph. We distinguish between writer (main loop, under BQL) that modifies the graph, and readers (all other coroutines running in various AioContext), that go through the graph edges, reading ->parents and->children. The writer (main loop) has "exclusive" access, so it first waits for any current read to finish, and then prevents incoming ones from entering while it has the exclusive access. The readers (coroutines in multiple AioContext) are free to access the graph as long the writer is not modifying the graph. In case it is, they go in a CoQueue and sleep until the writer is done. If a coroutine changes AioContext, the counter in the original and new AioContext are left intact, since the writer does not care where the reader is, but only if there is one. As a result, some AioContexts might have a negative reader count, to balance the positive count of the AioContext that took the lock. This also means that when an AioContext is deleted it may have a nonzero reader count. In that case we transfer the count to a global shared counter so that the writer is always aware of all readers. Signed-off-by: Paolo Bonzini Signed-off-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-3-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/graph-lock.c | 261 +++++++++++++++++++++++++++++++++++++ block/meson.build | 1 + include/block/aio.h | 9 ++ include/block/block_int.h | 1 + include/block/graph-lock.h | 139 ++++++++++++++++++++ 5 files changed, 411 insertions(+) create mode 100644 block/graph-lock.c create mode 100644 include/block/graph-lock.h diff --git a/block/graph-lock.c b/block/graph-lock.c new file mode 100644 index 0000000000..e033c6d9ac --- /dev/null +++ b/block/graph-lock.c @@ -0,0 +1,261 @@ +/* + * Graph lock: rwlock to protect block layer graph manipulations (add/remove + * edges and nodes) + * + * Copyright (c) 2022 Red Hat + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "qemu/main-loop.h" +#include "block/graph-lock.h" +#include "block/block.h" +#include "block/block_int.h" + +/* Protects the list of aiocontext and orphaned_reader_count */ +static QemuMutex aio_context_list_lock; + +/* Written and read with atomic operations. */ +static int has_writer; + +/* + * A reader coroutine could move from an AioContext to another. + * If this happens, there is no problem from the point of view of + * counters. The problem is that the total count becomes + * unbalanced if one of the two AioContexts gets deleted. + * The count of readers must remain correct, so the AioContext's + * balance is transferred to this glboal variable. + * Protected by aio_context_list_lock. + */ +static uint32_t orphaned_reader_count; + +/* Queue of readers waiting for the writer to finish */ +static CoQueue reader_queue; + +struct BdrvGraphRWlock { + /* How many readers are currently reading the graph. */ + uint32_t reader_count; + + /* + * List of BdrvGraphRWlock kept in graph-lock.c + * Protected by aio_context_list_lock + */ + QTAILQ_ENTRY(BdrvGraphRWlock) next_aio; +}; + +/* + * List of BdrvGraphRWlock. This list ensures that each BdrvGraphRWlock + * can safely modify only its own counter, avoid reading/writing + * others and thus improving performances by avoiding cacheline bounces. + */ +static QTAILQ_HEAD(, BdrvGraphRWlock) aio_context_list = + QTAILQ_HEAD_INITIALIZER(aio_context_list); + +static void __attribute__((__constructor__)) bdrv_init_graph_lock(void) +{ + qemu_mutex_init(&aio_context_list_lock); + qemu_co_queue_init(&reader_queue); +} + +void register_aiocontext(AioContext *ctx) +{ + ctx->bdrv_graph = g_new0(BdrvGraphRWlock, 1); + QEMU_LOCK_GUARD(&aio_context_list_lock); + assert(ctx->bdrv_graph->reader_count == 0); + QTAILQ_INSERT_TAIL(&aio_context_list, ctx->bdrv_graph, next_aio); +} + +void unregister_aiocontext(AioContext *ctx) +{ + QEMU_LOCK_GUARD(&aio_context_list_lock); + orphaned_reader_count += ctx->bdrv_graph->reader_count; + QTAILQ_REMOVE(&aio_context_list, ctx->bdrv_graph, next_aio); + g_free(ctx->bdrv_graph); +} + +static uint32_t reader_count(void) +{ + BdrvGraphRWlock *brdv_graph; + uint32_t rd; + + QEMU_LOCK_GUARD(&aio_context_list_lock); + + /* rd can temporarly be negative, but the total will *always* be >= 0 */ + rd = orphaned_reader_count; + QTAILQ_FOREACH(brdv_graph, &aio_context_list, next_aio) { + rd += qatomic_read(&brdv_graph->reader_count); + } + + /* shouldn't overflow unless there are 2^31 readers */ + assert((int32_t)rd >= 0); + return rd; +} + +void bdrv_graph_wrlock(void) +{ + GLOBAL_STATE_CODE(); + assert(!qatomic_read(&has_writer)); + + /* Make sure that constantly arriving new I/O doesn't cause starvation */ + bdrv_drain_all_begin_nopoll(); + + /* + * reader_count == 0: this means writer will read has_reader as 1 + * reader_count >= 1: we don't know if writer read has_writer == 0 or 1, + * but we need to wait. + * Wait by allowing other coroutine (and possible readers) to continue. + */ + do { + /* + * has_writer must be 0 while polling, otherwise we get a deadlock if + * any callback involved during AIO_WAIT_WHILE() tries to acquire the + * reader lock. + */ + qatomic_set(&has_writer, 0); + AIO_WAIT_WHILE(qemu_get_aio_context(), reader_count() >= 1); + qatomic_set(&has_writer, 1); + + /* + * We want to only check reader_count() after has_writer = 1 is visible + * to other threads. That way no more readers can sneak in after we've + * determined reader_count() == 0. + */ + smp_mb(); + } while (reader_count() >= 1); + + bdrv_drain_all_end(); +} + +void bdrv_graph_wrunlock(void) +{ + GLOBAL_STATE_CODE(); + QEMU_LOCK_GUARD(&aio_context_list_lock); + assert(qatomic_read(&has_writer)); + + /* + * No need for memory barriers, this works in pair with + * the slow path of rdlock() and both take the lock. + */ + qatomic_store_release(&has_writer, 0); + + /* Wake up all coroutine that are waiting to read the graph */ + qemu_co_enter_all(&reader_queue, &aio_context_list_lock); +} + +void coroutine_fn bdrv_graph_co_rdlock(void) +{ + BdrvGraphRWlock *bdrv_graph; + bdrv_graph = qemu_get_current_aio_context()->bdrv_graph; + + /* Do not lock if in main thread */ + if (qemu_in_main_thread()) { + return; + } + + for (;;) { + qatomic_set(&bdrv_graph->reader_count, + bdrv_graph->reader_count + 1); + /* make sure writer sees reader_count before we check has_writer */ + smp_mb(); + + /* + * has_writer == 0: this means writer will read reader_count as >= 1 + * has_writer == 1: we don't know if writer read reader_count == 0 + * or > 0, but we need to wait anyways because + * it will write. + */ + if (!qatomic_read(&has_writer)) { + break; + } + + /* + * Synchronize access with reader_count() in bdrv_graph_wrlock(). + * Case 1: + * If this critical section gets executed first, reader_count will + * decrease and the reader will go to sleep. + * Then the writer will read reader_count that does not take into + * account this reader, and if there's no other reader it will + * enter the write section. + * Case 2: + * If reader_count() critical section gets executed first, + * then writer will read reader_count >= 1. + * It will wait in AIO_WAIT_WHILE(), but once it releases the lock + * we will enter this critical section and call aio_wait_kick(). + */ + WITH_QEMU_LOCK_GUARD(&aio_context_list_lock) { + /* + * Additional check when we use the above lock to synchronize + * with bdrv_graph_wrunlock(). + * Case 1: + * If this gets executed first, has_writer is still 1, so we reduce + * reader_count and go to sleep. + * Then the writer will set has_writer to 0 and wake up all readers, + * us included. + * Case 2: + * If bdrv_graph_wrunlock() critical section gets executed first, + * then it will set has_writer to 0 and wake up all other readers. + * Then we execute this critical section, and therefore must check + * again for has_writer, otherwise we sleep without any writer + * actually running. + */ + if (!qatomic_read(&has_writer)) { + return; + } + + /* slow path where reader sleeps */ + bdrv_graph->reader_count--; + aio_wait_kick(); + qemu_co_queue_wait(&reader_queue, &aio_context_list_lock); + } + } +} + +void coroutine_fn bdrv_graph_co_rdunlock(void) +{ + BdrvGraphRWlock *bdrv_graph; + bdrv_graph = qemu_get_current_aio_context()->bdrv_graph; + + /* Do not lock if in main thread */ + if (qemu_in_main_thread()) { + return; + } + + qatomic_store_release(&bdrv_graph->reader_count, + bdrv_graph->reader_count - 1); + /* make sure writer sees reader_count before we check has_writer */ + smp_mb(); + + /* + * has_writer == 0: this means reader will read reader_count decreased + * has_writer == 1: we don't know if writer read reader_count old or + * new. Therefore, kick again so on next iteration + * writer will for sure read the updated value. + */ + if (qatomic_read(&has_writer)) { + aio_wait_kick(); + } +} + +void bdrv_graph_rdlock_main_loop(void) +{ + GLOBAL_STATE_CODE(); + assert(!qemu_in_coroutine()); +} + +void bdrv_graph_rdunlock_main_loop(void) +{ + GLOBAL_STATE_CODE(); + assert(!qemu_in_coroutine()); +} diff --git a/block/meson.build b/block/meson.build index c48a59571e..90011a2805 100644 --- a/block/meson.build +++ b/block/meson.build @@ -10,6 +10,7 @@ block_ss.add(files( 'blkverify.c', 'block-backend.c', 'block-copy.c', + 'graph-lock.c', 'commit.c', 'copy-on-read.c', 'preallocate.c', diff --git a/include/block/aio.h b/include/block/aio.h index d128558f1d..0f65a3cc9e 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -22,6 +22,7 @@ #include "qemu/event_notifier.h" #include "qemu/thread.h" #include "qemu/timer.h" +#include "block/graph-lock.h" typedef struct BlockAIOCB BlockAIOCB; typedef void BlockCompletionFunc(void *opaque, int ret); @@ -127,6 +128,14 @@ struct AioContext { /* Used by AioContext users to protect from multi-threaded access. */ QemuRecMutex lock; + /* + * Keep track of readers and writers of the block layer graph. + * This is essential to avoid performing additions and removal + * of nodes and edges from block graph while some + * other thread is traversing it. + */ + BdrvGraphRWlock *bdrv_graph; + /* The list of registered AIO handlers. Protected by ctx->list_lock. */ AioHandlerList aio_handlers; diff --git a/include/block/block_int.h b/include/block/block_int.h index 7d50b6bbd1..b35b0138ed 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -26,6 +26,7 @@ #include "block_int-global-state.h" #include "block_int-io.h" +#include "block/graph-lock.h" /* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */ diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h new file mode 100644 index 0000000000..82edb62cfa --- /dev/null +++ b/include/block/graph-lock.h @@ -0,0 +1,139 @@ +/* + * Graph lock: rwlock to protect block layer graph manipulations (add/remove + * edges and nodes) + * + * Copyright (c) 2022 Red Hat + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ +#ifndef GRAPH_LOCK_H +#define GRAPH_LOCK_H + +#include "qemu/osdep.h" + +#include "qemu/coroutine.h" + +/** + * Graph Lock API + * This API provides a rwlock used to protect block layer + * graph modifications like edge (BdrvChild) and node (BlockDriverState) + * addition and removal. + * Currently we have 1 writer only, the Main loop, and many + * readers, mostly coroutines running in other AioContext thus other threads. + * + * We distinguish between writer (main loop, under BQL) that modifies the + * graph, and readers (all other coroutines running in various AioContext), + * that go through the graph edges, reading + * BlockDriverState ->parents and->children. + * + * The writer (main loop) has an "exclusive" access, so it first waits for + * current read to finish, and then prevents incoming ones from + * entering while it has the exclusive access. + * + * The readers (coroutines in multiple AioContext) are free to + * access the graph as long the writer is not modifying the graph. + * In case it is, they go in a CoQueue and sleep until the writer + * is done. + * + * If a coroutine changes AioContext, the counter in the original and new + * AioContext are left intact, since the writer does not care where is the + * reader, but only if there is one. + * As a result, some AioContexts might have a negative reader count, to + * balance the positive count of the AioContext that took the lock. + * This also means that when an AioContext is deleted it may have a nonzero + * reader count. In that case we transfer the count to a global shared counter + * so that the writer is always aware of all readers. + */ +typedef struct BdrvGraphRWlock BdrvGraphRWlock; + +/* + * register_aiocontext: + * Add AioContext @ctx to the list of AioContext. + * This list is used to obtain the total number of readers + * currently running the graph. + */ +void register_aiocontext(AioContext *ctx); + +/* + * unregister_aiocontext: + * Removes AioContext @ctx to the list of AioContext. + */ +void unregister_aiocontext(AioContext *ctx); + +/* + * bdrv_graph_wrlock: + * Start an exclusive write operation to modify the graph. This means we are + * adding or removing an edge or a node in the block layer graph. Nobody else + * is allowed to access the graph. + * + * Must only be called from outside bdrv_graph_co_rdlock. + * + * The wrlock can only be taken from the main loop, with BQL held, as only the + * main loop is allowed to modify the graph. + * + * This function polls. Callers must not hold the lock of any AioContext other + * than the current one. + */ +void bdrv_graph_wrlock(void); + +/* + * bdrv_graph_wrunlock: + * Write finished, reset global has_writer to 0 and restart + * all readers that are waiting. + */ +void bdrv_graph_wrunlock(void); + +/* + * bdrv_graph_co_rdlock: + * Read the bs graph. This usually means traversing all nodes in + * the graph, therefore it can't happen while another thread is + * modifying it. + * Increases the reader counter of the current aiocontext, + * and if has_writer is set, it means that the writer is modifying + * the graph, therefore wait in a coroutine queue. + * The writer will then wake this coroutine once it is done. + * + * This lock should be taken from Iothreads (IO_CODE() class of functions) + * because it signals the writer that there are some + * readers currently running, or waits until the current + * write is finished before continuing. + * Calling this function from the Main Loop with BQL held + * is not necessary, since the Main Loop itself is the only + * writer, thus won't be able to read and write at the same time. + * The only exception to that is when we can't take the lock in the + * function/coroutine itself, and need to delegate the caller (usually main + * loop) to take it and wait that the coroutine ends, so that + * we always signal that a reader is running. + */ +void coroutine_fn bdrv_graph_co_rdlock(void); + +/* + * bdrv_graph_rdunlock: + * Read terminated, decrease the count of readers in the current aiocontext. + * If the writer is waiting for reads to finish (has_writer == 1), signal + * the writer that we are done via aio_wait_kick() to let it continue. + */ +void coroutine_fn bdrv_graph_co_rdunlock(void); + +/* + * bdrv_graph_rd{un}lock_main_loop: + * Just a placeholder to mark where the graph rdlock should be taken + * in the main loop. It is just asserting that we are not + * in a coroutine and in GLOBAL_STATE_CODE. + */ +void bdrv_graph_rdlock_main_loop(void); +void bdrv_graph_rdunlock_main_loop(void); + +#endif /* GRAPH_LOCK_H */ + From 8aa77000c2ba8c234f72b9b4162529d02ea00188 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Wed, 7 Dec 2022 14:18:23 +0100 Subject: [PATCH 36/50] graph-lock: Implement guard macros Similar to the implementation in lockable.h, implement macros to automatically take and release the rdlock. Create the empty GraphLockable and GraphLockableMainloop structs only to use it as a type for G_DEFINE_AUTOPTR_CLEANUP_FUNC. Signed-off-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-4-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf --- include/block/graph-lock.h | 66 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h index 82edb62cfa..b27d8a5fb1 100644 --- a/include/block/graph-lock.h +++ b/include/block/graph-lock.h @@ -135,5 +135,71 @@ void coroutine_fn bdrv_graph_co_rdunlock(void); void bdrv_graph_rdlock_main_loop(void); void bdrv_graph_rdunlock_main_loop(void); +typedef struct GraphLockable { } GraphLockable; + +/* + * In C, compound literals have the lifetime of an automatic variable. + * In C++ it would be different, but then C++ wouldn't need QemuLockable + * either... + */ +#define GML_OBJ_() (&(GraphLockable) { }) + +static inline GraphLockable *graph_lockable_auto_lock(GraphLockable *x) +{ + bdrv_graph_co_rdlock(); + return x; +} + +static inline void graph_lockable_auto_unlock(GraphLockable *x) +{ + bdrv_graph_co_rdunlock(); +} + +G_DEFINE_AUTOPTR_CLEANUP_FUNC(GraphLockable, graph_lockable_auto_unlock) + +#define WITH_GRAPH_RDLOCK_GUARD_(var) \ + for (g_autoptr(GraphLockable) var = graph_lockable_auto_lock(GML_OBJ_()); \ + var; \ + graph_lockable_auto_unlock(var), var = NULL) + +#define WITH_GRAPH_RDLOCK_GUARD() \ + WITH_GRAPH_RDLOCK_GUARD_(glue(graph_lockable_auto, __COUNTER__)) + +#define GRAPH_RDLOCK_GUARD(x) \ + g_autoptr(GraphLockable) \ + glue(graph_lockable_auto, __COUNTER__) G_GNUC_UNUSED = \ + graph_lockable_auto_lock(GML_OBJ_()) + + +typedef struct GraphLockableMainloop { } GraphLockableMainloop; + +/* + * In C, compound literals have the lifetime of an automatic variable. + * In C++ it would be different, but then C++ wouldn't need QemuLockable + * either... + */ +#define GMLML_OBJ_() (&(GraphLockableMainloop) { }) + +static inline GraphLockableMainloop * +graph_lockable_auto_lock_mainloop(GraphLockableMainloop *x) +{ + bdrv_graph_rdlock_main_loop(); + return x; +} + +static inline void +graph_lockable_auto_unlock_mainloop(GraphLockableMainloop *x) +{ + bdrv_graph_rdunlock_main_loop(); +} + +G_DEFINE_AUTOPTR_CLEANUP_FUNC(GraphLockableMainloop, + graph_lockable_auto_unlock_mainloop) + +#define GRAPH_RDLOCK_GUARD_MAINLOOP(x) \ + g_autoptr(GraphLockableMainloop) \ + glue(graph_lockable_auto, __COUNTER__) G_GNUC_UNUSED = \ + graph_lockable_auto_lock_mainloop(GMLML_OBJ_()) + #endif /* GRAPH_LOCK_H */ From 587d82fae258794e33cacc9bf4ba61949184e822 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Wed, 7 Dec 2022 14:18:24 +0100 Subject: [PATCH 37/50] async: Register/unregister aiocontext in graph lock list Add/remove the AioContext in aio_context_list in graph-lock.c when it is created/destroyed. This allows using the graph locking operations from this AioContext. In order to allow linking util/async.c with binaries that don't include the block layer, introduce stubs for (un)register_aiocontext(). Signed-off-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-5-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- stubs/graph-lock.c | 10 ++++++++++ stubs/meson.build | 1 + util/async.c | 4 ++++ 3 files changed, 15 insertions(+) create mode 100644 stubs/graph-lock.c diff --git a/stubs/graph-lock.c b/stubs/graph-lock.c new file mode 100644 index 0000000000..177aa0a8ba --- /dev/null +++ b/stubs/graph-lock.c @@ -0,0 +1,10 @@ +#include "qemu/osdep.h" +#include "block/graph-lock.h" + +void register_aiocontext(AioContext *ctx) +{ +} + +void unregister_aiocontext(AioContext *ctx) +{ +} diff --git a/stubs/meson.build b/stubs/meson.build index c96a74f095..981585cbdf 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -13,6 +13,7 @@ stub_ss.add(files('error-printf.c')) stub_ss.add(files('fdset.c')) stub_ss.add(files('gdbstub.c')) stub_ss.add(files('get-vm-name.c')) +stub_ss.add(files('graph-lock.c')) if linux_io_uring.found() stub_ss.add(files('io_uring.c')) endif diff --git a/util/async.c b/util/async.c index 63434ddae4..14d63b3091 100644 --- a/util/async.c +++ b/util/async.c @@ -27,6 +27,7 @@ #include "qapi/error.h" #include "block/aio.h" #include "block/thread-pool.h" +#include "block/graph-lock.h" #include "qemu/main-loop.h" #include "qemu/atomic.h" #include "qemu/rcu_queue.h" @@ -376,6 +377,7 @@ aio_ctx_finalize(GSource *source) qemu_rec_mutex_destroy(&ctx->lock); qemu_lockcnt_destroy(&ctx->list_lock); timerlistgroup_deinit(&ctx->tlg); + unregister_aiocontext(ctx); aio_context_destroy(ctx); } @@ -574,6 +576,8 @@ AioContext *aio_context_new(Error **errp) ctx->thread_pool_min = 0; ctx->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT; + register_aiocontext(ctx); + return ctx; fail: g_source_destroy(&ctx->source); From 702152d1c72225d318bb1626b457f87719cc2f25 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 7 Dec 2022 14:18:25 +0100 Subject: [PATCH 38/50] Import clang-tsa.h This defines macros that allow clang to perform Thread Safety Analysis based on function and variable annotations that specify the locking rules. On non-clang compilers, the annotations are ignored. Imported tsa.h from the original repository with the pthread_mutex_t wrapper removed: https://github.com/jhi/clang-thread-safety-analysis-for-c.git Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-6-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf --- include/qemu/clang-tsa.h | 101 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 include/qemu/clang-tsa.h diff --git a/include/qemu/clang-tsa.h b/include/qemu/clang-tsa.h new file mode 100644 index 0000000000..0a3361dfc8 --- /dev/null +++ b/include/qemu/clang-tsa.h @@ -0,0 +1,101 @@ +#ifndef CLANG_TSA_H +#define CLANG_TSA_H + +/* + * Copyright 2018 Jarkko Hietaniemi + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without + * limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* http://clang.llvm.org/docs/ThreadSafetyAnalysis.html + * + * TSA is available since clang 3.6-ish. + */ +#ifdef __clang__ +# define TSA(x) __attribute__((x)) +#else +# define TSA(x) /* No TSA, make TSA attributes no-ops. */ +#endif + +/* TSA_CAPABILITY() is used to annotate typedefs: + * + * typedef pthread_mutex_t TSA_CAPABILITY("mutex") tsa_mutex; + */ +#define TSA_CAPABILITY(x) TSA(capability(x)) + +/* TSA_GUARDED_BY() is used to annotate global variables, + * the data is guarded: + * + * Foo foo TSA_GUARDED_BY(mutex); + */ +#define TSA_GUARDED_BY(x) TSA(guarded_by(x)) + +/* TSA_PT_GUARDED_BY() is used to annotate global pointers, the data + * behind the pointer is guarded. + * + * Foo* ptr TSA_PT_GUARDED_BY(mutex); + */ +#define TSA_PT_GUARDED_BY(x) TSA(pt_guarded_by(x)) + +/* The TSA_REQUIRES() is used to annotate functions: the caller of the + * function MUST hold the resource, the function will NOT release it. + * + * More than one mutex may be specified, comma-separated. + * + * void Foo(void) TSA_REQUIRES(mutex); + */ +#define TSA_REQUIRES(...) TSA(requires_capability(__VA_ARGS__)) + +/* TSA_EXCLUDES() is used to annotate functions: the caller of the + * function MUST NOT hold resource, the function first acquires the + * resource, and then releases it. + * + * More than one mutex may be specified, comma-separated. + * + * void Foo(void) TSA_EXCLUDES(mutex); + */ +#define TSA_EXCLUDES(...) TSA(locks_excluded(__VA_ARGS__)) + +/* TSA_ACQUIRE() is used to annotate functions: the caller of the + * function MUST NOT hold the resource, the function will acquire the + * resource, but NOT release it. + * + * More than one mutex may be specified, comma-separated. + * + * void Foo(void) TSA_ACQUIRE(mutex); + */ +#define TSA_ACQUIRE(...) TSA(acquire_capability(__VA_ARGS__)) + +/* TSA_RELEASE() is used to annotate functions: the caller of the + * function MUST hold the resource, but the function will then release it. + * + * More than one mutex may be specified, comma-separated. + * + * void Foo(void) TSA_RELEASE(mutex); + */ +#define TSA_RELEASE(...) TSA(release_capability(__VA_ARGS__)) + +/* TSA_NO_TSA is used to annotate functions. Use only when you need to. + * + * void Foo(void) TSA_NO_TSA; + */ +#define TSA_NO_TSA TSA(no_thread_safety_analysis) + +#endif /* #ifndef CLANG_TSA_H */ From b1cc02e9463a406c6edd7ac55e356cf65a0681d7 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 7 Dec 2022 14:18:26 +0100 Subject: [PATCH 39/50] clang-tsa: Add TSA_ASSERT() macro Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-7-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf --- include/qemu/clang-tsa.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/qemu/clang-tsa.h b/include/qemu/clang-tsa.h index 0a3361dfc8..211ee0ae73 100644 --- a/include/qemu/clang-tsa.h +++ b/include/qemu/clang-tsa.h @@ -98,4 +98,13 @@ */ #define TSA_NO_TSA TSA(no_thread_safety_analysis) +/* + * TSA_ASSERT() is used to annotate functions: This function will assert that + * the lock is held. When it returns, the caller of the function is assumed to + * already hold the resource. + * + * More than one mutex may be specified, comma-separated. + */ +#define TSA_ASSERT(...) TSA(assert_capability(__VA_ARGS__)) + #endif /* #ifndef CLANG_TSA_H */ From d9bfb9de00d4ee15908cc2f76333d6657b580dea Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 7 Dec 2022 14:18:27 +0100 Subject: [PATCH 40/50] clang-tsa: Add macros for shared locks Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-8-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf --- include/qemu/clang-tsa.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/qemu/clang-tsa.h b/include/qemu/clang-tsa.h index 211ee0ae73..ba06fb8c92 100644 --- a/include/qemu/clang-tsa.h +++ b/include/qemu/clang-tsa.h @@ -62,6 +62,7 @@ * void Foo(void) TSA_REQUIRES(mutex); */ #define TSA_REQUIRES(...) TSA(requires_capability(__VA_ARGS__)) +#define TSA_REQUIRES_SHARED(...) TSA(requires_shared_capability(__VA_ARGS__)) /* TSA_EXCLUDES() is used to annotate functions: the caller of the * function MUST NOT hold resource, the function first acquires the @@ -82,6 +83,7 @@ * void Foo(void) TSA_ACQUIRE(mutex); */ #define TSA_ACQUIRE(...) TSA(acquire_capability(__VA_ARGS__)) +#define TSA_ACQUIRE_SHARED(...) TSA(acquire_shared_capability(__VA_ARGS__)) /* TSA_RELEASE() is used to annotate functions: the caller of the * function MUST hold the resource, but the function will then release it. @@ -91,6 +93,7 @@ * void Foo(void) TSA_RELEASE(mutex); */ #define TSA_RELEASE(...) TSA(release_capability(__VA_ARGS__)) +#define TSA_RELEASE_SHARED(...) TSA(release_shared_capability(__VA_ARGS__)) /* TSA_NO_TSA is used to annotate functions. Use only when you need to. * @@ -106,5 +109,6 @@ * More than one mutex may be specified, comma-separated. */ #define TSA_ASSERT(...) TSA(assert_capability(__VA_ARGS__)) +#define TSA_ASSERT_SHARED(...) TSA(assert_shared_capability(__VA_ARGS__)) #endif /* #ifndef CLANG_TSA_H */ From 617f3a963589dbd54fe1f323eeac36411b352a0e Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 7 Dec 2022 14:18:29 +0100 Subject: [PATCH 41/50] test-bdrv-drain: Fix incorrrect drain assumptions The test case assumes that a drain only happens in one specific place where it drains explicitly. This assumption happened to hold true until now, but block layer functions may drain interally (any graph modifications are going to do that through bdrv_graph_wrlock()), so this is incorrect. Make sure that the test code in .drained_begin only runs where we actually want it to run. When scheduling a BH from .drained_begin, we also need to increase the in_flight counter to make sure that the operation is actually completed in time before the node that it works on goes away. Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-10-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf --- tests/unit/test-bdrv-drain.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c index 2686a8acee..8cedea4959 100644 --- a/tests/unit/test-bdrv-drain.c +++ b/tests/unit/test-bdrv-drain.c @@ -1107,6 +1107,7 @@ struct detach_by_parent_data { BlockDriverState *c; BdrvChild *child_c; bool by_parent_cb; + bool detach_on_drain; }; static struct detach_by_parent_data detach_by_parent_data; @@ -1114,6 +1115,7 @@ static void detach_indirect_bh(void *opaque) { struct detach_by_parent_data *data = opaque; + bdrv_dec_in_flight(data->child_b->bs); bdrv_unref_child(data->parent_b, data->child_b); bdrv_ref(data->c); @@ -1128,12 +1130,21 @@ static void detach_by_parent_aio_cb(void *opaque, int ret) g_assert_cmpint(ret, ==, 0); if (data->by_parent_cb) { + bdrv_inc_in_flight(data->child_b->bs); detach_indirect_bh(data); } } static void detach_by_driver_cb_drained_begin(BdrvChild *child) { + struct detach_by_parent_data *data = &detach_by_parent_data; + + if (!data->detach_on_drain) { + return; + } + data->detach_on_drain = false; + + bdrv_inc_in_flight(data->child_b->bs); aio_bh_schedule_oneshot(qemu_get_current_aio_context(), detach_indirect_bh, &detach_by_parent_data); child_of_bds.drained_begin(child); @@ -1174,8 +1185,14 @@ static void test_detach_indirect(bool by_parent_cb) detach_by_driver_cb_class = child_of_bds; detach_by_driver_cb_class.drained_begin = detach_by_driver_cb_drained_begin; + detach_by_driver_cb_class.drained_end = NULL; + detach_by_driver_cb_class.drained_poll = NULL; } + detach_by_parent_data = (struct detach_by_parent_data) { + .detach_on_drain = false, + }; + /* Create all involved nodes */ parent_a = bdrv_new_open_driver(&bdrv_test, "parent-a", BDRV_O_RDWR, &error_abort); @@ -1227,6 +1244,7 @@ static void test_detach_indirect(bool by_parent_cb) .child_b = child_b, .c = c, .by_parent_cb = by_parent_cb, + .detach_on_drain = true, }; acb = blk_aio_preadv(blk, 0, &qiov, 0, detach_by_parent_aio_cb, NULL); g_assert(acb != NULL); From e13550558840422f980a0a71efe52ee83f37933d Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 7 Dec 2022 14:18:30 +0100 Subject: [PATCH 42/50] block: Fix locking in external_snapshot_prepare() bdrv_img_create() polls internally (when calling bdrv_create(), which is a co_wrapper), so it can't be called while holding the lock of any AioContext except the current one without causing deadlocks. Drop the lock around the call in external_snapshot_prepare(). Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-11-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf --- block.c | 4 ++++ blockdev.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/block.c b/block.c index 6191ac1f44..44d59362d6 100644 --- a/block.c +++ b/block.c @@ -6924,6 +6924,10 @@ bool bdrv_op_blocker_is_empty(BlockDriverState *bs) return true; } +/* + * Must not be called while holding the lock of an AioContext other than the + * current one. + */ void bdrv_img_create(const char *filename, const char *fmt, const char *base_filename, const char *base_fmt, char *options, uint64_t img_size, int flags, bool quiet, diff --git a/blockdev.c b/blockdev.c index d2f80b0386..ebf952cd21 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1507,10 +1507,14 @@ static void external_snapshot_prepare(BlkActionState *common, goto out; } bdrv_refresh_filename(state->old_bs); + + aio_context_release(aio_context); bdrv_img_create(new_image_file, format, state->old_bs->filename, state->old_bs->drv->format_name, NULL, size, flags, false, &local_err); + aio_context_acquire(aio_context); + if (local_err) { error_propagate(errp, local_err); goto out; From 293125078086027ee625b3fae23b374ad08f98c8 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Wed, 7 Dec 2022 14:18:31 +0100 Subject: [PATCH 43/50] block: wrlock in bdrv_replace_child_noperm Protect the main function where graph is modified. Signed-off-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-12-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/block.c b/block.c index 44d59362d6..df52c6b012 100644 --- a/block.c +++ b/block.c @@ -2836,8 +2836,6 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) * * If @new_bs is non-NULL, the parent of @child must already be drained through * @child. - * - * This function does not poll. */ static void bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs) @@ -2875,23 +2873,24 @@ static void bdrv_replace_child_noperm(BdrvChild *child, assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); } + /* TODO Pull this up into the callers to avoid polling here */ + bdrv_graph_wrlock(); if (old_bs) { if (child->klass->detach) { child->klass->detach(child); } - assert_bdrv_graph_writable(old_bs); QLIST_REMOVE(child, next_parent); } child->bs = new_bs; if (new_bs) { - assert_bdrv_graph_writable(new_bs); QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); if (child->klass->attach) { child->klass->attach(child); } } + bdrv_graph_wrunlock(); /* * If the parent was drained through this BdrvChild previously, but new_bs From 1af823923541ddfa0bfe51af5f40e9a8469e8992 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Wed, 7 Dec 2022 14:18:32 +0100 Subject: [PATCH 44/50] block: remove unnecessary assert_bdrv_graph_writable() We don't protect bdrv->aio_context with the graph rwlock, so these assertions are not needed Signed-off-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-13-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/block.c b/block.c index df52c6b012..bdffadcdaa 100644 --- a/block.c +++ b/block.c @@ -7214,7 +7214,6 @@ static void bdrv_detach_aio_context(BlockDriverState *bs) if (bs->quiesce_counter) { aio_enable_external(bs->aio_context); } - assert_bdrv_graph_writable(bs); bs->aio_context = NULL; } @@ -7228,7 +7227,6 @@ static void bdrv_attach_aio_context(BlockDriverState *bs, aio_disable_external(new_context); } - assert_bdrv_graph_writable(bs); bs->aio_context = new_context; if (bs->drv && bs->drv->bdrv_attach_aio_context) { @@ -7309,7 +7307,6 @@ static void bdrv_set_aio_context_commit(void *opaque) BlockDriverState *bs = (BlockDriverState *) state->bs; AioContext *new_context = state->new_ctx; AioContext *old_context = bdrv_get_aio_context(bs); - assert_bdrv_graph_writable(bs); /* * Take the old AioContex when detaching it from bs. From 3f35f82e04923affb3283b451b6d66880f266a5a Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Wed, 7 Dec 2022 14:18:33 +0100 Subject: [PATCH 45/50] block: assert that graph read and writes are performed correctly Remove the old assert_bdrv_graph_writable, and replace it with the new version using graph-lock API. See the function documentation for more information. Signed-off-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-14-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block.c | 4 ++-- block/graph-lock.c | 11 +++++++++++ include/block/block_int-global-state.h | 17 ----------------- include/block/graph-lock.h | 15 +++++++++++++++ 4 files changed, 28 insertions(+), 19 deletions(-) diff --git a/block.c b/block.c index bdffadcdaa..ff53b41af3 100644 --- a/block.c +++ b/block.c @@ -1406,7 +1406,7 @@ static void bdrv_child_cb_attach(BdrvChild *child) { BlockDriverState *bs = child->opaque; - assert_bdrv_graph_writable(bs); + assert_bdrv_graph_writable(); QLIST_INSERT_HEAD(&bs->children, child, next); if (bs->drv->is_filter || (child->role & BDRV_CHILD_FILTERED)) { /* @@ -1452,7 +1452,7 @@ static void bdrv_child_cb_detach(BdrvChild *child) bdrv_backing_detach(child); } - assert_bdrv_graph_writable(bs); + assert_bdrv_graph_writable(); QLIST_REMOVE(child, next); if (child == bs->backing) { assert(child != bs->file); diff --git a/block/graph-lock.c b/block/graph-lock.c index e033c6d9ac..c4d9d2c274 100644 --- a/block/graph-lock.c +++ b/block/graph-lock.c @@ -259,3 +259,14 @@ void bdrv_graph_rdunlock_main_loop(void) GLOBAL_STATE_CODE(); assert(!qemu_in_coroutine()); } + +void assert_bdrv_graph_readable(void) +{ + assert(qemu_in_main_thread() || reader_count()); +} + +void assert_bdrv_graph_writable(void) +{ + assert(qemu_in_main_thread()); + assert(qatomic_read(&has_writer)); +} diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h index b49f4eb35b..2f0993f6e9 100644 --- a/include/block/block_int-global-state.h +++ b/include/block/block_int-global-state.h @@ -310,21 +310,4 @@ void bdrv_remove_aio_context_notifier(BlockDriverState *bs, */ void bdrv_drain_all_end_quiesce(BlockDriverState *bs); -/** - * Make sure that the function is running under both drain and BQL. - * The latter protects from concurrent writings - * from the GS API, while the former prevents concurrent reads - * from I/O. - */ -static inline void assert_bdrv_graph_writable(BlockDriverState *bs) -{ - /* - * TODO: this function is incomplete. Because the users of this - * assert lack the necessary drains, check only for BQL. - * Once the necessary drains are added, - * assert also for qatomic_read(&bs->quiesce_counter) > 0 - */ - assert(qemu_in_main_thread()); -} - #endif /* BLOCK_INT_GLOBAL_STATE_H */ diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h index b27d8a5fb1..85e8a53b73 100644 --- a/include/block/graph-lock.h +++ b/include/block/graph-lock.h @@ -135,6 +135,21 @@ void coroutine_fn bdrv_graph_co_rdunlock(void); void bdrv_graph_rdlock_main_loop(void); void bdrv_graph_rdunlock_main_loop(void); +/* + * assert_bdrv_graph_readable: + * Make sure that the reader is either the main loop, + * or there is at least a reader helding the rdlock. + * In this way an incoming writer is aware of the read and waits. + */ +void assert_bdrv_graph_readable(void); + +/* + * assert_bdrv_graph_writable: + * Make sure that the writer is the main loop and has set @has_writer, + * so that incoming readers will pause. + */ +void assert_bdrv_graph_writable(void); + typedef struct GraphLockable { } GraphLockable; /* From 4002ffdc4fa7a2fd9b5a86772311c646a73775f7 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 7 Dec 2022 14:18:34 +0100 Subject: [PATCH 46/50] graph-lock: TSA annotations for lock/unlock functions Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-15-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf --- block/graph-lock.c | 3 ++ include/block/graph-lock.h | 80 +++++++++++++++++++++++++++++++++----- 2 files changed, 73 insertions(+), 10 deletions(-) diff --git a/block/graph-lock.c b/block/graph-lock.c index c4d9d2c274..454c31e691 100644 --- a/block/graph-lock.c +++ b/block/graph-lock.c @@ -24,6 +24,9 @@ #include "block/block.h" #include "block/block_int.h" +/* Dummy lock object to use for Thread Safety Analysis (TSA) */ +BdrvGraphLock graph_lock; + /* Protects the list of aiocontext and orphaned_reader_count */ static QemuMutex aio_context_list_lock; diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h index 85e8a53b73..33c05b331a 100644 --- a/include/block/graph-lock.h +++ b/include/block/graph-lock.h @@ -21,6 +21,7 @@ #define GRAPH_LOCK_H #include "qemu/osdep.h" +#include "qemu/clang-tsa.h" #include "qemu/coroutine.h" @@ -57,6 +58,35 @@ */ typedef struct BdrvGraphRWlock BdrvGraphRWlock; +/* Dummy lock object to use for Thread Safety Analysis (TSA) */ +typedef struct TSA_CAPABILITY("mutex") BdrvGraphLock { +} BdrvGraphLock; + +extern BdrvGraphLock graph_lock; + +/* + * clang doesn't check consistency in locking annotations between forward + * declarations and the function definition. Having the annotation on the + * definition, but not the declaration in a header file, may give the reader + * a false sense of security because the condition actually remains unchecked + * for callers in other source files. + * + * Therefore, as a convention, for public functions, GRAPH_RDLOCK and + * GRAPH_WRLOCK annotations should be present only in the header file. + */ +#define GRAPH_WRLOCK TSA_REQUIRES(graph_lock) +#define GRAPH_RDLOCK TSA_REQUIRES_SHARED(graph_lock) + +/* + * TSA annotations are not part of function types, so checks are defeated when + * using a function pointer. As a workaround, annotate function pointers with + * this macro that will require that the lock is at least taken while reading + * the pointer. In most cases this is equivalent to actually protecting the + * function call. + */ +#define GRAPH_RDLOCK_PTR TSA_GUARDED_BY(graph_lock) +#define GRAPH_WRLOCK_PTR TSA_GUARDED_BY(graph_lock) + /* * register_aiocontext: * Add AioContext @ctx to the list of AioContext. @@ -85,14 +115,14 @@ void unregister_aiocontext(AioContext *ctx); * This function polls. Callers must not hold the lock of any AioContext other * than the current one. */ -void bdrv_graph_wrlock(void); +void bdrv_graph_wrlock(void) TSA_ACQUIRE(graph_lock) TSA_NO_TSA; /* * bdrv_graph_wrunlock: * Write finished, reset global has_writer to 0 and restart * all readers that are waiting. */ -void bdrv_graph_wrunlock(void); +void bdrv_graph_wrunlock(void) TSA_RELEASE(graph_lock) TSA_NO_TSA; /* * bdrv_graph_co_rdlock: @@ -116,7 +146,8 @@ void bdrv_graph_wrunlock(void); * loop) to take it and wait that the coroutine ends, so that * we always signal that a reader is running. */ -void coroutine_fn bdrv_graph_co_rdlock(void); +void coroutine_fn TSA_ACQUIRE_SHARED(graph_lock) TSA_NO_TSA +bdrv_graph_co_rdlock(void); /* * bdrv_graph_rdunlock: @@ -124,7 +155,8 @@ void coroutine_fn bdrv_graph_co_rdlock(void); * If the writer is waiting for reads to finish (has_writer == 1), signal * the writer that we are done via aio_wait_kick() to let it continue. */ -void coroutine_fn bdrv_graph_co_rdunlock(void); +void coroutine_fn TSA_RELEASE_SHARED(graph_lock) TSA_NO_TSA +bdrv_graph_co_rdunlock(void); /* * bdrv_graph_rd{un}lock_main_loop: @@ -132,8 +164,11 @@ void coroutine_fn bdrv_graph_co_rdunlock(void); * in the main loop. It is just asserting that we are not * in a coroutine and in GLOBAL_STATE_CODE. */ -void bdrv_graph_rdlock_main_loop(void); -void bdrv_graph_rdunlock_main_loop(void); +void TSA_ACQUIRE_SHARED(graph_lock) TSA_NO_TSA +bdrv_graph_rdlock_main_loop(void); + +void TSA_RELEASE_SHARED(graph_lock) TSA_NO_TSA +bdrv_graph_rdunlock_main_loop(void); /* * assert_bdrv_graph_readable: @@ -150,6 +185,17 @@ void assert_bdrv_graph_readable(void); */ void assert_bdrv_graph_writable(void); +/* + * Calling this function tells TSA that we know that the lock is effectively + * taken even though we cannot prove it (yet) with GRAPH_RDLOCK. This can be + * useful in intermediate stages of a conversion to using the GRAPH_RDLOCK + * macro. + */ +static inline void TSA_ASSERT_SHARED(graph_lock) TSA_NO_TSA +assume_graph_lock(void) +{ +} + typedef struct GraphLockable { } GraphLockable; /* @@ -159,13 +205,21 @@ typedef struct GraphLockable { } GraphLockable; */ #define GML_OBJ_() (&(GraphLockable) { }) -static inline GraphLockable *graph_lockable_auto_lock(GraphLockable *x) +/* + * This is not marked as TSA_ACQUIRE() because TSA doesn't understand the + * cleanup attribute and would therefore complain that the graph is never + * unlocked. TSA_ASSERT() makes sure that the following calls know that we + * hold the lock while unlocking is left unchecked. + */ +static inline GraphLockable * TSA_ASSERT(graph_lock) TSA_NO_TSA +graph_lockable_auto_lock(GraphLockable *x) { bdrv_graph_co_rdlock(); return x; } -static inline void graph_lockable_auto_unlock(GraphLockable *x) +static inline void TSA_NO_TSA +graph_lockable_auto_unlock(GraphLockable *x) { bdrv_graph_co_rdunlock(); } @@ -195,14 +249,20 @@ typedef struct GraphLockableMainloop { } GraphLockableMainloop; */ #define GMLML_OBJ_() (&(GraphLockableMainloop) { }) -static inline GraphLockableMainloop * +/* + * This is not marked as TSA_ACQUIRE() because TSA doesn't understand the + * cleanup attribute and would therefore complain that the graph is never + * unlocked. TSA_ASSERT() makes sure that the following calls know that we + * hold the lock while unlocking is left unchecked. + */ +static inline GraphLockableMainloop * TSA_ASSERT(graph_lock) TSA_NO_TSA graph_lockable_auto_lock_mainloop(GraphLockableMainloop *x) { bdrv_graph_rdlock_main_loop(); return x; } -static inline void +static inline void TSA_NO_TSA graph_lockable_auto_unlock_mainloop(GraphLockableMainloop *x) { bdrv_graph_rdunlock_main_loop(); From 303de47b2c1e20a7f326ad11976d6006f5498709 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 7 Dec 2022 14:18:35 +0100 Subject: [PATCH 47/50] Mark assert_bdrv_graph_readable/writable() GRAPH_RD/WRLOCK Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-16-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf --- block.c | 4 ++-- include/block/block_int-common.h | 4 ++-- include/block/graph-lock.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/block.c b/block.c index ff53b41af3..1a82fd101a 100644 --- a/block.c +++ b/block.c @@ -1402,7 +1402,7 @@ static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format, *child_flags = flags; } -static void bdrv_child_cb_attach(BdrvChild *child) +static void GRAPH_WRLOCK bdrv_child_cb_attach(BdrvChild *child) { BlockDriverState *bs = child->opaque; @@ -1444,7 +1444,7 @@ static void bdrv_child_cb_attach(BdrvChild *child) } } -static void bdrv_child_cb_detach(BdrvChild *child) +static void GRAPH_WRLOCK bdrv_child_cb_detach(BdrvChild *child) { BlockDriverState *bs = child->opaque; diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index a6bc6b7fe9..b1f0d88307 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -898,8 +898,8 @@ struct BdrvChildClass { void (*activate)(BdrvChild *child, Error **errp); int (*inactivate)(BdrvChild *child); - void (*attach)(BdrvChild *child); - void (*detach)(BdrvChild *child); + void GRAPH_WRLOCK_PTR (*attach)(BdrvChild *child); + void GRAPH_WRLOCK_PTR (*detach)(BdrvChild *child); /* * Notifies the parent that the filename of its child has changed (e.g. diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h index 33c05b331a..4c92cd8edf 100644 --- a/include/block/graph-lock.h +++ b/include/block/graph-lock.h @@ -176,14 +176,14 @@ bdrv_graph_rdunlock_main_loop(void); * or there is at least a reader helding the rdlock. * In this way an incoming writer is aware of the read and waits. */ -void assert_bdrv_graph_readable(void); +void GRAPH_RDLOCK assert_bdrv_graph_readable(void); /* * assert_bdrv_graph_writable: * Make sure that the writer is the main loop and has set @has_writer, * so that incoming readers will pause. */ -void assert_bdrv_graph_writable(void); +void GRAPH_WRLOCK assert_bdrv_graph_writable(void); /* * Calling this function tells TSA that we know that the lock is effectively From e6d3f7a602a370362bc52b0aed7dfff1a0bf726d Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Wed, 7 Dec 2022 14:18:36 +0100 Subject: [PATCH 48/50] block-coroutine-wrapper.py: introduce annotations that take the graph rdlock Add co_wrapper_bdrv_rdlock and co_wrapper_mixed_bdrv_rdlock option to the block-coroutine-wrapper.py script. This "_bdrv_rdlock" option takes and releases the graph rdlock when a coroutine function is created. This means that when used together with "_mixed", the function marked with co_wrapper_mixed_bdrv_rdlock will support both coroutine and non-coroutine case, and in the latter case it will create a coroutine that takes and releases the rdlock. When called from a coroutine, the caller must already hold the graph lock. Example: void co_wrapper_mixed_bdrv_rdlock bdrv_f1(); Becomes static void bdrv_co_enter_f1() { bdrv_graph_co_rdlock(); bdrv_co_function(); bdrv_graph_co_rdunlock(); } void bdrv_f1() { if (qemu_in_coroutine) { assume_graph_lock(); bdrv_co_function(); } else { qemu_co_enter(bdrv_co_enter_f1); ... } } When used alone, the function will not work in coroutine context, and when called in non-coroutine context it will create a new coroutine that takes care of taking and releasing the rdlock automatically. Example: void co_wrapper_bdrv_rdlock bdrv_f1(); Becomes static void bdrv_co_enter_f1() { bdrv_graph_co_rdlock(); bdrv_co_function(); bdrv_graph_co_rdunlock(); } void bdrv_f1() { assert(!qemu_in_coroutine()); qemu_co_enter(bdrv_co_enter_f1); ... } About their usage: - co_wrapper does not take the rdlock, so it can be used also outside the block layer. - co_wrapper_mixed will be used by many blk_* functions, since the coroutine function needs to call blk_wait_while_drained() and the rdlock *must* be taken afterwards, otherwise it's a deadlock. In the future this annotation will go away, and blk_* will use co_wrapper directly. - co_wrapper_bdrv_rdlock will be used by BlockDriver callbacks, ideally by all of them in the future. - co_wrapper_mixed_bdrv_rdlock will be used by the remaining functions that are still called by coroutine and non-coroutine context. In the future this annotation will go away, as we will split such mixed functions. Signed-off-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-17-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- include/block/block-common.h | 9 ++++++++- scripts/block-coroutine-wrapper.py | 12 ++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/include/block/block-common.h b/include/block/block-common.h index 6cf603ab06..4749c46a5e 100644 --- a/include/block/block-common.h +++ b/include/block/block-common.h @@ -40,14 +40,21 @@ * * Usage: read docs/devel/block-coroutine-wrapper.rst * - * There are 2 kind of specifiers: + * There are 4 kind of specifiers: * - co_wrapper functions can be called by only non-coroutine context, because * they always generate a new coroutine. * - co_wrapper_mixed functions can be called by both coroutine and * non-coroutine context. + * - co_wrapper_bdrv_rdlock are co_wrapper functions but automatically take and + * release the graph rdlock when creating a new coroutine + * - co_wrapper_mixed_bdrv_rdlock are co_wrapper_mixed functions but + * automatically take and release the graph rdlock when creating a new + * coroutine. */ #define co_wrapper #define co_wrapper_mixed +#define co_wrapper_bdrv_rdlock +#define co_wrapper_mixed_bdrv_rdlock #include "block/dirty-bitmap.h" #include "block/blockjob.h" diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py index 71a06e917a..6e087fa0b7 100644 --- a/scripts/block-coroutine-wrapper.py +++ b/scripts/block-coroutine-wrapper.py @@ -69,6 +69,7 @@ class FuncDecl: self.struct_name = snake_to_camel(self.name) self.args = [ParamDecl(arg.strip()) for arg in args.split(',')] self.create_only_co = 'mixed' not in variant + self.graph_rdlock = 'bdrv_rdlock' in variant subsystem, subname = self.name.split('_', 1) self.co_name = f'{subsystem}_co_{subname}' @@ -123,10 +124,13 @@ def create_mixed_wrapper(func: FuncDecl) -> str: """ name = func.co_name struct_name = func.struct_name + graph_assume_lock = 'assume_graph_lock();' if func.graph_rdlock else '' + return f"""\ {func.return_type} {func.name}({ func.gen_list('{decl}') }) {{ if (qemu_in_coroutine()) {{ + {graph_assume_lock} return {name}({ func.gen_list('{name}') }); }} else {{ {struct_name} s = {{ @@ -174,6 +178,12 @@ def gen_wrapper(func: FuncDecl) -> str: name = func.co_name struct_name = func.struct_name + graph_lock='' + graph_unlock='' + if func.graph_rdlock: + graph_lock=' bdrv_graph_co_rdlock();' + graph_unlock=' bdrv_graph_co_rdunlock();' + creation_function = create_mixed_wrapper if func.create_only_co: creation_function = create_co_wrapper @@ -193,7 +203,9 @@ static void coroutine_fn {name}_entry(void *opaque) {{ {struct_name} *s = opaque; +{graph_lock} s->ret = {name}({ func.gen_list('s->{name}') }); +{graph_unlock} s->poll_state.in_progress = false; aio_wait_kick(); From 90830f595062ecf0d2c96049f5e01f3d37a2107f Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Wed, 7 Dec 2022 14:18:37 +0100 Subject: [PATCH 49/50] block: use co_wrapper_mixed_bdrv_rdlock in functions taking the rdlock Take the rdlock already, before we add the assertions. All these functions either read the graph recursively, or call BlockDriver callbacks that will eventually need to be protected by the graph rdlock. Do it now to all functions together, because many of these recursively call each other. For example, bdrv_co_truncate calls BlockDriver->bdrv_co_truncate, and some driver callbacks implement their own .bdrv_co_truncate by calling bdrv_flush inside. So if bdrv_flush asserts but bdrv_truncate does not take the rdlock yet, the assertion will always fail. Signed-off-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-18-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/coroutines.h | 2 +- include/block/block-io.h | 53 +++++++++++++++++++++++----------------- 2 files changed, 32 insertions(+), 23 deletions(-) diff --git a/block/coroutines.h b/block/coroutines.h index 17da4db963..48e9081aa1 100644 --- a/block/coroutines.h +++ b/block/coroutines.h @@ -71,7 +71,7 @@ nbd_co_do_establish_connection(BlockDriverState *bs, bool blocking, * the "I/O or GS" API. */ -int co_wrapper_mixed +int co_wrapper_mixed_bdrv_rdlock bdrv_common_block_status_above(BlockDriverState *bs, BlockDriverState *base, bool include_base, diff --git a/include/block/block-io.h b/include/block/block-io.h index 52869ea08e..2ed6214909 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -39,19 +39,24 @@ * to catch when they are accidentally called by the wrong API. */ -int co_wrapper_mixed bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, - int64_t bytes, - BdrvRequestFlags flags); +int co_wrapper_mixed_bdrv_rdlock +bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, int64_t bytes, + BdrvRequestFlags flags); + int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags); -int co_wrapper_mixed bdrv_pread(BdrvChild *child, int64_t offset, - int64_t bytes, void *buf, - BdrvRequestFlags flags); -int co_wrapper_mixed bdrv_pwrite(BdrvChild *child, int64_t offset, - int64_t bytes, const void *buf, - BdrvRequestFlags flags); -int co_wrapper_mixed bdrv_pwrite_sync(BdrvChild *child, int64_t offset, - int64_t bytes, const void *buf, - BdrvRequestFlags flags); + +int co_wrapper_mixed_bdrv_rdlock +bdrv_pread(BdrvChild *child, int64_t offset, int64_t bytes, void *buf, + BdrvRequestFlags flags); + +int co_wrapper_mixed_bdrv_rdlock +bdrv_pwrite(BdrvChild *child, int64_t offset,int64_t bytes, + const void *buf, BdrvRequestFlags flags); + +int co_wrapper_mixed_bdrv_rdlock +bdrv_pwrite_sync(BdrvChild *child, int64_t offset, int64_t bytes, + const void *buf, BdrvRequestFlags flags); + int coroutine_fn bdrv_co_pwrite_sync(BdrvChild *child, int64_t offset, int64_t bytes, const void *buf, BdrvRequestFlags flags); @@ -287,22 +292,26 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, void bdrv_drain(BlockDriverState *bs); -int co_wrapper_mixed +int co_wrapper_mixed_bdrv_rdlock bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); -int co_wrapper_mixed bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, - BdrvCheckMode fix); +int co_wrapper_mixed_bdrv_rdlock +bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix); /* Invalidate any cached metadata used by image formats */ -int co_wrapper_mixed bdrv_invalidate_cache(BlockDriverState *bs, - Error **errp); -int co_wrapper_mixed bdrv_flush(BlockDriverState *bs); -int co_wrapper_mixed bdrv_pdiscard(BdrvChild *child, int64_t offset, - int64_t bytes); -int co_wrapper_mixed +int co_wrapper_mixed_bdrv_rdlock +bdrv_invalidate_cache(BlockDriverState *bs, Error **errp); + +int co_wrapper_mixed_bdrv_rdlock bdrv_flush(BlockDriverState *bs); + +int co_wrapper_mixed_bdrv_rdlock +bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes); + +int co_wrapper_mixed_bdrv_rdlock bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); -int co_wrapper_mixed + +int co_wrapper_mixed_bdrv_rdlock bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); /** From 1b3ff9feb942c2ad0b01ac931e99ad451ab0ef39 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 7 Dec 2022 14:18:38 +0100 Subject: [PATCH 50/50] block: GRAPH_RDLOCK for functions only called by co_wrappers The generated coroutine wrappers already take care to take the lock in the non-coroutine path, and assume that the lock is already taken in the coroutine path. The only thing we need to do for the wrapped function is adding the GRAPH_RDLOCK annotation. Doing so also allows us to mark the corresponding callbacks in BlockDriver as GRAPH_RDLOCK_PTR. Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-19-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf --- block.c | 2 ++ block/coroutines.h | 17 ++++++++++------- block/io.c | 2 ++ include/block/block_int-common.h | 20 +++++++++----------- 4 files changed, 23 insertions(+), 18 deletions(-) diff --git a/block.c b/block.c index 1a82fd101a..9c2ac757e4 100644 --- a/block.c +++ b/block.c @@ -5402,6 +5402,7 @@ int coroutine_fn bdrv_co_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) { IO_CODE(); + assert_bdrv_graph_readable(); if (bs->drv == NULL) { return -ENOMEDIUM; } @@ -6617,6 +6618,7 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) IO_CODE(); assert(!(bs->open_flags & BDRV_O_INACTIVE)); + assert_bdrv_graph_readable(); if (bs->drv->bdrv_co_invalidate_cache) { bs->drv->bdrv_co_invalidate_cache(bs, &local_err); diff --git a/block/coroutines.h b/block/coroutines.h index 48e9081aa1..2a1e0b3c9d 100644 --- a/block/coroutines.h +++ b/block/coroutines.h @@ -37,9 +37,11 @@ * the I/O API. */ -int coroutine_fn bdrv_co_check(BlockDriverState *bs, - BdrvCheckResult *res, BdrvCheckMode fix); -int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp); +int coroutine_fn GRAPH_RDLOCK +bdrv_co_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix); + +int coroutine_fn GRAPH_RDLOCK +bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp); int coroutine_fn bdrv_co_common_block_status_above(BlockDriverState *bs, @@ -53,10 +55,11 @@ bdrv_co_common_block_status_above(BlockDriverState *bs, BlockDriverState **file, int *depth); -int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs, - QEMUIOVector *qiov, int64_t pos); -int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs, - QEMUIOVector *qiov, int64_t pos); +int coroutine_fn GRAPH_RDLOCK +bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); + +int coroutine_fn GRAPH_RDLOCK +bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); int coroutine_fn nbd_co_do_establish_connection(BlockDriverState *bs, bool blocking, diff --git a/block/io.c b/block/io.c index d160d2e273..d87788dfbb 100644 --- a/block/io.c +++ b/block/io.c @@ -2697,6 +2697,7 @@ bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) BlockDriverState *child_bs = bdrv_primary_bs(bs); int ret; IO_CODE(); + assert_bdrv_graph_readable(); ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL); if (ret < 0) { @@ -2729,6 +2730,7 @@ bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) BlockDriverState *child_bs = bdrv_primary_bs(bs); int ret; IO_CODE(); + assert_bdrv_graph_readable(); ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL); if (ret < 0) { diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index b1f0d88307..c34c525fa6 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -641,8 +641,8 @@ struct BlockDriver { /* * Invalidate any cached meta-data. */ - void coroutine_fn (*bdrv_co_invalidate_cache)(BlockDriverState *bs, - Error **errp); + void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_invalidate_cache)( + BlockDriverState *bs, Error **errp); /* * Flushes all data for all layers by calling bdrv_co_flush for underlying @@ -701,12 +701,11 @@ struct BlockDriver { Error **errp); BlockStatsSpecific *(*bdrv_get_specific_stats)(BlockDriverState *bs); - int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs, - QEMUIOVector *qiov, - int64_t pos); - int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs, - QEMUIOVector *qiov, - int64_t pos); + int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_save_vmstate)( + BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); + + int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_load_vmstate)( + BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); /* removable device specific */ bool (*bdrv_is_inserted)(BlockDriverState *bs); @@ -724,9 +723,8 @@ struct BlockDriver { * Returns 0 for completed check, -errno for internal errors. * The check results are stored in result. */ - int coroutine_fn (*bdrv_co_check)(BlockDriverState *bs, - BdrvCheckResult *result, - BdrvCheckMode fix); + int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_check)( + BlockDriverState *bs, BdrvCheckResult *result, BdrvCheckMode fix); void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event);