From 11cea42e176bebc5e85866b01ade4a86651b7633 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 21 Apr 2023 12:27:58 +0300 Subject: [PATCH 01/28] block: add configure options for excluding vmdk, vhdx and vpc Let's add --enable / --disable configure options for these formats, so that those who don't need them may not build them. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20230421092758.814122-1-vsementsov@yandex-team.ru> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/meson.build | 18 +++++++++++++----- meson.build | 3 +++ meson_options.txt | 6 ++++++ scripts/meson-buildoptions.sh | 9 +++++++++ 4 files changed, 31 insertions(+), 5 deletions(-) diff --git a/block/meson.build b/block/meson.build index 382bec0e7d..13337bd070 100644 --- a/block/meson.build +++ b/block/meson.build @@ -38,11 +38,6 @@ block_ss.add(files( 'snapshot-access.c', 'throttle-groups.c', 'throttle.c', - 'vhdx-endian.c', - 'vhdx-log.c', - 'vhdx.c', - 'vmdk.c', - 'vpc.c', 'write-threshold.c', ), zstd, zlib, gnutls) @@ -55,6 +50,19 @@ endif if get_option('vdi').allowed() block_ss.add(files('vdi.c')) endif +if get_option('vhdx').allowed() + block_ss.add(files( + 'vhdx-endian.c', + 'vhdx-log.c', + 'vhdx.c' + )) +endif +if get_option('vmdk').allowed() + block_ss.add(files('vmdk.c')) +endif +if get_option('vpc').allowed() + block_ss.add(files('vpc.c')) +endif if get_option('cloop').allowed() block_ss.add(files('cloop.c')) endif diff --git a/meson.build b/meson.build index 27782f8f52..c56e0fec9e 100644 --- a/meson.build +++ b/meson.build @@ -3929,6 +3929,9 @@ if have_block summary_info += {'dmg support': get_option('dmg').allowed()} summary_info += {'qcow v1 support': get_option('qcow1').allowed()} summary_info += {'vdi support': get_option('vdi').allowed()} + summary_info += {'vhdx support': get_option('vhdx').allowed()} + summary_info += {'vmdk support': get_option('vmdk').allowed()} + summary_info += {'vpc support': get_option('vpc').allowed()} summary_info += {'vvfat support': get_option('vvfat').allowed()} summary_info += {'qed support': get_option('qed').allowed()} summary_info += {'parallels support': get_option('parallels').allowed()} diff --git a/meson_options.txt b/meson_options.txt index ae2017702a..66ca350029 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -301,6 +301,12 @@ option('qcow1', type: 'feature', value: 'auto', description: 'qcow1 image format support') option('vdi', type: 'feature', value: 'auto', description: 'vdi image format support') +option('vhdx', type: 'feature', value: 'auto', + description: 'vhdx image format support') +option('vmdk', type: 'feature', value: 'auto', + description: 'vmdk image format support') +option('vpc', type: 'feature', value: 'auto', + description: 'vpc image format support') option('vvfat', type: 'feature', value: 'auto', description: 'vvfat image format support') option('qed', type: 'feature', value: 'auto', diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index 0e888e6ecd..34d82dec53 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -169,6 +169,7 @@ meson_options_help() { printf "%s\n" ' VDUSE block export support' printf "%s\n" ' vfio-user-server' printf "%s\n" ' vfio-user server support' + printf "%s\n" ' vhdx vhdx image format support' printf "%s\n" ' vhost-crypto vhost-user crypto backend support' printf "%s\n" ' vhost-kernel vhost kernel backend support' printf "%s\n" ' vhost-net vhost-net kernel acceleration support' @@ -178,10 +179,12 @@ meson_options_help() { printf "%s\n" ' vhost-vdpa vhost-vdpa kernel backend support' printf "%s\n" ' virglrenderer virgl rendering support' printf "%s\n" ' virtfs virtio-9p support' + printf "%s\n" ' vmdk vmdk image format support' printf "%s\n" ' vmnet vmnet.framework network backend support' printf "%s\n" ' vnc VNC server' printf "%s\n" ' vnc-jpeg JPEG lossy compression for VNC server' printf "%s\n" ' vnc-sasl SASL authentication for VNC server' + printf "%s\n" ' vpc vpc image format support' printf "%s\n" ' vte vte support for the gtk UI' printf "%s\n" ' vvfat vvfat image format support' printf "%s\n" ' whpx WHPX acceleration support' @@ -449,6 +452,8 @@ _meson_option_parse() { --disable-vduse-blk-export) printf "%s" -Dvduse_blk_export=disabled ;; --enable-vfio-user-server) printf "%s" -Dvfio_user_server=enabled ;; --disable-vfio-user-server) printf "%s" -Dvfio_user_server=disabled ;; + --enable-vhdx) printf "%s" -Dvhdx=enabled ;; + --disable-vhdx) printf "%s" -Dvhdx=disabled ;; --enable-vhost-crypto) printf "%s" -Dvhost_crypto=enabled ;; --disable-vhost-crypto) printf "%s" -Dvhost_crypto=disabled ;; --enable-vhost-kernel) printf "%s" -Dvhost_kernel=enabled ;; @@ -465,6 +470,8 @@ _meson_option_parse() { --disable-virglrenderer) printf "%s" -Dvirglrenderer=disabled ;; --enable-virtfs) printf "%s" -Dvirtfs=enabled ;; --disable-virtfs) printf "%s" -Dvirtfs=disabled ;; + --enable-vmdk) printf "%s" -Dvmdk=enabled ;; + --disable-vmdk) printf "%s" -Dvmdk=disabled ;; --enable-vmnet) printf "%s" -Dvmnet=enabled ;; --disable-vmnet) printf "%s" -Dvmnet=disabled ;; --enable-vnc) printf "%s" -Dvnc=enabled ;; @@ -473,6 +480,8 @@ _meson_option_parse() { --disable-vnc-jpeg) printf "%s" -Dvnc_jpeg=disabled ;; --enable-vnc-sasl) printf "%s" -Dvnc_sasl=enabled ;; --disable-vnc-sasl) printf "%s" -Dvnc_sasl=disabled ;; + --enable-vpc) printf "%s" -Dvpc=enabled ;; + --disable-vpc) printf "%s" -Dvpc=disabled ;; --enable-vte) printf "%s" -Dvte=enabled ;; --disable-vte) printf "%s" -Dvte=disabled ;; --enable-vvfat) printf "%s" -Dvvfat=enabled ;; From 17ac39c3e726173f737bb6cd898c160b406532d4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 6 Apr 2023 12:17:52 +0200 Subject: [PATCH 02/28] block: add missing coroutine_fn annotations After the recent introduction of many new coroutine callbacks, a couple calls from non-coroutine_fn to coroutine_fn have sneaked in; fix them. Signed-off-by: Paolo Bonzini Message-Id: <20230406101752.242125-1-pbonzini@redhat.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/mirror.c | 4 ++-- include/block/graph-lock.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index af9bbd23d4..80fa345071 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -270,8 +270,8 @@ static inline int64_t mirror_clip_bytes(MirrorBlockJob *s, /* Round offset and/or bytes to target cluster if COW is needed, and * return the offset of the adjusted tail against original. */ -static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, - uint64_t *bytes) +static int coroutine_fn mirror_cow_align(MirrorBlockJob *s, int64_t *offset, + uint64_t *bytes) { bool need_cow; int ret = 0; diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h index 18cc14de22..ac0fef8605 100644 --- a/include/block/graph-lock.h +++ b/include/block/graph-lock.h @@ -208,14 +208,14 @@ typedef struct GraphLockable { } GraphLockable; * unlocked. TSA_ASSERT() makes sure that the following calls know that we * hold the lock while unlocking is left unchecked. */ -static inline GraphLockable * TSA_ASSERT(graph_lock) TSA_NO_TSA +static inline GraphLockable * TSA_ASSERT(graph_lock) TSA_NO_TSA coroutine_fn graph_lockable_auto_lock(GraphLockable *x) { bdrv_graph_co_rdlock(); return x; } -static inline void TSA_NO_TSA +static inline void TSA_NO_TSA coroutine_fn graph_lockable_auto_unlock(GraphLockable *x) { bdrv_graph_co_rdunlock(); From 3edf660a9155848d1d7bc6ad2586ceb0285105e3 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 4 Apr 2023 11:33:07 -0400 Subject: [PATCH 03/28] aio-wait: avoid AioContext lock in aio_wait_bh_oneshot() There is no need for the AioContext lock in aio_wait_bh_oneshot(). It's easy to remove the lock from existing callers and then switch from AIO_WAIT_WHILE() to AIO_WAIT_WHILE_UNLOCKED() in aio_wait_bh_oneshot(). Document that the AioContext lock should not be held across aio_wait_bh_oneshot(). Holding a lock across aio_poll() can cause deadlock so we don't want callers to do that. This is a step towards getting rid of the AioContext lock. Cc: Paolo Bonzini Signed-off-by: Stefan Hajnoczi Message-Id: <20230404153307.458883-1-stefanha@redhat.com> Reviewed-by: Paolo Bonzini Reviewed-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- hw/block/dataplane/virtio-blk.c | 3 ++- hw/scsi/virtio-scsi-dataplane.c | 2 -- include/block/aio-wait.h | 2 +- util/aio-wait.c | 2 +- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index a6202997ee..af1c24c40c 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -315,9 +315,10 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) s->stopping = true; trace_virtio_blk_data_plane_stop(s); - aio_context_acquire(s->ctx); aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s); + aio_context_acquire(s->ctx); + /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ blk_drain(s->conf->conf.blk); diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c index 20bb91766e..f3214e1c57 100644 --- a/hw/scsi/virtio-scsi-dataplane.c +++ b/hw/scsi/virtio-scsi-dataplane.c @@ -197,9 +197,7 @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev) } s->dataplane_stopping = true; - aio_context_acquire(s->ctx); aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s); - aio_context_release(s->ctx); blk_drain_all(); /* ensure there are no in-flight requests */ diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h index 6e43e3b7bb..5449b6d742 100644 --- a/include/block/aio-wait.h +++ b/include/block/aio-wait.h @@ -131,7 +131,7 @@ void aio_wait_kick(void); * * Run a BH in @ctx and wait for it to complete. * - * Must be called from the main loop thread with @ctx acquired exactly once. + * Must be called from the main loop thread without @ctx acquired. * Note that main loop event processing may occur. */ void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque); diff --git a/util/aio-wait.c b/util/aio-wait.c index 98c5accd29..b5336cf5fd 100644 --- a/util/aio-wait.c +++ b/util/aio-wait.c @@ -82,5 +82,5 @@ void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) assert(qemu_get_current_aio_context() == qemu_get_aio_context()); aio_bh_schedule_oneshot(ctx, aio_wait_bh, &data); - AIO_WAIT_WHILE(ctx, !data.done); + AIO_WAIT_WHILE_UNLOCKED(NULL, !data.done); } From e2626874a32602d4e52971c786ef5ffb4430629d Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 3 May 2023 16:01:42 +0200 Subject: [PATCH 04/28] block: Fix use after free in blockdev_mark_auto_del() job_cancel_locked() drops the job list lock temporarily and it may call aio_poll(). We must assume that the list has changed after this call. Also, with unlucky timing, it can end up freeing the job during job_completed_txn_abort_locked(), making the job pointer invalid, too. For both reasons, we can't just continue at block_job_next_locked(job). Instead, start at the head of the list again after job_cancel_locked() and skip those jobs that we already cancelled (or that are completing anyway). Cc: qemu-stable@nongnu.org Signed-off-by: Kevin Wolf Message-Id: <20230503140142.474404-1-kwolf@redhat.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- blockdev.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/blockdev.c b/blockdev.c index d7b5c18f0a..2c1752a403 100644 --- a/blockdev.c +++ b/blockdev.c @@ -153,12 +153,22 @@ void blockdev_mark_auto_del(BlockBackend *blk) JOB_LOCK_GUARD(); - for (job = block_job_next_locked(NULL); job; - job = block_job_next_locked(job)) { - if (block_job_has_bdrv(job, blk_bs(blk))) { + do { + job = block_job_next_locked(NULL); + while (job && (job->job.cancelled || + job->job.deferred_to_main_loop || + !block_job_has_bdrv(job, blk_bs(blk)))) + { + job = block_job_next_locked(job); + } + if (job) { + /* + * This drops the job lock temporarily and polls, so we need to + * restart processing the list from the start after this. + */ job_cancel_locked(&job->job, false); } - } + } while (job); dinfo->auto_del = 1; } From cf6052f111f8d6a2ef083f3e2848bc6f8180bb96 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 3 May 2023 18:50:19 +0200 Subject: [PATCH 05/28] iotests/nbd-reconnect-on-open: Fix NBD socket path Socket paths need to be short to avoid failures. This is why there is a iotests.sock_dir (defaulting to /tmp) separate from the disk image base directory. Make use of it to fix failures in too deeply nested test directories. Fixes: ab7f7e67a7e7b49964109501dfcde4ec29bae60e Signed-off-by: Kevin Wolf Message-Id: <20230503165019.8867-1-kwolf@redhat.com> Reviewed-by: Eric Blake Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- tests/qemu-iotests/tests/nbd-reconnect-on-open | 3 ++- tests/qemu-iotests/tests/nbd-reconnect-on-open.out | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/qemu-iotests/tests/nbd-reconnect-on-open b/tests/qemu-iotests/tests/nbd-reconnect-on-open index d0b401b060..3ce52021c3 100755 --- a/tests/qemu-iotests/tests/nbd-reconnect-on-open +++ b/tests/qemu-iotests/tests/nbd-reconnect-on-open @@ -26,7 +26,8 @@ from iotests import qemu_img_create, file_path, qemu_io_popen, qemu_nbd, \ iotests.script_initialize(supported_fmts=['qcow2']) -disk, nbd_sock = file_path('disk', 'nbd-sock') +disk = file_path('disk') +nbd_sock = file_path('nbd-sock', base_dir=iotests.sock_dir) def create_args(open_timeout): diff --git a/tests/qemu-iotests/tests/nbd-reconnect-on-open.out b/tests/qemu-iotests/tests/nbd-reconnect-on-open.out index a35ae30ea4..b3dd90f2a3 100644 --- a/tests/qemu-iotests/tests/nbd-reconnect-on-open.out +++ b/tests/qemu-iotests/tests/nbd-reconnect-on-open.out @@ -2,10 +2,10 @@ read 1048576/1048576 bytes at offset 0 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) Check fail to connect with 0 seconds of timeout -qemu-io: can't open: Failed to connect to 'TEST_DIR/PID-nbd-sock': No such file or directory +qemu-io: can't open: Failed to connect to 'SOCK_DIR/PID-nbd-sock': No such file or directory qemu_io finished in 0..0.2 seconds, OK Check fail to connect with 1 seconds of timeout -qemu-io: can't open: Failed to connect to 'TEST_DIR/PID-nbd-sock': No such file or directory +qemu-io: can't open: Failed to connect to 'SOCK_DIR/PID-nbd-sock': No such file or directory qemu_io finished in 1..1.2 seconds, OK From 6dab4c93ecfae48e2e67b984d1032c1e988d3005 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Tue, 2 May 2023 15:52:12 -0500 Subject: [PATCH 06/28] migration: Attempt disk reactivation in more failure scenarios Commit fe904ea824 added a fail_inactivate label, which tries to reactivate disks on the source after a failure while s->state == MIGRATION_STATUS_ACTIVE, but didn't actually use the label if qemu_savevm_state_complete_precopy() failed. This failure to reactivate is also present in commit 6039dd5b1c (also covering the new s->state == MIGRATION_STATUS_DEVICE state) and 403d18ae (ensuring s->block_inactive is set more reliably). Consolidate the two labels back into one - no matter HOW migration is failed, if there is any chance we can reach vm_start() after having attempted inactivation, it is essential that we have tried to restart disks before then. This also makes the cleanup more like migrate_fd_cancel(). Suggested-by: Kevin Wolf Signed-off-by: Eric Blake Message-Id: <20230502205212.134680-1-eblake@redhat.com> Acked-by: Peter Xu Reviewed-by: Juan Quintela Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- migration/migration.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 0ee07802a5..f9f12a17b5 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2309,6 +2309,11 @@ static void migration_completion(MigrationState *s) MIGRATION_STATUS_DEVICE); } if (ret >= 0) { + /* + * Inactivate disks except in COLO, and track that we + * have done so in order to remember to reactivate + * them if migration fails or is cancelled. + */ s->block_inactive = !migrate_colo(); qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, @@ -2353,13 +2358,13 @@ static void migration_completion(MigrationState *s) rp_error = await_return_path_close_on_source(s); trace_migration_return_path_end_after(rp_error); if (rp_error) { - goto fail_invalidate; + goto fail; } } if (qemu_file_get_error(s->to_dst_file)) { trace_migration_completion_file_err(); - goto fail_invalidate; + goto fail; } if (migrate_colo() && s->state == MIGRATION_STATUS_ACTIVE) { @@ -2373,26 +2378,25 @@ static void migration_completion(MigrationState *s) return; -fail_invalidate: - /* If not doing postcopy, vm_start() will be called: let's regain - * control on images. - */ - if (s->state == MIGRATION_STATUS_ACTIVE || - s->state == MIGRATION_STATUS_DEVICE) { +fail: + if (s->block_inactive && (s->state == MIGRATION_STATUS_ACTIVE || + s->state == MIGRATION_STATUS_DEVICE)) { + /* + * If not doing postcopy, vm_start() will be called: let's + * regain control on images. + */ Error *local_err = NULL; qemu_mutex_lock_iothread(); bdrv_activate_all(&local_err); if (local_err) { error_report_err(local_err); - s->block_inactive = true; } else { s->block_inactive = false; } qemu_mutex_unlock_iothread(); } -fail: migrate_set_state(&s->state, current_active_state, MIGRATION_STATUS_FAILED); } From 0050c163ff6d8167e07c9fc4be7f728316140cb6 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 4 May 2023 13:57:31 +0200 Subject: [PATCH 07/28] qcow2: Don't call bdrv_getlength() in coroutine_fns There is a bdrv_co_getlength() now, which should be used in coroutine context. This requires adding GRAPH_RDLOCK to some functions so that this still compiles with TSA because bdrv_co_getlength() is GRAPH_RDLOCK. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-Id: <20230504115750.54437-2-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block/qcow2-refcount.c | 2 +- block/qcow2.c | 19 +++++++++---------- block/qcow2.h | 4 +++- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index b2a81ff707..4cf91bd955 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -3715,7 +3715,7 @@ int coroutine_fn qcow2_detect_metadata_preallocation(BlockDriverState *bs) qemu_co_mutex_assert_locked(&s->lock); - file_length = bdrv_getlength(bs->file->bs); + file_length = bdrv_co_getlength(bs->file->bs); if (file_length < 0) { return file_length; } diff --git a/block/qcow2.c b/block/qcow2.c index fe5def438e..94cf59af8b 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2089,11 +2089,10 @@ static void qcow2_join_options(QDict *options, QDict *old_options) } } -static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, - bool want_zero, - int64_t offset, int64_t count, - int64_t *pnum, int64_t *map, - BlockDriverState **file) +static int coroutine_fn GRAPH_RDLOCK +qcow2_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, + int64_t count, int64_t *pnum, int64_t *map, + BlockDriverState **file) { BDRVQcow2State *s = bs->opaque; uint64_t host_offset; @@ -3235,7 +3234,7 @@ preallocate_co(BlockDriverState *bs, uint64_t offset, uint64_t new_length, * all of the allocated clusters (otherwise we get failing reads after * EOF). Extend the image to the last allocated sector. */ - file_length = bdrv_getlength(s->data_file->bs); + file_length = bdrv_co_getlength(s->data_file->bs); if (file_length < 0) { error_setg_errno(errp, -file_length, "Could not get file size"); ret = file_length; @@ -4098,7 +4097,7 @@ qcow2_co_copy_range_from(BlockDriverState *bs, case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: if (bs->backing && bs->backing->bs) { - int64_t backing_length = bdrv_getlength(bs->backing->bs); + int64_t backing_length = bdrv_co_getlength(bs->backing->bs); if (src_offset >= backing_length) { cur_write_flags |= BDRV_REQ_ZERO_WRITE; } else { @@ -4293,7 +4292,7 @@ qcow2_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, goto fail; } - old_file_size = bdrv_getlength(bs->file->bs); + old_file_size = bdrv_co_getlength(bs->file->bs); if (old_file_size < 0) { error_setg_errno(errp, -old_file_size, "Failed to inquire current file length"); @@ -4386,7 +4385,7 @@ qcow2_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, break; } - old_file_size = bdrv_getlength(bs->file->bs); + old_file_size = bdrv_co_getlength(bs->file->bs); if (old_file_size < 0) { error_setg_errno(errp, -old_file_size, "Failed to inquire current file length"); @@ -4694,7 +4693,7 @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs, * align end of file to a sector boundary to ease reading with * sector based I/Os */ - int64_t len = bdrv_getlength(bs->file->bs); + int64_t len = bdrv_co_getlength(bs->file->bs); if (len < 0) { return len; } diff --git a/block/qcow2.h b/block/qcow2.h index c75decc38a..4f67eb912a 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -895,7 +895,9 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order, void *cb_opaque, Error **errp); int coroutine_fn GRAPH_RDLOCK qcow2_shrink_reftable(BlockDriverState *bs); int64_t coroutine_fn qcow2_get_last_cluster(BlockDriverState *bs, int64_t size); -int coroutine_fn qcow2_detect_metadata_preallocation(BlockDriverState *bs); + +int coroutine_fn GRAPH_RDLOCK +qcow2_detect_metadata_preallocation(BlockDriverState *bs); /* qcow2-cluster.c functions */ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, From da4afaff074e56b0fa0d25abf865784148018895 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 4 May 2023 13:57:32 +0200 Subject: [PATCH 08/28] block: Consistently call bdrv_activate() outside coroutine Migration code can call bdrv_activate() in coroutine context, whereas other callers call it outside of coroutines. As it calls other code that is not supposed to run in coroutines, standardise on running outside of coroutines. This adds a no_co_wrapper to switch to the main loop before calling bdrv_activate(). Cc: qemu-stable@nongnu.org Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-Id: <20230504115750.54437-3-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block/block-backend.c | 10 +++++++++- include/block/block-global-state.h | 6 +++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index fc530ded6a..e37d55d3e9 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -2024,7 +2024,15 @@ void blk_activate(BlockBackend *blk, Error **errp) return; } - bdrv_activate(bs, errp); + /* + * Migration code can call this function in coroutine context, so leave + * coroutine context if necessary. + */ + if (qemu_in_coroutine()) { + bdrv_co_activate(bs, errp); + } else { + bdrv_activate(bs, errp); + } } bool coroutine_fn blk_co_is_inserted(BlockBackend *blk) diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index 399200a9a3..2c312cc774 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -166,7 +166,11 @@ int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts, BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, const char *node_name, Error **errp); -int bdrv_activate(BlockDriverState *bs, Error **errp); +int no_coroutine_fn bdrv_activate(BlockDriverState *bs, Error **errp); + +int coroutine_fn no_co_wrapper +bdrv_co_activate(BlockDriverState *bs, Error **errp); + void bdrv_activate_all(Error **errp); int bdrv_inactivate_all(void); From b2ab5f545fa1eaaf2955dd617bee19a8b3279786 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 4 May 2023 13:57:33 +0200 Subject: [PATCH 09/28] block: bdrv/blk_co_unref() for calls in coroutine context These functions must not be called in coroutine context, because they need write access to the graph. Cc: qemu-stable@nongnu.org Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-Id: <20230504115750.54437-4-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 2 +- block/crypto.c | 6 +++--- block/parallels.c | 6 +++--- block/qcow.c | 6 +++--- block/qcow2.c | 14 +++++++------- block/qed.c | 6 +++--- block/vdi.c | 6 +++--- block/vhdx.c | 6 +++--- block/vmdk.c | 18 +++++++++--------- block/vpc.c | 6 +++--- include/block/block-global-state.h | 3 ++- include/sysemu/block-backend-global-state.h | 5 ++++- 12 files changed, 44 insertions(+), 40 deletions(-) diff --git a/block.c b/block.c index 5ec1a3897e..20d5ee0959 100644 --- a/block.c +++ b/block.c @@ -680,7 +680,7 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, ret = 0; out: - blk_unref(blk); + blk_co_unref(blk); return ret; } diff --git a/block/crypto.c b/block/crypto.c index ca67289187..8fd3ad0054 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -355,7 +355,7 @@ block_crypto_co_create_generic(BlockDriverState *bs, int64_t size, ret = 0; cleanup: qcrypto_block_free(crypto); - blk_unref(blk); + blk_co_unref(blk); return ret; } @@ -661,7 +661,7 @@ block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp) ret = 0; fail: - bdrv_unref(bs); + bdrv_co_unref(bs); return ret; } @@ -730,7 +730,7 @@ fail: bdrv_co_delete_file_noerr(bs); } - bdrv_unref(bs); + bdrv_co_unref(bs); qapi_free_QCryptoBlockCreateOptions(create_opts); qobject_unref(cryptoopts); return ret; diff --git a/block/parallels.c b/block/parallels.c index 013684801a..b49c35929e 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -613,8 +613,8 @@ static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts, ret = 0; out: - blk_unref(blk); - bdrv_unref(bs); + blk_co_unref(blk); + bdrv_co_unref(bs); return ret; exit: @@ -691,7 +691,7 @@ parallels_co_create_opts(BlockDriver *drv, const char *filename, done: qobject_unref(qdict); - bdrv_unref(bs); + bdrv_co_unref(bs); qapi_free_BlockdevCreateOptions(create_options); return ret; } diff --git a/block/qcow.c b/block/qcow.c index 490e4f819e..a0c701f578 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -915,8 +915,8 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts, g_free(tmp); ret = 0; exit: - blk_unref(qcow_blk); - bdrv_unref(bs); + blk_co_unref(qcow_blk); + bdrv_co_unref(bs); qcrypto_block_free(crypto); return ret; } @@ -1015,7 +1015,7 @@ qcow_co_create_opts(BlockDriver *drv, const char *filename, fail: g_free(backing_fmt); qobject_unref(qdict); - bdrv_unref(bs); + bdrv_co_unref(bs); qapi_free_BlockdevCreateOptions(create_options); return ret; } diff --git a/block/qcow2.c b/block/qcow2.c index 94cf59af8b..01742b3ebe 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3705,7 +3705,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) goto out; } - blk_unref(blk); + blk_co_unref(blk); blk = NULL; /* @@ -3785,7 +3785,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) } } - blk_unref(blk); + blk_co_unref(blk); blk = NULL; /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning. @@ -3810,9 +3810,9 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) ret = 0; out: - blk_unref(blk); - bdrv_unref(bs); - bdrv_unref(data_bs); + blk_co_unref(blk); + bdrv_co_unref(bs); + bdrv_co_unref(data_bs); return ret; } @@ -3943,8 +3943,8 @@ finish: } qobject_unref(qdict); - bdrv_unref(bs); - bdrv_unref(data_bs); + bdrv_co_unref(bs); + bdrv_co_unref(data_bs); qapi_free_BlockdevCreateOptions(create_options); return ret; } diff --git a/block/qed.c b/block/qed.c index 0705a7b4e2..aff2a2076e 100644 --- a/block/qed.c +++ b/block/qed.c @@ -748,8 +748,8 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts, ret = 0; /* success */ out: g_free(l1_table); - blk_unref(blk); - bdrv_unref(bs); + blk_co_unref(blk); + bdrv_co_unref(bs); return ret; } @@ -819,7 +819,7 @@ bdrv_qed_co_create_opts(BlockDriver *drv, const char *filename, fail: qobject_unref(qdict); - bdrv_unref(bs); + bdrv_co_unref(bs); qapi_free_BlockdevCreateOptions(create_options); return ret; } diff --git a/block/vdi.c b/block/vdi.c index f2434d6153..08331d2dd7 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -886,8 +886,8 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, ret = 0; exit: - blk_unref(blk); - bdrv_unref(bs_file); + blk_co_unref(blk); + bdrv_co_unref(bs_file); g_free(bmap); return ret; } @@ -975,7 +975,7 @@ vdi_co_create_opts(BlockDriver *drv, const char *filename, done: qobject_unref(qdict); qapi_free_BlockdevCreateOptions(create_options); - bdrv_unref(bs_file); + bdrv_co_unref(bs_file); return ret; } diff --git a/block/vhdx.c b/block/vhdx.c index 81420722a1..00777da91a 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -2053,8 +2053,8 @@ static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts, ret = 0; delete_and_exit: - blk_unref(blk); - bdrv_unref(bs); + blk_co_unref(blk); + bdrv_co_unref(bs); g_free(creator); return ret; } @@ -2144,7 +2144,7 @@ vhdx_co_create_opts(BlockDriver *drv, const char *filename, fail: qobject_unref(qdict); - bdrv_unref(bs); + bdrv_co_unref(bs); qapi_free_BlockdevCreateOptions(create_options); return ret; } diff --git a/block/vmdk.c b/block/vmdk.c index 3f8c731e32..11b553ef25 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -2306,7 +2306,7 @@ exit: if (pbb) { *pbb = blk; } else { - blk_unref(blk); + blk_co_unref(blk); blk = NULL; } } @@ -2516,12 +2516,12 @@ vmdk_co_do_create(int64_t size, if (strcmp(blk_bs(backing)->drv->format_name, "vmdk")) { error_setg(errp, "Invalid backing file format: %s. Must be vmdk", blk_bs(backing)->drv->format_name); - blk_unref(backing); + blk_co_unref(backing); ret = -EINVAL; goto exit; } ret = vmdk_read_cid(blk_bs(backing), 0, &parent_cid); - blk_unref(backing); + blk_co_unref(backing); if (ret) { error_setg(errp, "Failed to read parent CID"); goto exit; @@ -2542,14 +2542,14 @@ vmdk_co_do_create(int64_t size, blk_bs(extent_blk)->filename); created_size += cur_size; extent_idx++; - blk_unref(extent_blk); + blk_co_unref(extent_blk); } /* Check whether we got excess extents */ extent_blk = extent_fn(-1, extent_idx, flat, split, compress, zeroed_grain, opaque, NULL); if (extent_blk) { - blk_unref(extent_blk); + blk_co_unref(extent_blk); error_setg(errp, "List of extents contains unused extents"); ret = -EINVAL; goto exit; @@ -2590,7 +2590,7 @@ vmdk_co_do_create(int64_t size, ret = 0; exit: if (blk) { - blk_unref(blk); + blk_co_unref(blk); } g_free(desc); g_free(parent_desc_line); @@ -2641,7 +2641,7 @@ vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split, errp)) { goto exit; } - bdrv_unref(bs); + bdrv_co_unref(bs); exit: g_free(ext_filename); return blk; @@ -2797,12 +2797,12 @@ static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx, return NULL; } blk_set_allow_write_beyond_eof(blk, true); - bdrv_unref(bs); + bdrv_co_unref(bs); if (size != -1) { ret = vmdk_init_extent(blk, size, flat, compress, zeroed_grain, errp); if (ret) { - blk_unref(blk); + blk_co_unref(blk); blk = NULL; } } diff --git a/block/vpc.c b/block/vpc.c index b89b0ff8e2..07ddda5b99 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -1082,8 +1082,8 @@ static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts, } out: - blk_unref(blk); - bdrv_unref(bs); + blk_co_unref(blk); + bdrv_co_unref(bs); return ret; } @@ -1162,7 +1162,7 @@ vpc_co_create_opts(BlockDriver *drv, const char *filename, fail: qobject_unref(qdict); - bdrv_unref(bs); + bdrv_co_unref(bs); qapi_free_BlockdevCreateOptions(create_options); return ret; } diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index 2c312cc774..ec3ddb17a8 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -218,7 +218,8 @@ void bdrv_img_create(const char *filename, const char *fmt, bool quiet, Error **errp); void bdrv_ref(BlockDriverState *bs); -void bdrv_unref(BlockDriverState *bs); +void no_coroutine_fn bdrv_unref(BlockDriverState *bs); +void coroutine_fn no_co_wrapper bdrv_co_unref(BlockDriverState *bs); void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child); BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h index 2b6d27db7c..fa83f9389c 100644 --- a/include/sysemu/block-backend-global-state.h +++ b/include/sysemu/block-backend-global-state.h @@ -42,7 +42,10 @@ blk_co_new_open(const char *filename, const char *reference, QDict *options, int blk_get_refcnt(BlockBackend *blk); void blk_ref(BlockBackend *blk); -void blk_unref(BlockBackend *blk); + +void no_coroutine_fn blk_unref(BlockBackend *blk); +void coroutine_fn no_co_wrapper blk_co_unref(BlockBackend *blk); + void blk_remove_all_bs(void); BlockBackend *blk_by_name(const char *name); BlockBackend *blk_next(BlockBackend *blk); From 0c7d204f50c382c6baac8c94bd57af4a022b3888 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 4 May 2023 13:57:34 +0200 Subject: [PATCH 10/28] block: Don't call no_coroutine_fns in qmp_block_resize() This QMP handler runs in a coroutine, so it must use the corresponding no_co_wrappers instead. Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2185688 Cc: qemu-stable@nongnu.org Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-Id: <20230504115750.54437-5-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- blockdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/blockdev.c b/blockdev.c index 2c1752a403..e464daea58 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2440,7 +2440,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name, return; } - blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp); + blk = blk_co_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp); if (!blk) { return; } @@ -2455,7 +2455,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name, bdrv_co_lock(bs); bdrv_drained_end(bs); - blk_unref(blk); + blk_co_unref(blk); bdrv_co_unlock(bs); } From e113362e4cdfdcfe1d497e569527f70a0021333a Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 9 May 2023 15:41:33 +0200 Subject: [PATCH 11/28] iotests: Test resizing image attached to an iothread This tests that trying to resize an image with QMP block_resize doesn't hang or otherwise fail when the image is attached to a device running in an iothread. This is a regression test for the recent fix that changed qmp_block_resize, which is a coroutine based QMP handler, to avoid calling no_coroutine_fns directly. Signed-off-by: Kevin Wolf Message-Id: <20230509134133.373408-1-kwolf@redhat.com> Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/tests/iothreads-resize | 71 +++++++++++++++++++ tests/qemu-iotests/tests/iothreads-resize.out | 11 +++ 2 files changed, 82 insertions(+) create mode 100755 tests/qemu-iotests/tests/iothreads-resize create mode 100644 tests/qemu-iotests/tests/iothreads-resize.out diff --git a/tests/qemu-iotests/tests/iothreads-resize b/tests/qemu-iotests/tests/iothreads-resize new file mode 100755 index 0000000000..36e4598c62 --- /dev/null +++ b/tests/qemu-iotests/tests/iothreads-resize @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +# group: rw auto quick +# +# Test resizing an image that is attached to a separate iothread +# +# Copyright (C) 2023 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=kwolf@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +cd .. +. ./common.rc +. ./common.filter + +# Resizing images is only supported by a few block drivers +_supported_fmt raw qcow2 qed +_supported_proto file +_require_devices virtio-scsi-pci + +size=64M +_make_test_img $size + +qmp() { +cat < Date: Thu, 4 May 2023 13:57:35 +0200 Subject: [PATCH 12/28] test-bdrv-drain: Don't modify the graph in coroutines test-bdrv-drain contains a few test cases that are run both in coroutine and non-coroutine context. Running the entire code including the setup and shutdown in coroutines is incorrect because graph modifications can generally not happen in coroutines. Change the test so that creating and destroying the test nodes and BlockBackends always happens outside of coroutine context. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Message-Id: <20230504115750.54437-6-kwolf@redhat.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/unit/test-bdrv-drain.c | 112 +++++++++++++++++++++++------------ 1 file changed, 75 insertions(+), 37 deletions(-) diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c index d9d3807062..9a4c5e59d6 100644 --- a/tests/unit/test-bdrv-drain.c +++ b/tests/unit/test-bdrv-drain.c @@ -188,6 +188,25 @@ static void do_drain_begin_unlocked(enum drain_type drain_type, BlockDriverState } } +static BlockBackend * no_coroutine_fn test_setup(void) +{ + BlockBackend *blk; + BlockDriverState *bs, *backing; + + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); + bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, + &error_abort); + blk_insert_bs(blk, bs, &error_abort); + + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); + bdrv_set_backing_hd(bs, backing, &error_abort); + + bdrv_unref(backing); + bdrv_unref(bs); + + return blk; +} + static void do_drain_end_unlocked(enum drain_type drain_type, BlockDriverState *bs) { if (drain_type != BDRV_DRAIN_ALL) { @@ -199,25 +218,19 @@ static void do_drain_end_unlocked(enum drain_type drain_type, BlockDriverState * } } -static void test_drv_cb_common(enum drain_type drain_type, bool recursive) +static void test_drv_cb_common(BlockBackend *blk, enum drain_type drain_type, + bool recursive) { - BlockBackend *blk; - BlockDriverState *bs, *backing; + BlockDriverState *bs = blk_bs(blk); + BlockDriverState *backing = bs->backing->bs; BDRVTestState *s, *backing_s; BlockAIOCB *acb; int aio_ret; QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0); - blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); - bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, - &error_abort); s = bs->opaque; - blk_insert_bs(blk, bs, &error_abort); - - backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); backing_s = backing->opaque; - bdrv_set_backing_hd(bs, backing, &error_abort); /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */ g_assert_cmpint(s->drain_count, ==, 0); @@ -252,44 +265,53 @@ static void test_drv_cb_common(enum drain_type drain_type, bool recursive) g_assert_cmpint(s->drain_count, ==, 0); g_assert_cmpint(backing_s->drain_count, ==, 0); - - bdrv_unref(backing); - bdrv_unref(bs); - blk_unref(blk); } static void test_drv_cb_drain_all(void) { - test_drv_cb_common(BDRV_DRAIN_ALL, true); + BlockBackend *blk = test_setup(); + test_drv_cb_common(blk, BDRV_DRAIN_ALL, true); + blk_unref(blk); } static void test_drv_cb_drain(void) { - test_drv_cb_common(BDRV_DRAIN, false); + BlockBackend *blk = test_setup(); + test_drv_cb_common(blk, BDRV_DRAIN, false); + blk_unref(blk); +} + +static void coroutine_fn test_drv_cb_co_drain_all_entry(void) +{ + BlockBackend *blk = blk_all_next(NULL); + test_drv_cb_common(blk, BDRV_DRAIN_ALL, true); } static void test_drv_cb_co_drain_all(void) { - call_in_coroutine(test_drv_cb_drain_all); + BlockBackend *blk = test_setup(); + call_in_coroutine(test_drv_cb_co_drain_all_entry); + blk_unref(blk); +} + +static void coroutine_fn test_drv_cb_co_drain_entry(void) +{ + BlockBackend *blk = blk_all_next(NULL); + test_drv_cb_common(blk, BDRV_DRAIN, false); } static void test_drv_cb_co_drain(void) { - call_in_coroutine(test_drv_cb_drain); + BlockBackend *blk = test_setup(); + call_in_coroutine(test_drv_cb_co_drain_entry); + blk_unref(blk); } -static void test_quiesce_common(enum drain_type drain_type, bool recursive) +static void test_quiesce_common(BlockBackend *blk, enum drain_type drain_type, + bool recursive) { - BlockBackend *blk; - BlockDriverState *bs, *backing; - - blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); - bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, - &error_abort); - blk_insert_bs(blk, bs, &error_abort); - - backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); - bdrv_set_backing_hd(bs, backing, &error_abort); + BlockDriverState *bs = blk_bs(blk); + BlockDriverState *backing = bs->backing->bs; g_assert_cmpint(bs->quiesce_counter, ==, 0); g_assert_cmpint(backing->quiesce_counter, ==, 0); @@ -307,30 +329,46 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive) g_assert_cmpint(bs->quiesce_counter, ==, 0); g_assert_cmpint(backing->quiesce_counter, ==, 0); - - bdrv_unref(backing); - bdrv_unref(bs); - blk_unref(blk); } static void test_quiesce_drain_all(void) { - test_quiesce_common(BDRV_DRAIN_ALL, true); + BlockBackend *blk = test_setup(); + test_quiesce_common(blk, BDRV_DRAIN_ALL, true); + blk_unref(blk); } static void test_quiesce_drain(void) { - test_quiesce_common(BDRV_DRAIN, false); + BlockBackend *blk = test_setup(); + test_quiesce_common(blk, BDRV_DRAIN, false); + blk_unref(blk); +} + +static void coroutine_fn test_quiesce_co_drain_all_entry(void) +{ + BlockBackend *blk = blk_all_next(NULL); + test_quiesce_common(blk, BDRV_DRAIN_ALL, true); } static void test_quiesce_co_drain_all(void) { - call_in_coroutine(test_quiesce_drain_all); + BlockBackend *blk = test_setup(); + call_in_coroutine(test_quiesce_co_drain_all_entry); + blk_unref(blk); +} + +static void coroutine_fn test_quiesce_co_drain_entry(void) +{ + BlockBackend *blk = blk_all_next(NULL); + test_quiesce_common(blk, BDRV_DRAIN, false); } static void test_quiesce_co_drain(void) { - call_in_coroutine(test_quiesce_drain); + BlockBackend *blk = test_setup(); + call_in_coroutine(test_quiesce_co_drain_entry); + blk_unref(blk); } static void test_nested(void) From d51c349b642d2c50611085db521cf31f5f985488 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 4 May 2023 13:57:36 +0200 Subject: [PATCH 13/28] graph-lock: Add GRAPH_UNLOCKED(_PTR) For some functions, it is part of their interface to be called without holding the graph lock. Add a new macro to document this. The macro expands to TSA_EXCLUDES(), which is a relatively weak check because it passes in cases where the compiler just doesn't know if the lock is held. Function pointers can't be checked at all. Therefore, its primary purpose is documentation. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-Id: <20230504115750.54437-7-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- include/block/graph-lock.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h index ac0fef8605..f17d1588e7 100644 --- a/include/block/graph-lock.h +++ b/include/block/graph-lock.h @@ -73,6 +73,7 @@ extern BdrvGraphLock graph_lock; */ #define GRAPH_WRLOCK TSA_REQUIRES(graph_lock) #define GRAPH_RDLOCK TSA_REQUIRES_SHARED(graph_lock) +#define GRAPH_UNLOCKED TSA_EXCLUDES(graph_lock) /* * TSA annotations are not part of function types, so checks are defeated when @@ -83,6 +84,7 @@ extern BdrvGraphLock graph_lock; */ #define GRAPH_RDLOCK_PTR TSA_GUARDED_BY(graph_lock) #define GRAPH_WRLOCK_PTR TSA_GUARDED_BY(graph_lock) +#define GRAPH_UNLOCKED_PTR /* * register_aiocontext: From 4ee1f854ecbe81492e913bd7699091266992fee7 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 4 May 2023 13:57:37 +0200 Subject: [PATCH 14/28] graph-lock: Fix GRAPH_RDLOCK_GUARD*() to be reader lock GRAPH_RDLOCK_GUARD() and GRAPH_RDLOCK_GUARD_MAINLOOP() only take a reader lock for the graph, so the correct annotation for them to use is TSA_ASSERT_SHARED rather than TSA_ASSERT. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Message-Id: <20230504115750.54437-8-kwolf@redhat.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- include/block/graph-lock.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h index f17d1588e7..7574a2de5b 100644 --- a/include/block/graph-lock.h +++ b/include/block/graph-lock.h @@ -205,12 +205,12 @@ typedef struct GraphLockable { } GraphLockable; #define GML_OBJ_() (&(GraphLockable) { }) /* - * This is not marked as TSA_ACQUIRE() because TSA doesn't understand the + * This is not marked as TSA_ACQUIRE_SHARED() because TSA doesn't understand the * cleanup attribute and would therefore complain that the graph is never - * unlocked. TSA_ASSERT() makes sure that the following calls know that we - * hold the lock while unlocking is left unchecked. + * unlocked. TSA_ASSERT_SHARED() makes sure that the following calls know that + * we hold the lock while unlocking is left unchecked. */ -static inline GraphLockable * TSA_ASSERT(graph_lock) TSA_NO_TSA coroutine_fn +static inline GraphLockable * TSA_ASSERT_SHARED(graph_lock) TSA_NO_TSA coroutine_fn graph_lockable_auto_lock(GraphLockable *x) { bdrv_graph_co_rdlock(); @@ -249,12 +249,12 @@ typedef struct GraphLockableMainloop { } GraphLockableMainloop; #define GMLML_OBJ_() (&(GraphLockableMainloop) { }) /* - * This is not marked as TSA_ACQUIRE() because TSA doesn't understand the + * This is not marked as TSA_ACQUIRE_SHARED() because TSA doesn't understand the * cleanup attribute and would therefore complain that the graph is never - * unlocked. TSA_ASSERT() makes sure that the following calls know that we - * hold the lock while unlocking is left unchecked. + * unlocked. TSA_ASSERT_SHARED() makes sure that the following calls know that + * we hold the lock while unlocking is left unchecked. */ -static inline GraphLockableMainloop * TSA_ASSERT(graph_lock) TSA_NO_TSA +static inline GraphLockableMainloop * TSA_ASSERT_SHARED(graph_lock) TSA_NO_TSA graph_lockable_auto_lock_mainloop(GraphLockableMainloop *x) { bdrv_graph_rdlock_main_loop(); From 1a30b0f5d76f842576de2ab9a29ab9e8a7c9eb09 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 4 May 2023 13:57:38 +0200 Subject: [PATCH 15/28] block: .bdrv_open is non-coroutine and unlocked Drivers were a bit confused about whether .bdrv_open can run in a coroutine and whether or not it holds a graph lock. It cannot keep a graph lock from the caller across the whole function because it both changes the graph (requires a writer lock) and does I/O (requires a reader lock). Therefore, it should take these locks internally as needed. The functions used to be called in coroutine context during image creation. This was buggy for other reasons, and as of commit 32192301, all block drivers go through no_co_wrappers. So it is not called in coroutine context any more. Fix qcow2 and qed to work with the correct assumptions: The graph lock needs to be taken internally instead of just assuming it's already there, and the coroutine path is dead code that can be removed. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-Id: <20230504115750.54437-9-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 6 +++--- block/qcow2.c | 15 ++++++--------- block/qed.c | 18 ++++++++---------- include/block/block_int-common.h | 8 ++++---- 4 files changed, 21 insertions(+), 26 deletions(-) diff --git a/block.c b/block.c index 20d5ee0959..abec940867 100644 --- a/block.c +++ b/block.c @@ -1610,9 +1610,9 @@ out: * bdrv_refresh_total_sectors() which polls when called from non-coroutine * context. */ -static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, - const char *node_name, QDict *options, - int open_flags, Error **errp) +static int no_coroutine_fn GRAPH_UNLOCKED +bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, + QDict *options, int open_flags, Error **errp) { Error *local_err = NULL; int i, ret; diff --git a/block/qcow2.c b/block/qcow2.c index 01742b3ebe..5bde3b8401 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1891,7 +1891,7 @@ static void coroutine_fn qcow2_open_entry(void *opaque) QCow2OpenCo *qoc = opaque; BDRVQcow2State *s = qoc->bs->opaque; - assume_graph_lock(); /* FIXME */ + GRAPH_RDLOCK_GUARD(); qemu_co_mutex_lock(&s->lock); qoc->ret = qcow2_do_open(qoc->bs, qoc->options, qoc->flags, true, @@ -1920,14 +1920,11 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, /* Initialise locks */ qemu_co_mutex_init(&s->lock); - if (qemu_in_coroutine()) { - /* From bdrv_co_create. */ - qcow2_open_entry(&qoc); - } else { - assert(qemu_get_current_aio_context() == qemu_get_aio_context()); - qemu_coroutine_enter(qemu_coroutine_create(qcow2_open_entry, &qoc)); - BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS); - } + assert(!qemu_in_coroutine()); + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + qemu_coroutine_enter(qemu_coroutine_create(qcow2_open_entry, &qoc)); + BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS); + return qoc.ret; } diff --git a/block/qed.c b/block/qed.c index aff2a2076e..be9ff0fb34 100644 --- a/block/qed.c +++ b/block/qed.c @@ -557,11 +557,13 @@ typedef struct QEDOpenCo { int ret; } QEDOpenCo; -static void coroutine_fn GRAPH_RDLOCK bdrv_qed_open_entry(void *opaque) +static void coroutine_fn bdrv_qed_open_entry(void *opaque) { QEDOpenCo *qoc = opaque; BDRVQEDState *s = qoc->bs->opaque; + GRAPH_RDLOCK_GUARD(); + qemu_co_mutex_lock(&s->table_lock); qoc->ret = bdrv_qed_do_open(qoc->bs, qoc->options, qoc->flags, qoc->errp); qemu_co_mutex_unlock(&s->table_lock); @@ -579,21 +581,17 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, }; int ret; - assume_graph_lock(); /* FIXME */ - ret = bdrv_open_file_child(NULL, options, "file", bs, errp); if (ret < 0) { return ret; } bdrv_qed_init_state(bs); - if (qemu_in_coroutine()) { - bdrv_qed_open_entry(&qoc); - } else { - assert(qemu_get_current_aio_context() == qemu_get_aio_context()); - qemu_coroutine_enter(qemu_coroutine_create(bdrv_qed_open_entry, &qoc)); - BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS); - } + assert(!qemu_in_coroutine()); + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + qemu_coroutine_enter(qemu_coroutine_create(bdrv_qed_open_entry, &qoc)); + BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS); + return qoc.ret; } diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 013d419444..6fb28cd8fa 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -236,12 +236,12 @@ struct BlockDriver { void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state); void (*bdrv_join_options)(QDict *options, QDict *old_options); - int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags, - Error **errp); + int GRAPH_UNLOCKED_PTR (*bdrv_open)( + BlockDriverState *bs, QDict *options, int flags, Error **errp); /* Protocol drivers should implement this instead of bdrv_open */ - int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags, - Error **errp); + int GRAPH_UNLOCKED_PTR (*bdrv_file_open)( + BlockDriverState *bs, QDict *options, int flags, Error **errp); void (*bdrv_close)(BlockDriverState *bs); int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_create)( From 5d934513f8ca21f48709103991099c25f7bf9f6c Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 4 May 2023 13:57:39 +0200 Subject: [PATCH 16/28] nbd: Remove nbd_co_flush() wrapper function The only thing nbd_co_flush() does is call nbd_client_co_flush(). Just use that function directly in the BlockDriver definitions and remove the wrapper. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-Id: <20230504115750.54437-10-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block/nbd.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/block/nbd.c b/block/nbd.c index bf2894ad5c..d3ee256844 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -1920,11 +1920,6 @@ fail: return ret; } -static int coroutine_fn nbd_co_flush(BlockDriverState *bs) -{ - return nbd_client_co_flush(bs); -} - static void nbd_refresh_limits(BlockDriverState *bs, Error **errp) { BDRVNBDState *s = (BDRVNBDState *)bs->opaque; @@ -2120,7 +2115,7 @@ static BlockDriver bdrv_nbd = { .bdrv_co_pwritev = nbd_client_co_pwritev, .bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes, .bdrv_close = nbd_close, - .bdrv_co_flush_to_os = nbd_co_flush, + .bdrv_co_flush_to_os = nbd_client_co_flush, .bdrv_co_pdiscard = nbd_client_co_pdiscard, .bdrv_refresh_limits = nbd_refresh_limits, .bdrv_co_truncate = nbd_co_truncate, @@ -2148,7 +2143,7 @@ static BlockDriver bdrv_nbd_tcp = { .bdrv_co_pwritev = nbd_client_co_pwritev, .bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes, .bdrv_close = nbd_close, - .bdrv_co_flush_to_os = nbd_co_flush, + .bdrv_co_flush_to_os = nbd_client_co_flush, .bdrv_co_pdiscard = nbd_client_co_pdiscard, .bdrv_refresh_limits = nbd_refresh_limits, .bdrv_co_truncate = nbd_co_truncate, @@ -2176,7 +2171,7 @@ static BlockDriver bdrv_nbd_unix = { .bdrv_co_pwritev = nbd_client_co_pwritev, .bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes, .bdrv_close = nbd_close, - .bdrv_co_flush_to_os = nbd_co_flush, + .bdrv_co_flush_to_os = nbd_client_co_flush, .bdrv_co_pdiscard = nbd_client_co_pdiscard, .bdrv_refresh_limits = nbd_refresh_limits, .bdrv_co_truncate = nbd_co_truncate, From 69aa0d371f67b1c042ed4f3ff4a481d561b54d21 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Thu, 4 May 2023 13:57:40 +0200 Subject: [PATCH 17/28] nbd: Mark nbd_co_do_establish_connection() and callers GRAPH_RDLOCK This adds GRAPH_RDLOCK annotations to declare that callers of nbd_co_do_establish_connection() need to hold a reader lock for the graph. Signed-off-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-Id: <20230504115750.54437-11-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block/coroutines.h | 5 +++-- block/nbd.c | 39 +++++++++++++++++++++------------------ 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/block/coroutines.h b/block/coroutines.h index dd9f3d449b..f3226682d6 100644 --- a/block/coroutines.h +++ b/block/coroutines.h @@ -61,7 +61,7 @@ bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); int coroutine_fn GRAPH_RDLOCK bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); -int coroutine_fn +int coroutine_fn GRAPH_RDLOCK nbd_co_do_establish_connection(BlockDriverState *bs, bool blocking, Error **errp); @@ -85,7 +85,8 @@ bdrv_common_block_status_above(BlockDriverState *bs, int64_t *map, BlockDriverState **file, int *depth); -int co_wrapper_mixed + +int co_wrapper_mixed_bdrv_rdlock nbd_do_establish_connection(BlockDriverState *bs, bool blocking, Error **errp); #endif /* BLOCK_COROUTINES_H */ diff --git a/block/nbd.c b/block/nbd.c index d3ee256844..a3f8f8a9d5 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -322,6 +322,7 @@ int coroutine_fn nbd_co_do_establish_connection(BlockDriverState *bs, int ret; IO_CODE(); + assert_bdrv_graph_readable(); assert(!s->ioc); s->ioc = nbd_co_establish_connection(s->conn, &s->info, blocking, errp); @@ -369,7 +370,7 @@ static bool nbd_client_connecting(BDRVNBDState *s) } /* Called with s->requests_lock taken. */ -static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s) +static void coroutine_fn GRAPH_RDLOCK nbd_reconnect_attempt(BDRVNBDState *s) { int ret; bool blocking = s->state == NBD_CLIENT_CONNECTING_WAIT; @@ -480,9 +481,9 @@ static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t handle) } } -static int coroutine_fn nbd_co_send_request(BlockDriverState *bs, - NBDRequest *request, - QEMUIOVector *qiov) +static int coroutine_fn GRAPH_RDLOCK +nbd_co_send_request(BlockDriverState *bs, NBDRequest *request, + QEMUIOVector *qiov) { BDRVNBDState *s = (BDRVNBDState *)bs->opaque; int rc, i = -1; @@ -1171,8 +1172,9 @@ static int coroutine_fn nbd_co_receive_blockstatus_reply(BDRVNBDState *s, return iter.ret; } -static int coroutine_fn nbd_co_request(BlockDriverState *bs, NBDRequest *request, - QEMUIOVector *write_qiov) +static int coroutine_fn GRAPH_RDLOCK +nbd_co_request(BlockDriverState *bs, NBDRequest *request, + QEMUIOVector *write_qiov) { int ret, request_ret; Error *local_err = NULL; @@ -1208,9 +1210,9 @@ static int coroutine_fn nbd_co_request(BlockDriverState *bs, NBDRequest *request return ret ? ret : request_ret; } -static int coroutine_fn nbd_client_co_preadv(BlockDriverState *bs, int64_t offset, - int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags) +static int coroutine_fn GRAPH_RDLOCK +nbd_client_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { int ret, request_ret; Error *local_err = NULL; @@ -1266,9 +1268,9 @@ static int coroutine_fn nbd_client_co_preadv(BlockDriverState *bs, int64_t offse return ret ? ret : request_ret; } -static int coroutine_fn nbd_client_co_pwritev(BlockDriverState *bs, int64_t offset, - int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags) +static int coroutine_fn GRAPH_RDLOCK +nbd_client_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { BDRVNBDState *s = (BDRVNBDState *)bs->opaque; NBDRequest request = { @@ -1291,8 +1293,9 @@ static int coroutine_fn nbd_client_co_pwritev(BlockDriverState *bs, int64_t offs return nbd_co_request(bs, &request, qiov); } -static int coroutine_fn nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, - int64_t bytes, BdrvRequestFlags flags) +static int coroutine_fn GRAPH_RDLOCK +nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes, + BdrvRequestFlags flags) { BDRVNBDState *s = (BDRVNBDState *)bs->opaque; NBDRequest request = { @@ -1326,7 +1329,7 @@ static int coroutine_fn nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_ return nbd_co_request(bs, &request, NULL); } -static int coroutine_fn nbd_client_co_flush(BlockDriverState *bs) +static int coroutine_fn GRAPH_RDLOCK nbd_client_co_flush(BlockDriverState *bs) { BDRVNBDState *s = (BDRVNBDState *)bs->opaque; NBDRequest request = { .type = NBD_CMD_FLUSH }; @@ -1341,8 +1344,8 @@ static int coroutine_fn nbd_client_co_flush(BlockDriverState *bs) return nbd_co_request(bs, &request, NULL); } -static int coroutine_fn nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, - int64_t bytes) +static int coroutine_fn GRAPH_RDLOCK +nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) { BDRVNBDState *s = (BDRVNBDState *)bs->opaque; NBDRequest request = { @@ -1361,7 +1364,7 @@ static int coroutine_fn nbd_client_co_pdiscard(BlockDriverState *bs, int64_t off return nbd_co_request(bs, &request, NULL); } -static int coroutine_fn nbd_client_co_block_status( +static int coroutine_fn GRAPH_RDLOCK nbd_client_co_block_status( BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { From 622d30af9930e9eb791cf6344633c256e328a7ef Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 4 May 2023 13:57:41 +0200 Subject: [PATCH 18/28] vhdx: Require GRAPH_RDLOCK for accessing a node's parent list This adds GRAPH_RDLOCK annotations to declare that functions accessing the parent list of a node need to hold a reader lock for the graph. As it happens, they already do. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-Id: <20230504115750.54437-12-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block/vhdx.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/block/vhdx.c b/block/vhdx.c index 00777da91a..b20b1edf11 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1506,8 +1506,9 @@ exit: * There are 2 headers, and the highest sequence number will represent * the active header */ -static int vhdx_create_new_headers(BlockBackend *blk, uint64_t image_size, - uint32_t log_size) +static int coroutine_fn GRAPH_RDLOCK +vhdx_create_new_headers(BlockBackend *blk, uint64_t image_size, + uint32_t log_size) { BlockDriverState *bs = blk_bs(blk); BdrvChild *child; @@ -1897,8 +1898,8 @@ exit: * .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------. * 1MB */ -static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts, - Error **errp) +static int coroutine_fn GRAPH_RDLOCK +vhdx_co_create(BlockdevCreateOptions *opts, Error **errp) { BlockdevCreateOptionsVhdx *vhdx_opts; BlockBackend *blk = NULL; From 9c93652da6784314519968c65c05fcaccfe56193 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 4 May 2023 13:57:42 +0200 Subject: [PATCH 19/28] mirror: Require GRAPH_RDLOCK for accessing a node's parent list This adds GRAPH_RDLOCK annotations to declare that functions accessing the parent list of a node need to hold a reader lock for the graph. As it happens, they already do. Signed-off-by: Kevin Wolf Message-Id: <20230504115750.54437-13-kwolf@redhat.com> Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/mirror.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/mirror.c b/block/mirror.c index 80fa345071..b5c4ae31f3 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1416,7 +1416,7 @@ static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s, return op; } -static void coroutine_fn active_write_settle(MirrorOp *op) +static void coroutine_fn GRAPH_RDLOCK active_write_settle(MirrorOp *op) { uint64_t start_chunk = op->offset / op->s->granularity; uint64_t end_chunk = DIV_ROUND_UP(op->offset + op->bytes, From de335638a399b614d510b978b5c6d1b237e0ac79 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Thu, 4 May 2023 13:57:43 +0200 Subject: [PATCH 20/28] block: Mark bdrv_co_get_allocated_file_size() and callers GRAPH_RDLOCK This adds GRAPH_RDLOCK annotations to declare that callers of bdrv_co_get_allocated_file_size() need to hold a reader lock for the graph. Signed-off-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Message-Id: <20230504115750.54437-14-kwolf@redhat.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block.c | 4 +++- block/vmdk.c | 2 +- include/block/block-io.h | 7 +++++-- include/block/block_int-common.h | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/block.c b/block.c index abec940867..3ccb935950 100644 --- a/block.c +++ b/block.c @@ -5750,7 +5750,8 @@ exit: * sums the size of all data-bearing children. (This excludes backing * children.) */ -static int64_t coroutine_fn bdrv_sum_allocated_file_size(BlockDriverState *bs) +static int64_t coroutine_fn GRAPH_RDLOCK +bdrv_sum_allocated_file_size(BlockDriverState *bs) { BdrvChild *child; int64_t child_size, sum = 0; @@ -5778,6 +5779,7 @@ int64_t coroutine_fn bdrv_co_get_allocated_file_size(BlockDriverState *bs) { BlockDriver *drv = bs->drv; IO_CODE(); + assert_bdrv_graph_readable(); if (!drv) { return -ENOMEDIUM; diff --git a/block/vmdk.c b/block/vmdk.c index 11b553ef25..fddbd1c86c 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -2845,7 +2845,7 @@ static void vmdk_close(BlockDriverState *bs) error_free(s->migration_blocker); } -static int64_t coroutine_fn +static int64_t coroutine_fn GRAPH_RDLOCK vmdk_co_get_allocated_file_size(BlockDriverState *bs) { int i; diff --git a/include/block/block-io.h b/include/block/block-io.h index 5dab88521d..fb2adb31c7 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -84,8 +84,11 @@ int64_t coroutine_mixed_fn bdrv_nb_sectors(BlockDriverState *bs); int64_t coroutine_fn GRAPH_RDLOCK bdrv_co_getlength(BlockDriverState *bs); int64_t co_wrapper_mixed_bdrv_rdlock bdrv_getlength(BlockDriverState *bs); -int64_t coroutine_fn bdrv_co_get_allocated_file_size(BlockDriverState *bs); -int64_t co_wrapper bdrv_get_allocated_file_size(BlockDriverState *bs); +int64_t coroutine_fn GRAPH_RDLOCK +bdrv_co_get_allocated_file_size(BlockDriverState *bs); + +int64_t co_wrapper_bdrv_rdlock +bdrv_get_allocated_file_size(BlockDriverState *bs); BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts, BlockDriverState *in_bs, Error **errp); diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 6fb28cd8fa..6e0365d8f2 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -685,7 +685,7 @@ struct BlockDriver { int64_t coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_getlength)( BlockDriverState *bs); - int64_t coroutine_fn (*bdrv_co_get_allocated_file_size)( + int64_t coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_get_allocated_file_size)( BlockDriverState *bs); BlockMeasureInfo *(*bdrv_measure)(QemuOpts *opts, BlockDriverState *in_bs, From a00e70c01241590b6c80dca4ee39b9de0b10097e Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Thu, 4 May 2023 13:57:44 +0200 Subject: [PATCH 21/28] block: Mark bdrv_co_get_info() and callers GRAPH_RDLOCK This adds GRAPH_RDLOCK annotations to declare that callers of bdrv_co_get_info() need to hold a reader lock for the graph. Signed-off-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-Id: <20230504115750.54437-15-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 2 ++ block/crypto.c | 2 +- block/io.c | 11 +++++------ block/mirror.c | 8 ++++++-- block/raw-format.c | 2 +- include/block/block-io.h | 7 +++++-- include/block/block_int-common.h | 4 ++-- 7 files changed, 22 insertions(+), 14 deletions(-) diff --git a/block.c b/block.c index 3ccb935950..a6deaf8ad1 100644 --- a/block.c +++ b/block.c @@ -6349,6 +6349,8 @@ int coroutine_fn bdrv_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) int ret; BlockDriver *drv = bs->drv; IO_CODE(); + assert_bdrv_graph_readable(); + /* if bs->drv == NULL, bs is closed, so there's nothing to do here */ if (!drv) { return -ENOMEDIUM; diff --git a/block/crypto.c b/block/crypto.c index 8fd3ad0054..30093cff9b 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -736,7 +736,7 @@ fail: return ret; } -static int coroutine_fn +static int coroutine_fn GRAPH_RDLOCK block_crypto_co_get_info_luks(BlockDriverState *bs, BlockDriverInfo *bdi) { BlockDriverInfo subbdi; diff --git a/block/io.c b/block/io.c index 6fa1993374..3bf9ef9d87 100644 --- a/block/io.c +++ b/block/io.c @@ -727,10 +727,9 @@ BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs) /** * Round a region to cluster boundaries */ -void coroutine_fn bdrv_round_to_clusters(BlockDriverState *bs, - int64_t offset, int64_t bytes, - int64_t *cluster_offset, - int64_t *cluster_bytes) +void coroutine_fn GRAPH_RDLOCK +bdrv_round_to_clusters(BlockDriverState *bs, int64_t offset, int64_t bytes, + int64_t *cluster_offset, int64_t *cluster_bytes) { BlockDriverInfo bdi; IO_CODE(); @@ -744,7 +743,7 @@ void coroutine_fn bdrv_round_to_clusters(BlockDriverState *bs, } } -static coroutine_fn int bdrv_get_cluster_size(BlockDriverState *bs) +static int coroutine_fn GRAPH_RDLOCK bdrv_get_cluster_size(BlockDriverState *bs) { BlockDriverInfo bdi; int ret; @@ -1800,7 +1799,7 @@ fail: return ret; } -static inline int coroutine_fn +static inline int coroutine_fn GRAPH_RDLOCK bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes, BdrvTrackedRequest *req, int flags) { diff --git a/block/mirror.c b/block/mirror.c index b5c4ae31f3..e48ed0af31 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -576,8 +576,10 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) { int64_t target_offset; int64_t target_bytes; - bdrv_round_to_clusters(blk_bs(s->target), offset, io_bytes, - &target_offset, &target_bytes); + WITH_GRAPH_RDLOCK_GUARD() { + bdrv_round_to_clusters(blk_bs(s->target), offset, io_bytes, + &target_offset, &target_bytes); + } if (target_offset == offset && target_bytes == io_bytes) { mirror_method = ret & BDRV_BLOCK_ZERO ? @@ -966,11 +968,13 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) */ bdrv_get_backing_filename(target_bs, backing_filename, sizeof(backing_filename)); + bdrv_graph_co_rdlock(); if (!bdrv_co_get_info(target_bs, &bdi) && bdi.cluster_size) { s->target_cluster_size = bdi.cluster_size; } else { s->target_cluster_size = BDRV_SECTOR_SIZE; } + bdrv_graph_co_rdunlock(); if (backing_filename[0] && !bdrv_backing_chain_next(target_bs) && s->granularity < s->target_cluster_size) { s->buf_size = MAX(s->buf_size, s->target_cluster_size); diff --git a/block/raw-format.c b/block/raw-format.c index 06b8030d9d..fd9e61f58e 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -369,7 +369,7 @@ static BlockMeasureInfo *raw_measure(QemuOpts *opts, BlockDriverState *in_bs, return info; } -static int coroutine_fn +static int coroutine_fn GRAPH_RDLOCK raw_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) { return bdrv_co_get_info(bs->file->bs, bdi); diff --git a/include/block/block-io.h b/include/block/block-io.h index fb2adb31c7..bba7f957e1 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -167,8 +167,11 @@ const char *bdrv_get_node_name(const BlockDriverState *bs); const char *bdrv_get_device_name(const BlockDriverState *bs); const char *bdrv_get_device_or_node_name(const BlockDriverState *bs); -int coroutine_fn bdrv_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); -int co_wrapper_mixed bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); +int coroutine_fn GRAPH_RDLOCK +bdrv_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); + +int co_wrapper_mixed_bdrv_rdlock +bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs, Error **errp); diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 6e0365d8f2..ee77903f72 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -699,8 +699,8 @@ struct BlockDriver { BlockDriverState *bs, int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset); - int coroutine_fn (*bdrv_co_get_info)(BlockDriverState *bs, - BlockDriverInfo *bdi); + int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_get_info)( + BlockDriverState *bs, BlockDriverInfo *bdi); ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs, Error **errp); From cb2bfaa450dd65b717e27c9090169be05bd73b93 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Thu, 4 May 2023 13:57:45 +0200 Subject: [PATCH 22/28] block: Mark bdrv_co_debug_event() GRAPH_RDLOCK This adds GRAPH_RDLOCK annotations to declare that callers of bdrv_co_debug_event() need to hold a reader lock for the graph. Unfortunately we cannot use a co_wrapper_bdrv_rdlock (i.e. make the coroutine wrapper a no_coroutine_fn), because the function is called (using the BLKDBG_EVENT macro) by mixed functions that run both in coroutine and non-coroutine context (for example many of the functions in qcow2-cluster.c and qcow2-refcount.c). Signed-off-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-Id: <20230504115750.54437-16-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 2 ++ include/block/block-io.h | 9 +++++---- include/block/block_int-common.h | 4 ++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/block.c b/block.c index a6deaf8ad1..1bc766c778 100644 --- a/block.c +++ b/block.c @@ -6399,6 +6399,8 @@ BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs) void coroutine_fn bdrv_co_debug_event(BlockDriverState *bs, BlkdebugEvent event) { IO_CODE(); + assert_bdrv_graph_readable(); + if (!bs || !bs->drv || !bs->drv->bdrv_co_debug_event) { return; } diff --git a/include/block/block-io.h b/include/block/block-io.h index bba7f957e1..1f612ec5bd 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -205,10 +205,11 @@ void *qemu_try_blockalign0(BlockDriverState *bs, size_t size); void bdrv_enable_copy_on_read(BlockDriverState *bs); void bdrv_disable_copy_on_read(BlockDriverState *bs); -void coroutine_fn bdrv_co_debug_event(BlockDriverState *bs, - BlkdebugEvent event); -void co_wrapper_mixed bdrv_debug_event(BlockDriverState *bs, - BlkdebugEvent event); +void coroutine_fn GRAPH_RDLOCK +bdrv_co_debug_event(BlockDriverState *bs, BlkdebugEvent event); + +void co_wrapper_mixed_bdrv_rdlock +bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event); #define BLKDBG_EVENT(child, evt) \ do { \ diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index ee77903f72..88ce7f9d9e 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -735,8 +735,8 @@ struct BlockDriver { int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_check)( BlockDriverState *bs, BdrvCheckResult *result, BdrvCheckMode fix); - void coroutine_fn (*bdrv_co_debug_event)(BlockDriverState *bs, - BlkdebugEvent event); + void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_debug_event)( + BlockDriverState *bs, BlkdebugEvent event); /* io queue for linux-aio */ void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_io_plug)(BlockDriverState *bs); From 840428a2669c90f75cf0d26a06bdfe6f51755fae Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Thu, 4 May 2023 13:57:46 +0200 Subject: [PATCH 23/28] block: Mark BlockDriver callbacks for amend job GRAPH_RDLOCK This adds GRAPH_RDLOCK annotations to declare that callers of amend callbacks in BlockDriver need to hold a reader lock for the graph. Signed-off-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-Id: <20230504115750.54437-17-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block/amend.c | 8 +++++++- include/block/block_int-common.h | 12 ++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/block/amend.c b/block/amend.c index bc4bb7b416..53a410247c 100644 --- a/block/amend.c +++ b/block/amend.c @@ -46,6 +46,7 @@ static int coroutine_fn blockdev_amend_run(Job *job, Error **errp) { BlockdevAmendJob *s = container_of(job, BlockdevAmendJob, common); int ret; + GRAPH_RDLOCK_GUARD(); job_progress_set_remaining(&s->common, 1); ret = s->bs->drv->bdrv_co_amend(s->bs, s->opts, s->force, errp); @@ -54,7 +55,8 @@ static int coroutine_fn blockdev_amend_run(Job *job, Error **errp) return ret; } -static int blockdev_amend_pre_run(BlockdevAmendJob *s, Error **errp) +static int GRAPH_RDLOCK +blockdev_amend_pre_run(BlockdevAmendJob *s, Error **errp) { if (s->bs->drv->bdrv_amend_pre_run) { return s->bs->drv->bdrv_amend_pre_run(s->bs, errp); @@ -67,9 +69,11 @@ static void blockdev_amend_free(Job *job) { BlockdevAmendJob *s = container_of(job, BlockdevAmendJob, common); + bdrv_graph_rdlock_main_loop(); if (s->bs->drv->bdrv_amend_clean) { s->bs->drv->bdrv_amend_clean(s->bs); } + bdrv_graph_rdunlock_main_loop(); bdrv_unref(s->bs); } @@ -93,6 +97,8 @@ void qmp_x_blockdev_amend(const char *job_id, BlockDriver *drv = bdrv_find_format(fmt); BlockDriverState *bs; + GRAPH_RDLOCK_GUARD_MAINLOOP(); + bs = bdrv_lookup_bs(NULL, node_name, errp); if (!bs) { return; diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 88ce7f9d9e..37d094796e 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -204,12 +204,13 @@ struct BlockDriver { * to allow driver-specific initialization code that requires * the BQL, like setting up specific permission flags. */ - int (*bdrv_amend_pre_run)(BlockDriverState *bs, Error **errp); + int GRAPH_RDLOCK_PTR (*bdrv_amend_pre_run)( + BlockDriverState *bs, Error **errp); /* * This function is invoked under BQL after .bdrv_co_amend() * to allow cleaning up what was done in .bdrv_amend_pre_run(). */ - void (*bdrv_amend_clean)(BlockDriverState *bs); + void GRAPH_RDLOCK_PTR (*bdrv_amend_clean)(BlockDriverState *bs); /* * Return true if @to_replace can be replaced by a BDS with the @@ -463,10 +464,9 @@ struct BlockDriver { int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename); - int coroutine_fn (*bdrv_co_amend)(BlockDriverState *bs, - BlockdevAmendOptions *opts, - bool force, - Error **errp); + int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_amend)( + BlockDriverState *bs, BlockdevAmendOptions *opts, bool force, + Error **errp); /* aio */ BlockAIOCB * GRAPH_RDLOCK_PTR (*bdrv_aio_preadv)(BlockDriverState *bs, From 6ec75a6a3ed9d6ff3161d1f97c497909906495bd Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 4 May 2023 13:57:47 +0200 Subject: [PATCH 24/28] block: Mark bdrv_query_bds_stats() and callers GRAPH_RDLOCK This adds GRAPH_RDLOCK annotations to declare that callers of bdrv_query_bds_stats() need to hold a reader lock for the graph because it accesses the children list of a node. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-Id: <20230504115750.54437-18-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block/qapi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/block/qapi.c b/block/qapi.c index c84147849d..71f2751257 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -604,8 +604,8 @@ static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk) = bdrv_latency_histogram_stats(&hgram[BLOCK_ACCT_FLUSH]); } -static BlockStats *bdrv_query_bds_stats(BlockDriverState *bs, - bool blk_level) +static BlockStats * GRAPH_RDLOCK +bdrv_query_bds_stats(BlockDriverState *bs, bool blk_level) { BdrvChild *parent_child; BlockDriverState *filter_or_cow_bs; @@ -713,6 +713,8 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes, BlockBackend *blk; BlockDriverState *bs; + GRAPH_RDLOCK_GUARD_MAINLOOP(); + /* Just to be safe if query_nodes is not always initialized */ if (has_query_nodes && query_nodes) { for (bs = bdrv_next_node(NULL); bs; bs = bdrv_next_node(bs)) { From 4f0bef8b36eb33efbba0a216b233265d734bdbfb Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 4 May 2023 13:57:48 +0200 Subject: [PATCH 25/28] block: Mark bdrv_query_block_graph_info() and callers GRAPH_RDLOCK This adds GRAPH_RDLOCK annotations to declare that callers of bdrv_query_block_graph_info() need to hold a reader lock for the graph because it accesses the children list of a node. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-Id: <20230504115750.54437-19-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- include/block/qapi.h | 7 ++++--- qemu-img.c | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/block/qapi.h b/include/block/qapi.h index 8773b9b191..18d48ddb70 100644 --- a/include/block/qapi.h +++ b/include/block/qapi.h @@ -25,6 +25,7 @@ #ifndef BLOCK_QAPI_H #define BLOCK_QAPI_H +#include "block/graph-lock.h" #include "block/snapshot.h" #include "qapi/qapi-types-block-core.h" @@ -43,9 +44,9 @@ void bdrv_query_image_info(BlockDriverState *bs, bool flat, bool skip_implicit_filters, Error **errp); -void bdrv_query_block_graph_info(BlockDriverState *bs, - BlockGraphInfo **p_info, - Error **errp); +void GRAPH_RDLOCK +bdrv_query_block_graph_info(BlockDriverState *bs, BlockGraphInfo **p_info, + Error **errp); void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, diff --git a/qemu-img.c b/qemu-img.c index 9aeac69fa6..9f9f0a7629 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -2938,6 +2938,8 @@ static BlockGraphInfoList *collect_image_info_list(bool image_opts, } bs = blk_bs(blk); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + /* * Note that the returned BlockGraphInfo object will not have * information about this image's backing node, because we have opened From 533c6e4ee8885cb9e7c6ac36e8e9fa92bea64f97 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 4 May 2023 13:57:49 +0200 Subject: [PATCH 26/28] block: Mark bdrv_recurse_can_replace() and callers GRAPH_RDLOCK This adds GRAPH_RDLOCK annotations to declare that callers of bdrv_recurse_can_replace() need to hold a reader lock for the graph because it accesses the children list of a node. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-Id: <20230504115750.54437-20-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block/blkverify.c | 5 +++-- block/mirror.c | 4 ++++ block/quorum.c | 4 ++-- blockdev.c | 3 +++ include/block/block-global-state.h | 5 +++-- include/block/block_int-common.h | 4 ++-- include/block/block_int-global-state.h | 4 ++-- 7 files changed, 19 insertions(+), 10 deletions(-) diff --git a/block/blkverify.c b/block/blkverify.c index 1c16f86b2e..7326461f30 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -265,8 +265,9 @@ static int coroutine_fn GRAPH_RDLOCK blkverify_co_flush(BlockDriverState *bs) return bdrv_co_flush(s->test_file->bs); } -static bool blkverify_recurse_can_replace(BlockDriverState *bs, - BlockDriverState *to_replace) +static bool GRAPH_RDLOCK +blkverify_recurse_can_replace(BlockDriverState *bs, + BlockDriverState *to_replace) { BDRVBlkverifyState *s = bs->opaque; diff --git a/block/mirror.c b/block/mirror.c index e48ed0af31..717442ca4d 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -747,7 +747,10 @@ static int mirror_exit_common(Job *job) * Cannot use check_to_replace_node() here, because that would * check for an op blocker on @to_replace, and we have our own * there. + * + * TODO Pull out the writer lock from bdrv_replace_node() to here */ + bdrv_graph_rdlock_main_loop(); if (bdrv_recurse_can_replace(src, to_replace)) { bdrv_replace_node(to_replace, target_bs, &local_err); } else { @@ -756,6 +759,7 @@ static int mirror_exit_common(Job *job) "would not lead to an abrupt change of visible data", to_replace->node_name, target_bs->node_name); } + bdrv_graph_rdunlock_main_loop(); bdrv_drained_end(target_bs); if (local_err) { error_report_err(local_err); diff --git a/block/quorum.c b/block/quorum.c index ff5a0a2da3..f28758cf2b 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -825,8 +825,8 @@ static coroutine_fn GRAPH_RDLOCK int quorum_co_flush(BlockDriverState *bs) return result; } -static bool quorum_recurse_can_replace(BlockDriverState *bs, - BlockDriverState *to_replace) +static bool GRAPH_RDLOCK +quorum_recurse_can_replace(BlockDriverState *bs, BlockDriverState *to_replace) { BDRVQuorumState *s = bs->opaque; int i; diff --git a/blockdev.c b/blockdev.c index e464daea58..d141ca7a2d 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2961,6 +2961,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, BlockDriverState *unfiltered_bs; int job_flags = JOB_DEFAULT; + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + if (!has_speed) { speed = 0; } diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index ec3ddb17a8..f234bca0b6 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -163,8 +163,9 @@ int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts, Error **errp); /* check if a named node can be replaced when doing drive-mirror */ -BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, - const char *node_name, Error **errp); +BlockDriverState * GRAPH_RDLOCK +check_to_replace_node(BlockDriverState *parent_bs, const char *node_name, + Error **errp); int no_coroutine_fn bdrv_activate(BlockDriverState *bs, Error **errp); diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 37d094796e..024ded4fc2 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -217,8 +217,8 @@ struct BlockDriver { * same data as @bs without it affecting @bs's behavior (that is, * without it being visible to @bs's parents). */ - bool (*bdrv_recurse_can_replace)(BlockDriverState *bs, - BlockDriverState *to_replace); + bool GRAPH_RDLOCK_PTR (*bdrv_recurse_can_replace)( + BlockDriverState *bs, BlockDriverState *to_replace); int (*bdrv_probe_device)(const char *filename); diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h index 902406eb99..da5fb31089 100644 --- a/include/block/block_int-global-state.h +++ b/include/block/block_int-global-state.h @@ -225,8 +225,8 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, */ int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp); -bool bdrv_recurse_can_replace(BlockDriverState *bs, - BlockDriverState *to_replace); +bool GRAPH_RDLOCK bdrv_recurse_can_replace(BlockDriverState *bs, + BlockDriverState *to_replace); /* * Default implementation for BlockDriver.bdrv_child_perm() that can From e19b157f3c66c44e3b89cb50a2030f0187b968e9 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 4 May 2023 13:57:50 +0200 Subject: [PATCH 27/28] block: Mark bdrv_refresh_limits() and callers GRAPH_RDLOCK This adds GRAPH_RDLOCK annotations to declare that callers of bdrv_refresh_limits() need to hold a reader lock for the graph because it accesses the children list of a node. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-Id: <20230504115750.54437-21-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 9 +++++++++ block/io.c | 1 - include/block/block-global-state.h | 5 ++++- include/block/block_int-common.h | 3 ++- 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/block.c b/block.c index 1bc766c778..dad9a4fa43 100644 --- a/block.c +++ b/block.c @@ -1667,7 +1667,10 @@ bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, return ret; } + bdrv_graph_rdlock_main_loop(); bdrv_refresh_limits(bs, NULL, &local_err); + bdrv_graph_rdunlock_main_loop(); + if (local_err) { error_propagate(errp, local_err); return -EINVAL; @@ -3419,7 +3422,9 @@ static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs, } out: + bdrv_graph_rdlock_main_loop(); bdrv_refresh_limits(parent_bs, tran, NULL); + bdrv_graph_rdunlock_main_loop(); return 0; } @@ -4917,7 +4922,9 @@ static void bdrv_reopen_commit(BDRVReopenState *reopen_state) qdict_del(bs->explicit_options, "backing"); qdict_del(bs->options, "backing"); + bdrv_graph_rdlock_main_loop(); bdrv_refresh_limits(bs, NULL, NULL); + bdrv_graph_rdunlock_main_loop(); bdrv_refresh_total_sectors(bs, bs->total_sectors); } @@ -5316,7 +5323,9 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, out: tran_finalize(tran, ret); + bdrv_graph_rdlock_main_loop(); bdrv_refresh_limits(bs_top, NULL, NULL); + bdrv_graph_rdunlock_main_loop(); if (new_context && old_context != new_context) { aio_context_release(new_context); diff --git a/block/io.c b/block/io.c index 3bf9ef9d87..58557f2f96 100644 --- a/block/io.c +++ b/block/io.c @@ -160,7 +160,6 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp) bool have_limits; GLOBAL_STATE_CODE(); - assume_graph_lock(); /* FIXME */ if (tran) { BdrvRefreshLimitsState *s = g_new(BdrvRefreshLimitsState, 1); diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index f234bca0b6..2d93423d35 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -133,7 +133,10 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, const char *backing_file); void bdrv_refresh_filename(BlockDriverState *bs); -void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp); + +void GRAPH_RDLOCK +bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp); + int bdrv_commit(BlockDriverState *bs); int bdrv_make_empty(BdrvChild *c, Error **errp); int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 024ded4fc2..4909876756 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -334,7 +334,8 @@ struct BlockDriver { int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag); bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag); - void (*bdrv_refresh_limits)(BlockDriverState *bs, Error **errp); + void GRAPH_RDLOCK_PTR (*bdrv_refresh_limits)( + BlockDriverState *bs, Error **errp); /* * Returns 1 if newly created images are guaranteed to contain only From 58a2e3f5c37be02dac3086b81bdda9414b931edf Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 1 May 2023 13:34:43 -0400 Subject: [PATCH 28/28] block: compile out assert_bdrv_graph_readable() by default reader_count() is a performance bottleneck because the global aio_context_list_lock mutex causes thread contention. Put this debugging assertion behind a new ./configure --enable-debug-graph-lock option and disable it by default. The --enable-debug-graph-lock option is also enabled by the more general --enable-debug option. Signed-off-by: Stefan Hajnoczi Message-Id: <20230501173443.153062-1-stefanha@redhat.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/graph-lock.c | 3 +++ configure | 1 + meson.build | 2 ++ meson_options.txt | 2 ++ scripts/meson-buildoptions.sh | 4 ++++ 5 files changed, 12 insertions(+) diff --git a/block/graph-lock.c b/block/graph-lock.c index 639526608f..377884c3a9 100644 --- a/block/graph-lock.c +++ b/block/graph-lock.c @@ -265,7 +265,10 @@ void bdrv_graph_rdunlock_main_loop(void) void assert_bdrv_graph_readable(void) { + /* reader_count() is slow due to aio_context_list_lock lock contention */ +#ifdef CONFIG_DEBUG_GRAPH_LOCK assert(qemu_in_main_thread() || reader_count()); +#endif } void assert_bdrv_graph_writable(void) diff --git a/configure b/configure index 77c03315f8..243e2e0a0d 100755 --- a/configure +++ b/configure @@ -816,6 +816,7 @@ for opt do --enable-debug) # Enable debugging options that aren't excessively noisy debug_tcg="yes" + meson_option_parse --enable-debug-graph-lock "" meson_option_parse --enable-debug-mutex "" meson_option_add -Doptimization=0 fortify_source="no" diff --git a/meson.build b/meson.build index c56e0fec9e..646555420f 100644 --- a/meson.build +++ b/meson.build @@ -1963,6 +1963,7 @@ if get_option('debug_stack_usage') and have_coroutine_pool have_coroutine_pool = false endif config_host_data.set10('CONFIG_COROUTINE_POOL', have_coroutine_pool) +config_host_data.set('CONFIG_DEBUG_GRAPH_LOCK', get_option('debug_graph_lock')) config_host_data.set('CONFIG_DEBUG_MUTEX', get_option('debug_mutex')) config_host_data.set('CONFIG_DEBUG_STACK_USAGE', get_option('debug_stack_usage')) config_host_data.set('CONFIG_GPROF', get_option('gprof')) @@ -3841,6 +3842,7 @@ summary_info += {'PIE': get_option('b_pie')} summary_info += {'static build': config_host.has_key('CONFIG_STATIC')} summary_info += {'malloc trim support': has_malloc_trim} summary_info += {'membarrier': have_membarrier} +summary_info += {'debug graph lock': get_option('debug_graph_lock')} summary_info += {'debug stack usage': get_option('debug_stack_usage')} summary_info += {'mutex debugging': get_option('debug_mutex')} summary_info += {'memory allocator': get_option('malloc')} diff --git a/meson_options.txt b/meson_options.txt index 66ca350029..d8330a1f71 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -319,6 +319,8 @@ option('rng_none', type: 'boolean', value: false, description: 'dummy RNG, avoid using /dev/(u)random and getrandom()') option('coroutine_pool', type: 'boolean', value: true, description: 'coroutine freelist (better performance)') +option('debug_graph_lock', type: 'boolean', value: false, + description: 'graph lock debugging support') option('debug_mutex', type: 'boolean', value: false, description: 'mutex debugging support') option('debug_stack_usage', type: 'boolean', value: false, diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index 34d82dec53..2805d1c145 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -23,6 +23,8 @@ meson_options_help() { printf "%s\n" ' QEMU' printf "%s\n" ' --enable-cfi Control-Flow Integrity (CFI)' printf "%s\n" ' --enable-cfi-debug Verbose errors in case of CFI violation' + printf "%s\n" ' --enable-debug-graph-lock' + printf "%s\n" ' graph lock debugging support' printf "%s\n" ' --enable-debug-mutex mutex debugging support' printf "%s\n" ' --enable-debug-stack-usage' printf "%s\n" ' measure coroutine stack usage' @@ -254,6 +256,8 @@ _meson_option_parse() { --datadir=*) quote_sh "-Ddatadir=$2" ;; --enable-dbus-display) printf "%s" -Ddbus_display=enabled ;; --disable-dbus-display) printf "%s" -Ddbus_display=disabled ;; + --enable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=true ;; + --disable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=false ;; --enable-debug-mutex) printf "%s" -Ddebug_mutex=true ;; --disable-debug-mutex) printf "%s" -Ddebug_mutex=false ;; --enable-debug-stack-usage) printf "%s" -Ddebug_stack_usage=true ;;