diff --git a/block.c b/block.c
index f04a6ad4e8..a2f8d5a0c0 100644
--- a/block.c
+++ b/block.c
@@ -533,7 +533,6 @@ int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename,
int ret;
GLOBAL_STATE_CODE();
ERRP_GUARD();
- assert_bdrv_graph_readable();
if (!drv->bdrv_co_create_opts) {
error_setg(errp, "Driver '%s' does not support image creation",
diff --git a/block/commit.c b/block/commit.c
index 2b20fd0fd4..aa45beb0f0 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -116,7 +116,6 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
{
CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
int64_t offset;
- uint64_t delay_ns = 0;
int ret = 0;
int64_t n = 0; /* bytes */
QEMU_AUTO_VFREE void *buf = NULL;
@@ -149,7 +148,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
/* Note that even when no rate limit is applied we need to yield
* with no pending I/O here so that bdrv_drain_all() returns.
*/
- job_sleep_ns(&s->common.job, delay_ns);
+ block_job_ratelimit_sleep(&s->common);
if (job_is_cancelled(&s->common.job)) {
break;
}
@@ -184,9 +183,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
job_progress_update(&s->common.job, n);
if (copy) {
- delay_ns = block_job_ratelimit_get_delay(&s->common, n);
- } else {
- delay_ns = 0;
+ block_job_ratelimit_processed_bytes(&s->common, n);
}
}
diff --git a/block/create.c b/block/create.c
index bf67b9947c..6b23a21675 100644
--- a/block/create.c
+++ b/block/create.c
@@ -43,7 +43,6 @@ static int coroutine_fn blockdev_create_run(Job *job, Error **errp)
int ret;
GLOBAL_STATE_CODE();
- GRAPH_RDLOCK_GUARD();
job_progress_set_remaining(&s->common, 1);
ret = s->drv->bdrv_co_create(s->opts, errp);
diff --git a/block/crypto.c b/block/crypto.c
index 30093cff9b..6ee8d46d30 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -99,12 +99,10 @@ struct BlockCryptoCreateData {
};
-static int block_crypto_create_write_func(QCryptoBlock *block,
- size_t offset,
- const uint8_t *buf,
- size_t buflen,
- void *opaque,
- Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+block_crypto_create_write_func(QCryptoBlock *block, size_t offset,
+ const uint8_t *buf, size_t buflen, void *opaque,
+ Error **errp)
{
struct BlockCryptoCreateData *data = opaque;
ssize_t ret;
@@ -117,10 +115,9 @@ static int block_crypto_create_write_func(QCryptoBlock *block,
return 0;
}
-static int block_crypto_create_init_func(QCryptoBlock *block,
- size_t headerlen,
- void *opaque,
- Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+block_crypto_create_init_func(QCryptoBlock *block, size_t headerlen,
+ void *opaque, Error **errp)
{
struct BlockCryptoCreateData *data = opaque;
Error *local_error = NULL;
@@ -314,7 +311,7 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
}
-static int coroutine_fn
+static int coroutine_fn GRAPH_UNLOCKED
block_crypto_co_create_generic(BlockDriverState *bs, int64_t size,
QCryptoBlockCreateOptions *opts,
PreallocMode prealloc, Error **errp)
@@ -627,7 +624,7 @@ static int block_crypto_open_luks(BlockDriverState *bs,
bs, options, flags, errp);
}
-static int coroutine_fn
+static int coroutine_fn GRAPH_UNLOCKED
block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp)
{
BlockdevCreateOptionsLUKS *luks_opts;
@@ -665,7 +662,7 @@ fail:
return ret;
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
block_crypto_co_create_opts_luks(BlockDriver *drv, const char *filename,
QemuOpts *opts, Error **errp)
{
@@ -727,7 +724,9 @@ fail:
* beforehand, it has been truncated and corrupted in the process.
*/
if (ret) {
+ bdrv_graph_co_rdlock();
bdrv_co_delete_file_noerr(bs);
+ bdrv_graph_co_rdunlock();
}
bdrv_co_unref(bs);
diff --git a/block/export/export.c b/block/export/export.c
index 62c7c22d45..a5c8f42f53 100644
--- a/block/export/export.c
+++ b/block/export/export.c
@@ -192,8 +192,10 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
return exp;
fail:
- blk_set_dev_ops(exp->blk, NULL, NULL);
- blk_unref(blk);
+ if (blk) {
+ blk_set_dev_ops(blk, NULL, NULL);
+ blk_unref(blk);
+ }
aio_context_release(ctx);
if (exp) {
g_free(exp->id);
diff --git a/block/graph-lock.c b/block/graph-lock.c
index 377884c3a9..a92c6ae219 100644
--- a/block/graph-lock.c
+++ b/block/graph-lock.c
@@ -30,8 +30,10 @@ BdrvGraphLock graph_lock;
/* Protects the list of aiocontext and orphaned_reader_count */
static QemuMutex aio_context_list_lock;
+#if 0
/* Written and read with atomic operations. */
static int has_writer;
+#endif
/*
* A reader coroutine could move from an AioContext to another.
@@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx)
g_free(ctx->bdrv_graph);
}
+#if 0
static uint32_t reader_count(void)
{
BdrvGraphRWlock *brdv_graph;
@@ -105,10 +108,17 @@ static uint32_t reader_count(void)
assert((int32_t)rd >= 0);
return rd;
}
+#endif
void bdrv_graph_wrlock(void)
{
GLOBAL_STATE_CODE();
+ /*
+ * TODO Some callers hold an AioContext lock when this is called, which
+ * causes deadlocks. Reenable once the AioContext locking is cleaned up (or
+ * AioContext locks are gone).
+ */
+#if 0
assert(!qatomic_read(&has_writer));
/* Make sure that constantly arriving new I/O doesn't cause starvation */
@@ -139,11 +149,13 @@ void bdrv_graph_wrlock(void)
} while (reader_count() >= 1);
bdrv_drain_all_end();
+#endif
}
void bdrv_graph_wrunlock(void)
{
GLOBAL_STATE_CODE();
+#if 0
QEMU_LOCK_GUARD(&aio_context_list_lock);
assert(qatomic_read(&has_writer));
@@ -155,18 +167,16 @@ void bdrv_graph_wrunlock(void)
/* Wake up all coroutine that are waiting to read the graph */
qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
+#endif
}
void coroutine_fn bdrv_graph_co_rdlock(void)
{
+ /* TODO Reenable when wrlock is reenabled */
+#if 0
BdrvGraphRWlock *bdrv_graph;
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
- /* Do not lock if in main thread */
- if (qemu_in_main_thread()) {
- return;
- }
-
for (;;) {
qatomic_set(&bdrv_graph->reader_count,
bdrv_graph->reader_count + 1);
@@ -223,18 +233,15 @@ void coroutine_fn bdrv_graph_co_rdlock(void)
qemu_co_queue_wait(&reader_queue, &aio_context_list_lock);
}
}
+#endif
}
void coroutine_fn bdrv_graph_co_rdunlock(void)
{
+#if 0
BdrvGraphRWlock *bdrv_graph;
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
- /* Do not lock if in main thread */
- if (qemu_in_main_thread()) {
- return;
- }
-
qatomic_store_release(&bdrv_graph->reader_count,
bdrv_graph->reader_count - 1);
/* make sure writer sees reader_count before we check has_writer */
@@ -249,6 +256,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void)
if (qatomic_read(&has_writer)) {
aio_wait_kick();
}
+#endif
}
void bdrv_graph_rdlock_main_loop(void)
@@ -266,13 +274,19 @@ void bdrv_graph_rdunlock_main_loop(void)
void assert_bdrv_graph_readable(void)
{
/* reader_count() is slow due to aio_context_list_lock lock contention */
+ /* TODO Reenable when wrlock is reenabled */
+#if 0
#ifdef CONFIG_DEBUG_GRAPH_LOCK
assert(qemu_in_main_thread() || reader_count());
#endif
+#endif
}
void assert_bdrv_graph_writable(void)
{
assert(qemu_in_main_thread());
+ /* TODO Reenable when wrlock is reenabled */
+#if 0
assert(qatomic_read(&has_writer));
+#endif
}
diff --git a/block/mirror.c b/block/mirror.c
index 717442ca4d..b7d92d1378 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -471,12 +471,11 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset,
return bytes_handled;
}
-static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
+static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
{
BlockDriverState *source = s->mirror_top_bs->backing->bs;
MirrorOp *pseudo_op;
int64_t offset;
- uint64_t delay_ns = 0, ret = 0;
/* At least the first dirty chunk is mirrored in one iteration. */
int nb_chunks = 1;
bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target));
@@ -608,16 +607,13 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
assert(io_bytes);
offset += io_bytes;
nb_chunks -= DIV_ROUND_UP(io_bytes, s->granularity);
- delay_ns = block_job_ratelimit_get_delay(&s->common, io_bytes_acct);
+ block_job_ratelimit_processed_bytes(&s->common, io_bytes_acct);
}
- ret = delay_ns;
fail:
QTAILQ_REMOVE(&s->ops_in_flight, pseudo_op, next);
qemu_co_queue_restart_all(&pseudo_op->waiting_requests);
g_free(pseudo_op);
-
- return ret;
}
static void mirror_free_init(MirrorBlockJob *s)
@@ -1011,7 +1007,6 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
assert(!s->dbi);
s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap);
for (;;) {
- uint64_t delay_ns = 0;
int64_t cnt, delta;
bool should_complete;
@@ -1051,7 +1046,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
mirror_wait_for_free_in_flight_slot(s);
continue;
} else if (cnt != 0) {
- delay_ns = mirror_iteration(s);
+ mirror_iteration(s);
}
}
@@ -1114,12 +1109,14 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
}
if (job_is_ready(&s->common.job) && !should_complete) {
- delay_ns = (s->in_flight == 0 &&
- cnt == 0 ? BLOCK_JOB_SLICE_TIME : 0);
+ if (s->in_flight == 0 && cnt == 0) {
+ trace_mirror_before_sleep(s, cnt, job_is_ready(&s->common.job),
+ BLOCK_JOB_SLICE_TIME);
+ job_sleep_ns(&s->common.job, BLOCK_JOB_SLICE_TIME);
+ }
+ } else {
+ block_job_ratelimit_sleep(&s->common);
}
- trace_mirror_before_sleep(s, cnt, job_is_ready(&s->common.job),
- delay_ns);
- job_sleep_ns(&s->common.job, delay_ns);
s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
}
diff --git a/block/parallels.c b/block/parallels.c
index b49c35929e..d8a3f13e24 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -522,8 +522,8 @@ out:
}
-static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts,
- Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+parallels_co_create(BlockdevCreateOptions* opts, Error **errp)
{
BlockdevCreateOptionsParallels *parallels_opts;
BlockDriverState *bs;
@@ -622,7 +622,7 @@ exit:
goto out;
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
parallels_co_create_opts(BlockDriver *drv, const char *filename,
QemuOpts *opts, Error **errp)
{
diff --git a/block/qcow.c b/block/qcow.c
index a0c701f578..3644bbf5cb 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -800,8 +800,8 @@ static void qcow_close(BlockDriverState *bs)
error_free(s->migration_blocker);
}
-static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts,
- Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+qcow_co_create(BlockdevCreateOptions *opts, Error **errp)
{
BlockdevCreateOptionsQcow *qcow_opts;
int header_size, backing_filename_len, l1_size, shift, i;
@@ -921,7 +921,7 @@ exit:
return ret;
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
qcow_co_create_opts(BlockDriver *drv, const char *filename,
QemuOpts *opts, Error **errp)
{
diff --git a/block/qcow2.c b/block/qcow2.c
index 5bde3b8401..b00b4e7575 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -118,8 +118,9 @@ static int qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset,
}
-static int qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen,
- void *opaque, Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen, void *opaque,
+ Error **errp)
{
BlockDriverState *bs = opaque;
BDRVQcow2State *s = bs->opaque;
@@ -144,9 +145,7 @@ static int qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen,
*/
clusterlen = size_to_clusters(s, headerlen) * s->cluster_size;
assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen, false) == 0);
- ret = bdrv_pwrite_zeroes(bs->file,
- ret,
- clusterlen, 0);
+ ret = bdrv_co_pwrite_zeroes(bs->file, ret, clusterlen, 0);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not zero fill encryption header");
return -1;
@@ -156,9 +155,11 @@ static int qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen,
}
-static int qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset,
- const uint8_t *buf, size_t buflen,
- void *opaque, Error **errp)
+/* The graph lock must be held when called in coroutine context */
+static int coroutine_mixed_fn
+qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset,
+ const uint8_t *buf, size_t buflen,
+ void *opaque, Error **errp)
{
BlockDriverState *bs = opaque;
BDRVQcow2State *s = bs->opaque;
@@ -1618,9 +1619,11 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
if (open_data_file) {
/* Open external data file */
+ bdrv_graph_co_rdunlock();
s->data_file = bdrv_co_open_child(NULL, options, "data-file", bs,
&child_of_bds, BDRV_CHILD_DATA,
true, errp);
+ bdrv_graph_co_rdlock();
if (*errp) {
ret = -EINVAL;
goto fail;
@@ -1628,10 +1631,12 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) {
if (!s->data_file && s->image_data_file) {
+ bdrv_graph_co_rdunlock();
s->data_file = bdrv_co_open_child(s->image_data_file, options,
"data-file", bs,
&child_of_bds,
BDRV_CHILD_DATA, false, errp);
+ bdrv_graph_co_rdlock();
if (!s->data_file) {
ret = -EINVAL;
goto fail;
@@ -1856,7 +1861,9 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
fail:
g_free(s->image_data_file);
if (open_data_file && has_data_file(bs)) {
+ bdrv_graph_co_rdunlock();
bdrv_unref_child(bs, s->data_file);
+ bdrv_graph_co_rdlock();
s->data_file = NULL;
}
g_free(s->unknown_header_fields);
@@ -3137,9 +3144,10 @@ static int qcow2_change_backing_file(BlockDriverState *bs,
return qcow2_update_header(bs);
}
-static int qcow2_set_up_encryption(BlockDriverState *bs,
- QCryptoBlockCreateOptions *cryptoopts,
- Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_set_up_encryption(BlockDriverState *bs,
+ QCryptoBlockCreateOptions *cryptoopts,
+ Error **errp)
{
BDRVQcow2State *s = bs->opaque;
QCryptoBlock *crypto = NULL;
@@ -3426,7 +3434,7 @@ static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version,
return refcount_bits;
}
-static int coroutine_fn
+static int coroutine_fn GRAPH_UNLOCKED
qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
{
BlockdevCreateOptionsQcow2 *qcow2_opts;
@@ -3724,8 +3732,10 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
goto out;
}
+ bdrv_graph_co_rdlock();
ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size);
if (ret < 0) {
+ bdrv_graph_co_rdunlock();
error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
"header and refcount table");
goto out;
@@ -3743,6 +3753,8 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
/* Create a full header (including things like feature table) */
ret = qcow2_update_header(blk_bs(blk));
+ bdrv_graph_co_rdunlock();
+
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not update qcow2 header");
goto out;
@@ -3776,7 +3788,10 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
/* Want encryption? There you go. */
if (qcow2_opts->encrypt) {
+ bdrv_graph_co_rdlock();
ret = qcow2_set_up_encryption(blk_bs(blk), qcow2_opts->encrypt, errp);
+ bdrv_graph_co_rdunlock();
+
if (ret < 0) {
goto out;
}
@@ -3813,7 +3828,7 @@ out:
return ret;
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
qcow2_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts,
Error **errp)
{
@@ -3933,8 +3948,10 @@ qcow2_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts,
ret = qcow2_co_create(create_options, errp);
finish:
if (ret < 0) {
+ bdrv_graph_co_rdlock();
bdrv_co_delete_file_noerr(bs);
bdrv_co_delete_file_noerr(data_bs);
+ bdrv_graph_co_rdunlock();
} else {
ret = 0;
}
diff --git a/block/qed.c b/block/qed.c
index be9ff0fb34..9a0350b534 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -630,8 +630,8 @@ static void bdrv_qed_close(BlockDriverState *bs)
qemu_vfree(s->l1_table);
}
-static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts,
- Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+bdrv_qed_co_create(BlockdevCreateOptions *opts, Error **errp)
{
BlockdevCreateOptionsQed *qed_opts;
BlockBackend *blk = NULL;
@@ -751,7 +751,7 @@ out:
return ret;
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
bdrv_qed_co_create_opts(BlockDriver *drv, const char *filename,
QemuOpts *opts, Error **errp)
{
diff --git a/block/raw-format.c b/block/raw-format.c
index 3a3946213f..918fe4fb7e 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -457,7 +457,7 @@ static int raw_has_zero_init(BlockDriverState *bs)
return bdrv_has_zero_init(bs->file->bs);
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
raw_co_create_opts(BlockDriver *drv, const char *filename,
QemuOpts *opts, Error **errp)
{
diff --git a/block/stream.c b/block/stream.c
index 7f9e1ecdbb..e522bbdec5 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -131,7 +131,6 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
int64_t len;
int64_t offset = 0;
- uint64_t delay_ns = 0;
int error = 0;
int64_t n = 0; /* bytes */
@@ -155,7 +154,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
/* Note that even when no rate limit is applied we need to yield
* with no pending I/O here so that bdrv_drain_all() returns.
*/
- job_sleep_ns(&s->common.job, delay_ns);
+ block_job_ratelimit_sleep(&s->common);
if (job_is_cancelled(&s->common.job)) {
break;
}
@@ -204,9 +203,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
/* Publish progress */
job_progress_update(&s->common.job, n);
if (copy) {
- delay_ns = block_job_ratelimit_get_delay(&s->common, n);
- } else {
- delay_ns = 0;
+ block_job_ratelimit_processed_bytes(&s->common, n);
}
}
diff --git a/block/vdi.c b/block/vdi.c
index 08331d2dd7..6c35309e04 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -734,8 +734,9 @@ nonallocating_write:
return ret;
}
-static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options,
- size_t block_size, Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+vdi_co_do_create(BlockdevCreateOptions *create_options, size_t block_size,
+ Error **errp)
{
BlockdevCreateOptionsVdi *vdi_opts;
int ret = 0;
@@ -892,13 +893,13 @@ exit:
return ret;
}
-static int coroutine_fn vdi_co_create(BlockdevCreateOptions *create_options,
- Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+vdi_co_create(BlockdevCreateOptions *create_options, Error **errp)
{
return vdi_co_do_create(create_options, DEFAULT_CLUSTER_SIZE, errp);
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
vdi_co_create_opts(BlockDriver *drv, const char *filename,
QemuOpts *opts, Error **errp)
{
diff --git a/block/vhdx.c b/block/vhdx.c
index b20b1edf11..89913cba87 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1506,7 +1506,7 @@ exit:
* There are 2 headers, and the highest sequence number will represent
* the active header
*/
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
vhdx_create_new_headers(BlockBackend *blk, uint64_t image_size,
uint32_t log_size)
{
@@ -1515,6 +1515,8 @@ vhdx_create_new_headers(BlockBackend *blk, uint64_t image_size,
int ret = 0;
VHDXHeader *hdr = NULL;
+ GRAPH_RDLOCK_GUARD();
+
hdr = g_new0(VHDXHeader, 1);
hdr->signature = VHDX_HEADER_SIGNATURE;
@@ -1898,7 +1900,7 @@ exit:
* .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------.
* 1MB
*/
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
vhdx_co_create(BlockdevCreateOptions *opts, Error **errp)
{
BlockdevCreateOptionsVhdx *vhdx_opts;
@@ -2060,7 +2062,7 @@ delete_and_exit:
return ret;
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
vhdx_co_create_opts(BlockDriver *drv, const char *filename,
QemuOpts *opts, Error **errp)
{
diff --git a/block/vmdk.c b/block/vmdk.c
index fddbd1c86c..e3e86608ec 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -2165,10 +2165,9 @@ vmdk_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
return ret;
}
-static int vmdk_init_extent(BlockBackend *blk,
- int64_t filesize, bool flat,
- bool compress, bool zeroed_grain,
- Error **errp)
+static int GRAPH_UNLOCKED
+vmdk_init_extent(BlockBackend *blk, int64_t filesize, bool flat, bool compress,
+ bool zeroed_grain, Error **errp)
{
int ret, i;
VMDK4Header header;
@@ -2277,7 +2276,7 @@ exit:
return ret;
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
vmdk_create_extent(const char *filename, int64_t filesize, bool flat,
bool compress, bool zeroed_grain, BlockBackend **pbb,
QemuOpts *opts, Error **errp)
@@ -2358,7 +2357,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
* non-split format.
* idx >= 1: get the n-th extent if in a split subformat
*/
-typedef BlockBackend * coroutine_fn /* GRAPH_RDLOCK */
+typedef BlockBackend * coroutine_fn GRAPH_UNLOCKED_PTR
(*vmdk_create_extent_fn)(int64_t size, int idx, bool flat, bool split,
bool compress, bool zeroed_grain, void *opaque,
Error **errp);
@@ -2374,7 +2373,7 @@ static void vmdk_desc_add_extent(GString *desc,
g_free(basename);
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
vmdk_co_do_create(int64_t size,
BlockdevVmdkSubformat subformat,
BlockdevVmdkAdapterType adapter_type,
@@ -2605,7 +2604,7 @@ typedef struct {
QemuOpts *opts;
} VMDKCreateOptsData;
-static BlockBackend * coroutine_fn GRAPH_RDLOCK
+static BlockBackend * coroutine_fn GRAPH_UNLOCKED
vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split,
bool compress, bool zeroed_grain, void *opaque,
Error **errp)
@@ -2647,7 +2646,7 @@ exit:
return blk;
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
vmdk_co_create_opts(BlockDriver *drv, const char *filename,
QemuOpts *opts, Error **errp)
{
@@ -2756,11 +2755,9 @@ exit:
return ret;
}
-static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx,
- bool flat, bool split,
- bool compress,
- bool zeroed_grain,
- void *opaque, Error **errp)
+static BlockBackend * coroutine_fn GRAPH_UNLOCKED
+vmdk_co_create_cb(int64_t size, int idx, bool flat, bool split, bool compress,
+ bool zeroed_grain, void *opaque, Error **errp)
{
int ret;
BlockDriverState *bs;
@@ -2809,7 +2806,7 @@ static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx,
return blk;
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
vmdk_co_create(BlockdevCreateOptions *create_options, Error **errp)
{
BlockdevCreateOptionsVmdk *opts;
diff --git a/block/vpc.c b/block/vpc.c
index 07ddda5b99..7ee7c7b4e0 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -967,8 +967,8 @@ static int calculate_rounded_image_size(BlockdevCreateOptionsVpc *vpc_opts,
return 0;
}
-static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts,
- Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+vpc_co_create(BlockdevCreateOptions *opts, Error **errp)
{
BlockdevCreateOptionsVpc *vpc_opts;
BlockBackend *blk = NULL;
@@ -1087,7 +1087,7 @@ out:
return ret;
}
-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn GRAPH_UNLOCKED
vpc_co_create_opts(BlockDriver *drv, const char *filename,
QemuOpts *opts, Error **errp)
{
diff --git a/blockdev.c b/blockdev.c
index d141ca7a2d..5d56b79df4 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1188,79 +1188,22 @@ out_aio_context:
return NULL;
}
-/* New and old BlockDriverState structs for atomic group operations */
-
-typedef struct BlkActionState BlkActionState;
-
-/**
- * BlkActionOps:
- * Table of operations that define an Action.
- *
- * @instance_size: Size of state struct, in bytes.
- * @prepare: Prepare the work, must NOT be NULL.
- * @commit: Commit the changes, can be NULL.
- * @abort: Abort the changes on fail, can be NULL.
- * @clean: Clean up resources after all transaction actions have called
- * commit() or abort(). Can be NULL.
- *
- * Only prepare() may fail. In a single transaction, only one of commit() or
- * abort() will be called. clean() will always be called if it is present.
- *
- * Always run under BQL.
- */
-typedef struct BlkActionOps {
- size_t instance_size;
- void (*prepare)(BlkActionState *common, Error **errp);
- void (*commit)(BlkActionState *common);
- void (*abort)(BlkActionState *common);
- void (*clean)(BlkActionState *common);
-} BlkActionOps;
-
-/**
- * BlkActionState:
- * Describes one Action's state within a Transaction.
- *
- * @action: QAPI-defined enum identifying which Action to perform.
- * @ops: Table of ActionOps this Action can perform.
- * @block_job_txn: Transaction which this action belongs to.
- * @entry: List membership for all Actions in this Transaction.
- *
- * This structure must be arranged as first member in a subclassed type,
- * assuming that the compiler will also arrange it to the same offsets as the
- * base class.
- */
-struct BlkActionState {
- TransactionAction *action;
- const BlkActionOps *ops;
- JobTxn *block_job_txn;
- TransactionProperties *txn_props;
- QTAILQ_ENTRY(BlkActionState) entry;
-};
-
/* internal snapshot private data */
typedef struct InternalSnapshotState {
- BlkActionState common;
BlockDriverState *bs;
QEMUSnapshotInfo sn;
bool created;
} InternalSnapshotState;
+static void internal_snapshot_abort(void *opaque);
+static void internal_snapshot_clean(void *opaque);
+TransactionActionDrv internal_snapshot_drv = {
+ .abort = internal_snapshot_abort,
+ .clean = internal_snapshot_clean,
+};
-static int action_check_completion_mode(BlkActionState *s, Error **errp)
-{
- if (s->txn_props->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) {
- error_setg(errp,
- "Action '%s' does not support Transaction property "
- "completion-mode = %s",
- TransactionActionKind_str(s->action->type),
- ActionCompletionMode_str(s->txn_props->completion_mode));
- return -1;
- }
- return 0;
-}
-
-static void internal_snapshot_prepare(BlkActionState *common,
- Error **errp)
+static void internal_snapshot_action(BlockdevSnapshotInternal *internal,
+ Transaction *tran, Error **errp)
{
Error *local_err = NULL;
const char *device;
@@ -1269,25 +1212,15 @@ static void internal_snapshot_prepare(BlkActionState *common,
QEMUSnapshotInfo old_sn, *sn;
bool ret;
int64_t rt;
- BlockdevSnapshotInternal *internal;
- InternalSnapshotState *state;
+ InternalSnapshotState *state = g_new0(InternalSnapshotState, 1);
AioContext *aio_context;
int ret1;
- g_assert(common->action->type ==
- TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC);
- internal = common->action->u.blockdev_snapshot_internal_sync.data;
- state = DO_UPCAST(InternalSnapshotState, common, common);
+ tran_add(tran, &internal_snapshot_drv, state);
- /* 1. parse input */
device = internal->device;
name = internal->name;
- /* 2. check for validation */
- if (action_check_completion_mode(common, errp) < 0) {
- return;
- }
-
bs = qmp_get_root_bs(device, errp);
if (!bs) {
return;
@@ -1363,10 +1296,9 @@ out:
aio_context_release(aio_context);
}
-static void internal_snapshot_abort(BlkActionState *common)
+static void internal_snapshot_abort(void *opaque)
{
- InternalSnapshotState *state =
- DO_UPCAST(InternalSnapshotState, common, common);
+ InternalSnapshotState *state = opaque;
BlockDriverState *bs = state->bs;
QEMUSnapshotInfo *sn = &state->sn;
AioContext *aio_context;
@@ -1390,10 +1322,9 @@ static void internal_snapshot_abort(BlkActionState *common)
aio_context_release(aio_context);
}
-static void internal_snapshot_clean(BlkActionState *common)
+static void internal_snapshot_clean(void *opaque)
{
- InternalSnapshotState *state = DO_UPCAST(InternalSnapshotState,
- common, common);
+ g_autofree InternalSnapshotState *state = opaque;
AioContext *aio_context;
if (!state->bs) {
@@ -1410,14 +1341,22 @@ static void internal_snapshot_clean(BlkActionState *common)
/* external snapshot private data */
typedef struct ExternalSnapshotState {
- BlkActionState common;
BlockDriverState *old_bs;
BlockDriverState *new_bs;
bool overlay_appended;
} ExternalSnapshotState;
-static void external_snapshot_prepare(BlkActionState *common,
- Error **errp)
+static void external_snapshot_commit(void *opaque);
+static void external_snapshot_abort(void *opaque);
+static void external_snapshot_clean(void *opaque);
+TransactionActionDrv external_snapshot_drv = {
+ .commit = external_snapshot_commit,
+ .abort = external_snapshot_abort,
+ .clean = external_snapshot_clean,
+};
+
+static void external_snapshot_action(TransactionAction *action,
+ Transaction *tran, Error **errp)
{
int ret;
int flags = 0;
@@ -1430,12 +1369,12 @@ static void external_snapshot_prepare(BlkActionState *common,
const char *snapshot_ref;
/* File name of the new image (for 'blockdev-snapshot-sync') */
const char *new_image_file;
- ExternalSnapshotState *state =
- DO_UPCAST(ExternalSnapshotState, common, common);
- TransactionAction *action = common->action;
+ ExternalSnapshotState *state = g_new0(ExternalSnapshotState, 1);
AioContext *aio_context;
uint64_t perm, shared;
+ tran_add(tran, &external_snapshot_drv, state);
+
/* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar
* purpose but a different set of parameters */
switch (action->type) {
@@ -1462,9 +1401,6 @@ static void external_snapshot_prepare(BlkActionState *common,
}
/* start processing */
- if (action_check_completion_mode(common, errp) < 0) {
- return;
- }
state->old_bs = bdrv_lookup_bs(device, node_name, errp);
if (!state->old_bs) {
@@ -1588,10 +1524,9 @@ out:
aio_context_release(aio_context);
}
-static void external_snapshot_commit(BlkActionState *common)
+static void external_snapshot_commit(void *opaque)
{
- ExternalSnapshotState *state =
- DO_UPCAST(ExternalSnapshotState, common, common);
+ ExternalSnapshotState *state = opaque;
AioContext *aio_context;
aio_context = bdrv_get_aio_context(state->old_bs);
@@ -1607,10 +1542,9 @@ static void external_snapshot_commit(BlkActionState *common)
aio_context_release(aio_context);
}
-static void external_snapshot_abort(BlkActionState *common)
+static void external_snapshot_abort(void *opaque)
{
- ExternalSnapshotState *state =
- DO_UPCAST(ExternalSnapshotState, common, common);
+ ExternalSnapshotState *state = opaque;
if (state->new_bs) {
if (state->overlay_appended) {
AioContext *aio_context;
@@ -1650,10 +1584,9 @@ static void external_snapshot_abort(BlkActionState *common)
}
}
-static void external_snapshot_clean(BlkActionState *common)
+static void external_snapshot_clean(void *opaque)
{
- ExternalSnapshotState *state =
- DO_UPCAST(ExternalSnapshotState, common, common);
+ g_autofree ExternalSnapshotState *state = opaque;
AioContext *aio_context;
if (!state->old_bs) {
@@ -1670,7 +1603,6 @@ static void external_snapshot_clean(BlkActionState *common)
}
typedef struct DriveBackupState {
- BlkActionState common;
BlockDriverState *bs;
BlockJob *job;
} DriveBackupState;
@@ -1681,10 +1613,20 @@ static BlockJob *do_backup_common(BackupCommon *backup,
AioContext *aio_context,
JobTxn *txn, Error **errp);
-static void drive_backup_prepare(BlkActionState *common, Error **errp)
+static void drive_backup_commit(void *opaque);
+static void drive_backup_abort(void *opaque);
+static void drive_backup_clean(void *opaque);
+TransactionActionDrv drive_backup_drv = {
+ .commit = drive_backup_commit,
+ .abort = drive_backup_abort,
+ .clean = drive_backup_clean,
+};
+
+static void drive_backup_action(DriveBackup *backup,
+ JobTxn *block_job_txn,
+ Transaction *tran, Error **errp)
{
- DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
- DriveBackup *backup;
+ DriveBackupState *state = g_new0(DriveBackupState, 1);
BlockDriverState *bs;
BlockDriverState *target_bs;
BlockDriverState *source = NULL;
@@ -1698,8 +1640,7 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
bool set_backing_hd = false;
int ret;
- assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
- backup = common->action->u.drive_backup.data;
+ tran_add(tran, &drive_backup_drv, state);
if (!backup->has_mode) {
backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
@@ -1820,7 +1761,7 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
state->job = do_backup_common(qapi_DriveBackup_base(backup),
bs, target_bs, aio_context,
- common->block_job_txn, errp);
+ block_job_txn, errp);
unref:
bdrv_unref(target_bs);
@@ -1828,9 +1769,9 @@ out:
aio_context_release(aio_context);
}
-static void drive_backup_commit(BlkActionState *common)
+static void drive_backup_commit(void *opaque)
{
- DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
+ DriveBackupState *state = opaque;
AioContext *aio_context;
aio_context = bdrv_get_aio_context(state->bs);
@@ -1842,18 +1783,18 @@ static void drive_backup_commit(BlkActionState *common)
aio_context_release(aio_context);
}
-static void drive_backup_abort(BlkActionState *common)
+static void drive_backup_abort(void *opaque)
{
- DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
+ DriveBackupState *state = opaque;
if (state->job) {
job_cancel_sync(&state->job->job, true);
}
}
-static void drive_backup_clean(BlkActionState *common)
+static void drive_backup_clean(void *opaque)
{
- DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
+ g_autofree DriveBackupState *state = opaque;
AioContext *aio_context;
if (!state->bs) {
@@ -1869,23 +1810,31 @@ static void drive_backup_clean(BlkActionState *common)
}
typedef struct BlockdevBackupState {
- BlkActionState common;
BlockDriverState *bs;
BlockJob *job;
} BlockdevBackupState;
-static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
+static void blockdev_backup_commit(void *opaque);
+static void blockdev_backup_abort(void *opaque);
+static void blockdev_backup_clean(void *opaque);
+TransactionActionDrv blockdev_backup_drv = {
+ .commit = blockdev_backup_commit,
+ .abort = blockdev_backup_abort,
+ .clean = blockdev_backup_clean,
+};
+
+static void blockdev_backup_action(BlockdevBackup *backup,
+ JobTxn *block_job_txn,
+ Transaction *tran, Error **errp)
{
- BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
- BlockdevBackup *backup;
+ BlockdevBackupState *state = g_new0(BlockdevBackupState, 1);
BlockDriverState *bs;
BlockDriverState *target_bs;
AioContext *aio_context;
AioContext *old_context;
int ret;
- assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP);
- backup = common->action->u.blockdev_backup.data;
+ tran_add(tran, &blockdev_backup_drv, state);
bs = bdrv_lookup_bs(backup->device, backup->device, errp);
if (!bs) {
@@ -1917,14 +1866,14 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
state->job = do_backup_common(qapi_BlockdevBackup_base(backup),
bs, target_bs, aio_context,
- common->block_job_txn, errp);
+ block_job_txn, errp);
aio_context_release(aio_context);
}
-static void blockdev_backup_commit(BlkActionState *common)
+static void blockdev_backup_commit(void *opaque)
{
- BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+ BlockdevBackupState *state = opaque;
AioContext *aio_context;
aio_context = bdrv_get_aio_context(state->bs);
@@ -1936,18 +1885,18 @@ static void blockdev_backup_commit(BlkActionState *common)
aio_context_release(aio_context);
}
-static void blockdev_backup_abort(BlkActionState *common)
+static void blockdev_backup_abort(void *opaque)
{
- BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+ BlockdevBackupState *state = opaque;
if (state->job) {
job_cancel_sync(&state->job->job, true);
}
}
-static void blockdev_backup_clean(BlkActionState *common)
+static void blockdev_backup_clean(void *opaque)
{
- BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+ g_autofree BlockdevBackupState *state = opaque;
AioContext *aio_context;
if (!state->bs) {
@@ -1963,27 +1912,26 @@ static void blockdev_backup_clean(BlkActionState *common)
}
typedef struct BlockDirtyBitmapState {
- BlkActionState common;
BdrvDirtyBitmap *bitmap;
BlockDriverState *bs;
HBitmap *backup;
- bool prepared;
bool was_enabled;
} BlockDirtyBitmapState;
-static void block_dirty_bitmap_add_prepare(BlkActionState *common,
- Error **errp)
+static void block_dirty_bitmap_add_abort(void *opaque);
+TransactionActionDrv block_dirty_bitmap_add_drv = {
+ .abort = block_dirty_bitmap_add_abort,
+ .clean = g_free,
+};
+
+static void block_dirty_bitmap_add_action(BlockDirtyBitmapAdd *action,
+ Transaction *tran, Error **errp)
{
Error *local_err = NULL;
- BlockDirtyBitmapAdd *action;
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
+ BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1);
- if (action_check_completion_mode(common, errp) < 0) {
- return;
- }
+ tran_add(tran, &block_dirty_bitmap_add_drv, state);
- action = common->action->u.block_dirty_bitmap_add.data;
/* AIO context taken and released within qmp_block_dirty_bitmap_add */
qmp_block_dirty_bitmap_add(action->node, action->name,
action->has_granularity, action->granularity,
@@ -1992,39 +1940,37 @@ static void block_dirty_bitmap_add_prepare(BlkActionState *common,
&local_err);
if (!local_err) {
- state->prepared = true;
+ state->bitmap = block_dirty_bitmap_lookup(action->node, action->name,
+ NULL, &error_abort);
} else {
error_propagate(errp, local_err);
}
}
-static void block_dirty_bitmap_add_abort(BlkActionState *common)
+static void block_dirty_bitmap_add_abort(void *opaque)
{
- BlockDirtyBitmapAdd *action;
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
+ BlockDirtyBitmapState *state = opaque;
- action = common->action->u.block_dirty_bitmap_add.data;
- /* Should not be able to fail: IF the bitmap was added via .prepare(),
- * then the node reference and bitmap name must have been valid.
- */
- if (state->prepared) {
- qmp_block_dirty_bitmap_remove(action->node, action->name, &error_abort);
+ if (state->bitmap) {
+ bdrv_release_dirty_bitmap(state->bitmap);
}
}
-static void block_dirty_bitmap_clear_prepare(BlkActionState *common,
- Error **errp)
+static void block_dirty_bitmap_restore(void *opaque);
+static void block_dirty_bitmap_free_backup(void *opaque);
+TransactionActionDrv block_dirty_bitmap_clear_drv = {
+ .abort = block_dirty_bitmap_restore,
+ .commit = block_dirty_bitmap_free_backup,
+ .clean = g_free,
+};
+
+static void block_dirty_bitmap_clear_action(BlockDirtyBitmap *action,
+ Transaction *tran, Error **errp)
{
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
- BlockDirtyBitmap *action;
+ BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1);
- if (action_check_completion_mode(common, errp) < 0) {
- return;
- }
+ tran_add(tran, &block_dirty_bitmap_clear_drv, state);
- action = common->action->u.block_dirty_bitmap_clear.data;
state->bitmap = block_dirty_bitmap_lookup(action->node,
action->name,
&state->bs,
@@ -2040,36 +1986,35 @@ static void block_dirty_bitmap_clear_prepare(BlkActionState *common,
bdrv_clear_dirty_bitmap(state->bitmap, &state->backup);
}
-static void block_dirty_bitmap_restore(BlkActionState *common)
+static void block_dirty_bitmap_restore(void *opaque)
{
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
+ BlockDirtyBitmapState *state = opaque;
if (state->backup) {
bdrv_restore_dirty_bitmap(state->bitmap, state->backup);
}
}
-static void block_dirty_bitmap_free_backup(BlkActionState *common)
+static void block_dirty_bitmap_free_backup(void *opaque)
{
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
+ BlockDirtyBitmapState *state = opaque;
hbitmap_free(state->backup);
}
-static void block_dirty_bitmap_enable_prepare(BlkActionState *common,
- Error **errp)
+static void block_dirty_bitmap_enable_abort(void *opaque);
+TransactionActionDrv block_dirty_bitmap_enable_drv = {
+ .abort = block_dirty_bitmap_enable_abort,
+ .clean = g_free,
+};
+
+static void block_dirty_bitmap_enable_action(BlockDirtyBitmap *action,
+ Transaction *tran, Error **errp)
{
- BlockDirtyBitmap *action;
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
+ BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1);
- if (action_check_completion_mode(common, errp) < 0) {
- return;
- }
+ tran_add(tran, &block_dirty_bitmap_enable_drv, state);
- action = common->action->u.block_dirty_bitmap_enable.data;
state->bitmap = block_dirty_bitmap_lookup(action->node,
action->name,
NULL,
@@ -2086,28 +2031,28 @@ static void block_dirty_bitmap_enable_prepare(BlkActionState *common,
bdrv_enable_dirty_bitmap(state->bitmap);
}
-static void block_dirty_bitmap_enable_abort(BlkActionState *common)
+static void block_dirty_bitmap_enable_abort(void *opaque)
{
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
+ BlockDirtyBitmapState *state = opaque;
if (!state->was_enabled) {
bdrv_disable_dirty_bitmap(state->bitmap);
}
}
-static void block_dirty_bitmap_disable_prepare(BlkActionState *common,
- Error **errp)
+static void block_dirty_bitmap_disable_abort(void *opaque);
+TransactionActionDrv block_dirty_bitmap_disable_drv = {
+ .abort = block_dirty_bitmap_disable_abort,
+ .clean = g_free,
+};
+
+static void block_dirty_bitmap_disable_action(BlockDirtyBitmap *action,
+ Transaction *tran, Error **errp)
{
- BlockDirtyBitmap *action;
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
+ BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1);
- if (action_check_completion_mode(common, errp) < 0) {
- return;
- }
+ tran_add(tran, &block_dirty_bitmap_disable_drv, state);
- action = common->action->u.block_dirty_bitmap_disable.data;
state->bitmap = block_dirty_bitmap_lookup(action->node,
action->name,
NULL,
@@ -2124,46 +2069,48 @@ static void block_dirty_bitmap_disable_prepare(BlkActionState *common,
bdrv_disable_dirty_bitmap(state->bitmap);
}
-static void block_dirty_bitmap_disable_abort(BlkActionState *common)
+static void block_dirty_bitmap_disable_abort(void *opaque)
{
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
+ BlockDirtyBitmapState *state = opaque;
if (state->was_enabled) {
bdrv_enable_dirty_bitmap(state->bitmap);
}
}
-static void block_dirty_bitmap_merge_prepare(BlkActionState *common,
- Error **errp)
+TransactionActionDrv block_dirty_bitmap_merge_drv = {
+ .commit = block_dirty_bitmap_free_backup,
+ .abort = block_dirty_bitmap_restore,
+ .clean = g_free,
+};
+
+static void block_dirty_bitmap_merge_action(BlockDirtyBitmapMerge *action,
+ Transaction *tran, Error **errp)
{
- BlockDirtyBitmapMerge *action;
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
+ BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1);
- if (action_check_completion_mode(common, errp) < 0) {
- return;
- }
-
- action = common->action->u.block_dirty_bitmap_merge.data;
+ tran_add(tran, &block_dirty_bitmap_merge_drv, state);
state->bitmap = block_dirty_bitmap_merge(action->node, action->target,
action->bitmaps, &state->backup,
errp);
}
-static void block_dirty_bitmap_remove_prepare(BlkActionState *common,
- Error **errp)
+static void block_dirty_bitmap_remove_commit(void *opaque);
+static void block_dirty_bitmap_remove_abort(void *opaque);
+TransactionActionDrv block_dirty_bitmap_remove_drv = {
+ .commit = block_dirty_bitmap_remove_commit,
+ .abort = block_dirty_bitmap_remove_abort,
+ .clean = g_free,
+};
+
+static void block_dirty_bitmap_remove_action(BlockDirtyBitmap *action,
+ Transaction *tran, Error **errp)
{
- BlockDirtyBitmap *action;
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
+ BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1);
- if (action_check_completion_mode(common, errp) < 0) {
- return;
- }
+ tran_add(tran, &block_dirty_bitmap_remove_drv, state);
- action = common->action->u.block_dirty_bitmap_remove.data;
state->bitmap = block_dirty_bitmap_remove(action->node, action->name,
false, &state->bs, errp);
@@ -2173,10 +2120,9 @@ static void block_dirty_bitmap_remove_prepare(BlkActionState *common,
}
}
-static void block_dirty_bitmap_remove_abort(BlkActionState *common)
+static void block_dirty_bitmap_remove_abort(void *opaque)
{
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
+ BlockDirtyBitmapState *state = opaque;
if (state->bitmap) {
bdrv_dirty_bitmap_skip_store(state->bitmap, false);
@@ -2184,210 +2130,156 @@ static void block_dirty_bitmap_remove_abort(BlkActionState *common)
}
}
-static void block_dirty_bitmap_remove_commit(BlkActionState *common)
+static void block_dirty_bitmap_remove_commit(void *opaque)
{
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
- common, common);
+ BlockDirtyBitmapState *state = opaque;
bdrv_dirty_bitmap_set_busy(state->bitmap, false);
bdrv_release_dirty_bitmap(state->bitmap);
}
-static void abort_prepare(BlkActionState *common, Error **errp)
+static void abort_commit(void *opaque);
+TransactionActionDrv abort_drv = {
+ .commit = abort_commit,
+};
+
+static void abort_action(Transaction *tran, Error **errp)
{
+ tran_add(tran, &abort_drv, NULL);
error_setg(errp, "Transaction aborted using Abort action");
}
-static void abort_commit(BlkActionState *common)
+static void abort_commit(void *opaque)
{
g_assert_not_reached(); /* this action never succeeds */
}
-static const BlkActionOps actions[] = {
- [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT] = {
- .instance_size = sizeof(ExternalSnapshotState),
- .prepare = external_snapshot_prepare,
- .commit = external_snapshot_commit,
- .abort = external_snapshot_abort,
- .clean = external_snapshot_clean,
- },
- [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC] = {
- .instance_size = sizeof(ExternalSnapshotState),
- .prepare = external_snapshot_prepare,
- .commit = external_snapshot_commit,
- .abort = external_snapshot_abort,
- .clean = external_snapshot_clean,
- },
- [TRANSACTION_ACTION_KIND_DRIVE_BACKUP] = {
- .instance_size = sizeof(DriveBackupState),
- .prepare = drive_backup_prepare,
- .commit = drive_backup_commit,
- .abort = drive_backup_abort,
- .clean = drive_backup_clean,
- },
- [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = {
- .instance_size = sizeof(BlockdevBackupState),
- .prepare = blockdev_backup_prepare,
- .commit = blockdev_backup_commit,
- .abort = blockdev_backup_abort,
- .clean = blockdev_backup_clean,
- },
- [TRANSACTION_ACTION_KIND_ABORT] = {
- .instance_size = sizeof(BlkActionState),
- .prepare = abort_prepare,
- .commit = abort_commit,
- },
- [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC] = {
- .instance_size = sizeof(InternalSnapshotState),
- .prepare = internal_snapshot_prepare,
- .abort = internal_snapshot_abort,
- .clean = internal_snapshot_clean,
- },
- [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ADD] = {
- .instance_size = sizeof(BlockDirtyBitmapState),
- .prepare = block_dirty_bitmap_add_prepare,
- .abort = block_dirty_bitmap_add_abort,
- },
- [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_CLEAR] = {
- .instance_size = sizeof(BlockDirtyBitmapState),
- .prepare = block_dirty_bitmap_clear_prepare,
- .commit = block_dirty_bitmap_free_backup,
- .abort = block_dirty_bitmap_restore,
- },
- [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ENABLE] = {
- .instance_size = sizeof(BlockDirtyBitmapState),
- .prepare = block_dirty_bitmap_enable_prepare,
- .abort = block_dirty_bitmap_enable_abort,
- },
- [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_DISABLE] = {
- .instance_size = sizeof(BlockDirtyBitmapState),
- .prepare = block_dirty_bitmap_disable_prepare,
- .abort = block_dirty_bitmap_disable_abort,
- },
- [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_MERGE] = {
- .instance_size = sizeof(BlockDirtyBitmapState),
- .prepare = block_dirty_bitmap_merge_prepare,
- .commit = block_dirty_bitmap_free_backup,
- .abort = block_dirty_bitmap_restore,
- },
- [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_REMOVE] = {
- .instance_size = sizeof(BlockDirtyBitmapState),
- .prepare = block_dirty_bitmap_remove_prepare,
- .commit = block_dirty_bitmap_remove_commit,
- .abort = block_dirty_bitmap_remove_abort,
- },
- /* Where are transactions for MIRROR, COMMIT and STREAM?
+static void transaction_action(TransactionAction *act, JobTxn *block_job_txn,
+ Transaction *tran, Error **errp)
+{
+ switch (act->type) {
+ case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT:
+ case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC:
+ external_snapshot_action(act, tran, errp);
+ return;
+ case TRANSACTION_ACTION_KIND_DRIVE_BACKUP:
+ drive_backup_action(act->u.drive_backup.data,
+ block_job_txn, tran, errp);
+ return;
+ case TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP:
+ blockdev_backup_action(act->u.blockdev_backup.data,
+ block_job_txn, tran, errp);
+ return;
+ case TRANSACTION_ACTION_KIND_ABORT:
+ abort_action(tran, errp);
+ return;
+ case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC:
+ internal_snapshot_action(act->u.blockdev_snapshot_internal_sync.data,
+ tran, errp);
+ return;
+ case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ADD:
+ block_dirty_bitmap_add_action(act->u.block_dirty_bitmap_add.data,
+ tran, errp);
+ return;
+ case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_CLEAR:
+ block_dirty_bitmap_clear_action(act->u.block_dirty_bitmap_clear.data,
+ tran, errp);
+ return;
+ case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ENABLE:
+ block_dirty_bitmap_enable_action(act->u.block_dirty_bitmap_enable.data,
+ tran, errp);
+ return;
+ case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_DISABLE:
+ block_dirty_bitmap_disable_action(
+ act->u.block_dirty_bitmap_disable.data, tran, errp);
+ return;
+ case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_MERGE:
+ block_dirty_bitmap_merge_action(act->u.block_dirty_bitmap_merge.data,
+ tran, errp);
+ return;
+ case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_REMOVE:
+ block_dirty_bitmap_remove_action(act->u.block_dirty_bitmap_remove.data,
+ tran, errp);
+ return;
+ /*
+ * Where are transactions for MIRROR, COMMIT and STREAM?
* Although these blockjobs use transaction callbacks like the backup job,
* these jobs do not necessarily adhere to transaction semantics.
* These jobs may not fully undo all of their actions on abort, nor do they
* necessarily work in transactions with more than one job in them.
*/
-};
-
-/**
- * Allocate a TransactionProperties structure if necessary, and fill
- * that structure with desired defaults if they are unset.
- */
-static TransactionProperties *get_transaction_properties(
- TransactionProperties *props)
-{
- if (!props) {
- props = g_new0(TransactionProperties, 1);
- }
-
- if (!props->has_completion_mode) {
- props->has_completion_mode = true;
- props->completion_mode = ACTION_COMPLETION_MODE_INDIVIDUAL;
- }
-
- return props;
+ case TRANSACTION_ACTION_KIND__MAX:
+ default:
+ g_assert_not_reached();
+ };
}
+
/*
* 'Atomic' group operations. The operations are performed as a set, and if
* any fail then we roll back all operations in the group.
*
* Always run under BQL.
*/
-void qmp_transaction(TransactionActionList *dev_list,
- struct TransactionProperties *props,
+void qmp_transaction(TransactionActionList *actions,
+ struct TransactionProperties *properties,
Error **errp)
{
- TransactionActionList *dev_entry = dev_list;
- bool has_props = !!props;
+ TransactionActionList *act;
JobTxn *block_job_txn = NULL;
- BlkActionState *state, *next;
Error *local_err = NULL;
+ Transaction *tran;
+ ActionCompletionMode comp_mode =
+ properties ? properties->completion_mode :
+ ACTION_COMPLETION_MODE_INDIVIDUAL;
GLOBAL_STATE_CODE();
- QTAILQ_HEAD(, BlkActionState) snap_bdrv_states;
- QTAILQ_INIT(&snap_bdrv_states);
-
/* Does this transaction get canceled as a group on failure?
* If not, we don't really need to make a JobTxn.
*/
- props = get_transaction_properties(props);
- if (props->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) {
+ if (comp_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) {
+ for (act = actions; act; act = act->next) {
+ TransactionActionKind type = act->value->type;
+
+ if (type != TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP &&
+ type != TRANSACTION_ACTION_KIND_DRIVE_BACKUP)
+ {
+ error_setg(errp,
+ "Action '%s' does not support transaction property "
+ "completion-mode = %s",
+ TransactionActionKind_str(type),
+ ActionCompletionMode_str(comp_mode));
+ return;
+ }
+ }
+
block_job_txn = job_txn_new();
}
/* drain all i/o before any operations */
bdrv_drain_all();
+ tran = tran_new();
+
/* We don't do anything in this loop that commits us to the operations */
- while (NULL != dev_entry) {
- TransactionAction *dev_info = NULL;
- const BlkActionOps *ops;
-
- dev_info = dev_entry->value;
- dev_entry = dev_entry->next;
-
- assert(dev_info->type < ARRAY_SIZE(actions));
-
- ops = &actions[dev_info->type];
- assert(ops->instance_size > 0);
-
- state = g_malloc0(ops->instance_size);
- state->ops = ops;
- state->action = dev_info;
- state->block_job_txn = block_job_txn;
- state->txn_props = props;
- QTAILQ_INSERT_TAIL(&snap_bdrv_states, state, entry);
-
- state->ops->prepare(state, &local_err);
+ for (act = actions; act; act = act->next) {
+ transaction_action(act->value, block_job_txn, tran, &local_err);
if (local_err) {
error_propagate(errp, local_err);
goto delete_and_fail;
}
}
- QTAILQ_FOREACH(state, &snap_bdrv_states, entry) {
- if (state->ops->commit) {
- state->ops->commit(state);
- }
- }
+ tran_commit(tran);
/* success */
goto exit;
delete_and_fail:
/* failure, and it is all-or-none; roll back all operations */
- QTAILQ_FOREACH_REVERSE(state, &snap_bdrv_states, entry) {
- if (state->ops->abort) {
- state->ops->abort(state);
- }
- }
+ tran_abort(tran);
exit:
- QTAILQ_FOREACH_SAFE(state, &snap_bdrv_states, entry, next) {
- if (state->ops->clean) {
- state->ops->clean(state);
- }
- g_free(state);
- }
- if (!has_props) {
- qapi_free_TransactionProperties(props);
- }
job_txn_unref(block_job_txn);
}
diff --git a/blockjob.c b/blockjob.c
index 659c3cb9de..913da3cbf7 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -319,10 +319,28 @@ static bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
return block_job_set_speed_locked(job, speed, errp);
}
-int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n)
+void block_job_ratelimit_processed_bytes(BlockJob *job, uint64_t n)
{
IO_CODE();
- return ratelimit_calculate_delay(&job->limit, n);
+ ratelimit_calculate_delay(&job->limit, n);
+}
+
+void block_job_ratelimit_sleep(BlockJob *job)
+{
+ uint64_t delay_ns;
+
+ /*
+ * Sleep at least once. If the job is reentered early, keep waiting until
+ * we've waited for the full time that is necessary to keep the job at the
+ * right speed.
+ *
+ * Make sure to recalculate the delay after each (possibly interrupted)
+ * sleep because the speed can change while the job has yielded.
+ */
+ do {
+ delay_ns = ratelimit_calculate_delay(&job->limit, 0);
+ job_sleep_ns(&job->job, delay_ns);
+ } while (delay_ns && !job_is_cancelled(&job->job));
}
BlockJobInfo *block_job_query_locked(BlockJob *job, Error **errp)
diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
index f7dc304ff6..e7f036c286 100644
--- a/docs/interop/qcow2.txt
+++ b/docs/interop/qcow2.txt
@@ -214,14 +214,18 @@ version 2.
type.
If the incompatible bit "Compression type" is set: the field
- must be present and non-zero (which means non-zlib
+ must be present and non-zero (which means non-deflate
compression type). Otherwise, this field must not be present
- or must be zero (which means zlib).
+ or must be zero (which means deflate).
Available compression type values:
- 0: zlib
+ 0: deflate
1: zstd
+ The deflate compression type is called "zlib"
+ in QEMU. However, clusters with the
+ deflate compression type do not have zlib headers.
+
=== Header padding ===
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
index 2d93423d35..f347199bff 100644
--- a/include/block/block-global-state.h
+++ b/include/block/block-global-state.h
@@ -58,14 +58,14 @@ BlockDriver *bdrv_find_protocol(const char *filename,
Error **errp);
BlockDriver *bdrv_find_format(const char *format_name);
-int coroutine_fn GRAPH_RDLOCK
+int coroutine_fn GRAPH_UNLOCKED
bdrv_co_create(BlockDriver *drv, const char *filename, QemuOpts *opts,
Error **errp);
-int co_wrapper_bdrv_rdlock bdrv_create(BlockDriver *drv, const char *filename,
- QemuOpts *opts, Error **errp);
+int co_wrapper bdrv_create(BlockDriver *drv, const char *filename,
+ QemuOpts *opts, Error **errp);
-int coroutine_fn GRAPH_RDLOCK
+int coroutine_fn GRAPH_UNLOCKED
bdrv_co_create_file(const char *filename, QemuOpts *opts, Error **errp);
BlockDriverState *bdrv_new(void);
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index dbec0e3bb4..6492a1e538 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -250,10 +250,10 @@ struct BlockDriver {
BlockDriverState *bs, QDict *options, int flags, Error **errp);
void (*bdrv_close)(BlockDriverState *bs);
- int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_create)(
+ int coroutine_fn GRAPH_UNLOCKED_PTR (*bdrv_co_create)(
BlockdevCreateOptions *opts, Error **errp);
- int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_create_opts)(
+ int coroutine_fn GRAPH_UNLOCKED_PTR (*bdrv_co_create_opts)(
BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp);
int (*bdrv_amend_options)(BlockDriverState *bs,
diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
index f008446285..104824040c 100644
--- a/include/block/blockjob_int.h
+++ b/include/block/blockjob_int.h
@@ -126,12 +126,18 @@ void block_job_user_resume(Job *job);
*/
/**
- * block_job_ratelimit_get_delay:
+ * block_job_ratelimit_processed_bytes:
*
- * Calculate and return delay for the next request in ns. See the documentation
- * of ratelimit_calculate_delay() for details.
+ * To be called after some work has been done. Adjusts the delay for the next
+ * request. See the documentation of ratelimit_calculate_delay() for details.
*/
-int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n);
+void block_job_ratelimit_processed_bytes(BlockJob *job, uint64_t n);
+
+/**
+ * Put the job to sleep (assuming that it wasn't canceled) to throttle it to the
+ * right speed according to its rate limiting.
+ */
+void block_job_ratelimit_sleep(BlockJob *job);
/**
* block_job_error_action:
diff --git a/include/io/channel.h b/include/io/channel.h
index 446a566e5e..229bf36910 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -757,6 +757,16 @@ void qio_channel_detach_aio_context(QIOChannel *ioc);
void coroutine_fn qio_channel_yield(QIOChannel *ioc,
GIOCondition condition);
+/**
+ * qio_channel_wake_read:
+ * @ioc: the channel object
+ *
+ * If qio_channel_yield() is currently waiting for the channel to become
+ * readable, interrupt it and reenter immediately. This function is safe to call
+ * from any thread.
+ */
+void qio_channel_wake_read(QIOChannel *ioc);
+
/**
* qio_channel_wait:
* @ioc: the channel object
diff --git a/io/channel.c b/io/channel.c
index 375a130a39..72f0066af5 100644
--- a/io/channel.c
+++ b/io/channel.c
@@ -19,6 +19,7 @@
*/
#include "qemu/osdep.h"
+#include "block/aio-wait.h"
#include "io/channel.h"
#include "qapi/error.h"
#include "qemu/main-loop.h"
@@ -514,7 +515,11 @@ int qio_channel_flush(QIOChannel *ioc,
static void qio_channel_restart_read(void *opaque)
{
QIOChannel *ioc = opaque;
- Coroutine *co = ioc->read_coroutine;
+ Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL);
+
+ if (!co) {
+ return;
+ }
/* Assert that aio_co_wake() reenters the coroutine directly */
assert(qemu_get_current_aio_context() ==
@@ -525,7 +530,11 @@ static void qio_channel_restart_read(void *opaque)
static void qio_channel_restart_write(void *opaque)
{
QIOChannel *ioc = opaque;
- Coroutine *co = ioc->write_coroutine;
+ Coroutine *co = qatomic_xchg(&ioc->write_coroutine, NULL);
+
+ if (!co) {
+ return;
+ }
/* Assert that aio_co_wake() reenters the coroutine directly */
assert(qemu_get_current_aio_context() ==
@@ -568,7 +577,11 @@ void qio_channel_detach_aio_context(QIOChannel *ioc)
void coroutine_fn qio_channel_yield(QIOChannel *ioc,
GIOCondition condition)
{
+ AioContext *ioc_ctx = ioc->ctx ?: qemu_get_aio_context();
+
assert(qemu_in_coroutine());
+ assert(in_aio_context_home_thread(ioc_ctx));
+
if (condition == G_IO_IN) {
assert(!ioc->read_coroutine);
ioc->read_coroutine = qemu_coroutine_self();
@@ -580,18 +593,26 @@ void coroutine_fn qio_channel_yield(QIOChannel *ioc,
}
qio_channel_set_aio_fd_handlers(ioc);
qemu_coroutine_yield();
+ assert(in_aio_context_home_thread(ioc_ctx));
/* Allow interrupting the operation by reentering the coroutine other than
* through the aio_fd_handlers. */
- if (condition == G_IO_IN && ioc->read_coroutine) {
- ioc->read_coroutine = NULL;
+ if (condition == G_IO_IN) {
+ assert(ioc->read_coroutine == NULL);
qio_channel_set_aio_fd_handlers(ioc);
- } else if (condition == G_IO_OUT && ioc->write_coroutine) {
- ioc->write_coroutine = NULL;
+ } else if (condition == G_IO_OUT) {
+ assert(ioc->write_coroutine == NULL);
qio_channel_set_aio_fd_handlers(ioc);
}
}
+void qio_channel_wake_read(QIOChannel *ioc)
+{
+ Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL);
+ if (co) {
+ aio_co_wake(co);
+ }
+}
static gboolean qio_channel_wait_complete(QIOChannel *ioc,
GIOCondition condition,
diff --git a/nbd/server.c b/nbd/server.c
index e239c2890f..2664d43bff 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1599,8 +1599,7 @@ static bool nbd_drained_poll(void *opaque)
* enter it here so we don't depend on the client to wake it up.
*/
if (client->recv_coroutine != NULL && client->read_yielding) {
- qemu_aio_coroutine_enter(exp->common.ctx,
- client->recv_coroutine);
+ qio_channel_wake_read(client->ioc);
}
return true;
diff --git a/qemu-img.c b/qemu-img.c
index 9f9f0a7629..27f48051b0 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -2938,8 +2938,6 @@ static BlockGraphInfoList *collect_image_info_list(bool image_opts,
}
bs = blk_bs(blk);
- GRAPH_RDLOCK_GUARD_MAINLOOP();
-
/*
* Note that the returned BlockGraphInfo object will not have
* information about this image's backing node, because we have opened
@@ -2947,7 +2945,10 @@ static BlockGraphInfoList *collect_image_info_list(bool image_opts,
* duplicate the backing chain information that we obtain by walking
* the chain manually here.
*/
+ bdrv_graph_rdlock_main_loop();
bdrv_query_block_graph_info(bs, &info, &err);
+ bdrv_graph_rdunlock_main_loop();
+
if (err) {
error_report_err(err);
blk_unref(blk);
diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245
index edaf29094b..92b28c79be 100755
--- a/tests/qemu-iotests/245
+++ b/tests/qemu-iotests/245
@@ -611,6 +611,7 @@ class TestBlockdevReopen(iotests.QMPTestCase):
self.reopen(hd0_opts, {'file': 'hd0-file'})
# Insert (and remove) a compress filter
+ @iotests.skip_if_unsupported(['compress'])
def test_insert_compress_filter(self):
# Add an image to the VM: hd (raw) -> hd0 (qcow2) -> hd0-file (file)
opts = {'driver': 'raw', 'node-name': 'hd', 'file': hd_opts(0)}
@@ -650,9 +651,9 @@ class TestBlockdevReopen(iotests.QMPTestCase):
# Check the first byte of the first three L2 entries and verify that
# the second one is compressed (0x40) while the others are not (0x80)
- iotests.qemu_io_log('-f', 'raw', '-c', 'read -P 0x80 0x40000 1',
- '-c', 'read -P 0x40 0x40008 1',
- '-c', 'read -P 0x80 0x40010 1', hd_path[0])
+ iotests.qemu_io('-f', 'raw', '-c', 'read -P 0x80 0x40000 1',
+ '-c', 'read -P 0x40 0x40008 1',
+ '-c', 'read -P 0x80 0x40010 1', hd_path[0])
# Swap the disk images of two active block devices
def test_swap_files(self):
diff --git a/tests/qemu-iotests/245.out b/tests/qemu-iotests/245.out
index a4e04a3266..0970ced62a 100644
--- a/tests/qemu-iotests/245.out
+++ b/tests/qemu-iotests/245.out
@@ -10,14 +10,7 @@
{"return": {}}
{"data": {"id": "stream0", "type": "stream"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
{"data": {"device": "stream0", "len": 3145728, "offset": 3145728, "speed": 0, "type": "stream"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
-....read 1/1 bytes at offset 262144
-1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 1/1 bytes at offset 262152
-1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 1/1 bytes at offset 262160
-1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-
-................
+....................
----------------------------------------------------------------------
Ran 26 tests
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 3e82c634cf..7073579a7d 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -462,6 +462,10 @@ class QemuStorageDaemon:
assert self._qmp is not None
return self._qmp.cmd(cmd, args)
+ def get_qmp(self) -> QEMUMonitorProtocol:
+ assert self._qmp is not None
+ return self._qmp
+
def stop(self, kill_signal=15):
self._p.send_signal(kill_signal)
self._p.wait()
diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io
index 7664f33689..750e7d4d38 100755
--- a/tests/qemu-iotests/tests/graph-changes-while-io
+++ b/tests/qemu-iotests/tests/graph-changes-while-io
@@ -22,19 +22,19 @@
import os
from threading import Thread
import iotests
-from iotests import imgfmt, qemu_img, qemu_img_create, QMPTestCase, \
- QemuStorageDaemon
+from iotests import imgfmt, qemu_img, qemu_img_create, qemu_io, \
+ QMPTestCase, QemuStorageDaemon
top = os.path.join(iotests.test_dir, 'top.img')
nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock')
-def do_qemu_img_bench() -> None:
+def do_qemu_img_bench(count: int = 2000000) -> None:
"""
Do some I/O requests on `nbd_sock`.
"""
- qemu_img('bench', '-f', 'raw', '-c', '2000000',
+ qemu_img('bench', '-f', 'raw', '-c', str(count),
f'nbd+unix:///node0?socket={nbd_sock}')
@@ -84,6 +84,54 @@ class TestGraphChangesWhileIO(QMPTestCase):
bench_thr.join()
+ def test_commit_while_io(self) -> None:
+ # Run qemu-img bench in the background
+ bench_thr = Thread(target=do_qemu_img_bench, args=(200000, ))
+ bench_thr.start()
+
+ qemu_io('-c', 'write 0 64k', top)
+ qemu_io('-c', 'write 128k 64k', top)
+
+ result = self.qsd.qmp('blockdev-add', {
+ 'driver': imgfmt,
+ 'node-name': 'overlay',
+ 'backing': None,
+ 'file': {
+ 'driver': 'file',
+ 'filename': top
+ }
+ })
+ self.assert_qmp(result, 'return', {})
+
+ result = self.qsd.qmp('blockdev-snapshot', {
+ 'node': 'node0',
+ 'overlay': 'overlay',
+ })
+ self.assert_qmp(result, 'return', {})
+
+ # While qemu-img bench is running, repeatedly commit overlay to node0
+ while bench_thr.is_alive():
+ result = self.qsd.qmp('block-commit', {
+ 'job-id': 'job0',
+ 'device': 'overlay',
+ })
+ self.assert_qmp(result, 'return', {})
+
+ result = self.qsd.qmp('block-job-cancel', {
+ 'device': 'job0',
+ })
+ self.assert_qmp(result, 'return', {})
+
+ cancelled = False
+ while not cancelled:
+ for event in self.qsd.get_qmp().get_events(wait=10.0):
+ if event['event'] != 'JOB_STATUS_CHANGE':
+ continue
+ if event['data']['status'] == 'null':
+ cancelled = True
+
+ bench_thr.join()
+
if __name__ == '__main__':
# Format must support raw backing files
iotests.main(supported_fmts=['qcow', 'qcow2', 'qed'],
diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out b/tests/qemu-iotests/tests/graph-changes-while-io.out
index ae1213e6f8..fbc63e62f8 100644
--- a/tests/qemu-iotests/tests/graph-changes-while-io.out
+++ b/tests/qemu-iotests/tests/graph-changes-while-io.out
@@ -1,5 +1,5 @@
-.
+..
----------------------------------------------------------------------
-Ran 1 tests
+Ran 2 tests
OK
diff --git a/tests/unit/meson.build b/tests/unit/meson.build
index 48ae66011b..3a6314269b 100644
--- a/tests/unit/meson.build
+++ b/tests/unit/meson.build
@@ -114,7 +114,10 @@ if have_block
tests += {'test-crypto-xts': [crypto, io]}
endif
if 'CONFIG_POSIX' in config_host
- tests += {'test-image-locking': [testblock]}
+ tests += {
+ 'test-image-locking': [testblock],
+ 'test-nested-aio-poll': [testblock],
+ }
endif
if config_host_data.get('CONFIG_REPLICATION')
tests += {'test-replication': [testblock]}
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 9a4c5e59d6..08bb0f9984 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -1004,8 +1004,6 @@ static void coroutine_fn test_co_delete_by_drain(void *opaque)
void *buffer = g_malloc(65536);
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buffer, 65536);
- GRAPH_RDLOCK_GUARD();
-
/* Pretend some internal write operation from parent to child.
* Important: We have to read from the child, not from the parent!
* Draining works by first propagating it all up the tree to the
@@ -1014,12 +1012,14 @@ static void coroutine_fn test_co_delete_by_drain(void *opaque)
* everything will be drained before we go back down the tree, but
* we do not want that. We want to be in the middle of draining
* when this following requests returns. */
+ bdrv_graph_co_rdlock();
bdrv_co_preadv(tts->wait_child, 0, 65536, &qiov, 0);
+ bdrv_graph_co_rdunlock();
g_assert_cmpint(bs->refcnt, ==, 1);
if (!dbdd->detach_instead_of_delete) {
- blk_unref(blk);
+ blk_co_unref(blk);
} else {
BdrvChild *c, *next_c;
QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) {
diff --git a/tests/unit/test-nested-aio-poll.c b/tests/unit/test-nested-aio-poll.c
new file mode 100644
index 0000000000..9bbe18b839
--- /dev/null
+++ b/tests/unit/test-nested-aio-poll.c
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Test that poll handlers are not re-entrant in nested aio_poll()
+ *
+ * Copyright Red Hat
+ *
+ * Poll handlers are usually level-triggered. That means they continue firing
+ * until the condition is reset (e.g. a virtqueue becomes empty). If a poll
+ * handler calls nested aio_poll() before the condition is reset, then infinite
+ * recursion occurs.
+ *
+ * aio_poll() is supposed to prevent this by disabling poll handlers in nested
+ * aio_poll() calls. This test case checks that this is indeed what happens.
+ */
+#include "qemu/osdep.h"
+#include "block/aio.h"
+#include "qapi/error.h"
+
+typedef struct {
+ AioContext *ctx;
+
+ /* This is the EventNotifier that drives the test */
+ EventNotifier poll_notifier;
+
+ /* This EventNotifier is only used to wake aio_poll() */
+ EventNotifier dummy_notifier;
+
+ bool nested;
+} TestData;
+
+static void io_read(EventNotifier *notifier)
+{
+ fprintf(stderr, "%s %p\n", __func__, notifier);
+ event_notifier_test_and_clear(notifier);
+}
+
+static bool io_poll_true(void *opaque)
+{
+ fprintf(stderr, "%s %p\n", __func__, opaque);
+ return true;
+}
+
+static bool io_poll_false(void *opaque)
+{
+ fprintf(stderr, "%s %p\n", __func__, opaque);
+ return false;
+}
+
+static void io_poll_ready(EventNotifier *notifier)
+{
+ TestData *td = container_of(notifier, TestData, poll_notifier);
+
+ fprintf(stderr, "> %s\n", __func__);
+
+ g_assert(!td->nested);
+ td->nested = true;
+
+ /* Wake the following nested aio_poll() call */
+ event_notifier_set(&td->dummy_notifier);
+
+ /* This nested event loop must not call io_poll()/io_poll_ready() */
+ g_assert(aio_poll(td->ctx, true));
+
+ td->nested = false;
+
+ fprintf(stderr, "< %s\n", __func__);
+}
+
+/* dummy_notifier never triggers */
+static void io_poll_never_ready(EventNotifier *notifier)
+{
+ g_assert_not_reached();
+}
+
+static void test(void)
+{
+ TestData td = {
+ .ctx = aio_context_new(&error_abort),
+ };
+
+ qemu_set_current_aio_context(td.ctx);
+
+ /* Enable polling */
+ aio_context_set_poll_params(td.ctx, 1000000, 2, 2, &error_abort);
+
+ /*
+ * The GSource is unused but this has the side-effect of changing the fdmon
+ * that AioContext uses.
+ */
+ aio_get_g_source(td.ctx);
+
+ /* Make the event notifier active (set) right away */
+ event_notifier_init(&td.poll_notifier, 1);
+ aio_set_event_notifier(td.ctx, &td.poll_notifier, false,
+ io_read, io_poll_true, io_poll_ready);
+
+ /* This event notifier will be used later */
+ event_notifier_init(&td.dummy_notifier, 0);
+ aio_set_event_notifier(td.ctx, &td.dummy_notifier, false,
+ io_read, io_poll_false, io_poll_never_ready);
+
+ /* Consume aio_notify() */
+ g_assert(!aio_poll(td.ctx, false));
+
+ /*
+ * Run the io_read() handler. This has the side-effect of activating
+ * polling in future aio_poll() calls.
+ */
+ g_assert(aio_poll(td.ctx, true));
+
+ /* The second time around the io_poll()/io_poll_ready() handler runs */
+ g_assert(aio_poll(td.ctx, true));
+
+ /* Run io_poll()/io_poll_ready() one more time to show it keeps working */
+ g_assert(aio_poll(td.ctx, true));
+
+ aio_set_event_notifier(td.ctx, &td.dummy_notifier, false,
+ NULL, NULL, NULL);
+ aio_set_event_notifier(td.ctx, &td.poll_notifier, false, NULL, NULL, NULL);
+ event_notifier_cleanup(&td.dummy_notifier);
+ event_notifier_cleanup(&td.poll_notifier);
+ aio_context_unref(td.ctx);
+}
+
+int main(int argc, char **argv)
+{
+ g_test_init(&argc, &argv, NULL);
+ g_test_add_func("/nested-aio-poll", test);
+ return g_test_run();
+}
diff --git a/util/aio-posix.c b/util/aio-posix.c
index a8be940f76..34bc2a64d8 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -353,8 +353,19 @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
poll_ready && revents == 0 &&
aio_node_check(ctx, node->is_external) &&
node->io_poll_ready) {
+ /*
+ * Remove temporarily to avoid infinite loops when ->io_poll_ready()
+ * calls aio_poll() before clearing the condition that made the poll
+ * handler become ready.
+ */
+ QLIST_SAFE_REMOVE(node, node_poll);
+
node->io_poll_ready(node->opaque);
+ if (!QLIST_IS_INSERTED(node, node_poll)) {
+ QLIST_INSERT_HEAD(&ctx->poll_aio_handlers, node, node_poll);
+ }
+
/*
* Return early since revents was zero. aio_notify() does not count as
* progress.